diff --git a/.gitignore b/.gitignore index 04093524cf8..99d24a91ec9 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,8 @@ cli/openbb_cli/assets/styles/user/* # Platform openbb_platform/core/openbb/package/* +openbb_platform/providers/oecd/openbb_oecd/assets/* +openbb_platform/providers/imf/openbb_imf/assets/* # Dev Container env obb/* diff --git a/openbb_platform/extensions/economy/integration/test_economy_api.py b/openbb_platform/extensions/economy/integration/test_economy_api.py index 6dd3ffd3a81..84fe9f5c446 100644 --- a/openbb_platform/extensions/economy/integration/test_economy_api.py +++ b/openbb_platform/extensions/economy/integration/test_economy_api.py @@ -288,6 +288,24 @@ def test_economy_gdp_real(params, headers): "provider": "fred", } ), + ( + { + "country": "united_states", + "start_date": None, + "end_date": None, + "provider": "oecd", + "frequency": "quarterly", + } + ), + ( + { + "country": "united_states", + "start_date": None, + "end_date": None, + "provider": "oecd", + "frequency": "quarterly", + } + ), ], ) @pytest.mark.integration @@ -582,6 +600,20 @@ def test_economy_fred_regional(params, headers): "pivot": False, } ), + ( + { + "provider": "oecd", + "country": "united_states", + "symbol": "DF_PRICES_ALL::CPI", + "start_date": "2026-01-01", + "end_date": None, + "frequency": "month", + "transform": None, + "dimension_values": None, + "limit": 1, + "pivot": False, + } + ), ], ) @pytest.mark.integration @@ -609,6 +641,15 @@ def test_economy_indicators(params, headers): "symbol": None, } ), + ( + { + "provider": "oecd", + "topic": None, + "query": "balance+trade", + "dataflows": None, + "keywords": None, + } + ), ], ) @pytest.mark.integration diff --git a/openbb_platform/extensions/economy/integration/test_economy_python.py b/openbb_platform/extensions/economy/integration/test_economy_python.py index 60162806ec1..57094083b25 100644 --- a/openbb_platform/extensions/economy/integration/test_economy_python.py +++ b/openbb_platform/extensions/economy/integration/test_economy_python.py @@ -266,6 +266,15 @@ def test_economy_gdp_real(params, obb): "provider": "fred", } ), + ( + { + "country": "united_states", + "start_date": None, + "end_date": None, + "provider": "oecd", + "frequency": "quarterly", + } + ), ], ) @pytest.mark.integration @@ -549,6 +558,15 @@ def test_economy_country_profile(params, obb): "symbol": None, } ), + ( + { + "provider": "oecd", + "topic": None, + "query": "balance+trade", + "dataflows": None, + "keywords": None, + } + ), ], ) @pytest.mark.integration @@ -603,6 +621,20 @@ def test_economy_available_indicators(params, obb): "pivot": False, } ), + ( + { + "provider": "oecd", + "country": "united_states", + "symbol": "DF_PRICES_ALL::CPI", + "start_date": "2026-01-01", + "end_date": None, + "frequency": "month", + "transform": None, + "dimension_values": None, + "limit": 1, + "pivot": False, + } + ), ], ) @pytest.mark.integration diff --git a/openbb_platform/extensions/economy/openbb_economy/economy_router.py b/openbb_platform/extensions/economy/openbb_economy/economy_router.py index 3d7e1c12a4d..52b0dc16773 100644 --- a/openbb_platform/extensions/economy/openbb_economy/economy_router.py +++ b/openbb_platform/extensions/economy/openbb_economy/economy_router.py @@ -117,6 +117,8 @@ async def risk_premium( APIEx(parameters={"provider": "fred", "country": "brazil"}), APIEx(parameters={"provider": "ecb"}), APIEx(parameters={"report_type": "summary", "provider": "ecb"}), + APIEx(parameters={"provider": "oecd"}), + APIEx(parameters={"provider": "oecd", "country": "japan"}), APIEx( description="The `country` parameter will override the `report_type`.", parameters={"country": "united_states", "provider": "ecb"}, diff --git a/openbb_platform/extensions/tests/utils/integration_tests_testers.py b/openbb_platform/extensions/tests/utils/integration_tests_testers.py index a604f45c939..efea1f82dc7 100644 --- a/openbb_platform/extensions/tests/utils/integration_tests_testers.py +++ b/openbb_platform/extensions/tests/utils/integration_tests_testers.py @@ -5,16 +5,33 @@ import os from collections.abc import Callable from typing import ( + Annotated, Any, Literal, + get_args, + get_origin, ) +try: + from fastapi.params import Depends as FastAPIDepends +except ImportError: + FastAPIDepends = type(None) + from openbb_core.app.provider_interface import ProviderInterface from openbb_core.app.router import CommandMap from .integration_tests_generator import find_extensions +def _is_depends_param(param: inspect.Parameter) -> bool: + if isinstance(param.default, FastAPIDepends): + return True + if get_origin(param.annotation) is Annotated: + metadata = get_args(param.annotation)[1:] + return any(isinstance(item, FastAPIDepends) for item in metadata) + return False + + def get_integration_tests( test_type: Literal["api", "python"], filter_charting_ext: bool | None = True ) -> list[Any]: @@ -260,10 +277,12 @@ def check_integration_tests( for function in processing_functions: if route.replace("/", "_")[1:] == function.replace("test_", ""): sig = inspect.signature(cm.map[route]) - param_names = list(sig.parameters.keys()) + ["return"] - processing_command_params = [ - {k: "" for k in param_names} - ] + param_names = [ + name + for name, param in sig.parameters.items() + if not _is_depends_param(param) + ] + ["return"] + processing_command_params = [{k: "" for k in param_names}] if ( not processing_command_params or len(functions[function].pytestmark) < 2 diff --git a/openbb_platform/providers/oecd/README.md b/openbb_platform/providers/oecd/README.md index bd1fb9e1716..ca8ae3f4044 100644 --- a/openbb_platform/providers/oecd/README.md +++ b/openbb_platform/providers/oecd/README.md @@ -1,13 +1,190 @@ -# OpenBB OECD Provider +# OpenBB OECD Provider Extension -This extension integrates the [OECD](https://stats.oecd.org) data provider into the OpenBB Platform. +This package adds the `openbb-oecd` provider extension to the Open Data Platform by OpenBB. + +It provides everything you need — endpoints, tools, and metadata — to access and explore the entirety of +https://data-explorer.oecd.org, without any previous experience working with it. ## Installation -To install the extension: +Install from PyPI with: -```bash +```sh pip install openbb-oecd ``` -Documentation available [here](https://docs.openbb.co/platform/developer_guide/contributing). +Then build the Python static assets by running: + +```sh +openbb-build +``` + +## Quick Start + +The fastest way to get started is by connecting to the OpenBB Workspace as a custom backend. + +### Start Server + +```sh +openbb-api +``` + +This starts the FastAPI server over localhost on port 6900. + +### Add to Workspace + +See the documentation [here](https://docs.openbb.co/python/quickstart/workspace) for more details. + +### Click to Open App + +Once added, click on the app to open the dashboard. + +The dashboard contains widgets with metadata and information, as well as ones for exploring and retrieving the data. + +## Implementation Details + +OECD publishes data through a SDMX v2 REST API, and organizes everything into "dataflows". You can think of +these as databases, each with its own dimension definitions, codelists, and observation attributes. +Some definitions are shared across dataflows, others are domain-specific. + +The extension ships with a bundled metadata cache that covers all OECD dataflows. It contains dimension +definitions, codelist mappings, content constraints, and the full topic taxonomy. When anything is missing +from the bundled cache, it is fetched on first use and added to a user-writable cache file that persists +across sessions. + +Input is validated against the dataflow's content constraints. Codes are resolved to human-readable labels +in the output, and dataset and series metadata are returned alongside the observations. + +### Indicators + +In this library, we use the term "indicator" to refer to indicator-like dimensions within individual dataflows. +These are dimensions that represent what is being measured, such as `MEASURE`, `SUBJECT`, `TRANSACTION`, or +`ACTIVITY`. The specific dimension varies by dataflow. + +The OECD codes for these values — `B1GQ`, `CPI`, `LI`, etc. — are used to construct ticker-like symbols. + +### Symbology + +The Open Data Platform refers to all time series IDs as a `symbol`. +Requesting data requires a symbol constructed from the dataflow's short ID and an identifier, joined with `::`. + +The identifier can be either: + +- An indicator-like code, such as `CPI`, `LI`, or `B1GQ` +- A presentation table identifier, such as `T0101` + +``` +DF_PRICES_ALL::CPI — Consumer Price Index, all items +DF_QNA::B1GQ — GDP, expenditure approach (Quarterly National Accounts) +DF_CLI::LI — Composite Leading Indicator +DF_EO::GDPV_USD — GDP forecast, volume (Economic Outlook) +DF_BOP::B6_USD — Current account balance +DF_QNA::T0101 — Quarterly National Accounts presentation table +``` + +Multiple indicators from the same dataflow can be comma-separated: + +``` +DF_PRICES_ALL::CPI,DF_PRICES_ALL::HICP +``` + +Use `obb.economy.available_indicators(provider="oecd")` to search for or list indicator symbols. + +Use `obb.oecd_utils.list_tables()` and `obb.oecd_utils.get_table_detail()` to discover presentation tables and inspect their dimensions. + +Use `obb.oecd_utils.get_dataflow_parameters()` to see all dimensions and valid codes for any dataflow. + +### Metadata Cache + +The library ships with a bundled base cache (`oecd_cache.msgpack.xz`) containing: + +- All dataflow IDs, names, and version metadata +- DSD dimension definitions and codelist references for every dataflow +- All codelist code-to-label mappings +- Content constraints (valid value sets per dimension) +- The full OECD topic taxonomy (category scheme and categorisations) + +When a structure is missing, it is fetched on demand and merged into a user-level cache stored in the +OpenBB user cache directory as `oecd_cache.msgpack.gz`. If that directory cannot be resolved from OpenBB +settings, the fallback location is `~/.openbb_platform/cache/oecd_cache.msgpack.gz`. + +## Coverage + +All data available from https://data-explorer.oecd.org can be retrieved via `obb.economy.indicators(provider='oecd', **kwargs)`. + +The extension also exposes specialized fetchers for the most commonly used OECD datasets. + +The extension creates a router path, `oecd_utils`, that exposes utility functions for UI integrations +and metadata lookup. + +### Endpoints + +**Economy** + +- `obb.economy.available_indicators` — search all OECD indicator symbols +- `obb.economy.indicators` — fetch data for any OECD indicator symbol +- `obb.economy.balance_of_payments` — Balance of Payments +- `obb.economy.composite_leading_indicator` — Composite Leading Indicators +- `obb.economy.cpi` — Consumer Price Indices +- `obb.economy.country_interest_rates` — Short and long-term interest rates +- `obb.economy.gdp.nominal` — Nominal GDP +- `obb.economy.gdp.real` — Real GDP +- `obb.economy.gdp.forecast` — GDP forecasts (Economic Outlook) +- `obb.economy.house_price_index` — Residential property price indices +- `obb.economy.share_price_index` — Share price indices +- `obb.economy.unemployment` — Unemployment rates + +**Utilities** + +- `obb.oecd_utils.list_topic_choices` — topic dropdown choices for UI widgets +- `obb.oecd_utils.list_subtopic_choices` — subtopic dropdown choices for a selected topic +- `obb.oecd_utils.list_dataflows` — list all OECD dataflows with topic breadcrumbs +- `obb.oecd_utils.list_dataflow_choices` — dropdown choices for UI widgets +- `obb.oecd_utils.list_topics` — browse OECD topics and subtopics with dataflow counts +- `obb.oecd_utils.get_dataflow_parameters` — dimensions and valid codes for a dataflow +- `obb.oecd_utils.list_tables` — search OECD presentation tables +- `obb.oecd_utils.get_table_detail` — inspect a table's dimensions and indicator hierarchy +- `obb.oecd_utils.list_table_choices` — table dropdown choices for UI widgets +- `obb.oecd_utils.presentation_table_choices` — progressive choices for the presentation table widget +- `obb.oecd_utils.presentation_table` — retrieve formatted OECD presentation tables + +"Choices" endpoints are used by OpenBB Workspace to populate widget dropdown menus. + +### Example + +```python +from openbb import obb + +# List all available OECD indicators to find what you need +indicators = obb.economy.available_indicators(provider="oecd", query="GDP") +print(indicators.to_df()) + +# Fetch real GDP growth for the US, UK, and Germany +data = obb.economy.indicators( + provider="oecd", + symbol="DF_QNA::B1GQ", + country="USA,GBR,DEU", + frequency="quarterly", +) +print(data.to_df()) + +# Fetch a presentation table directly +table = obb.economy.indicators( + provider="oecd", + symbol="DF_QNA::T0101", + country="USA", + frequency="Q", + limit=4, +) +print(table.to_df()) + +# Inspect all dimensions and valid values for a dataflow +params = obb.oecd_utils.get_dataflow_parameters( + dataflow_id="DF_PRICES_ALL", output_format="json" +) +print(params.results) + +# Search available presentation tables +tables = obb.oecd_utils.list_tables(query="GDP") +print(tables.results) +``` diff --git a/openbb_platform/providers/oecd/integration/__init__.py b/openbb_platform/providers/oecd/integration/__init__.py new file mode 100644 index 00000000000..1436fcb097f --- /dev/null +++ b/openbb_platform/providers/oecd/integration/__init__.py @@ -0,0 +1 @@ +"""OECD Utilities Router Module Integration Tests.""" diff --git a/openbb_platform/providers/oecd/integration/test_oecd_utils_api.py b/openbb_platform/providers/oecd/integration/test_oecd_utils_api.py new file mode 100644 index 00000000000..a84b97aea24 --- /dev/null +++ b/openbb_platform/providers/oecd/integration/test_oecd_utils_api.py @@ -0,0 +1,346 @@ +"""Test OECD Utils API endpoints.""" + +import base64 + +import pytest +import requests +from openbb_core.env import Env +from openbb_core.provider.utils.helpers import get_querystring + +# pylint: disable=redefined-outer-name + + +@pytest.fixture(scope="session") +def headers(): + """Get the headers for the API request.""" + userpass = f"{Env().API_USERNAME}:{Env().API_PASSWORD}" + userpass_bytes = userpass.encode("ascii") + base64_bytes = base64.b64encode(userpass_bytes) + + return {"Authorization": f"Basic {base64_bytes.decode('ascii')}"} + + +@pytest.mark.parametrize("params", [{}]) +@pytest.mark.integration +def test_oecd_utils_list_topic_choices(params, headers): + """Test oecd_utils_list_topic_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/list_topic_choices?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + }, + { + "topic": "ECO", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_subtopic_choices(params, headers): + """Test oecd_utils_list_subtopic_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/list_subtopic_choices?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + "subtopic": None, + }, + { + "topic": "ECO", + "subtopic": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_dataflows(params, headers): + """Test oecd_utils_list_dataflows endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/list_dataflows?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize("params", [{}]) +@pytest.mark.integration +def test_oecd_utils_list_dataflow_choices(params, headers): + """Test oecd_utils_list_dataflow_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/list_dataflow_choices?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "query": None, + }, + { + "query": "health", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_topics(params, headers): + """Test oecd_utils_list_topics endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/list_topics?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "dataflow_id": "DF_PRICES_ALL", + "output_format": "json", + }, + { + "dataflow_id": "DF_PRICES_ALL", + "output_format": "markdown", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_get_dataflow_parameters(params, headers): + """Test oecd_utils_get_dataflow_parameters endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/get_dataflow_parameters?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "query": None, + "topic": None, + "subtopic": None, + "dataflow_id": None, + }, + { + "query": "GDP", + "topic": None, + "subtopic": None, + "dataflow_id": None, + }, + { + "query": None, + "topic": "HEA", + "subtopic": None, + "dataflow_id": None, + }, + { + "query": None, + "topic": None, + "subtopic": None, + "dataflow_id": "DF_PRICES_ALL", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_tables(params, headers): + """Test oecd_utils_list_tables endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/list_tables?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "table_id": "DF_PRICES_ALL", + }, + { + "table_id": "DF_T725R_Q", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_get_table_detail(params, headers): + """Test oecd_utils_get_table_detail endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/get_table_detail?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + }, + { + "topic": "ECO", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_table_choices(params, headers): + """Test oecd_utils_list_table_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/list_table_choices?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "symbol": "DF_PRICES_ALL::CPI", + "country": None, + "frequency": None, + "transform": None, + "dimension_values": None, + }, + { + "symbol": "DF_PRICES_ALL::CPI", + "country": "true", + "frequency": None, + "transform": None, + "dimension_values": None, + }, + { + "symbol": "DF_PRICES_ALL::CPI", + "country": "USA", + "frequency": "true", + "transform": None, + "dimension_values": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_indicator_choices(params, headers): + """Test oecd_utils_indicator_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/indicator_choices?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + "subtopic": None, + "table": None, + "country": None, + "frequency": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_presentation_table_choices(params, headers): + """Test oecd_utils_presentation_table_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/presentation_table_choices?{query_str}" + result = requests.get(url, headers=headers, timeout=10) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "table": "DF_QNA::T0101", + "country": "USA", + "dimension": "unit_measure", + "frequency": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_presentation_table_dim_choices(params, headers): + """Test oecd_utils_presentation_table_dim_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/presentation_table_dim_choices?{query_str}" + result = requests.get(url, headers=headers, timeout=30) + assert isinstance(result, requests.Response) + assert result.status_code == 200 + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + "subtopic": None, + "table": "DF_PRICES_ALL", + "country": "USA", + "counterpart": None, + "frequency": "M", + "unit_measure": None, + "adjustment": None, + "transformation": None, + "dimension_values": None, + "limit": 2, + "start_date": None, + "end_date": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_presentation_table(params, headers): + """Test oecd_utils_presentation_table endpoint.""" + params = {p: v for p, v in params.items() if v} + + query_str = get_querystring(params, []) + url = f"http://localhost:8000/api/v1/oecd_utils/presentation_table?{query_str}" + result = requests.get(url, headers=headers, timeout=30) + assert isinstance(result, requests.Response) + assert result.status_code == 200 diff --git a/openbb_platform/providers/oecd/integration/test_oecd_utils_python.py b/openbb_platform/providers/oecd/integration/test_oecd_utils_python.py new file mode 100644 index 00000000000..2b1ce8c21ae --- /dev/null +++ b/openbb_platform/providers/oecd/integration/test_oecd_utils_python.py @@ -0,0 +1,308 @@ +"""OECD Utilities module integration tests.""" + +import pytest +from openbb_core.app.model.obbject import OBBject + +# pylint: disable=redefined-outer-name + + +@pytest.fixture(scope="session") +def obb(pytestconfig): # pylint: disable=inconsistent-return-statements + """Fixture to setup obb.""" + + if pytestconfig.getoption("markexpr") != "not integration": + import openbb # pylint: disable=import-outside-toplevel + + return openbb.obb + + +@pytest.mark.parametrize("params", [{}]) +@pytest.mark.integration +def test_oecd_utils_list_topic_choices(params, obb): + """Test oecd_utils_list_topic_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.list_topic_choices(**params) + assert result + assert isinstance(result, list) + assert len(result) > 0 + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + }, + { + "topic": "ECO", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_subtopic_choices(params, obb): + """Test oecd_utils_list_subtopic_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.list_subtopic_choices(**params) + assert isinstance(result, list) + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + "subtopic": None, + }, + { + "topic": "ECO", + "subtopic": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_dataflows(params, obb): + """Test oecd_utils_list_dataflows endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.list_dataflows(**params) + assert result + assert isinstance(result, OBBject) + assert len(result.results) > 0 # type: ignore + + +@pytest.mark.parametrize("params", [{}]) +@pytest.mark.integration +def test_oecd_utils_list_dataflow_choices(params, obb): + """Test oecd_utils_list_dataflow_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.list_dataflow_choices(**params) + assert result + assert isinstance(result, list) + assert len(result) > 0 + + +@pytest.mark.parametrize( + "params", + [ + { + "query": None, + }, + { + "query": "health", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_topics(params, obb): + """Test oecd_utils_list_topics endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.list_topics(**params) + assert result + assert isinstance(result, OBBject) + assert len(result.results) > 0 # type: ignore + + +@pytest.mark.parametrize( + "params", + [ + { + "dataflow_id": "DF_PRICES_ALL", + "output_format": "json", + }, + { + "dataflow_id": "DF_PRICES_ALL", + "output_format": "markdown", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_get_dataflow_parameters(params, obb): + """Test oecd_utils_get_dataflow_parameters endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.get_dataflow_parameters(**params) + assert result + assert isinstance(result, OBBject) + assert result.results + + +@pytest.mark.parametrize( + "params", + [ + { + "query": None, + "topic": None, + "subtopic": None, + "dataflow_id": None, + }, + { + "query": "GDP", + "topic": None, + "subtopic": None, + "dataflow_id": None, + }, + { + "query": None, + "topic": "HEA", + "subtopic": None, + "dataflow_id": None, + }, + { + "query": None, + "topic": None, + "subtopic": None, + "dataflow_id": "DF_PRICES_ALL", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_tables(params, obb): + """Test oecd_utils_list_tables endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.list_tables(**params) + assert result + assert isinstance(result, OBBject) + assert len(result.results) > 0 # type: ignore + + +@pytest.mark.parametrize( + "params", + [ + { + "table_id": "DF_PRICES_ALL", + }, + { + "table_id": "DF_T725R_Q", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_get_table_detail(params, obb): + """Test oecd_utils_get_table_detail endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.get_table_detail(**params) + assert result + assert isinstance(result, OBBject) + assert result.results + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + }, + { + "topic": "ECO", + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_list_table_choices(params, obb): + """Test oecd_utils_list_table_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.list_table_choices(**params) + assert result + assert isinstance(result, list) + assert len(result) > 0 + + +@pytest.mark.parametrize( + "params", + [ + { + "symbol": "DF_PRICES_ALL::CPI", + "country": None, + "frequency": None, + "transform": None, + "dimension_values": None, + }, + { + "symbol": "DF_PRICES_ALL::CPI", + "country": "true", + "frequency": None, + "transform": None, + "dimension_values": None, + }, + { + "symbol": "DF_PRICES_ALL::CPI", + "country": "USA", + "frequency": "true", + "transform": None, + "dimension_values": None, + }, + ], +) +@pytest.mark.integration +@pytest.mark.skip(reason="Not included in Python interface (include_in_schema=False)") +def test_oecd_utils_indicator_choices(params, obb): + """Test oecd_utils_indicator_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.indicator_choices(**params) + assert isinstance(result, list) + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + "subtopic": None, + "table": None, + "country": None, + "frequency": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_presentation_table_choices(params, obb): + """Test oecd_utils_presentation_table_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.presentation_table_choices(**params) + assert isinstance(result, list) + + +@pytest.mark.parametrize( + "params", + [ + { + "table": "DF_QNA::T0101", + "country": "USA", + "dimension": "unit_measure", + "frequency": None, + }, + ], +) +@pytest.mark.integration +@pytest.mark.skip(reason="Not included in Python interface (include_in_schema=False)") +def test_oecd_utils_presentation_table_dim_choices(params, obb): + """Test oecd_utils_presentation_table_dim_choices endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.presentation_table_dim_choices(**params) + assert isinstance(result, list) + + +@pytest.mark.parametrize( + "params", + [ + { + "topic": None, + "subtopic": None, + "table": "DF_PRICES_ALL", + "country": "USA", + "counterpart": None, + "frequency": "M", + "unit_measure": None, + "adjustment": None, + "transformation": None, + "dimension_values": None, + "limit": 2, + "start_date": None, + "end_date": None, + }, + ], +) +@pytest.mark.integration +def test_oecd_utils_presentation_table(params, obb): + """Test oecd_utils_presentation_table endpoint.""" + params = {p: v for p, v in params.items() if v} + result = obb.oecd_utils.presentation_table(**params) + assert result diff --git a/openbb_platform/providers/oecd/openbb_oecd/__init__.py b/openbb_platform/providers/oecd/openbb_oecd/__init__.py index c9c1dd45a57..a4688854127 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/__init__.py +++ b/openbb_platform/providers/oecd/openbb_oecd/__init__.py @@ -1,11 +1,14 @@ """OECD provider module.""" from openbb_core.provider.abstract.provider import Provider +from openbb_oecd.models.available_indicators import OecdAvailableIndicatorsFetcher +from openbb_oecd.models.balance_of_payments import OECDBalanceOfPaymentsFetcher from openbb_oecd.models.composite_leading_indicator import ( OECDCompositeLeadingIndicatorFetcher, ) from openbb_oecd.models.consumer_price_index import OECDCPIFetcher from openbb_oecd.models.country_interest_rates import OecdCountryInterestRatesFetcher +from openbb_oecd.models.economic_indicators import OecdEconomicIndicatorsFetcher from openbb_oecd.models.gdp_forecast import OECDGdpForecastFetcher from openbb_oecd.models.gdp_nominal import OECDGdpNominalFetcher from openbb_oecd.models.gdp_real import OECDGdpRealFetcher @@ -16,9 +19,15 @@ oecd_provider = Provider( name="oecd", website="https://data-explorer.oecd.org/", - description="""OECD Data Explorer includes data and metadata for OECD countries and selected -non-member economies.""", + description="""Access OECD data via the SDMX REST API. +Covers all OECD dataflows including GDP, CPI, unemployment, +interest rates, and hundreds more.""", fetcher_dict={ + # Generic fetchers + "AvailableIndicators": OecdAvailableIndicatorsFetcher, + "EconomicIndicators": OecdEconomicIndicatorsFetcher, + # Specialized fetchers + "BalanceOfPayments": OECDBalanceOfPaymentsFetcher, "CompositeLeadingIndicator": OECDCompositeLeadingIndicatorFetcher, "ConsumerPriceIndex": OECDCPIFetcher, "CountryInterestRates": OecdCountryInterestRatesFetcher, diff --git a/openbb_platform/providers/oecd/openbb_oecd/apps.json b/openbb_platform/providers/oecd/openbb_oecd/apps.json new file mode 100644 index 00000000000..225e320481f --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/apps.json @@ -0,0 +1,1024 @@ +[ + { + "name": "OECD Data Explorer", + "img": "https://moneyandbanking.co.th/wp-content/uploads/2024/06/oecd.webp", + "img_dark": "https://moneyandbanking.co.th/wp-content/uploads/2024/06/oecd.webp", + "img_light": "https://moneyandbanking.co.th/wp-content/uploads/2024/06/oecd.webp", + "description": "Unofficial OECD Data Explorer (https://data-explorer.oecd.org/). Explore topics, indicators, time series, survey data, and presentation tables.", + "allowCustomization": true, + "tabs": { + "metadata": { + "id": "metadata", + "name": "Metadata", + "layout": [ + { + "i": "oecd_utils_list_tables_custom_obb", + "x": 0, + "y": 13, + "w": 22, + "h": 24, + "state": { + "params": { + "query": "CPI" + }, + "chartView": { + "enabled": false, + "chartType": "line" + }, + "columnState": { + "default": { + "columnOrder": { + "orderedColIds": [ + "table_id", + "name", + "topic", + "subtopic", + "dataflow_id", + "topic_id", + "subtopic_id" + ] + }, + "rowSelection": [ + "0" + ] + } + } + }, + "groups": [] + }, + { + "i": "oecd_utils_get_table_detail_custom_obb", + "x": 22, + "y": 13, + "w": 18, + "h": 24, + "state": { + "params": { + "table_id": "DF_CPI_CTRY_WEIGHTS" + } + }, + "groups": [] + }, + { + "i": "oecd_utils_list_topics_custom_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 11, + "state": { + "chartView": { + "enabled": false, + "chartType": "line" + }, + "columnState": { + "default": { + "columnOrder": { + "orderedColIds": [ + "topic_id", + "topic", + "subtopic_id", + "subtopic", + "dataflows" + ] + } + } + } + }, + "groups": [] + } + ] + }, + "gdp": { + "id": "gdp", + "name": "GDP", + "layout": [ + { + "i": "economy_gdp_nominal_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 15, + "state": { + "params": { + "start_date": "2000-01-01", + "country": [ + "canada" + ], + "units": "capita" + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value" + ] + } + } + } + }, + "groups": [ + "Group 1", + "Group 2", + "Group 3" + ] + }, + { + "i": "economy_gdp_real_oecd_obb", + "x": 0, + "y": 17, + "w": 40, + "h": 15, + "state": { + "params": { + "start_date": "2000-01-01", + "country": [ + "canada" + ] + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "pivot": { + "pivotMode": false, + "pivotColIds": [ + "country" + ] + }, + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value" + ] + } + }, + "default_pivot": { + "rowGroup": { + "groupColIds": [ + "date", + "country" + ] + }, + "aggregation": { + "aggregationModel": [ + { + "colId": "value", + "aggFunc": "sum" + } + ] + }, + "pivot": { + "pivotMode": true, + "pivotColIds": [ + "country" + ] + }, + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value", + "ag-Grid-AutoColumn", + "pivot_country_United States_value" + ] + } + } + } + }, + "groups": [ + "Group 1", + "Group 2", + "Group 3" + ] + }, + { + "i": "economy_gdp_forecast_oecd_obb", + "x": 0, + "y": 32, + "w": 40, + "h": 15, + "state": { + "params": { + "start_date": "2000-01-01", + "country": [ + "canada" + ], + "frequency": "quarter", + "units": "growth" + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value" + ] + } + } + } + }, + "groups": [ + "Group 1", + "Group 2", + "Group 3" + ] + } + ] + }, + "cpi": { + "id": "cpi", + "name": "CPI", + "layout": [ + { + "i": "economy_cpi_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 25, + "state": { + "params": { + "country": [ + "canada" + ], + "start_date": "2000-01-01", + "expenditure": [ + "all_non_food_non_energy" + ] + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnVisibility": { + "hiddenColIds": [ + "unit_multiplier", + "country_code", + "series_id", + "title", + "order" + ] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value", + "unit", + "unit_multiplier", + "country_code", + "series_id", + "expenditure", + "title", + "order" + ] + } + } + } + }, + "groups": [ + "Group 2", + "Group 3" + ] + } + ] + }, + "unemployment": { + "id": "unemployment", + "name": "Unemployment", + "layout": [ + { + "i": "economy_unemployment_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 25, + "state": { + "params": { + "country": [ + "canada" + ], + "start_date": "2000-01-01", + "seasonal_adjustment": "true" + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value" + ] + } + } + } + }, + "groups": [ + "Group 2", + "Group 3" + ] + } + ] + }, + "house-price-index": { + "id": "house-price-index", + "name": "House Price Index", + "layout": [ + { + "i": "economy_house_price_index_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 26, + "state": { + "params": { + "country": [ + "canada" + ] + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value" + ] + } + } + } + }, + "groups": [ + "Group 2" + ] + } + ] + }, + "composite-leading-indicator": { + "id": "composite-leading-indicator", + "name": "Composite Leading Indicator", + "layout": [ + { + "i": "economy_composite_leading_indicator_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 26, + "state": { + "params": { + "start_date": "2000-01-01", + "country": [ + "canada" + ] + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "value", + "country" + ] + } + } + } + }, + "groups": [ + "Group 2", + "Group 3" + ] + } + ] + }, + "share-price-index": { + "id": "share-price-index", + "name": "Share Price Index", + "layout": [ + { + "i": "economy_share_price_index_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 26, + "state": { + "params": { + "country": [ + "canada" + ], + "start_date": "2000-01-01" + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "country", + "value" + ] + } + } + } + }, + "groups": [ + "Group 2", + "Group 3" + ] + } + ] + }, + "interest-rates": { + "id": "interest-rates", + "name": "Interest Rates", + "layout": [ + { + "i": "economy_interest_rates_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 27, + "state": { + "params": { + "country": [ + "canada" + ], + "start_date": "2000-01-01", + "duration": "long" + }, + "chartModel": { + "modelType": "range", + "chartType": "line", + "chartOptions": {}, + "cellRange": { + "columns": [ + "date", + "value" + ] + }, + "suppressChartRanges": true + }, + "chartView": { + "enabled": true, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "date" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "date", + "value", + "country" + ] + } + } + } + }, + "groups": [ + "Group 2", + "Group 3" + ] + } + ] + }, + "balance-of-payments": { + "id": "balance-of-payments", + "name": "Balance of Payments", + "layout": [ + { + "i": "economy_balance_of_payments_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 27, + "state": { + "params": { + "frequency": "quarterly", + "country": [ + "canada" + ], + "start_date": "2000-01-01" + }, + "chartView": { + "enabled": false, + "chartType": "line" + }, + "columnState": { + "default": { + "columnOrder": { + "orderedColIds": [ + "country", + "period", + "balance_percent_of_gdp", + "balance_total", + "balance_total_services", + "balance_total_secondary_income", + "balance_total_goods", + "balance_total_primary_income", + "credits_services_percent_of_goods_and_services", + "credits_services_percent_of_current_account", + "credits_total_services", + "credits_total_goods", + "credits_total_primary_income", + "credits_total_secondary_income", + "credits_total", + "debits_services_percent_of_goods_and_services", + "debits_services_percent_of_current_account", + "debits_total_services", + "debits_total_goods", + "debits_total_primary_income", + "debits_total", + "debits_total_secondary_income" + ] + }, + "columnPinning": { + "leftColIds": [ + "period" + ], + "rightColIds": [] + }, + "columnVisibility": { + "hiddenColIds": [ + "country" + ] + } + } + } + }, + "groups": [ + "Group 2", + "Group 3" + ] + } + ] + }, + "indicators": { + "id": "indicators", + "name": "Indicators", + "layout": [ + { + "i": "economy_available_indicators_oecd_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 18, + "state": { + "params": { + "dataflows": "DF_KEI" + }, + "chartView": { + "enabled": false, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "symbol" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "symbol_root", + "symbol", + "description", + "label", + "dataflow_id", + "dataflow_name", + "dimension_id", + "frequencies", + "transformations", + "member_of" + ] + }, + "rowSelection": [ + "0" + ], + "columnVisibility": { + "hiddenColIds": [ + "symbol_root" + ] + } + } + } + }, + "groups": [] + }, + { + "i": "economy_indicators_oecd_obb", + "x": 0, + "y": 20, + "w": 40, + "h": 15, + "state": { + "params": { + "symbol": "DF_KEI::ULC", + "country": [], + "start_date": "2000-01-01", + "transform": "_Z", + "pivot": "true" + }, + "chartView": { + "enabled": false, + "chartType": "line" + }, + "columnState": { + "default": { + "columnPinning": { + "leftColIds": [ + "country" + ], + "rightColIds": [] + }, + "columnOrder": { + "orderedColIds": [ + "country", + "2000-01-01", + "2000-04-01", + "2000-07-01", + "2000-10-01", + "2001-01-01", + "2001-04-01", + "2001-07-01", + "2001-10-01", + "2002-01-01", + "2002-04-01", + "2002-07-01", + "2002-10-01", + "2003-01-01", + "2003-04-01", + "2003-07-01", + "2003-10-01", + "2004-01-01", + "2004-04-01", + "2004-07-01", + "2004-10-01", + "2005-01-01", + "2005-04-01", + "2005-07-01", + "2005-10-01", + "2006-01-01", + "2006-04-01", + "2006-07-01", + "2006-10-01", + "2007-01-01", + "2007-04-01", + "2007-07-01", + "2007-10-01", + "2008-01-01", + "2008-04-01", + "2008-07-01", + "2008-10-01", + "2009-01-01", + "2009-04-01", + "2009-07-01", + "2009-10-01", + "2010-01-01", + "2010-04-01", + "2010-07-01", + "2010-10-01", + "2011-01-01", + "2011-04-01", + "2011-07-01", + "2011-10-01", + "2012-01-01", + "2012-04-01", + "2012-07-01", + "2012-10-01", + "2013-01-01", + "2013-04-01", + "2013-07-01", + "2013-10-01", + "2014-01-01", + "2014-04-01", + "2014-07-01", + "2014-10-01", + "2015-01-01", + "2015-04-01", + "2015-07-01", + "2015-10-01", + "2016-01-01", + "2016-04-01", + "2016-07-01", + "2016-10-01", + "2017-01-01", + "2017-04-01", + "2017-07-01", + "2017-10-01", + "2018-01-01", + "2018-04-01", + "2018-07-01", + "2018-10-01", + "2019-01-01", + "2019-04-01", + "2019-07-01", + "2019-10-01", + "2020-01-01", + "2020-04-01", + "2020-07-01", + "2020-10-01", + "2021-01-01", + "2021-04-01", + "2021-07-01", + "2021-10-01", + "2022-01-01", + "2022-04-01", + "2022-07-01", + "2022-10-01", + "2023-01-01", + "2023-04-01", + "2023-07-01", + "2023-10-01", + "2024-01-01", + "2024-04-01", + "2024-07-01", + "2024-10-01", + "2025-01-01", + "2025-04-01", + "2025-07-01", + "2025-10-01" + ] + } + } + } + }, + "groups": [ + "Group 3" + ] + } + ] + }, + "presentation-tables": { + "id": "presentation-tables", + "name": "Presentation Tables", + "layout": [ + { + "i": "oecd_utils_presentation_table_custom_obb", + "x": 0, + "y": 2, + "w": 40, + "h": 26, + "state": { + "params": { + "topic": "ECO", + "subtopic": "ECO_NAD", + "table": "DF_QNA_INCOME::T0107", + "country": [ + "AUS" + ], + "frequency": "Q", + "end_date": "2020-01-01" + }, + "chartView": { + "enabled": false, + "chartType": "line" + }, + "columnState": { + "default": { + "columnOrder": { + "orderedColIds": [ + "title", + "2025-10-01", + "2025-07-01", + "2025-04-01", + "2025-01-01", + "2024-10-01", + "2024-07-01", + "2024-04-01", + "2024-01-01", + "2023-10-01", + "2023-07-01", + "2023-04-01", + "2023-01-01", + "2022-10-01", + "2022-07-01", + "2022-04-01", + "2022-01-01", + "2021-10-01", + "2021-07-01", + "2021-04-01", + "2021-01-01", + "2020-10-01", + "2020-07-01", + "2020-04-01", + "2020-01-01" + ] + } + } + } + }, + "groups": [] + } + ] + } + }, + "groups": [ + { + "name": "Group 1", + "type": "param", + "paramName": "frequency", + "defaultValue": "quarter" + }, + { + "name": "Group 2", + "type": "param", + "paramName": "country", + "defaultValue": [ + "canada" + ] + }, + { + "name": "Group 3", + "type": "param", + "paramName": "start_date", + "defaultValue": "2000-01-01" + } + ] + } +] \ No newline at end of file diff --git a/openbb_platform/providers/oecd/openbb_oecd/assets/oecd_cache.json.xz b/openbb_platform/providers/oecd/openbb_oecd/assets/oecd_cache.json.xz new file mode 100644 index 00000000000..b49195dc9a1 Binary files /dev/null and b/openbb_platform/providers/oecd/openbb_oecd/assets/oecd_cache.json.xz differ diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/available_indicators.py b/openbb_platform/providers/oecd/openbb_oecd/models/available_indicators.py new file mode 100644 index 00000000000..7d9be8aacec --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/models/available_indicators.py @@ -0,0 +1,313 @@ +"""OECD Available Indicators Model.""" + +# pylint: disable=unused-argument + +from typing import Any + +from openbb_core.app.service.system_service import SystemService +from openbb_core.provider.abstract.fetcher import Fetcher +from openbb_core.provider.standard_models.available_indicators import ( + AvailableIndicatorsData, + AvailableIndicesQueryParams, +) +from pydantic import ConfigDict, Field + +api_prefix = SystemService().system_settings.api_settings.prefix + + +def _build_also_in( + indicator: str, + df_id: str, + code_to_dataflows: dict[str, list[str]], + df_name_cache: dict[str, str], + metadata, +) -> list[str]: + """Build 'also_in' cross-reference list from pre-built reverse index.""" + if not indicator or indicator not in code_to_dataflows: + return [] + + # Deduplicate while preserving order. + seen: set[str] = set() + unique: list[str] = [] + for d in code_to_dataflows[indicator]: + if d != df_id and d not in seen: + seen.add(d) + unique.append(d) + + labeled: list[str] = [] + for other_id in unique[:10]: + name = df_name_cache.get(other_id, "") + if not name: + try: + full_id = metadata._resolve_dataflow_id( # noqa: SLF001 # pylint: disable=W0212 + other_id + ) + name = df_name_cache.get(full_id, "") + except Exception: # noqa: BLE001, S110 + pass + labeled.append(f"{other_id} ({name})" if name else other_id) + if len(unique) > 10: + labeled.append(f"... and {len(unique) - 10} more") + return labeled + + +class OecdAvailableIndicatorsQueryParams(AvailableIndicesQueryParams): + """OECD Available Indicators Query. + + Search the OECD SDMX catalogue for indicators across all dataflows. + Supports AND/OR/quoted-phrase queries and dataflow filtering. + """ + + __json_schema_extra__ = { + "topic": { + "x-widget_config": { + "label": "Topic", + "description": "Filter by topic. Leave blank for all topics.", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_topic_choices", + "style": {"popupWidth": 600}, + "optional": True, + }, + }, + "dataflow": { + "multiple_items_allowed": True, + "x-widget_config": { + "label": "Dataflow", + "description": "Filter to specific dataflow(s). Leave blank for all.", + "type": "endpoint", + "multiSelect": True, + "multiple": False, + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_dataflow_choices", + "style": {"popupWidth": 700}, + "optional": True, + }, + }, + } + topic: str | None = Field( + default=None, + description="Filter to a topic ID (e.g. 'ECO', 'HEA'). Use list_topics() to see available topics.", + ) + dataflows: str | list[str] | None = Field( + default=None, + description="Filter by dataflow ID(s). Comma-separated or list. e.g. 'DF_KEI' or 'DF_KEI,DF_QNA'.", + ) + query: str | None = Field( + default=None, + description=( + "Search string. Use quotes for exact phrases, + for AND, | for OR. " + "e.g. 'GDP growth', 'CPI | inflation', 'balance +trade'." + ), + ) + keywords: str | list[str] | None = Field( + default=None, + description="Single-word keyword filter(s). Prefix with 'not' to exclude.", + ) + + +class OecdAvailableIndicatorsData(AvailableIndicatorsData): + """OECD Available Indicators Data.""" + + model_config = ConfigDict(extra="ignore") + + __alias_dict__ = { + "symbol": "series_id", + "symbol_root": "indicator", + } + + label: str | None = Field( + default=None, description="Short human-readable name of the indicator." + ) + dataflow_id: str = Field(description="Dataflow identifier.") + dataflow_name: str | None = Field( + default=None, description="Human-readable dataflow name." + ) + dimension_id: str | None = Field( + default=None, description="Dimension containing the indicator." + ) + parent: str | None = Field( + default=None, description="Parent indicator code in the codelist hierarchy." + ) + frequencies: list[str] = Field( + default_factory=list, + description="Available observation frequencies (e.g. Annual, Quarterly, Monthly).", + ) + transformations: list[str] = Field( + default_factory=list, + description="Available data transformations (e.g. Growth rate, Index).", + ) + member_of: list[str] = Field( + default_factory=list, + description="Other OECD dataflows that also contain this indicator code.", + ) + + +class OecdAvailableIndicatorsFetcher( + Fetcher[OecdAvailableIndicatorsQueryParams, list[OecdAvailableIndicatorsData]] +): + """OECD Available Indicators Fetcher.""" + + @staticmethod + def transform_query( + params: dict[str, Any], + ) -> OecdAvailableIndicatorsQueryParams: + """Transform the query.""" + return OecdAvailableIndicatorsQueryParams(**params) + + @staticmethod + def _build_enrichment_indexes(metadata): # noqa: PLR0912 + """Build reverse indexes from the in-memory cache for fast enrichment. + + Returns code_to_dataflows, indicator_to_tables, constrained_cache, + df_name_cache — all derived from already-loaded data with NO API calls. + """ + # Reverse index: indicator_code → list of short dataflow IDs. + code_to_dataflows: dict[str, list[str]] = {} + for ( + _full_id, + inds, + ) in ( + metadata._dataflow_indicators_cache.items() # noqa: SLF001 # pylint: disable=W0212 + ): + for ind in inds: + code = ind.get("indicator", "") + if code: + code_to_dataflows.setdefault(code, []).append( + ind.get("dataflow_id", _full_id) + ) + + # Dataflow name lookup. + df_name_cache: dict[str, str] = {} + for fid, meta_entry in metadata.dataflows.items(): + short = meta_entry.get("short_id", "") + name = meta_entry.get("name", "") + if short: + df_name_cache[short] = name + df_name_cache[fid] = name + + return code_to_dataflows, df_name_cache + + @staticmethod + def _enrich_results( # noqa: PLR0912 + results: list[dict], + metadata, + code_to_dataflows: dict[str, list[str]], + df_name_cache: dict[str, str], + ) -> list[dict]: + """Enrich search results with symbol, membership, and cross-refs. + + Uses only pre-built indexes and already-cached structures. + """ + # Build freq/transformation labels from cached constraints + codelists. + target_dfs = {r.get("dataflow_id", "") for r in results} - {""} + freq_labels: dict[str, list[str]] = {} # df_id → ["A (Annual)", ...] + transform_labels: dict[str, list[str]] = {} # df_id → ["G1 (Growth ...)", ...] + + for df_id in target_dfs: + full_id = ( + metadata._short_id_map.get( # noqa: SLF001 # pylint: disable=W0212 + df_id + ) + ) + if not full_id: + full_id = df_id if df_id in metadata.datastructures else None + if not full_id or full_id not in metadata.datastructures: + continue + constraints = metadata._dataflow_constraints.get( # noqa: SLF001 # pylint: disable=W0212 + full_id, {} + ) + dsd = metadata.datastructures.get(full_id, {}) + dim_cl: dict[str, str] = { + d["id"]: d.get("codelist_id", "") for d in dsd.get("dimensions", []) + } + for dim_key, target_dict in [ + ("FREQ", freq_labels), + ("TRANSFORMATION", transform_labels), + ]: + codes = constraints.get(dim_key, []) + if not codes: + continue + cl_id = dim_cl.get(dim_key, "") + cl = metadata.codelists.get(cl_id, {}) if cl_id else {} # noqa: SLF001 + target_dict[df_id] = [f"{c} ({cl.get(c, c)})" for c in codes] + + for row in results: + df_id = row.get("dataflow_id", "") + indicator = row.get("indicator", "") + + # Clean symbol: DATAFLOW::INDICATOR_CODE. + row["series_id"] = ( + f"{df_id}::{indicator}" if df_id and indicator else (indicator or df_id) + ) + row["symbol"] = row["series_id"] + + # Cross-reference: other dataflows containing this indicator. + row["member_of"] = _build_also_in( + indicator, df_id, code_to_dataflows, df_name_cache, metadata + ) + + row["frequencies"] = freq_labels.get(df_id, []) + row["transformations"] = transform_labels.get(df_id, []) + + return results + + @staticmethod + def extract_data( + query: OecdAvailableIndicatorsQueryParams, + credentials: dict[str, str] | None, + **kwargs: Any, + ) -> list[dict]: + """Search OECD indicators using the metadata catalogue.""" + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata + + metadata = OecdMetadata() + dataflows = query.dataflows + + if isinstance(dataflows, str): + dataflows = [d.strip() for d in dataflows.split(",") if d.strip()] + + # If a topic filter is given, expand it to the dataflows in that topic + # and intersect with any explicit dataflow filter. + if query.topic: + topic_dfs = [e["value"] for e in metadata.list_dataflows(topic=query.topic)] + if dataflows: + intersected = [ + d + for d in topic_dfs + if any(d.endswith(f) or d == f for f in dataflows) + ] + dataflows = intersected if intersected else dataflows + else: + dataflows = topic_dfs + + keywords = query.keywords + + if isinstance(keywords, str): + keywords = [k.strip() for k in keywords.split(",") if k.strip()] + + results = metadata.search_indicators( + query=query.query or "", + dataflows=dataflows, + keywords=keywords, + ) + + if not results: + return results + + # Build reverse indexes ONCE from already-loaded cache (no API calls). + code_to_dataflows, df_name_cache = ( + OecdAvailableIndicatorsFetcher._build_enrichment_indexes(metadata) + ) + + return OecdAvailableIndicatorsFetcher._enrich_results( + results, metadata, code_to_dataflows, df_name_cache + ) + + @staticmethod + def transform_data( + query: OecdAvailableIndicatorsQueryParams, + data: list[dict], + **kwargs: Any, + ) -> list[OecdAvailableIndicatorsData]: + """Transform the data.""" + return [OecdAvailableIndicatorsData.model_validate(d) for d in data] diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/balance_of_payments.py b/openbb_platform/providers/oecd/openbb_oecd/models/balance_of_payments.py new file mode 100644 index 00000000000..cea6333a917 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/models/balance_of_payments.py @@ -0,0 +1,300 @@ +"""OECD Balance of Payments (BOP6) Model.""" + +# pylint: disable=unused-argument + +from datetime import date as dateType +from typing import Any, Literal + +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_core.provider.abstract.fetcher import Fetcher +from openbb_core.provider.standard_models.balance_of_payments import ( + BalanceOfPaymentsQueryParams, + BP6BopUsdData, +) +from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS +from openbb_core.provider.utils.errors import EmptyDataError +from openbb_oecd.utils.constants import BOP_COUNTRIES +from pydantic import Field, field_validator + +# Map (MEASURE, ACCOUNTING_ENTRY, UNIT_MEASURE) → BP6BopUsdData field name. +_COL_MAP: dict[tuple[str, str, str], str] = { + ("CA", "B", "PT_B1GQ"): "balance_percent_of_gdp", + ("CA", "B", "USD_EXC"): "balance_total", + ("S", "B", "USD_EXC"): "balance_total_services", + ("IN2", "B", "USD_EXC"): "balance_total_secondary_income", + ("G", "B", "USD_EXC"): "balance_total_goods", + ("IN1", "B", "USD_EXC"): "balance_total_primary_income", + ("S", "C", "PT_GS"): "credits_services_percent_of_goods_and_services", + ("S", "C", "PT_CA"): "credits_services_percent_of_current_account", + ("S", "C", "USD_EXC"): "credits_total_services", + ("G", "C", "USD_EXC"): "credits_total_goods", + ("IN1", "C", "USD_EXC"): "credits_total_primary_income", + ("IN2", "C", "USD_EXC"): "credits_total_secondary_income", + ("CA", "C", "USD_EXC"): "credits_total", + ("S", "D", "PT_GS"): "debits_services_percent_of_goods_and_services", + ("S", "D", "PT_CA"): "debits_services_percent_of_current_account", + ("S", "D", "USD_EXC"): "debits_total_services", + ("G", "D", "USD_EXC"): "debits_total_goods", + ("IN1", "D", "USD_EXC"): "debits_total_primary_income", + ("CA", "D", "USD_EXC"): "debits_total", + ("IN2", "D", "USD_EXC"): "debits_total_secondary_income", +} + +_FREQ_MAP = {"annual": "A", "quarterly": "Q"} +_Q_MAP = { + 1: "Q1", + 2: "Q1", + 3: "Q1", + 4: "Q2", + 5: "Q2", + 6: "Q2", + 7: "Q3", + 8: "Q3", + 9: "Q3", + 10: "Q4", + 11: "Q4", + 12: "Q4", +} + + +def _format_start_period(d: dateType, freq: str) -> str: + """Format a date as an SDMX startPeriod for the given frequency.""" + if freq == "A": + return str(d.year) + if freq == "Q": + return f"{d.year}-{_Q_MAP[d.month]}" + return f"{d.year}-{d.month:02d}" + + +def _format_end_period(d: dateType, freq: str) -> str: + """Format a date as an SDMX endPeriod for the given frequency.""" + if freq == "A": + return str(d.year) + if freq == "Q": + return f"{d.year}-{_Q_MAP[d.month]}" + return f"{d.year}-{d.month:02d}" + + +class OECDBalanceOfPaymentsQueryParams(BalanceOfPaymentsQueryParams): + """OECD Balance of Payments Query. + + Source: https://data-explorer.oecd.org/?lc=en + """ + + __json_schema_extra__ = { + "country": { + "multiple_items_allowed": True, + "choices": list(BOP_COUNTRIES) + ["all"], + }, + } + + country: str = Field( + default="united_states", + description=QUERY_DESCRIPTIONS.get("country", ""), + ) + frequency: Literal["annual", "quarterly"] = Field( + default="quarterly", + description="Frequency of the data.", + ) + start_date: dateType | None = Field( + default=None, + description=QUERY_DESCRIPTIONS.get("start_date", ""), + ) + end_date: dateType | None = Field( + default=None, + description=QUERY_DESCRIPTIONS.get("end_date", ""), + ) + + @field_validator("country", mode="before", check_fields=False) + @classmethod + def validate_country(cls, c: str): + """Validate country.""" + return c.replace(" ", "_").strip().lower() + + +class OECDBalanceOfPaymentsData(BP6BopUsdData): + """OECD Balance of Payments Data.""" + + __alias_dict__ = {"period": "date"} + + +class OECDBalanceOfPaymentsFetcher( + Fetcher[OECDBalanceOfPaymentsQueryParams, list[OECDBalanceOfPaymentsData]] +): + """OECD Balance of Payments Fetcher.""" + + @staticmethod + def transform_query( + params: dict[str, Any], + ) -> OECDBalanceOfPaymentsQueryParams: + """Transform the query.""" + return OECDBalanceOfPaymentsQueryParams(**params) + + @staticmethod + def extract_data( + query: OECDBalanceOfPaymentsQueryParams, + credentials: dict[str, str] | None, + **kwargs: Any, + ) -> list[dict]: + """Return raw data from the OECD BOP endpoint.""" + # pylint: disable=import-outside-toplevel + from io import StringIO + + from openbb_core.provider.utils.helpers import make_request + from openbb_oecd.utils.metadata import OecdMetadata + from pandas import read_csv, to_numeric + from pandas.api.types import is_string_dtype + + meta = OecdMetadata() + countries = meta.resolve_country_codes("DF_BOP", query.country) + country_str = "+".join(countries) if countries else "" + freq_code = _FREQ_MAP[query.frequency] + + # Only request the measures/entries/units we actually map. + measures = sorted({k[0] for k in _COL_MAP}) + entries = sorted({k[1] for k in _COL_MAP}) + units = sorted({k[2] for k in _COL_MAP}) + + dim_filter = f"{country_str}..{'+'.join(measures)}.{'+'.join(entries)}..{freq_code}.{'+'.join(units)}.Y" + url = f"https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_BOP@DF_BOP,1.0/{dim_filter}" + params: list[str] = [] + + if query.start_date: + params.append( + f"startPeriod={_format_start_period(query.start_date, freq_code)}" + ) + + if query.end_date: + params.append(f"endPeriod={_format_end_period(query.end_date, freq_code)}") + + if params: + url += "?" + "&".join(params) + + headers = { + "Accept": "application/vnd.sdmx.data+csv; version=2.0.0; labels=both", + "User-Agent": "OpenBB/1.0", + } + response = make_request(url, headers=headers, timeout=120) + + if response.status_code != 200: + raise OpenBBError( + f"OECD BOP request failed ({response.status_code}): {response.reason}" + ) + + text = response.text + + if not text or not text.strip(): + raise OpenBBError( + EmptyDataError(f"Empty response from OECD BOP. URL: {url}") + ) + + try: + df = read_csv(StringIO(text)) + except Exception as exc: + raise OpenBBError( + f"Failed to parse OECD BOP CSV: {exc}\nURL: {url}" + ) from exc + + if df.empty: + raise OpenBBError(EmptyDataError(f"No BOP data rows. URL: {url}")) + + # Strip "CODE: Label" columns to just "CODE". + rename_map: dict[str, str] = {} + + for col in df.columns: + if ": " in col: + rename_map[col] = col.split(":")[0].strip() + else: + rename_map[col] = col + + df = df.rename(columns=rename_map) + skip_cols = { + "TIME_PERIOD", + "OBS_VALUE", + "DATAFLOW", + "STRUCTURE", + "STRUCTURE_ID", + "ACTION", + } + + for col in [ + c for c in df.columns if c not in skip_cols and is_string_dtype(df[c]) + ]: + sample = df[col].dropna().head(10) + + if sample.empty: + continue + + if sample.str.contains(": ", regex=False).any(): + split = df[col].str.split(": ", n=1, expand=True) + df[col] = split[0].str.strip() + + if split.shape[1] > 1: + df[f"{col}_label"] = split[1].str.strip() + else: + df[f"{col}_label"] = df[col] + + if "OBS_VALUE" in df.columns: + df["OBS_VALUE"] = to_numeric(df["OBS_VALUE"], errors="coerce") + + return df.to_dict(orient="records") + + @staticmethod + def transform_data( + query: OECDBalanceOfPaymentsQueryParams, + data: list[dict], + **kwargs: Any, + ) -> list[OECDBalanceOfPaymentsData]: + """Pivot long OECD rows into wide BP6 format indexed by date.""" + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + # Group values by (date, country) and map each row to its column. + groups: dict[tuple, dict[str, Any]] = {} + + for row in data: + value = row.get("OBS_VALUE") + + if value is None or value == "": + continue + + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + + if d is None: + continue + + if query.start_date and d < query.start_date: + continue + + if query.end_date and d > query.end_date: + continue + + measure = row.get("MEASURE", "") + entry = row.get("ACCOUNTING_ENTRY", "") + unit = row.get("UNIT_MEASURE", "") + col_name = _COL_MAP.get((measure, entry, unit)) + + if col_name is None: + continue + + key = (d, row.get("REF_AREA_label", row.get("REF_AREA", ""))) + + if key not in groups: + groups[key] = {"date": d, "country": key[1]} + + val = float(value) + + if col_name == "balance_percent_of_gdp" or "percent" in col_name: + val = val / 100 + + groups[key][col_name] = val + + output: list[OECDBalanceOfPaymentsData] = [] + + for rec in groups.values(): + output.append(OECDBalanceOfPaymentsData.model_validate(rec)) + + return sorted( + output, + key=lambda r: r.period or dateType.min, + ) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/composite_leading_indicator.py b/openbb_platform/providers/oecd/openbb_oecd/models/composite_leading_indicator.py index e98bcfc2600..59cb3324a05 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/composite_leading_indicator.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/composite_leading_indicator.py @@ -12,59 +12,10 @@ CompositeLeadingIndicatorData, CompositeLeadingIndicatorQueryParams, ) -from openbb_core.provider.utils.errors import EmptyDataError +from openbb_oecd.utils.constants import CLI_COUNTRIES from pydantic import Field, field_validator -COUNTRIES = { - "g20": "G20", - "g7": "G7", - "asia5": "A5M", - "north_america": "NAFTA", - "europe4": "G4E", - "australia": "AUS", - "brazil": "BRA", - "canada": "CAN", - "china": "CHN", - "france": "FRA", - "germany": "DEU", - "india": "IND", - "indonesia": "IDN", - "italy": "ITA", - "japan": "JPN", - "mexico": "MEX", - "spain": "ESP", - "south_africa": "ZAF", - "south_korea": "KOR", - "turkey": "TUR", - "united_states": "USA", - "united_kingdom": "GBR", -} -COUNTRY_CHOICES = list(COUNTRIES) + ["all"] -Countries = Literal[ - "g20", - "g7", - "asia5", - "north_america", - "europe4", - "australia", - "brazil", - "canada", - "china", - "france", - "germany", - "india", - "indonesia", - "italy", - "japan", - "mexico", - "south_africa", - "south_korea", - "spain", - "turkey", - "united_kingdom", - "united_states", - "all", -] +COUNTRY_CHOICES = list(CLI_COUNTRIES) + ["all"] class OECDCompositeLeadingIndicatorQueryParams(CompositeLeadingIndicatorQueryParams): @@ -77,7 +28,7 @@ class OECDCompositeLeadingIndicatorQueryParams(CompositeLeadingIndicatorQueryPar }, } - country: Countries | str = Field( + country: str = Field( description="Country to get the CLI for, default is G20.", default="g20", ) @@ -85,6 +36,7 @@ class OECDCompositeLeadingIndicatorQueryParams(CompositeLeadingIndicatorQueryPar default="amplitude", description="Adjustment of the data, either 'amplitude' or 'normalized'." + " Default is amplitude.", + json_schema_extra={"choices": ["amplitude", "normalized"]}, ) growth_rate: bool = Field( default=False, @@ -149,78 +101,47 @@ def transform_query( return OECDCompositeLeadingIndicatorQueryParams(**transformed_params) @staticmethod - async def aextract_data( + def extract_data( query: OECDCompositeLeadingIndicatorQueryParams, credentials: dict[str, str] | None, **kwargs: Any, ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_oecd.utils.helpers import oecd_date_to_python_date - from pandas import read_csv - from openbb_core.provider.utils.helpers import amake_request - - COUNTRY_MAP = {v: k.replace("_", " ").title() for k, v in COUNTRIES.items()} + from openbb_oecd.utils.query_builder import OecdQueryBuilder + qb = OecdQueryBuilder() growth_rate = "GY" if query.growth_rate is True else "IX" adjustment = "AA" if query.adjustment == "amplitude" else "NOR" if growth_rate == "GY": adjustment = "" - def country_string(input_str: str): - if input_str == "all": - return "" - _countries = input_str.split(",") - return "+".join([COUNTRIES[country.lower()] for country in _countries]) - - country = country_string(query.country) if query.country else "" - url = ( - "https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_CLI,4.1" - + f"/{country}.M.LI...{adjustment}.{growth_rate}..H" - + f"?startPeriod={query.start_date}&endPeriod={query.end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly&format=csvfile" - ) - - async def response_callback(response, _): - """Response callback.""" - if response.status != 200: - raise OpenBBError(f"Error with the OECD request: {response.status}") - return await response.text() - - headers = {"Accept": "application/vnd.sdmx.data+csv; charset=utf-8"} - response = await amake_request( - url, timeout=30, headers=headers, response_callback=response_callback - ) - - df = read_csv(StringIO(response)).get(["REF_AREA", "TIME_PERIOD", "OBS_VALUE"]) # type: ignore - - if df.empty: # type: ignore - raise EmptyDataError("No data was found.") - - df = df.rename(columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"}) # type: ignore - df.country = [ - ( - COUNTRY_MAP.get(d, d) - .replace("Asia5", "Major 5 Asian Economies") - .replace("Europe4", "Major 4 European Economies") + countries = qb.metadata.resolve_country_codes("DF_CLI", query.country) + country_str = "+".join(countries) if countries else "" + + try: + result = qb.fetch_data( + dataflow="DF_CLI", + start_date=str(query.start_date) if query.start_date else None, + end_date=str(query.end_date) if query.end_date else None, + _skip_validation=True, + REF_AREA=country_str, + FREQ="M", + MEASURE="LI", + ADJUSTMENT=adjustment, + TRANSFORMATION=growth_rate, + METHODOLOGY="H", ) - for d in df.country - ] - df.date = df.date.apply(oecd_date_to_python_date) + except Exception as exc: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc - if query.growth_rate is True: - df.value = df.value.astype(float) / 100 + records = result["data"] - df = ( - df.query("value.notnull()") - .set_index(["date", "country"]) - .sort_index() - .reset_index() - ) + if not records: + raise OpenBBError("No data returned from OECD for the given query.") - return df.to_dict("records") + return records @staticmethod def transform_data( @@ -229,4 +150,34 @@ def transform_data( **kwargs: Any, ) -> list[OECDCompositeLeadingIndicatorData]: """Transform the data from the OECD endpoint.""" - return [OECDCompositeLeadingIndicatorData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + is_growth = query.growth_rate is True + output: list[OECDCompositeLeadingIndicatorData] = [] + + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + + if d is None: + continue + + value = row.get("OBS_VALUE") + + if value is None or value == "": + continue + + value = float(value) + + if is_growth: + value = value / 100 + + output.append( + OECDCompositeLeadingIndicatorData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "")), + value=value, + ) + ) + + return sorted(output, key=lambda x: (x.date, x.country or "")) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/consumer_price_index.py b/openbb_platform/providers/oecd/openbb_oecd/models/consumer_price_index.py index 304283304c0..9050814caff 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/consumer_price_index.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/consumer_price_index.py @@ -11,30 +11,34 @@ ConsumerPriceIndexData, ConsumerPriceIndexQueryParams, ) -from openbb_core.provider.utils.helpers import check_item -from openbb_oecd.utils.constants import ( - CODE_TO_COUNTRY_CPI, - COUNTRY_TO_CODE_CPI, -) +from openbb_oecd.utils.constants import CPI_COUNTRIES from pydantic import Field, field_validator -countries = tuple(CODE_TO_COUNTRY_CPI.values()) + ("all",) -CountriesList = list(countries) # type: ignore - +# Domain-specific expenditure mappings (COICOP codes → human labels). +# These are NOT country dicts — they are indicator-level constants specific to the +# CPI dataflow and are kept here intentionally. expenditure_dict_rev = { + # --- Main COICOP categories (CP01-CP12) --- "_T": "total", "CP01": "food_non_alcoholic_beverages", "CP02": "alcoholic_beverages_tobacco_narcotics", "CP03": "clothing_footwear", "CP04": "housing_water_electricity_gas", + "CP041": "actual_rentals", + "CP042": "imputed_rentals", + "CP043": "maintenance_repair_dwelling", + "CP044": "water_supply_other_services", + "CP045": "electricity_gas_other_fuels", "CP05": "furniture_household_equipment", "CP06": "health", "CP07": "transport", + "CP0722": "fuels_lubricants_personal", "CP08": "communication", "CP09": "recreation_culture", "CP10": "education", "CP11": "restaurants_hotels", "CP12": "miscellaneous_goods_services", + # --- Aggregate / special categories --- "CP045_0722": "energy", "GD": "goods", "CP041T043": "housing", @@ -45,66 +49,86 @@ "SERV": "services", "_TXNRG_01_02": "overall_excl_energy_food_alcohol_tobacco", "CPRES": "residuals", - "CP0722": "fuels_lubricants_personal", - "CP041": "actual_rentals", - "CP042": "imputed_rentals", - "CP043": "maintenance_repair_dwelling", - "CP044": "water_supply_other_services", - "CP045": "electricity_gas_other_fuels", } expenditure_dict = {v: k for k, v in expenditure_dict_rev.items()} -expenditures = tuple(expenditure_dict.keys()) + ("all",) -expenditure_choices = [ - "total", - "all", - "actual_rentals", - "alcoholic_beverages_tobacco_narcotics", - "all_non_food_non_energy", - "clothing_footwear", - "communication", - "education", - "electricity_gas_other_fuels", - "energy", - "overall_excl_energy_food_alcohol_tobacco", - "food_non_alcoholic_beverages", - "fuels_lubricants_personal", - "furniture_household_equipment", - "goods", - "housing", - "housing_excluding_rentals", - "housing_water_electricity_gas", - "health", - "imputed_rentals", - "maintenance_repair_dwelling", - "miscellaneous_goods_services", - "recreation_culture", - "residuals", - "restaurants_hotels", - "services_less_housing", - "services_less_house_excl_rentals", - "services", - "transport", - "water_supply_other_services", -] +expenditure_choices = sorted(expenditure_dict.keys()) + ["all"] transform_choices = ["index", "yoy", "period"] +# CPI transformation codes: user-facing name → TRANSFORMATION dimension value. +_TRANSFORM_MAP = { + "index": "_Z", # Index — no transformation + "yoy": "GY", # Growth rate, over 1 year + "period": "G1", # Growth rate, period on period +} + +# Frequency codes. +_FREQ_MAP = {"annual": "A", "quarter": "Q", "monthly": "M"} + +# User-friendly unit labels keyed by transform name. +_UNIT_LABELS = { + "index": "Index", + "yoy": "Year-over-year (YOY) percent change", + "period": "Period-over-period percent change", +} + +# Sort order for expenditure categories (COICOP codes). +_EXPENDITURE_ORDER: dict[str, int] = { + # Main COICOP categories + "_T": 0, + "CP01": 1, + "CP02": 2, + "CP03": 3, + "CP04": 4, + "CP041": 5, + "CP042": 6, + "CP043": 7, + "CP044": 8, + "CP045": 9, + "CP05": 10, + "CP06": 11, + "CP07": 12, + "CP0722": 13, + "CP08": 14, + "CP09": 15, + "CP10": 16, + "CP11": 17, + "CP12": 18, + # Aggregate / special categories + "CP045_0722": 19, + "GD": 20, + "CP041T043": 21, + "CP041T043X042": 22, + "_TXCP01_NRG": 23, + "SERVXCP041_042_0432": 24, + "SERVXCP041_0432": 25, + "SERV": 26, + "_TXNRG_01_02": 27, + "CPRES": 28, +} + class OECDCPIQueryParams(ConsumerPriceIndexQueryParams): """OECD CPI Query. + Notes + ----- Source: https://data-explorer.oecd.org/?lc=en """ __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": CountriesList, + "choices": list(CPI_COUNTRIES) + ["all"], + }, + "frequency": { + "choices": ["annual", "quarter", "monthly"], }, "transform": { "choices": transform_choices, }, "expenditure": { + "multiple_items_allowed": True, "choices": expenditure_choices, }, } @@ -118,28 +142,36 @@ class OECDCPIQueryParams(ConsumerPriceIndexQueryParams): @classmethod def validate_country(cls, c: str): """Validate country.""" - result: list = [] - values = c.replace(" ", "_").split(",") - for v in values: - check_item(v.lower(), CountriesList) - result.append(v.lower()) - return ",".join(result) + return c.replace(" ", "_").strip().lower() @field_validator("expenditure", mode="before", check_fields=False) @classmethod def validate_expenditure(cls, v): """Validate expenditure.""" - if v.lower() not in expenditure_choices: - raise ValueError( - f"Expenditure '{v}' is not a valid choice. Valid choices:\n\n{expenditure_choices}" - ) - return v + items = [s.strip().lower() for s in str(v).split(",") if s.strip()] + for item in items: + if item not in expenditure_choices: + raise ValueError( + f"Expenditure '{item}' is not a valid choice. Valid choices:\n\n{expenditure_choices}" + ) + return ",".join(items) class OECDCPIData(ConsumerPriceIndexData): """OECD CPI Data.""" + unit: str = Field(description="Unit of measurement.") + unit_multiplier: int | float = Field( + description="Unit multiplier for the observation value.", + ) + country_code: str = Field(description="ISO3 country code.") + series_id: str = Field(description="OECD series identifier.") expenditure: str = Field(description="Expenditure component of CPI.") + title: str = Field(description="Complete reference title for the series.") + order: int | None = Field( + default=None, + description="Sort order for expenditure categories.", + ) class OECDCPIFetcher(Fetcher[OECDCPIQueryParams, list[OECDCPIData]]): @@ -166,96 +198,129 @@ def extract_data( ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from requests.exceptions import HTTPError # noqa - from openbb_oecd.utils import helpers # noqa + from openbb_oecd.utils.query_builder import OecdQueryBuilder + qb = OecdQueryBuilder() + countries = qb.metadata.resolve_country_codes("DF_PRICES_ALL", query.country) + country_str = "+".join(countries) if countries else "" methodology = "HICP" if query.harmonized is True else "N" - unit = "mom" if query.transform == "period" else query.transform - query.frequency = ( - "monthly" - if query.harmonized is True and query.frequency == "quarter" - else query.frequency - ) - frequency = query.frequency[0].upper() - units = { - "index": "IX", - "yoy": "PA", - "mom": "PC", - }[unit] - expenditure = ( - "" if query.expenditure == "all" else expenditure_dict[query.expenditure] - ) + # Harmonized data not available quarterly — force monthly + freq = query.frequency + + if query.harmonized is True and freq == "quarter": + freq = "monthly" + + freq_code = _FREQ_MAP.get(freq, freq[0].upper() if freq else "M") + transform_code = _TRANSFORM_MAP.get(query.transform, "_Z") + exp_items = [s.strip() for s in query.expenditure.split(",") if s.strip()] + if "all" in exp_items: + expenditure_code = "" + else: + codes = [expenditure_dict.get(e, e) for e in exp_items] + expenditure_code = "+".join(codes) - def country_string(input_str: str): - if input_str == "all": - return "" - _countries = input_str.split(",") - return "+".join([COUNTRY_TO_CODE_CPI[country] for country in _countries]) - - country = country_string(query.country) - # For caching, include this in the key - query_dict = { - k: v - for k, v in query.__dict__.items() - if k not in ["start_date", "end_date"] - } - - url = ( - f"https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_PRICES@DF_PRICES_ALL,1.0/" - f"{country}.{frequency}.{methodology}.CPI.{units}.{expenditure}.N." - ) try: - data = helpers.get_possibly_cached_data( - url, function="economy_cpi", query_dict=query_dict + result = qb.fetch_data( + dataflow="DF_PRICES_ALL", + start_date=str(query.start_date) if query.start_date else None, + end_date=str(query.end_date) if query.end_date else None, + _skip_validation=True, + REF_AREA=country_str, + FREQ=freq_code, + METHODOLOGY=methodology, + MEASURE="CPI", + EXPENDITURE=expenditure_code, + TRANSFORMATION=transform_code, ) - except HTTPError as exc: + except Exception as exc: raise OpenBBError("No data found for the given query.") from exc - url_query = f"METHODOLOGY=='{methodology}' & UNIT_MEASURE=='{units}' & FREQ=='{frequency}'" - if country != "all": - if "+" in country: - _countries = country.split("+") - country_conditions = " or ".join( - [f"REF_AREA=='{c}'" for c in _countries] - ) - url_query += f" & ({country_conditions})" - else: - url_query = url_query + f" & REF_AREA=='{country}'" - url_query = ( - url_query + f" & EXPENDITURE=='{expenditure}'" - if query.expenditure != "all" - else url_query - ) - # Filter down - data = ( - data.query(url_query) - .reset_index(drop=True)[["REF_AREA", "TIME_PERIOD", "VALUE", "EXPENDITURE"]] - .rename( - columns={ - "REF_AREA": "country", - "TIME_PERIOD": "date", - "VALUE": "value", - "EXPENDITURE": "expenditure", - } - ) - ) - data["country"] = data["country"].map(CODE_TO_COUNTRY_CPI) - data["expenditure"] = data["expenditure"].map(expenditure_dict_rev) - data["date"] = data["date"].apply(helpers.oecd_date_to_python_date) - data = data[ - (data["date"] <= query.end_date) & (data["date"] >= query.start_date) - ] - # Normalize the percent value. - if query.transform in ("yoy", "period"): - data["value"] = data["value"].astype(float) / 100 - - return data.fillna("N/A").replace("N/A", None).to_dict(orient="records") + records = result["data"] + + if not records: + raise OpenBBError("No data found for the given query.") + + return records @staticmethod def transform_data( query: OECDCPIQueryParams, data: list[dict], **kwargs: Any ) -> list[OECDCPIData]: """Transform the data from the OECD endpoint.""" - return [ - OECDCPIData.model_validate(d) for d in sorted(data, key=lambda x: x["date"]) - ] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + is_pct = query.transform in ("yoy", "period") + unit_label = _UNIT_LABELS.get(query.transform, query.transform) + unit_mult = 100 if is_pct else 1 + methodology = "HICP" if query.harmonized else "CPI" + output: list[OECDCPIData] = [] + + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + + if d is None: + continue + + if query.start_date and d < query.start_date: + continue + + if query.end_date and d > query.end_date: + continue + + value = row.get("OBS_VALUE") + + if value is None or value == "": + continue + + value = float(value) + + if is_pct: + value = value / 100 + + exp_code = row.get("EXPENDITURE", "_T") + country_code = row.get("REF_AREA", "") + freq_label = row.get("FREQ_label", "") + measure_label = row.get("MEASURE_label", "Consumer price index") + exp_label = row.get( + "EXPENDITURE_label", expenditure_dict_rev.get(exp_code, exp_code) + ) + transform_label = row.get("TRANSFORMATION_label", unit_label) + # Build a descriptive title + # "Monthly Consumer price index - Total - Growth rate, over 1 year" + title_parts = [f"{freq_label} {measure_label} ({methodology})", exp_label] + + if transform_label and transform_label.lower() not in ("not applicable",): + title_parts.append(transform_label) + + title = " - ".join(title_parts) + # Build compound series_id from all dimension values. + series_id = ".".join( + [ + row.get("REF_AREA", ""), + row.get("FREQ", ""), + row.get("METHODOLOGY", "N"), + row.get("MEASURE", "CPI"), + row.get("EXPENDITURE", "_T"), + row.get("TRANSFORMATION", "_Z"), + ] + ) + output.append( + OECDCPIData( + date=d, + country=row.get("REF_AREA_label", country_code), + country_code=country_code, + value=value, + unit=unit_label, + unit_multiplier=unit_mult, + series_id=series_id, + expenditure=exp_label, + title=title, + order=_EXPENDITURE_ORDER.get(exp_code), + ) + ) + + return sorted( + output, + key=lambda x: (x.date, x.country, x.order if x.order is not None else 99), + ) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/country_interest_rates.py b/openbb_platform/providers/oecd/openbb_oecd/models/country_interest_rates.py index 5f720195b80..4a5d0a2ca68 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/country_interest_rates.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/country_interest_rates.py @@ -4,7 +4,6 @@ from datetime import date from typing import Any, Literal -from warnings import warn from openbb_core.app.model.abstract.error import OpenBBError from openbb_core.provider.abstract.fetcher import Fetcher @@ -13,11 +12,9 @@ CountryInterestRatesQueryParams, ) from openbb_core.provider.utils.errors import EmptyDataError -from openbb_oecd.utils.constants import CODE_TO_COUNTRY_IR, COUNTRY_TO_CODE_IR +from openbb_oecd.utils.constants import KEI_COUNTRIES from pydantic import Field, field_validator -COUNTRIES = list(CODE_TO_COUNTRY_IR.values()) + ["all"] - DURATION_DICT = { "immediate": "IRSTCI", "short": "IR3TIB", @@ -31,7 +28,7 @@ class OecdCountryInterestRatesQueryParams(CountryInterestRatesQueryParams): __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": COUNTRIES, + "choices": list(KEI_COUNTRIES) + ["all"], }, "frequency": { "multiple_items_allowed": False, @@ -56,26 +53,7 @@ class OecdCountryInterestRatesQueryParams(CountryInterestRatesQueryParams): @classmethod def validate_country(cls, c): """Validate country.""" - # pylint: disable=import-outside-toplevel - from openbb_core.provider.utils.helpers import check_item - - result: list = [] - values = c.replace(" ", "_").split(",") - for v in values: - if v.upper() in CODE_TO_COUNTRY_IR: - result.append(CODE_TO_COUNTRY_IR.get(v.upper())) - continue - try: - check_item(v.lower(), COUNTRIES) - except Exception as e: - if len(values) == 1: - raise e from e - warn(f"Invalid country: {v}. Skipping...") - continue - result.append(v.lower()) - if result: - return ",".join(result) - raise OpenBBError(f"No valid country found. -> {values}") + return c.replace(" ", "_").strip().lower() class OecdCountryInterestRatesData(CountryInterestRatesData): @@ -112,52 +90,39 @@ def extract_data( ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_oecd.utils.helpers import oecd_date_to_python_date - from pandas import read_csv - from openbb_core.provider.utils.helpers import make_request - - frequency = query.frequency[0].upper() - - def country_string(input_str: str): - if input_str == "all": - return "" - _countries = input_str.split(",") - return "+".join([COUNTRY_TO_CODE_IR[country] for country in _countries]) - - country = country_string(query.country) if query.country else "" - start_date = query.start_date.strftime("%Y-%m") if query.start_date else "" - end_date = query.end_date.strftime("%Y-%m") if query.end_date else "" - - url = ( - "https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_KEI@DF_KEI,4.0" - + f"/{country}.{frequency}.{DURATION_DICT[query.duration]}....?" - + f"startPeriod={start_date}&endPeriod={end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly" - ) - headers = {"Accept": "application/vnd.sdmx.data+csv; charset=utf-8"} - response = make_request(url, headers=headers, timeout=20) - if response.status_code != 200: - raise Exception(f"Error with the OECD request: {response.status_code}") - df = read_csv(StringIO(response.text)).get( - ["REF_AREA", "TIME_PERIOD", "OBS_VALUE"] - ) - if df.empty: + from openbb_oecd.utils.query_builder import OecdQueryBuilder + + qb = OecdQueryBuilder() + freq_code = query.frequency[0].upper() + measure = DURATION_DICT.get(query.duration, "IR3TIB") + countries = qb.metadata.resolve_country_codes("DF_KEI", query.country) + country_str = "+".join(countries) if countries else "" + + try: + result = qb.fetch_data( + dataflow="DF_KEI", + start_date=( + query.start_date.strftime("%Y-%m") if query.start_date else None + ), + end_date=query.end_date.strftime("%Y-%m") if query.end_date else None, + _skip_validation=True, + REF_AREA=country_str, + FREQ=freq_code, + MEASURE=measure, + UNIT_MEASURE="PA", + ACTIVITY="_Z", + ADJUSTMENT="_Z", + TRANSFORMATION="_Z", + ) + except Exception as exc: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc + + records = result["data"] + + if not records: raise EmptyDataError() - df = df.rename( - columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"} - ) - df.country = [CODE_TO_COUNTRY_IR.get(d, d) for d in df.country] - df.date = df.date.apply(oecd_date_to_python_date) - df.value = df.value.astype(float) / 100 - df = ( - df.query("value.notnull()") - .set_index(["date", "country"]) - .sort_index() - .reset_index() - ) - - return df.to_dict("records") + + return records @staticmethod def transform_data( @@ -166,4 +131,28 @@ def transform_data( **kwargs: Any, ) -> list[OecdCountryInterestRatesData]: """Transform the data from the OECD endpoint.""" - return [OecdCountryInterestRatesData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + output: list[OecdCountryInterestRatesData] = [] + + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + + if d is None: + continue + + value = row.get("OBS_VALUE") + + if value is None or value == "": + continue + + output.append( + OecdCountryInterestRatesData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "")), + value=float(value) / 100, + ) + ) + + return sorted(output, key=lambda x: (x.date, x.country or "")) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/economic_indicators.py b/openbb_platform/providers/oecd/openbb_oecd/models/economic_indicators.py new file mode 100644 index 00000000000..a4515c1e803 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/models/economic_indicators.py @@ -0,0 +1,825 @@ +"""OECD Economic Indicators Model — generic fetcher for ALL OECD dataflows.""" + +# pylint: disable=unused-argument,too-many-branches,protected-access,too-many-instance-attributes,too-many-statements,too-many-locals,too-many-return-statements + +from typing import Any + +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_core.app.service.system_service import SystemService +from openbb_core.provider.abstract.annotated_result import AnnotatedResult +from openbb_core.provider.abstract.fetcher import Fetcher +from openbb_core.provider.standard_models.economic_indicators import ( + EconomicIndicatorsData, + EconomicIndicatorsQueryParams, +) +from openbb_core.provider.utils.errors import EmptyDataError +from pydantic import ConfigDict, Field, PrivateAttr, field_validator, model_validator + +api_prefix = SystemService().system_settings.api_settings.prefix + + +class OecdEconomicIndicatorsQueryParams(EconomicIndicatorsQueryParams): + """OECD Economic Indicators Query.""" + + __json_schema_extra__ = { + "symbol": { + "multiple_items_allowed": True, + "x-widget_config": { + "multiSelect": False, + "multiple": False, + "type": "text", + }, + }, + "country": { + "multiple_items_allowed": True, + "x-widget_config": { + "type": "endpoint", + "multiSelect": False, + "optionsEndpoint": f"{api_prefix}/oecd_utils/indicator_choices", + "optionsParams": { + "symbol": "$symbol", + "country": "true", + "dimension_values": "$dimension_values", + }, + "style": {"popupWidth": 500}, + }, + }, + "frequency": { + "x-widget_config": { + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/indicator_choices", + "optionsParams": { + "symbol": "$symbol", + "country": "$country", + "frequency": "true", + "dimension_values": "$dimension_values", + }, + }, + }, + "transform": { + "x-widget_config": { + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/indicator_choices", + "optionsParams": { + "symbol": "$symbol", + "country": "$country", + "frequency": "$frequency", + "transform": "true", + "dimension_values": "$dimension_values", + }, + "style": {"popupWidth": 500}, + }, + }, + "dimension_values": { + "multiple_items_allowed": True, + "x-widget_config": { + "type": "text", + "value": None, + "multiple": True, + "multiSelect": False, + }, + }, + "limit": { + "x-widget_config": { + "label": "Limit", + "type": "number", + }, + }, + } + + symbol: str = Field( + description=( + "OECD indicator symbol(s). Format: 'DATAFLOW::INDICATOR' or " + "'DATAFLOW::TABLE_ID'. Multiple symbols from the same dataflow " + "can be comma-separated." + ), + ) + country: str | None = Field( + default=None, + description="Country name or ISO code. Comma-separated for multiples. 'all' for all.", + ) + frequency: str | None = Field( + default=None, + description="Frequency: 'annual'/'yearly', 'quarterly'/'quarter', 'monthly'/'month', or SDMX code (A/Q/M).", + ) + transform: str | None = Field( + default=None, + description="Transformation code (dataflow-specific, e.g. 'GY' for growth rate).", + ) + dimension_values: list[str] | str | None = Field( + default=None, + description=( + "Additional dimension constraints. Format: 'DIM_ID:VALUE'. e.g. ['SECTOR:S1', 'UNIT_MEASURE:USD_PPP']" + ), + ) + limit: int | None = Field( + default=None, + description="Maximum number of most recent observations per series.", + ) + pivot: bool = Field( + default=False, + description="If True, pivot dates to columns for presentation-table output.", + ) + + # Internal parsed state. + _is_table: bool = PrivateAttr(default=False) + _dataflow: str | None = PrivateAttr(default=None) + _table_id: str | None = PrivateAttr(default=None) + _indicator_codes: list[str] = PrivateAttr(default_factory=list) + _indicators_by_dataflow: dict = PrivateAttr(default_factory=dict) + + @field_validator("country", mode="before", check_fields=False) + @classmethod + def validate_country(cls, c): + """Normalize country input.""" + if c is None: + return c + return c.replace(" ", "_").strip().lower() + + @field_validator("dimension_values", mode="before") + @classmethod + def validate_dimension_values(cls, v): + """Accept a bare string or list; split comma-joined items into a list.""" + if v is None: + return v + if isinstance(v, str): + return [item.strip() for item in v.split(",") if item.strip()] or None + return list(v) + + @model_validator(mode="after") + def parse_and_validate_symbols(self): + """Parse the symbol string into dataflow + indicator codes or table ID.""" + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata + + symbol = self.symbol + + if not symbol: + raise ValueError("Symbol is required.") + + parts = [ + s.strip() for s in symbol.split(",") if s.strip() # pylint: disable=E1101 + ] + dataflows: set[str] = set() + identifiers: list[str] = [] + + for part in parts: + if "::" in part: + df, identifier = part.split("::", 1) + dataflows.add(df.strip()) + identifiers.append(identifier.strip()) + else: + identifiers.append(part.strip()) + + if len(dataflows) > 1: + raise ValueError( + f"All symbols must be from the same dataflow. Got: {dataflows}" + ) + + if not dataflows and not identifiers: + raise ValueError("No valid symbols provided.") + + dataflow = dataflows.pop() if dataflows else None + self._dataflow = dataflow + + if not dataflow: + self._indicator_codes = identifiers + return self + + # Detect table mode: check if identifier is a TABLE_IDENTIFIER value. + metadata = OecdMetadata() + + if len(identifiers) == 1: + hierarchies = metadata.get_dataflow_hierarchies(dataflow) + table_ids = {h["id"] for h in hierarchies} + if identifiers[0] in table_ids: + self._is_table = True + self._table_id = identifiers[0] + return self + + # Indicator mode. + self._is_table = False + self._indicator_codes = identifiers + self._indicators_by_dataflow = {dataflow: identifiers} + + return self + + +class OecdEconomicIndicatorsData(EconomicIndicatorsData): + """OECD Economic Indicators Data.""" + + __alias_dict__ = { + "title": "Indicator", + "country": "Country", + "symbol_root": "parent_code", + } + + model_config = ConfigDict( + extra="allow", + json_schema_extra={ + "x-widget_config": { + "$.name": "OECD Indicators", + "$.refetchInterval": False, + "$.data": { + "table": { + "columnsDefs": [ + { + "field": "title", + "headerName": "Title", + "renderFn": "hoverCard", + "renderFnParams": { + "hoverCard": { + "cellField": "description", + "markdown": "{description}", + } + }, + }, + { + "field": "description", + "hide": True, + }, + { + "field": "symbol", + "pinned": False, + }, + ] + } + }, + }, + }, + ) + + unit: str | None = Field(default=None, description="Unit of measurement.") + unit_multiplier: int | float | None = Field( + default=None, description="Unit multiplier (power of 10)." + ) + scale: str | None = Field( + default=None, description="Scale description (e.g. Thousands, Millions)." + ) + order: int | float | None = Field( + default=None, description="Sort order within the table hierarchy." + ) + level: int | None = Field( + default=None, description="Indentation level in the table hierarchy." + ) + title: str | None = Field( + default=None, + description="Human-readable title of the series.", + alias="Indicator", + ) + description: str | None = Field(default=None, description="Indicator description.") + country_code: str | None = Field(default=None, description="ISO country code.") + + @field_validator( + "scale", + "unit", + "title", + "description", + "value", + "unit_multiplier", + "order", + "level", + mode="before", + ) + @classmethod + def nan_to_none(cls, v): + """Convert NaN float values to None for optional fields.""" + # pylint: disable=import-outside-toplevel + from math import isnan + + if not v: + return None + if isinstance(v, float) and isnan(v): + return None + if isinstance(v, str) and v.strip().lower() == "nan": + return None + return v + + @model_validator(mode="before") + @classmethod + def _sanitize_extra_nan(cls, values): + """Replace NaN in extra/dynamic fields so JSON serialization doesn't break.""" + # pylint: disable=import-outside-toplevel + from math import isnan + + if isinstance(values, dict): + for k, v in values.items(): + if isinstance(v, float) and isnan(v): + values[k] = None + return values + + +class OecdEconomicIndicatorsFetcher( + Fetcher[OecdEconomicIndicatorsQueryParams, list[OecdEconomicIndicatorsData]] +): + """OECD Economic Indicators Fetcher.""" + + @staticmethod + def transform_query( + params: dict[str, Any], + ) -> OecdEconomicIndicatorsQueryParams: + """Transform the query.""" + return OecdEconomicIndicatorsQueryParams(**params) + + @staticmethod + def extract_data( # noqa: PLR0912 + query: OecdEconomicIndicatorsQueryParams, + credentials: dict[str, str] | None, + **kwargs: Any, + ) -> dict: + """Fetch data from OECD for the given indicators or table.""" + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import ( + detect_indicator_dimensions, + detect_transform_dimension, + ) + from openbb_oecd.utils.query_builder import OecdQueryBuilder + from openbb_oecd.utils.table_builder import OecdTableBuilder + + qb = OecdQueryBuilder() + dataflow = query._dataflow # noqa: SLF001 + + if not dataflow: + raise OpenBBError( + "Could not determine dataflow from symbol. Use format 'DATAFLOW::INDICATOR'." + ) + + # Parse dimension_values into extra_dimensions dict. + extra_dimensions: dict[str, str] = {} + if query.dimension_values: + for dv in query.dimension_values: + if not dv or not isinstance(dv, str): + continue + for pair in (p.strip() for p in dv.split(",") if p.strip()): + if ":" in pair: + dim_id, dim_value = pair.split(":", 1) + extra_dimensions[dim_id.strip().upper()] = ( + dim_value.strip().upper() + ) + + # Resolve country codes. + countries_str = "" + if query.country: + countries = qb.metadata.resolve_country_codes(dataflow, query.country) + if countries: + countries_str = "+".join(countries) + + # Map frequency to SDMX code. + freq_map = { + "annual": "A", + "annually": "A", + "yearly": "A", + "year": "A", + "quarter": "Q", + "quarterly": "Q", + "monthly": "M", + "month": "M", + } + frequency = ( + freq_map.get(str(query.frequency).lower(), query.frequency.upper()) # type: ignore[union-attr] + if query.frequency + else None + ) + + start_date = str(query.start_date) if query.start_date else None + end_date = str(query.end_date) if query.end_date else None + + if query._is_table: + params: dict[str, Any] = {} + + if countries_str: + params["REF_AREA"] = countries_str + + if frequency: + params["FREQ"] = frequency + + # Apply user-specified dimension filters. + if extra_dimensions: + params.update(extra_dimensions) + + # Handle transform/unit for table mode. + if query.transform: + _apply_transform( + query.transform, dataflow, params, detect_transform_dimension + ) + + builder = OecdTableBuilder() + + try: + result = builder.get_table( + dataflow=dataflow, + table_id=query._table_id, # noqa: SLF001 + start_date=start_date, + end_date=end_date, + limit=query.limit, + **params, + ) + except (ValueError, OpenBBError) as exc: + raise OpenBBError(str(exc)) from exc + + return { + "mode": "table", + "data": result.get("data", []), + "table_metadata": result.get("table_metadata", {}), + "structure": result.get("structure", {}), + "series_metadata": result.get("series_metadata", {}), + } + + # ---- INDICATOR MODE ---- + params = {} + if countries_str: + params["REF_AREA"] = countries_str + if frequency: + params["FREQ"] = frequency + + # Apply user-specified dimension filters. + if extra_dimensions: + dsd_dims = qb.metadata.get_dimension_order(dataflow) + dsd_dim_map = {d.upper(): d for d in dsd_dims} + invalid_keys: list[str] = [] + for dim_upper, dim_val in extra_dimensions.items(): + canonical = dsd_dim_map.get(dim_upper) + if canonical is None: + invalid_keys.append(dim_upper) + else: + params[canonical] = dim_val + if invalid_keys: + raise OpenBBError( + f"Invalid dimension(s) for dataflow '{dataflow}': {invalid_keys}. Available dimensions: {dsd_dims}" + ) + + # Handle transform via detect_transform_dimension. + if query.transform: + _apply_transform( + query.transform, dataflow, params, detect_transform_dimension + ) + + # Compound-code decomposition via detect_indicator_dimensions. + indicator_codes = query._indicator_codes # noqa: SLF001 + if indicator_codes: + dimension_codes = detect_indicator_dimensions( + dataflow, indicator_codes, qb.metadata + ) + for dim_id, codes in dimension_codes.items(): + params[dim_id] = "+".join(codes) + + # Build content dimension order for symbol construction. + _SKIP_DIMS = { + "REF_AREA", + "COUNTERPART_AREA", + "JURISDICTION", + "COUNTRY", + "AREA", + "FREQ", + "FREQUENCY", + "TIME_PERIOD", + "UNIT_MEASURE", + "UNIT_MULT", + "TRANSFORMATION", + "ADJUSTMENT", + "DECIMALS", + "CURRENCY", + "PRICE_BASE", + "TABLE_IDENTIFIER", + "REF_YEAR_PRICE", + "CONF_STATUS", + "OBS_STATUS", + "BASE_PER", + "BASE_REF_AREA", + "CURRENCY_DENOM", + } + try: + all_dims = qb.metadata.get_dimension_order(dataflow) + content_dims = [d for d in all_dims if d.upper() not in _SKIP_DIMS] + except Exception: + content_dims = [] + + try: + result = qb.fetch_data( + dataflow=dataflow, + start_date=start_date, + end_date=end_date, + limit=query.limit, + _skip_validation=False, + **params, + ) + except Exception as exc: + raise OpenBBError(f"OECD data fetch failed: {exc}") from exc + + records = result.get("data", []) + + if not records: + raise EmptyDataError() + + return { + "mode": "indicator", + "data": records, + "metadata": result.get("metadata", {}), + "content_dims": content_dims, + } + + @staticmethod + def transform_data( # noqa: PLR0912 + query: OecdEconomicIndicatorsQueryParams, + data: dict, + **kwargs: Any, + ) -> AnnotatedResult[list[OecdEconomicIndicatorsData]]: + """Transform the raw data into the output model.""" + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + mode = data.get("mode", "indicator") + row_data = data.get("data", []) + + if not row_data: + raise EmptyDataError("No data returned for the given query parameters.") + + result: list[dict] = [] + metadata: dict = {} + dataflow = query._dataflow or "" # noqa: SLF001 + + if mode == "table": + metadata = { + "table": data.get("table_metadata", {}), + "series": data.get("series_metadata", {}), + } + # Build order→code map from the hierarchy structure. + structure = data.get("structure", {}) + order_to_code: dict[int | float, str] = {} + for entry in structure.get("indicators", []): + order_val = entry.get("order") + if order_val is not None: + order_to_code[order_val] = entry.get("code", "") + + # When a single country is queried, REF_AREA is a fixed + # dimension (not on each row). Pull it from table_metadata. + fixed_dims = metadata.get("table", {}).get("fixed_dimensions", {}) + fixed_country = "" + fixed_country_code = "" + for dim_key in ("REF_AREA", "COUNTRY", "AREA"): + if dim_key in fixed_dims: + fixed_country = fixed_dims[dim_key].get("label", "") + fixed_country_code = fixed_dims[dim_key].get("code", "") + break + + for row in row_data: + time_str = row.get("time_period", "") + parsed_date = oecd_date_to_python_date(time_str) if time_str else None + + if query.start_date and parsed_date and parsed_date < query.start_date: + continue + if query.end_date and parsed_date and parsed_date > query.end_date: + continue + + ind_code = row.get("code", "") or order_to_code.get( + row.get("order"), "" + ) + country = ( + row.get("ref_area", "") or row.get("country", "") or fixed_country + ) + country_code = fixed_country_code + + new_row: dict[str, Any] = { + "date": parsed_date, + "symbol": f"{dataflow}::{ind_code}" if ind_code else "", + "country": country, + "country_code": country_code, + "value": row.get("value"), + "unit": row.get("unit_measure"), + "unit_multiplier": row.get("unit_mult"), + "scale": row.get("scale"), + "order": row.get("order"), + "level": row.get("level"), + "symbol_root": row.get("parent_code"), + "title": row.get("label", ""), + "description": row.get("description"), + } + result.append(new_row) + + else: + # Indicator mode. + metadata = data.get("metadata", {}) + content_dims = data.get("content_dims", []) + + _SKIP_TITLE_DIMS = { + "REF_AREA", + "FREQ", + "TIME_PERIOD", + "UNIT_MEASURE", + "UNIT_MULT", + "BASE_REF_AREA", + "DECIMALS", + "CURRENCY", + "TRANSFORMATION", + "ADJUSTMENT", + "PRICE_BASE", + "TABLE_IDENTIFIER", + "REF_YEAR_PRICE", + "CONF_STATUS", + "OBS_STATUS", + "BASE_PER", + "CURRENCY_DENOM", + } + _SKIP_TITLE_VALUES = { + "not applicable", + "total economy", + "not specified", + "no breakdown", + "all activities", + "total", + "non transformed data", + } + + for row in row_data: + time_str = row.get("TIME_PERIOD", "") + parsed_date = oecd_date_to_python_date(time_str) + if parsed_date is None: + continue + + if query.start_date and parsed_date < query.start_date: + continue + if query.end_date and parsed_date > query.end_date: + continue + + val = row.get("OBS_VALUE") + if val is None or val == "": + continue + try: + if str(val).lower() == "nan": + continue + val = float(val) if not isinstance(val, (int, float)) else val + except (ValueError, TypeError): + continue + + # Build compound-code symbol from content dims in DSD order. + code_parts = [row.get(d, "") for d in content_dims if row.get(d)] + compound_code = "_".join(code_parts) + + # Build title from indicator + content dimension labels. + title_parts: list[str] = [] + for k, v in row.items(): + if not k.endswith("_label") or not v: + continue + dim_id = k[:-6] + if dim_id in _SKIP_TITLE_DIMS: + continue + sv = str(v).strip() + raw_code = row.get(dim_id, "") + if sv.lower() not in _SKIP_TITLE_VALUES and raw_code not in ( + "_Z", + "_T", + "_X", + ): + title_parts.append(sv) + + # Sanitize scale/unit. + scale_val = row.get("UNIT_MULT_label", "") + if scale_val and str(scale_val).lower() in ("nan", "units"): + scale_val = None + + unit_val = row.get("UNIT_MEASURE_label", row.get("UNIT_MEASURE")) + if unit_val and str(unit_val).lower() == "nan": + unit_val = None + + new_row = { + "date": parsed_date, + "symbol": (f"{dataflow}::{compound_code}" if compound_code else ""), + "country": row.get("REF_AREA_label", row.get("REF_AREA", "")), + "country_code": row.get("REF_AREA", ""), + "value": val, + "unit": unit_val, + "title": " - ".join(title_parts) if title_parts else compound_code, + } + + result.append(new_row) + + if not result: + raise EmptyDataError( + "No data remaining after applying date filters. Try adjusting start_date and end_date parameters." + ) + + result.sort( + key=lambda x: ( + x["order"] if x.get("order") is not None else 9999, + x["date"] if x.get("date") else "", + x["country"] or "", + ) + ) + + to_exclude = ["is_category_header"] + + # Non-pivot mode: return flat list. + if not query.pivot: + new_data: list[OecdEconomicIndicatorsData] = [] + for row in result: + if not row.get("date") and not row.get("is_category_header"): + continue + for field in to_exclude: + row.pop(field, None) + new_data.append(OecdEconomicIndicatorsData.model_validate(row)) + + return AnnotatedResult(result=new_data, metadata=metadata) + + # Pivot mode. + from pandas import DataFrame + + df = DataFrame(result) + if df.empty: + raise EmptyDataError("No data for pivot.") + + # Determine pivot shape based on cardinality of countries/symbols. + unique_countries = df["country"].nunique() if "country" in df.columns else 0 + unique_symbols = df["symbol"].nunique() if "symbol" in df.columns else 0 + + if unique_countries <= 1: + # One country: rows=title, columns=date. + pivot_index = ["title"] + elif unique_symbols <= 1: + # Multiple countries, one symbol: rows=country, columns=date. + pivot_index = ["country"] + else: + # Multiple countries, multiple symbols: rows=title+country, columns=date. + pivot_index = ["title", "country"] + + pivot_index = [c for c in pivot_index if c in df.columns] + + try: + pivoted = df.pivot_table( + index=pivot_index, + columns="date", + values="value", + observed=True, + ) + pivoted.columns = [str(c) for c in pivoted.columns] + pivoted = pivoted.reset_index() + except Exception: + # Fallback: return unpivoted. + new_data = [ + OecdEconomicIndicatorsData.model_validate( + {k: v for k, v in r.items() if k not in to_exclude} + ) + for r in result + if r.get("date") + ] + return AnnotatedResult(result=new_data, metadata=metadata) + + pivot_records = pivoted.where(pivoted.notna(), other=None).to_dict( + orient="records" + ) + return AnnotatedResult( + result=[ + OecdEconomicIndicatorsData.model_validate(r) for r in pivot_records + ], + metadata=metadata, + ) + + +def _apply_transform( + transform: str, + dataflow: str, + params: dict[str, Any], + detect_fn: Any, +) -> None: + """Resolve and apply a transform/unit dimension value to *params*. + + Mutates *params* in place. Raises ``OpenBBError`` if the transform + value cannot be resolved for the dataflow. + """ + transform_val = transform.strip().lower() + transform_dim, unit_dim, transform_lookup, unit_lookup = detect_fn(dataflow) + applied = False + + if transform_dim: + if transform_val in ("all", "*"): + params[transform_dim] = "*" + applied = True + elif transform_val in transform_lookup: + params[transform_dim] = transform_lookup[transform_val] + applied = True + + if not applied and unit_dim: + if transform_val in ("all", "*"): + params[unit_dim] = "*" + applied = True + elif transform_val in unit_lookup: + params[unit_dim] = unit_lookup[transform_val] + applied = True + + if not applied: + if not transform_dim and not unit_dim: + raise OpenBBError( + f"Dataflow '{dataflow}' does not support transform/unit parameter." + ) + available: list[str] = [] + if transform_lookup: + available.extend( + sorted(set(transform_lookup.keys()) - set(transform_lookup.values())) + ) + if unit_lookup: + available.extend( + sorted(set(unit_lookup.keys()) - set(unit_lookup.values())) + ) + raise OpenBBError( + f"Invalid transform value '{transform}' for dataflow '{dataflow}'. " + f"Available options: {', '.join(available) if available else 'none'}" + ) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/gdp_forecast.py b/openbb_platform/providers/oecd/openbb_oecd/models/gdp_forecast.py index 819ee7091c0..2ef83c46b61 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/gdp_forecast.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/gdp_forecast.py @@ -13,50 +13,16 @@ GdpForecastQueryParams, ) from openbb_core.provider.utils.errors import EmptyDataError -from openbb_oecd.utils.constants import ( - CODE_TO_COUNTRY_GDP_FORECAST, - COUNTRY_TO_CODE_GDP_FORECAST, -) +from openbb_oecd.utils.constants import GDP_FORECAST_COUNTRIES from pydantic import Field -COUNTRIES = list(COUNTRY_TO_CODE_GDP_FORECAST) + ["all"] - -COUNTRIES_QUARTER = [ - "australia", - "austria", - "belgium", - "canada", - "chile", - "colombia", - "costa_rica", - "czechia", - "denmark", - "estonia", - "finland", - "france", - "germany", - "greece", - "iceland", - "ireland", - "israel", - "italy", - "japan", - "korea", - "lithuania", - "luxembourg", - "netherlands", - "new_zealand", - "norway", - "poland", - "portugal", - "slovak_republic", - "spain", - "sweden", - "switzerland", - "turkey", - "united_kingdom", - "united_states", -] +_MEASURE_MAP = { + "current_prices": "GDP_USD", + "volume": "GDPV_USD", + "capita": "GDPVD_CAP", + "growth": "GDPV_ANNPCT", + "deflator": "PGDP", +} class OECDGdpForecastQueryParams(GdpForecastQueryParams): @@ -64,17 +30,12 @@ class OECDGdpForecastQueryParams(GdpForecastQueryParams): The OECD Economic Outlook presents the OECD's analysis of the major global economic trends and prospects for the next two years. - The Outlook puts forward a consistent set of projections for output, employment, government spending, - prices and current balances based on a review of each member country - and of the induced effect on each of them on international developments. - - https://www.oecd.org/economic-outlook/ """ __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": COUNTRIES, + "choices": list(GDP_FORECAST_COUNTRIES) + ["all"], }, } @@ -85,6 +46,7 @@ class OECDGdpForecastQueryParams(GdpForecastQueryParams): frequency: Literal["annual", "quarter"] = Field( default="annual", description="Frequency of the data, default is annual.", + json_schema_extra={"choices": ["annual", "quarter"]}, ) units: Literal["current_prices", "volume", "capita", "growth", "deflator"] = Field( default="volume", @@ -111,35 +73,8 @@ def transform_query(params: dict[str, Any]) -> OECDGdpForecastQueryParams: """Transform the query.""" transformed_params = params.copy() countries = transformed_params.get("country") - new_countries: list = [] - freq = transformed_params.get("frequency") if not countries: - new_countries.append("all") - if countries: - countries = ( - countries.split(",") if isinstance(countries, str) else countries - ) - if "all" in countries: - new_countries = ["all"] - else: - for country in countries: - if freq == "quarter": - if country.lower() in COUNTRIES_QUARTER: - new_countries.append(country.lower()) - else: - warn(f"{country} is not available for quarterly data.") - else: # noqa - if country.lower() in COUNTRIES: - new_countries.append(country.lower()) - else: - warn(f"{country} is not available for annual data.") - - if not new_countries: - raise OpenBBError( - "No valid countries were found for the supplied parameters." - ) - - transformed_params["country"] = ",".join(new_countries) + transformed_params["country"] = "all" if not transformed_params.get("start_date"): transformed_params["start_date"] = datetime( @@ -154,99 +89,78 @@ def transform_query(params: dict[str, Any]) -> OECDGdpForecastQueryParams: return OECDGdpForecastQueryParams(**transformed_params) @staticmethod - async def aextract_data( + def extract_data( query: OECDGdpForecastQueryParams, credentials: dict[str, str] | None, **kwargs: Any, ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_oecd.utils.helpers import oecd_date_to_python_date - from pandas import read_csv - from openbb_core.provider.utils.helpers import amake_request - - freq = "Q" if query.frequency == "quarter" else "A" + from openbb_oecd.utils.query_builder import OecdQueryBuilder - measure_dict = { - "current_prices": "GDP_USD", # This gives questionable results. - "volume": "GDPV_USD", - "capita": "GDPVD_CAP", - "growth": "GDPV_ANNPCT", - "deflator": "PGDP", - } - measure = measure_dict[query.units] # type: ignore + qb = OecdQueryBuilder() + freq_code = "Q" if query.frequency == "quarter" else "A" + measure = _MEASURE_MAP.get(query.units, "GDPV_USD") - if query.units == "capita" and freq == "Q": + if query.units == "capita" and freq_code == "Q": warn( "Capita data is not available for quarterly data, using annual data instead." ) - freq = "A" - - def country_string(input_str: str): - """Convert the list of countries to an abbreviated string.""" - if input_str == "all": - return "" - _countries = input_str.split(",") - - return "+".join( - [ - COUNTRY_TO_CODE_GDP_FORECAST[country.lower()] - for country in _countries - ] + freq_code = "A" + + countries = qb.metadata.resolve_country_codes("DF_EO", query.country) + country_str = "+".join(countries) if countries else "" + + try: + result = qb.fetch_data( + dataflow="DF_EO", + start_date=str(query.start_date) if query.start_date else None, + end_date=str(query.end_date) if query.end_date else None, + _skip_validation=True, + REF_AREA=country_str, + MEASURE=measure, + FREQ=freq_code, ) + except Exception as exc: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc - country = country_string(query.country) - - url = ( - "https://sdmx.oecd.org/public/rest/data/OECD.ECO.MAD,DSD_EO@DF_EO,1.1" - + f"/{country}.{measure}.{freq}?" - + f"startPeriod={query.start_date}&endPeriod={query.end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly&format=csvfile" - ) - - async def response_callback(response, _): - """Response callback.""" - if response.status != 200: - raise OpenBBError(f"Error with the OECD request: {response.status}") - return await response.text() - - headers = {"Accept": "application/vnd.sdmx.data+csv; charset=utf-8"} - response = await amake_request( - url, timeout=30, headers=headers, response_callback=response_callback - ) - df = read_csv(StringIO(response)).get(["REF_AREA", "TIME_PERIOD", "OBS_VALUE"]) # type: ignore - if df.empty: # type: ignore + records = result["data"] + if not records: raise EmptyDataError("No data was found.") - df = df.rename(columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"}) # type: ignore - df.country = [ - ( - CODE_TO_COUNTRY_GDP_FORECAST.get(d, d) - .replace("_", " ") - .replace("asia", "Dynamic Asian Economies") - .title() - ) - for d in df.country - ] - df.date = df.date.apply(oecd_date_to_python_date) - df = df[df["value"].notnull()] - - if query.units != "growth": - df["value"] = df.value.astype("int64") - df = df[df["value"] > 0] - - if query.units == "growth": - df["value"] = df.value.astype("float64") / 100 - - df = df[df["value"] > 0] - df = df.sort_values(by=["date", "value"], ascending=[True, False]) - - return df.to_dict(orient="records") + return records @staticmethod def transform_data( query: OECDGdpForecastQueryParams, data: list[dict], **kwargs: Any ) -> list[OECDGdpForecastData]: """Transform the data from the OECD endpoint.""" - return [OECDGdpForecastData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + is_growth = query.units == "growth" + is_deflator = query.units == "deflator" + output: list[OECDGdpForecastData] = [] + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + if d is None: + continue + value = row.get("OBS_VALUE") + if value is None or value == "": + continue + value = float(value) + if is_growth: + value = value / 100 + elif not is_deflator: + value = int(value) + if value <= 0: + continue + output.append( + OECDGdpForecastData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "")), + value=value, + ) + ) + + return sorted(output, key=lambda x: (x.date, -(x.value or 0))) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/gdp_nominal.py b/openbb_platform/providers/oecd/openbb_oecd/models/gdp_nominal.py index 59be94c01a3..bf3f6882b2e 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/gdp_nominal.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/gdp_nominal.py @@ -4,7 +4,6 @@ from datetime import date from typing import Any, Literal -from warnings import warn from openbb_core.app.model.abstract.error import OpenBBError from openbb_core.provider.abstract.fetcher import Fetcher @@ -14,28 +13,29 @@ ) from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS from openbb_core.provider.utils.errors import EmptyDataError -from openbb_oecd.utils.constants import CODE_TO_COUNTRY_GDP, COUNTRY_TO_CODE_GDP +from openbb_oecd.utils.constants import GDP_REAL_COUNTRIES from pydantic import Field, field_validator -COUNTRIES = list(COUNTRY_TO_CODE_GDP) + ["all"] +# Map units param to data-flow suffix and price-base code. +_UNIT_DATAFLOW = { + "level": "USD", + "index": "INDICES", + "capita": "CAPITA", +} class OECDGdpNominalQueryParams(GdpNominalQueryParams): """OECD Nominal GDP Query. + Notes + ----- Source: https://www.oecd.org/en/data/datasets/gdp-and-non-financial-accounts.html - - This table presents Gross Domestic Product (GDP) and its main components according to the expenditure approach. - Data is presented in US dollars. In the expenditure approach, the components of GDP are: - final consumption expenditure of households and non-profit institutions serving households (NPISH) - plus final consumption expenditure of General Government plus gross fixed capital formation (or investment) - plus net trade (exports minus imports). """ __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": COUNTRIES, + "choices": list(GDP_REAL_COUNTRIES) + ["all"], } } @@ -65,26 +65,7 @@ class OECDGdpNominalQueryParams(GdpNominalQueryParams): @classmethod def validate_country(cls, c): """Validate country.""" - # pylint: disable=import-outside-toplevel - from openbb_core.provider.utils.helpers import check_item - - result: list = [] - values = c.replace(" ", "_").split(",") - for v in values: - if v.upper() in CODE_TO_COUNTRY_GDP: - result.append(CODE_TO_COUNTRY_GDP.get(v.upper())) - continue - try: - check_item(v.lower(), COUNTRIES) - except Exception as e: - if len(values) == 1: - raise e from e - warn(f"Invalid country: {v}. Skipping...") - continue - result.append(v.lower()) - if result: - return ",".join(result) - raise OpenBBError(f"No valid country found. -> {values}") + return c.replace(" ", "_").strip().lower() class OECDGdpNominalData(GdpNominalData): @@ -114,81 +95,50 @@ def transform_query(params: dict[str, Any]) -> OECDGdpNominalQueryParams: return OECDGdpNominalQueryParams(**transformed_params) @staticmethod - async def aextract_data( + def extract_data( query: OECDGdpNominalQueryParams, credentials: dict[str, str] | None, **kwargs: Any, ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_oecd.utils.helpers import oecd_date_to_python_date - from numpy import nan - from pandas import read_csv - from openbb_core.provider.utils.helpers import amake_request - - if query.units == "index": - unit = "INDICES" - elif query.units == "capita": - unit = "CAPITA" - else: - unit = "USD" - - frequency = "Q" if query.frequency == "quarter" else "A" - price_base = "V" if query.price_base == "current_prices" else "LR" + from openbb_oecd.utils.query_builder import OecdQueryBuilder - if unit == "INDICES" and price_base == "V": - price_base = "DR" + qb = OecdQueryBuilder() + freq_code = "Q" if query.frequency == "quarter" else "A" - def country_string(input_str: str): - """Convert the list of countries to an abbreviated string.""" - if input_str == "all": - return "" - _countries = input_str.split(",") - - return "+".join([COUNTRY_TO_CODE_GDP[country] for country in _countries]) - - country = country_string(query.country) if query.country else "" - - url = ( - f"https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_{unit},1.1" - + f"/{frequency}..{country}.S1..B1GQ.....{price_base}..?" - + f"&startPeriod={query.start_date}&endPeriod={query.end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly&format=csvfile" - ) - if query.units == "capita": - url = url.replace("B1GQ", "B1GQ_POP") - - async def response_callback(response, _): - """Response callback.""" - if response.status != 200: - raise OpenBBError(f"Error with the OECD request: {response.status}") - return await response.text() - - response = await amake_request( - url, timeout=30, response_callback=response_callback - ) - - df = read_csv(StringIO(response)).get(["REF_AREA", "TIME_PERIOD", "OBS_VALUE"]) # type: ignore - if df.empty: # type: ignore - raise EmptyDataError() - df = df.rename(columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"}) # type: ignore + unit_suffix = _UNIT_DATAFLOW.get(query.units, "USD") + dataflow = f"DF_QNA_EXPENDITURE_{unit_suffix}" - def apply_map(x): - """Apply the country map.""" - v = CODE_TO_COUNTRY_GDP.get(x, x) - v = v.replace("_", " ").title() - return v + price_base = "V" if query.price_base == "current_prices" else "LR" + if query.units == "index" and price_base == "V": + price_base = "DR" - df["country"] = df["country"].apply(apply_map).str.replace("Oecd", "OECD") - df["date"] = df["date"].apply(oecd_date_to_python_date) - df = df[(df["date"] <= query.end_date) & (df["date"] >= query.start_date)] - if query.units == "level": - df["value"] = (df["value"].astype(float) * 1_000_000).astype("int64") + countries = qb.metadata.resolve_country_codes(dataflow, query.country) + country_str = "+".join(countries) if countries else "" + + transaction = "B1GQ_POP" if query.units == "capita" else "B1GQ" + + try: + result = qb.fetch_data( + dataflow=dataflow, + start_date=str(query.start_date) if query.start_date else None, + end_date=str(query.end_date) if query.end_date else None, + _skip_validation=True, + FREQ=freq_code, + REF_AREA=country_str, + SECTOR="S1", + TRANSACTION=transaction, + PRICE_BASE=price_base, + ) + except Exception as exc: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc - df = df.sort_values(by=["date", "value"], ascending=[True, False]) + records = result["data"] + if not records: + raise EmptyDataError() - return df.replace({nan: None}).to_dict(orient="records") + return records @staticmethod def transform_data( @@ -197,4 +147,31 @@ def transform_data( **kwargs: Any, ) -> list[OECDGdpNominalData]: """Transform the data from the OECD endpoint.""" - return [OECDGdpNominalData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + is_level = query.units == "level" + output: list[OECDGdpNominalData] = [] + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + if d is None: + continue + if query.start_date and d < query.start_date: + continue + if query.end_date and d > query.end_date: + continue + value = row.get("OBS_VALUE") + if value is None or value == "": + continue + value = float(value) + if is_level: + value = int(value * 1_000_000) + output.append( + OECDGdpNominalData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "")), + value=value, + ) + ) + + return sorted(output, key=lambda x: (x.date, -(x.value or 0))) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/gdp_real.py b/openbb_platform/providers/oecd/openbb_oecd/models/gdp_real.py index d3601f89066..cea2d04ff3d 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/gdp_real.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/gdp_real.py @@ -4,7 +4,6 @@ from datetime import date from typing import Any, Literal -from warnings import warn from openbb_core.app.model.abstract.error import OpenBBError from openbb_core.provider.abstract.fetcher import Fetcher @@ -14,28 +13,22 @@ ) from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS from openbb_core.provider.utils.errors import EmptyDataError -from openbb_oecd.utils.constants import CODE_TO_COUNTRY_GDP, COUNTRY_TO_CODE_GDP +from openbb_oecd.utils.constants import GDP_REAL_COUNTRIES from pydantic import Field, field_validator -COUNTRIES = list(COUNTRY_TO_CODE_GDP) + ["all"] - class OECDGdpRealQueryParams(GdpRealQueryParams): """OECD Real GDP Query. + Notes + ----- Source: https://www.oecd.org/en/data/datasets/gdp-and-non-financial-accounts.html - - This table presents Gross Domestic Product (GDP) and its main components according to the expenditure approach. - Data is presented in US dollars. In the expenditure approach, the components of GDP are: - final consumption expenditure of households and non-profit institutions serving households (NPISH) - plus final consumption expenditure of General Government plus gross fixed capital formation (or investment) - plus net trade (exports minus imports). """ __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": COUNTRIES, + "choices": list(GDP_REAL_COUNTRIES) + ["all"], } } @@ -54,26 +47,7 @@ class OECDGdpRealQueryParams(GdpRealQueryParams): @classmethod def validate_country(cls, c): """Validate country.""" - # pylint: disable=import-outside-toplevel - from openbb_core.provider.utils.helpers import check_item - - result: list = [] - values = c.replace(" ", "_").split(",") - for v in values: - if v.upper() in CODE_TO_COUNTRY_GDP: - result.append(CODE_TO_COUNTRY_GDP.get(v.upper())) - continue - try: - check_item(v.lower(), COUNTRIES) - except Exception as e: - if len(values) == 1: - raise e from e - warn(f"Invalid country: {v}. Skipping...") - continue - result.append(v.lower()) - if result: - return ",".join(result) - raise OpenBBError(f"No valid country found. -> {values}") + return c.replace(" ", "_").strip().lower() class OECDGdpRealData(GdpRealData): @@ -101,67 +75,45 @@ def transform_query(params: dict[str, Any]) -> OECDGdpRealQueryParams: return OECDGdpRealQueryParams(**transformed_params) @staticmethod - async def aextract_data( + def extract_data( query: OECDGdpRealQueryParams, credentials: dict[str, str] | None, **kwargs: Any, ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_oecd.utils.helpers import oecd_date_to_python_date - from numpy import nan - from pandas import read_csv - from openbb_core.provider.utils.helpers import amake_request - - frequency = "Q" if query.frequency == "quarter" else "A" - - def country_string(input_str: str): - """Convert the list of countries to an abbreviated string.""" - if input_str == "all": - return "" - _countries = input_str.split(",") - - return "+".join([COUNTRY_TO_CODE_GDP[country] for country in _countries]) - - country = country_string(query.country) if query.country else "" - - url = ( - "https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA,1.1" - + f"/{frequency}..{country}.S1..B1GQ._Z...USD_PPP.LR.LA.T0102?" - + f"&startPeriod={query.start_date}&endPeriod={query.end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly&format=csvfile" - ) - - async def response_callback(response, _): - """Response callback.""" - if response.status != 200: - raise OpenBBError(f"Error with the OECD request: {response.status}") - return await response.text() - - response = await amake_request( - url, timeout=30, response_callback=response_callback - ) + from openbb_oecd.utils.query_builder import OecdQueryBuilder + + qb = OecdQueryBuilder() + freq_code = "Q" if query.frequency == "quarter" else "A" + + countries = qb.metadata.resolve_country_codes("DF_QNA", query.country) + country_str = "+".join(countries) if countries else "" + + try: + result = qb.fetch_data( + dataflow="DF_QNA", + start_date=str(query.start_date) if query.start_date else None, + end_date=str(query.end_date) if query.end_date else None, + _skip_validation=True, + FREQ=freq_code, + REF_AREA=country_str, + SECTOR="S1", + TRANSACTION="B1GQ", + INSTR_ASSET="_Z", + UNIT_MEASURE="USD_PPP", + PRICE_BASE="LR", + TRANSFORMATION="LA", + TABLE_IDENTIFIER="T0102", + ) + except Exception as exc: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc - df = read_csv(StringIO(response)).get(["REF_AREA", "TIME_PERIOD", "OBS_VALUE"]) # type: ignore - if df.empty: # type: ignore + records = result["data"] + if not records: raise EmptyDataError() - df = df.rename(columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"}) # type: ignore - - def apply_map(x): - """Apply the country map.""" - v = CODE_TO_COUNTRY_GDP.get(x, x) - v = v.replace("_", " ").title() - return v - - df["country"] = df["country"].apply(apply_map).str.replace("Oecd", "OECD") - df["date"] = df["date"].apply(oecd_date_to_python_date) - df = df[(df["date"] <= query.end_date) & (df["date"] >= query.start_date)] - df["value"] = (df["value"].astype(float) * 1_000_000).astype("int64") - - df = df.sort_values(by=["date", "value"], ascending=[True, False]) - return df.replace({nan: None}).to_dict(orient="records") + return records @staticmethod def transform_data( @@ -170,4 +122,27 @@ def transform_data( **kwargs: Any, ) -> list[OECDGdpRealData]: """Transform the data from the OECD endpoint.""" - return [OECDGdpRealData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + output: list[OECDGdpRealData] = [] + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + if d is None: + continue + if query.start_date and d < query.start_date: + continue + if query.end_date and d > query.end_date: + continue + value = row.get("OBS_VALUE") + if value is None or value == "": + continue + output.append( + OECDGdpRealData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "")), + value=int(float(value) * 1_000_000), + ) + ) + + return sorted(output, key=lambda x: (x.date, -(x.value or 0))) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/house_price_index.py b/openbb_platform/providers/oecd/openbb_oecd/models/house_price_index.py index e19827ebfb7..7046243f33c 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/house_price_index.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/house_price_index.py @@ -14,33 +14,25 @@ ) from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS from openbb_core.provider.utils.errors import EmptyDataError -from openbb_core.provider.utils.helpers import check_item -from openbb_oecd.utils.constants import ( - CODE_TO_COUNTRY_RGDP, - COUNTRY_TO_CODE_RGDP, -) +from openbb_oecd.utils.constants import RHPI_COUNTRIES from pydantic import Field, field_validator -countries = tuple(CODE_TO_COUNTRY_RGDP.values()) + ("all",) -CountriesList = list(countries) # type: ignore -frequency_dict = { - "monthly": "M", - "quarter": "Q", - "annual": "A", -} -transform_dict = {"yoy": "PA", "period": "PC", "index": "IX"} +FREQUENCY_MAP = {"monthly": "M", "quarter": "Q", "annual": "A"} +TRANSFORM_MAP = {"yoy": "PA", "period": "PC", "index": "IX"} class OECDHousePriceIndexQueryParams(HousePriceIndexQueryParams): """OECD House Price Index Query. + Notes + ----- Source: https://data-explorer.oecd.org/?lc=en """ __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": CountriesList, + "choices": list(RHPI_COUNTRIES) + ["all"], } } @@ -53,23 +45,7 @@ class OECDHousePriceIndexQueryParams(HousePriceIndexQueryParams): @classmethod def validate_country(cls, c): """Validate country.""" - result: list = [] - values = c.replace(" ", "_").split(",") - for v in values: - if v.upper() in CODE_TO_COUNTRY_RGDP: - result.append(CODE_TO_COUNTRY_RGDP.get(v.upper())) - continue - try: - check_item(v.lower(), CountriesList) - except Exception as e: - if len(values) == 1: - raise e from e - warn(f"Invalid country: {v}. Skipping...") - continue - result.append(v.lower()) - if result: - return ",".join(result) - raise OpenBBError(f"No valid country found. -> {values}") + return c.replace(" ", "_").strip().lower() class OECDHousePriceIndexData(HousePriceIndexData): @@ -106,62 +82,91 @@ def extract_data( ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_oecd.utils.helpers import oecd_date_to_python_date # noqa - from openbb_core.provider.utils.helpers import make_request # noqa - from pandas import read_csv # noqa - - frequency = frequency_dict.get(query.frequency, "Q") - transform = transform_dict.get(query.transform, "PA") - - def country_string(input_str: str): - if input_str == "all": - return "" - _countries = input_str.split(",") - return "+".join([COUNTRY_TO_CODE_RGDP[country] for country in _countries]) - - country = country_string(query.country) if query.country else "" - start_date = query.start_date.strftime("%Y-%m") if query.start_date else "" - end_date = query.end_date.strftime("%Y-%m") if query.end_date else "" - url = ( - "https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_RHPI_TARGET@DF_RHPI_TARGET,1.0/" - + f"COU.{country}.{frequency}.RHPI.{transform}....?" - + f"startPeriod={start_date}&endPeriod={end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly" - ) - headers = {"Accept": "application/vnd.sdmx.data+csv; charset=utf-8"} - response = make_request(url, headers=headers, timeout=20) - if response.status_code == 404 and frequency == "M": - warn("No monthly data found. Switching to quarterly data.") - response = make_request( - url.replace(".M.RHPI.", ".Q.RHPI."), headers=headers - ) - if response.status_code != 200: - raise OpenBBError( - f"Error with the OECD request (HTTP {response.status_code}): `{response.text}`" + from openbb_oecd.utils.query_builder import OecdQueryBuilder + + qb = OecdQueryBuilder() + freq_code = FREQUENCY_MAP.get(query.frequency, "Q") + transform = TRANSFORM_MAP.get(query.transform, "PA") + + countries = qb.metadata.resolve_country_codes("DF_RHPI_TARGET", query.country) + country_str = "+".join(countries) if countries else "" + + try: + result = qb.fetch_data( + dataflow="DF_RHPI_TARGET", + start_date=( + query.start_date.strftime("%Y-%m") if query.start_date else None + ), + end_date=query.end_date.strftime("%Y-%m") if query.end_date else None, + _skip_validation=True, + REF_AREA_TYPE="COU", + REF_AREA=country_str, + FREQ=freq_code, + MEASURE="RHPI", + UNIT_MEASURE=transform, ) - df = read_csv(StringIO(response.text)).get( - ["REF_AREA", "TIME_PERIOD", "OBS_VALUE"] - ) - if df.empty: + except Exception as exc: + # Fallback from monthly to quarterly if fetch fails + if freq_code == "M": + warn("No monthly data found. Switching to quarterly data.") + try: + result = qb.fetch_data( + dataflow="DF_RHPI_TARGET", + start_date=( + query.start_date.strftime("%Y-%m") + if query.start_date + else None + ), + end_date=( + query.end_date.strftime("%Y-%m") if query.end_date else None + ), + _skip_validation=True, + REF_AREA_TYPE="COU", + REF_AREA=country_str, + FREQ="Q", + MEASURE="RHPI", + UNIT_MEASURE=transform, + ) + except Exception as exc2: + raise OpenBBError(f"Error fetching OECD data: {exc2}") from exc2 + else: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc + + records = result["data"] + if not records: raise EmptyDataError() - df = df.rename( - columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"} - ) - df.country = df.country.map(CODE_TO_COUNTRY_RGDP) - df.date = df.date.apply(oecd_date_to_python_date) - df = ( - df.query("value.notnull()") - .set_index(["date", "country"]) - .sort_index() - .reset_index() - ) - - return df.to_dict("records") + + return records @staticmethod def transform_data( query: OECDHousePriceIndexQueryParams, data: list[dict], **kwargs: Any ) -> list[OECDHousePriceIndexData]: """Transform the data from the OECD endpoint.""" - return [OECDHousePriceIndexData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + output: list[OECDHousePriceIndexData] = [] + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + + if d is None: + continue + + value = row.get("OBS_VALUE") + + if value is None or value == "": + continue + + if query.transform and query.transform != "index": + value = float(value) / 100.0 + + output.append( + OECDHousePriceIndexData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "")), + value=float(value), + ) + ) + + return sorted(output, key=lambda x: (x.date, x.country or "")) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/share_price_index.py b/openbb_platform/providers/oecd/openbb_oecd/models/share_price_index.py index d4f07619bb0..ad52c7fdd81 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/share_price_index.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/share_price_index.py @@ -4,7 +4,6 @@ from datetime import date from typing import Any -from warnings import warn from openbb_core.app.model.abstract.error import OpenBBError from openbb_core.provider.abstract.fetcher import Fetcher @@ -12,22 +11,10 @@ SharePriceIndexData, SharePriceIndexQueryParams, ) -from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS -from openbb_core.provider.utils.errors import EmptyDataError -from openbb_core.provider.utils.helpers import check_item -from openbb_oecd.utils.constants import ( - CODE_TO_COUNTRY_RGDP, - COUNTRY_TO_CODE_RGDP, -) -from pydantic import Field, field_validator +from openbb_oecd.utils.constants import FINMARK_COUNTRIES +from pydantic import field_validator -countries = tuple(CODE_TO_COUNTRY_RGDP.values()) + ("all",) -CountriesList = sorted(list(countries)) # type: ignore -frequency_dict = { - "monthly": "M", - "quarter": "Q", - "annual": "A", -} +FREQUENCY_MAP = {"monthly": "M", "quarter": "Q", "annual": "A"} class OECDSharePriceIndexQueryParams(SharePriceIndexQueryParams): @@ -39,36 +26,15 @@ class OECDSharePriceIndexQueryParams(SharePriceIndexQueryParams): __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": CountriesList, + "choices": list(FINMARK_COUNTRIES) + ["all"], } } - country: str = Field( - description=QUERY_DESCRIPTIONS.get("country", ""), - default="united_states", - ) - @field_validator("country", mode="before", check_fields=False) @classmethod def validate_country(cls, c): """Validate country.""" - result: list = [] - values = c.replace(" ", "_").split(",") - for v in values: - if v.upper() in CODE_TO_COUNTRY_RGDP: - result.append(CODE_TO_COUNTRY_RGDP.get(v.upper())) - continue - try: - check_item(v.lower(), CountriesList) - except Exception as e: - if len(values) == 1: - raise e from e - warn(f"Invalid country: {v}. Skipping...") - continue - result.append(v.lower()) - if result: - return ",".join(result) - raise OpenBBError(f"No valid country found. -> {values}") + return c.replace(" ", "_").strip().lower() class OECDSharePriceIndexData(SharePriceIndexData): @@ -84,14 +50,17 @@ class OECDSharePriceIndexFetcher( def transform_query(params: dict[str, Any]) -> OECDSharePriceIndexQueryParams: """Transform the query.""" transformed_params = params.copy() + if transformed_params.get("start_date") is None: transformed_params["start_date"] = ( date(2000, 1, 1) if transformed_params.get("country") == "all" else date(1958, 1, 1) ) + if transformed_params.get("end_date") is None: transformed_params["end_date"] = date(date.today().year, 12, 31) + if transformed_params.get("country") is None: transformed_params["country"] = "united_states" @@ -105,54 +74,64 @@ def extract_data( ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_core.provider.utils.helpers import make_request # noqa - from openbb_oecd.utils.helpers import oecd_date_to_python_date # noqa - from pandas import read_csv # noqa - - frequency = frequency_dict.get(query.frequency) - - def country_string(input_str: str): - if input_str == "all": - return "" - _countries = input_str.split(",") - return "+".join([COUNTRY_TO_CODE_RGDP[country] for country in _countries]) - - country = country_string(query.country) - start_date = query.start_date.strftime("%Y-%m") if query.start_date else "" - end_date = query.end_date.strftime("%Y-%m") if query.end_date else "" - url = ( - "https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_FINMARK,4.0/" - + f"{country}.{frequency}.SHARE......?" - + f"startPeriod={start_date}&endPeriod={end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly" - ) - headers = {"Accept": "application/vnd.sdmx.data+csv; charset=utf-8"} - response = make_request(url, headers=headers, timeout=20) - if response.status_code != 200: - raise Exception(f"Error: {response.status_code}") - df = read_csv(StringIO(response.text)).get( - ["REF_AREA", "TIME_PERIOD", "OBS_VALUE"] - ) - if df.empty: - raise EmptyDataError() - df = df.rename( - columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"} - ) - df = ( - df.query("value.notnull()") - .set_index(["date", "country"]) - .sort_index() - .reset_index() - ) - df.country = df.country.map(CODE_TO_COUNTRY_RGDP) - df.date = df.date.apply(oecd_date_to_python_date) - - return df.to_dict("records") + from openbb_oecd.utils.query_builder import OecdQueryBuilder + + qb = OecdQueryBuilder() + freq_code = FREQUENCY_MAP.get(query.frequency, "M") + + countries = qb.metadata.resolve_country_codes("DF_FINMARK", query.country) + country_str = "+".join(countries) if countries else "" + + try: + result = qb.fetch_data( + dataflow="DF_FINMARK", + start_date=( + query.start_date.strftime("%Y-%m") if query.start_date else None + ), + end_date=query.end_date.strftime("%Y-%m") if query.end_date else None, + _skip_validation=True, + REF_AREA=country_str, + FREQ=freq_code, + MEASURE="SHARE", + ) + except Exception as exc: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc + + records = result["data"] + + if not records: + raise OpenBBError( + "OECD returned no data rows for the given query parameters." + ) + + return records @staticmethod def transform_data( query: OECDSharePriceIndexQueryParams, data: list[dict], **kwargs: Any ) -> list[OECDSharePriceIndexData]: """Transform the data from the OECD endpoint.""" - return [OECDSharePriceIndexData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + output: list[OECDSharePriceIndexData] = [] + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + + if d is None: + continue + + value = row.get("OBS_VALUE") + + if value is None or value == "": + continue + + output.append( + OECDSharePriceIndexData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "")), + value=float(value), + ) + ) + + return sorted(output, key=lambda x: (x.date, x.country or "")) diff --git a/openbb_platform/providers/oecd/openbb_oecd/models/unemployment.py b/openbb_platform/providers/oecd/openbb_oecd/models/unemployment.py index 33472c015a2..d8acd7b5088 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/models/unemployment.py +++ b/openbb_platform/providers/oecd/openbb_oecd/models/unemployment.py @@ -4,7 +4,6 @@ from datetime import date from typing import Any, Literal -from warnings import warn from openbb_core.app.model.abstract.error import OpenBBError from openbb_core.provider.abstract.fetcher import Fetcher @@ -14,37 +13,29 @@ ) from openbb_core.provider.utils.descriptions import QUERY_DESCRIPTIONS from openbb_core.provider.utils.errors import EmptyDataError -from openbb_core.provider.utils.helpers import check_item -from openbb_oecd.utils.constants import ( - CODE_TO_COUNTRY_UNEMPLOYMENT, - COUNTRY_TO_CODE_UNEMPLOYMENT, -) +from openbb_oecd.utils.constants import UNEMPLOYMENT_COUNTRIES from pydantic import Field, field_validator -countries = tuple(CODE_TO_COUNTRY_UNEMPLOYMENT.values()) + ("all",) -CountriesList = sorted(list(countries)) # type: ignore -AGES = [ - "total", - "15-24", - "25+", -] -AgesLiteral = Literal[ - "total", - "15-24", - "25+", -] +AGES = ["total", "15-24", "25+"] +AgesLiteral = Literal["total", "15-24", "25+"] + +_SEX_MAP = {"total": "_T", "male": "M", "female": "F"} +_AGE_MAP = {"total": "Y_GE15", "15-24": "Y15T24", "25+": "Y_GE25"} +_FREQ_MAP = {"annual": "A", "quarter": "Q", "monthly": "M"} class OECDUnemploymentQueryParams(UnemploymentQueryParams): """OECD Unemployment Query. + Notes + ----- Source: https://data-explorer.oecd.org/?lc=en """ __json_schema_extra__ = { "country": { "multiple_items_allowed": True, - "choices": CountriesList, + "choices": list(UNEMPLOYMENT_COUNTRIES) + ["all"], }, } @@ -71,23 +62,7 @@ class OECDUnemploymentQueryParams(UnemploymentQueryParams): @classmethod def validate_country(cls, c): """Validate country.""" - result: list = [] - values = c.replace(" ", "_").split(",") - for v in values: - if v.upper() in CODE_TO_COUNTRY_UNEMPLOYMENT: - result.append(CODE_TO_COUNTRY_UNEMPLOYMENT.get(v.upper())) - continue - try: - check_item(v.lower(), CountriesList) - except Exception as e: - if len(values) == 1: - raise e from e - warn(f"Invalid country: {v}. Skipping...") - continue - result.append(v.lower()) - if result: - return ",".join(result) - raise OpenBBError(f"No valid country found. -> {values}") + return c.replace(" ", "_").strip().lower() class OECDUnemploymentData(UnemploymentData): @@ -122,68 +97,66 @@ def extract_data( ) -> list[dict]: """Return the raw data from the OECD endpoint.""" # pylint: disable=import-outside-toplevel - from io import StringIO # noqa - from openbb_core.provider.utils.helpers import make_request # noqa - from openbb_oecd.utils import helpers # noqa - from pandas import read_csv # noqa - - sex = {"total": "_T", "male": "M", "female": "F"}[query.sex] - frequency = query.frequency[0].upper() - age = { - "total": "Y_GE15", - "15-24": "Y15T24", - "25+": "Y_GE25", - }[query.age] - seasonal_adjustment = "Y" if query.seasonal_adjustment else "N" - - def country_string(input_str: str): - if input_str == "all": - return "" - _countries = input_str.split(",") - return "+".join( - [COUNTRY_TO_CODE_UNEMPLOYMENT[country] for country in _countries] + from openbb_oecd.utils.query_builder import OecdQueryBuilder + + qb = OecdQueryBuilder() + sex = _SEX_MAP.get(query.sex, "_T") + age = _AGE_MAP.get(query.age, "Y_GE15") + freq_code = _FREQ_MAP.get(query.frequency, query.frequency[0].upper()) + adj = "Y" if query.seasonal_adjustment else "N" + + countries = qb.metadata.resolve_country_codes("DF_IALFS_UNE_M", query.country) + country_str = "+".join(countries) if countries else "" + + try: + result = qb.fetch_data( + dataflow="DF_IALFS_UNE_M", + start_date=( + query.start_date.strftime("%Y-%m") if query.start_date else None + ), + end_date=query.end_date.strftime("%Y-%m") if query.end_date else None, + _skip_validation=True, + REF_AREA=country_str, + ADJUSTMENT=adj, + SEX=sex, + AGE=age, + FREQ=freq_code, ) + except Exception as exc: + raise OpenBBError(f"Error fetching OECD data: {exc}") from exc - country = country_string(query.country) - start_date = query.start_date.strftime("%Y-%m") if query.start_date else "" - end_date = query.end_date.strftime("%Y-%m") if query.end_date else "" - url = ( - "https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_LFS@DF_IALFS_UNE_M,1.0/" - + f"{country}..._Z.{seasonal_adjustment}.{sex}.{age}..{frequency}" - + f"?startPeriod={start_date}&endPeriod={end_date}" - + "&dimensionAtObservation=TIME_PERIOD&detail=dataonly" - ) - headers = {"Accept": "application/vnd.sdmx.data+csv; charset=utf-8"} - response = make_request(url, headers=headers, timeout=20) - if response.status_code != 200: - raise OpenBBError(f"Error: {response.status_code} -> {response.text}") - df = read_csv(StringIO(response.text)).get( - ["REF_AREA", "TIME_PERIOD", "OBS_VALUE"] - ) - if df.empty: + records = result["data"] + if not records: raise EmptyDataError() - df = df.rename( - columns={"REF_AREA": "country", "TIME_PERIOD": "date", "OBS_VALUE": "value"} - ) - df["value"] = df["value"].astype(float) / 100 - df["country"] = df["country"].map(CODE_TO_COUNTRY_UNEMPLOYMENT) - df["date"] = df["date"].apply(helpers.oecd_date_to_python_date) - df = ( - df.query("value.notnull()") - .set_index(["date", "country"]) - .sort_index() - .reset_index() - ) - df = df[(df["date"] <= query.end_date) & (df["date"] >= query.start_date)] - - # in column "country" if NaN replace with "all" - df["country"] = df["country"].fillna("all") - - return df.to_dict(orient="records") + + return records @staticmethod def transform_data( query: OECDUnemploymentQueryParams, data: list[dict], **kwargs: Any ) -> list[OECDUnemploymentData]: """Transform the data from the OECD endpoint.""" - return [OECDUnemploymentData.model_validate(d) for d in data] + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.helpers import oecd_date_to_python_date + + output: list[OECDUnemploymentData] = [] + for row in data: + d = oecd_date_to_python_date(row.get("TIME_PERIOD", "")) + if d is None: + continue + if query.start_date and d < query.start_date: + continue + if query.end_date and d > query.end_date: + continue + value = row.get("OBS_VALUE") + if value is None or value == "": + continue + output.append( + OECDUnemploymentData( + date=d, + country=row.get("REF_AREA_label", row.get("REF_AREA", "all")), + value=float(value) / 100, + ) + ) + + return sorted(output, key=lambda x: (x.date, x.country or "")) diff --git a/openbb_platform/providers/oecd/openbb_oecd/oecd_router.py b/openbb_platform/providers/oecd/openbb_oecd/oecd_router.py new file mode 100644 index 00000000000..23688a62285 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/oecd_router.py @@ -0,0 +1,1989 @@ +"""OECD Utilities Router.""" + +# pylint: disable=unused-argument,protected-access,too-many-return-statements,too-many-branches,too-many-positional-arguments,too-many-locals,too-many-statements,too-many-lines,too-many-arguments + +from typing import Annotated, Any, Literal + +from fastapi import Query +from openbb_core.app.model.example import APIEx, PythonEx +from openbb_core.app.model.obbject import OBBject +from openbb_core.app.router import Router +from openbb_core.app.service.system_service import SystemService +from openbb_oecd.utils.metadata import OECDMetadataDependency + +router = Router(prefix="", description="Utilities for OECD provider.") +api_prefix = SystemService().system_settings.api_settings.prefix +# Dimension IDs typically representing the country/reference area. +_COUNTRY_DIMS = ("REF_AREA", "COUNTERPART_AREA", "JURISDICTION", "COUNTRY", "AREA") +# Dimension IDs typically representing observation frequency. +_FREQ_DIMS = ("FREQ", "FREQUENCY") +# Dimension IDs typically representing a data transformation. +_TRANSFORM_DIMS = ("TRANSFORMATION", "UNIT_MEASURE", "ADJUSTMENT") + + +def _parse_annotation(text: str) -> dict[str, str]: + """Parse a comma-separated ``DIM=VALUE`` annotation string. + + Works for both NOT_DISPLAYED and DEFAULT annotations. + """ + result: dict[str, str] = {} + if not text: + return result + for part in text.split(","): + p = part.strip() + if not p: + continue + if "=" in p: + dim, val = p.split("=", 1) + val = val.strip() + if val.startswith("(") and val.endswith(")"): + val = val[1:-1] + result[dim.strip()] = val + else: + result[p] = "" + return result + + +def _parse_not_displayed(annotations: dict[str, str]) -> dict[str, str]: + """Parse NOT_DISPLAYED annotation into ``{dim_id: value}``.""" + return _parse_annotation(annotations.get("NOT_DISPLAYED", "")) + + +def _parse_defaults(annotations: dict[str, str]) -> dict[str, str]: + """Parse DEFAULT annotation into ``{dim_id: value}``.""" + return _parse_annotation(annotations.get("DEFAULT", "")) + + +@router.command( + methods=["GET"], + widget_config={"exclude": True}, + examples=[ + APIEx( + description="Get OECD topic choices for UI dropdowns.", + parameters={}, + ) + ], +) +async def list_topic_choices(metadata: OECDMetadataDependency) -> list[dict[str, str]]: + """Return [{label, value}] for every OECD topic (for dropdowns).""" + # pylint: disable=import-outside-toplevel + from collections import Counter + + topics = metadata.list_topics() + tm = metadata.table_map() + topic_counts = Counter(r.get("topic_id", "") for r in tm) + result = [] + for t in topics: + count = topic_counts.get(t["id"], 0) + if count > 0: + result.append( + { + "label": f"{t['name']} ({count})", + "value": t["id"], + } + ) + return sorted(result, key=lambda x: x["label"]) + + +@router.command( + methods=["GET"], + widget_config={"exclude": True}, + examples=[ + APIEx( + description="Get subtopic choices for a given topic.", + parameters={"topic": "ECO"}, + ) + ], +) +async def list_subtopic_choices( + metadata: OECDMetadataDependency, + topic: Annotated[ + str | None, + Query( + title="Topic", + description="Topic ID to get subtopics for (e.g. 'ECO').", + ), + ] = None, +) -> list[dict[str, str]]: + """Return [{label, value}] for subtopics within a given topic (for dropdowns).""" + # pylint: disable=import-outside-toplevel + from collections import Counter + + if not topic: + return [] + + t_upper = topic.upper() + topics = metadata.list_topics() + target = None + + for t in topics: + if t["id"].upper() == t_upper: + target = t + break + + if not target: + return [] + + tm = metadata.table_map() + sub_counts = Counter( + r.get("subtopic_id", "") for r in tm if r.get("topic_id", "").upper() == t_upper + ) + result = [] + + for s in target.get("subtopics", []): + count = sub_counts.get(s["id"], 0) + if count > 0: + result.append( + { + "label": f"{s['name']} ({count})", + "value": s["id"], + } + ) + + return sorted(result, key=lambda x: x["label"]) + + +@router.command( + methods=["GET"], + widget_config={ + "name": "OECD Dataflows", + "description": "All available OECD dataflows, optionally filtered by topic.", + "params": [ + { + "paramName": "topic", + "label": "Topic", + "value": None, + "description": "Filter by topic. Leave blank to show all.", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_topic_choices", + "style": {"popupWidth": 500}, + "optional": True, + }, + { + "paramName": "subtopic", + "label": "Subtopic", + "value": None, + "description": "Filter by subtopic (requires a topic to be selected).", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_subtopic_choices", + "optionsParams": {"topic": "$topic"}, + "style": {"popupWidth": 500}, + "optional": True, + }, + ], + "gridData": {"w": 20, "h": 15}, + "refetchInterval": False, + "source": ["OECD"], + "category": "OECD Utilities", + "subCategory": "Metadata", + }, + examples=[ + APIEx( + description="List all OECD dataflows.", + parameters={}, + ), + APIEx( + description="Filter dataflows by topic.", + parameters={"topic": "HEA"}, + ), + PythonEx( + description="List all OECD dataflows.", + code=[ + "dataflows = obb.oecd.utils.list_dataflows()", + "print(dataflows.results)", + ], + ), + ], +) +async def list_dataflows( + metadata: OECDMetadataDependency, + topic: Annotated[ + str | None, + Query( + title="Topic", + description=( + "Filter dataflows by topic ID" + " (e.g. 'ECO', 'HEA', 'ENV')." + " Use list_topics() to see all available topics." + ), + ), + ] = None, + subtopic: Annotated[ + str | None, + Query( + title="Subtopic", + description="Filter dataflows by subtopic ID within the selected topic.", + ), + ] = None, +) -> OBBject: + """List all available OECD dataflows, optionally filtered by topic and subtopic.""" + dataflows = metadata.list_dataflows(topic=topic or None) + + if subtopic: + needle = subtopic.upper() + dataflows = [d for d in dataflows if d.get("subtopic", "").upper() == needle] + + rows = [] + + for entry in dataflows: + full_id = entry["value"] + short_id = full_id.split("@")[-1] if "@" in full_id else full_id + rows.append( + { + "dataflow_id": short_id, + "name": entry.get("label", short_id), + "topic": entry.get("topic_name", ""), + "subtopic": entry.get("subtopic_name", ""), + } + ) + + return OBBject(results=rows, provider="oecd") + + +@router.command( + methods=["GET"], + widget_config={"exclude": True}, + examples=[ + APIEx( + description="Get OECD dataflow choices for UI dropdowns.", + parameters={}, + ) + ], +) +async def list_dataflow_choices( + metadata: OECDMetadataDependency, +) -> list[dict[str, str]]: + """Return [{label, value}] for every OECD dataflow (for dropdowns).""" + dataflows = metadata.list_dataflows() + + return sorted( + [{"label": e.get("label", e["value"]), "value": e["value"]} for e in dataflows], + key=lambda x: x["label"], + ) + + +@router.command( + methods=["GET"], + widget_config={ + "name": "OECD Topics", + "description": "All OECD topic categories with dataflow counts.", + "params": [ + { + "paramName": "query", + "label": "Search", + "value": None, + "description": "Filter by topic or subtopic name.", + "optional": True, + }, + ], + "gridData": {"w": 30, "h": 20}, + "refetchInterval": False, + "source": ["OECD"], + "category": "OECD Utilities", + "subCategory": "Metadata", + }, + examples=[ + APIEx(description="List all OECD topics.", parameters={}), + APIEx(description="Search topics.", parameters={"query": "health"}), + PythonEx( + description="Browse OECD topics and subtopics.", + code=[ + "topics = obb.oecd.utils.list_topics()", + "print(topics.results)", + ], + ), + ], +) +async def list_topics( + metadata: OECDMetadataDependency, + query: Annotated[ + str | None, + Query( + title="Search", + description="Filter rows by topic or subtopic name.", + ), + ] = None, +) -> OBBject: + """List all OECD topic categories with dataflow counts.""" + topics = metadata.list_topics() + + rows = [] + + for t in topics: + if not t["dataflow_count"]: + continue + subs = t.get("subtopics", []) + + if subs: + for s in subs: + if not s["dataflow_count"]: + continue + rows.append( + { + "topic_id": t["id"], + "topic": t["name"], + "subtopic_id": s["id"], + "subtopic": s["name"], + "dataflows": s["dataflow_count"], + } + ) + else: + rows.append( + { + "topic_id": t["id"], + "topic": t["name"], + "subtopic_id": "", + "subtopic": "", + "dataflows": t["dataflow_count"], + } + ) + + if query: + needle = query.lower() + rows = [ + r + for r in rows + if needle in r["topic"].lower() or needle in r["subtopic"].lower() + ] + + return OBBject(results=rows, provider="oecd") + + +@router.command( + methods=["GET"], + widget_config={ + "name": "OECD Dataflow Parameters", + "type": "markdown", + "params": [ + { + "paramName": "dataflow_id", + "label": "Dataflow", + "value": "DF_PRICES_ALL", + "description": "The OECD dataflow to inspect.", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_dataflow_choices", + "style": {"popupWidth": 700}, + }, + { + "paramName": "output_format", + "value": "markdown", + "show": False, + }, + ], + "data": {"dataKey": "results"}, + "source": ["OECD"], + "category": "OECD Utilities", + "subCategory": "Metadata", + }, + examples=[ + APIEx( + description="Get parameters for the 'DF_PRICES_ALL' dataflow in markdown.", + parameters={"dataflow_id": "DF_PRICES_ALL", "output_format": "markdown"}, + ), + APIEx( + description="Get parameters for the 'DF_CLI' dataflow as JSON.", + parameters={"dataflow_id": "DF_CLI", "output_format": "json"}, + ), + PythonEx( + description="Inspect dimensions of the 'DF_QNA' dataflow.", + code=[ + "params = obb.oecd.utils.get_dataflow_parameters('DF_QNA')", + "print(params.results)", + ], + ), + ], +) +async def get_dataflow_parameters( + metadata: OECDMetadataDependency, + dataflow_id: Annotated[ + str, + Query( + title="Dataflow", + description="The OECD dataflow ID. Use list_dataflows() to see available dataflows.", + ), + ], + output_format: Literal["json", "markdown"] = "json", +) -> OBBject: + """Dataflow parameters and possible dimension values. + + Returns an OBBject with either a JSON dict or markdown string under results. + """ + parameters = metadata.get_constrained_values(dataflow_id) + + if output_format == "json": + return OBBject(results=parameters) + + sections: list[str] = [] + + for dim_id, options in parameters.items(): + inner = "\n".join( + f"| {opt['value']} | {opt.get('label', '')} |" for opt in options + ) + table = f"| Code | Label |\n|---|---|\n{inner}" + sections.append( + f"
\n{dim_id}" + f" ({len(options)} values)" + f"\n\n{table}\n\n
" + ) + + return OBBject(results="\n\n".join(sections), provider="oecd") + + +@router.command( + methods=["GET"], + widget_config={ + "name": "OECD Tables", + "description": "Searchable map of all OECD tables, optionally filtered by topic.", + "params": [ + { + "paramName": "topic", + "label": "Topic", + "value": None, + "description": "Filter by topic. Leave blank to show all.", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_topic_choices", + "style": {"popupWidth": 700}, + "optional": True, + }, + { + "paramName": "subtopic", + "label": "Subtopic", + "value": None, + "description": "Filter by subtopic (requires a topic to be selected).", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_subtopic_choices", + "optionsParams": {"topic": "$topic"}, + "style": {"popupWidth": 500}, + "optional": True, + }, + { + "paramName": "query", + "label": "Search", + "value": None, + "description": "Keyword search across name, topic, and dataflow ID.", + "optional": True, + }, + { + "paramName": "dataflow_id", + "label": "Dataflow ID", + "value": None, + "description": "Filter by exact or partial dataflow ID (e.g. 'DF_PRICES_ALL').", + "optional": True, + }, + ], + "gridData": {"w": 40, "h": 20}, + "refetchInterval": False, + "source": ["OECD"], + "category": "OECD Utilities", + "subCategory": "Metadata", + }, + examples=[ + APIEx(description="List all OECD tables.", parameters={}), + APIEx(description="Search for GDP tables.", parameters={"query": "GDP"}), + APIEx(description="Filter by topic.", parameters={"topic": "HEA"}), + APIEx( + description="Find a specific table by dataflow ID.", + parameters={"dataflow_id": "DF_PRICES_ALL"}, + ), + PythonEx( + description="Search OECD tables.", + code=[ + "tables = obb.oecd.utils.list_tables(query='prices')", + "print(tables.results)", + ], + ), + ], +) +async def list_tables( + metadata: OECDMetadataDependency, + query: Annotated[ + str | None, + Query( + title="Search", + description=( + "Keyword search. Space-separated terms" + " are AND-ed; use | for OR within a word." + ), + ), + ] = None, + topic: Annotated[ + str | None, + Query( + title="Topic", + description=( + "Filter by topic ID (e.g. 'ECO', 'HEA')." + " Use list_topics() to see all topics." + ), + ), + ] = None, + subtopic: Annotated[ + str | None, + Query( + title="Subtopic", + description="Filter by subtopic ID within the selected topic.", + ), + ] = None, + dataflow_id: Annotated[ + str | None, + Query( + title="Dataflow ID", + description="Filter by exact or partial dataflow (table) ID (e.g. 'DF_PRICES_ALL').", + ), + ] = None, +) -> OBBject: + """List all OECD tables with keyword search, topic, subtopic, and dataflow ID filtering.""" + rows = metadata.list_tables( + query=query, topic=topic or None, subtopic=subtopic or None + ) + + if dataflow_id: + needle = dataflow_id.upper() + rows = [ + r + for r in rows + if needle in r["table_id"].upper() or needle in r["dataflow_id"].upper() + ] + + return OBBject(results=rows, provider="oecd") + + +@router.command( + methods=["GET"], + widget_config={ + "name": "OECD Table Detail", + "description": ( + "Full dimension breakdown for a single" + " OECD table, including indicator hierarchy." + ), + "type": "markdown", + "params": [ + { + "paramName": "table_id", + "label": "Table", + "value": "DF_PRICES_ALL", + "description": "The OECD table (dataflow) to inspect.", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/list_table_choices", + "style": {"popupWidth": 950}, + }, + ], + "data": {"dataKey": "results"}, + "gridData": {"w": 40, "h": 25}, + "refetchInterval": False, + "source": ["OECD"], + "category": "OECD Utilities", + "subCategory": "Metadata", + }, + examples=[ + APIEx( + description="Get full detail for the DF_PRICES_ALL table.", + parameters={"table_id": "DF_PRICES_ALL"}, + ), + APIEx( + description="Inspect a national accounts table.", + parameters={"table_id": "DF_T725R_Q"}, + ), + PythonEx( + description="Describe a table.", + code=[ + "detail = obb.oecd.utils.get_table_detail(table_id='DF_QNA')", + "print(detail.results)", + ], + ), + ], +) +async def get_table_detail( + metadata: OECDMetadataDependency, + table_id: Annotated[ + str, + Query( + title="Table", + description="The OECD dataflow (table) ID. Use list_tables() to find IDs.", + ), + ], +) -> OBBject: + """Full dimension and indicator breakdown for a single OECD table. + + Returns a markdown document with: + - Table name and description + - Each dimension as a collapsible section with allowed values + - Table groups (TABLE_IDENTIFIER), if present + - Indicator count and hierarchy summary + """ + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import _TABLE_GROUP_CANDIDATES + + detail = metadata.describe_dataflow(table_id) + short_id = detail.get("short_id", table_id) + + # Look up topic/subtopic/path from the table map for context + table_row: dict = {} + + for row in metadata.table_map(): + if row["short_id"] == short_id or row["dataflow_id"] == detail.get( + "dataflow_id" + ): + table_row = row + break + + lines: list[str] = [] + + # Header + lines.append(f"# {detail.get('name', table_id)}") + lines.append(f"**Dataflow ID:** `{short_id}`") + + path = table_row.get("path", "") + + if path: + lines.append(f"\n**Category:** {path}") + + desc = detail.get("description", "") + + if desc: + lines.append(f"\n{desc}") + + lines.append("") + indicator_dim = detail.get("indicator_dimension") + # Table groups (TABLE_IDENTIFIER) — show as the primary content with full descriptions + table_groups = detail.get("table_groups", []) + + if table_groups: + lines.append("## Tables\n") + for g in table_groups: + label = g.get("label", g["value"]) + description = g.get("description", "") + lines.append(f"### {g['value']} — {label}") + # Show description only if it adds something beyond the label + if description and description != label: + lines.append(f"\n{description}\n") + else: + lines.append("") + + # Identify the table-grouping dimension (if any) to skip in the dimensions section + + dimensions = detail.get("dimensions", []) + _table_group_dim: str | None = None + + if table_groups: + for _cand in _TABLE_GROUP_CANDIDATES: + if any(d.get("id") == _cand for d in dimensions): + _table_group_dim = _cand + break + + # Dimensions — collapsible sections with code/label/description tables + if dimensions: + lines.append("## Dimensions\n") + for dim in dimensions: + dim_id = dim["id"] + + if dim_id == _table_group_dim: + continue # already shown above as table groups + + concept_name = dim.get("name", dim_id) + values = dim.get("values", []) + constrained = dim.get("constrained_codes", len(values)) + tag = " *(indicator dimension)*" if dim_id == indicator_dim else "" + summary_label = f"`{dim_id}`" + + if concept_name and concept_name != dim_id: + summary_label += f" — {concept_name}" + + summary_label += f" ({constrained} values){tag}" + lines.append(f"
\n{summary_label}\n") + + if values: + lines.append("| Code | Label | Description |") + lines.append("|---|---|---|") + for v in values: + code = v.get("value", "") + lbl = v.get("label", "") + vdesc = " ".join(v.get("description", "").split()) + desc_col = vdesc if vdesc and vdesc != lbl else "" + lines.append(f"| {code} | {lbl} | {desc_col} |") + + lines.append("\n
\n") + + # Indicator tree + ind_count = detail.get("indicator_count", 0) + + if ind_count: + lines.append( + f"## Indicators\n\n**{ind_count}** indicators in dimension `{indicator_dim}`.\n" + ) + tree = detail.get("indicator_tree", []) + + def _render_tree(nodes: list[dict], depth: int = 0) -> None: + for node in nodes: + indent = " " * depth + code = node.get("code", "") + name = node.get("label", code) + lines.append(f"{indent}- **{code}** — {name}") + for child in node.get("children", []): + _render_tree([child], depth + 1) + + if tree: + lines.append("
\nIndicator tree\n") + _render_tree(tree) + lines.append("\n
\n") + + return OBBject(results="\n".join(lines), provider="oecd") + + +@router.command( + methods=["GET"], + widget_config={"exclude": True}, + examples=[ + APIEx( + description="Get table choices for dropdowns.", + parameters={}, + ) + ], +) +async def list_table_choices( + metadata: OECDMetadataDependency, + topic: Annotated[ + str | None, + Query( + title="Topic", + description="Filter choices by topic ID (e.g. 'ECO'). Leave blank for all.", + ), + ] = None, +) -> list[dict]: + """Return [{label, value}] for OECD tables (for dropdowns), optionally filtered by topic.""" + tables = metadata.list_tables(topic=topic or None) + seen: set[str] = set() + choices = [] + + for t in sorted(tables, key=lambda x: x["name"]): + if t["table_id"] not in seen: + seen.add(t["table_id"]) + choices.append( + { + "label": t["name"], + "value": t["table_id"], + "extraInfo": {"description": t["table_id"]}, + } + ) + + return choices + + +@router.command( + methods=["GET"], + widget_config={"exclude": True}, + include_in_schema=False, + examples=[ + APIEx( + description="Get country choices for the DF_CLI::LI symbol.", + parameters={"symbol": "DF_CLI::LI", "country": "true"}, + ), + APIEx( + description="Get frequency choices after selecting a country.", + parameters={"symbol": "DF_CLI::LI", "country": "AUS", "frequency": "true"}, + ), + ], +) +async def indicator_choices( # noqa: PLR0911,PLR0912 + metadata: OECDMetadataDependency, + symbol: str | None = None, + country: str | None = None, + frequency: str | None = None, + transform: str | None = None, + dimension_values: list[str] | None = None, +) -> list[dict[str, str]]: + """Progressive dimension choices for the OECD Economic Indicators widget. + + Called by the OpenBB Workspace UI in a stepped fashion: + + 1. Provide symbol → returns country choices. + 2. Add country → returns frequency choices. + 3. Add frequency → returns transform choices (when the dataflow has one). + + Each step passes "true" as the value of the parameter being resolved. + + Parameters + ---------- + symbol : str | None + Dataflow and indicator in DATAFLOW::INDICATOR format. + Multiple comma-separated symbols from the same dataflow are allowed. + country : str | None + Pass "true" to request country choices; otherwise the selected + country code(s) used to narrow downstream options. + frequency : str | None + Pass "true" to request frequency choices. + transform : str | None + Pass "true" to request transformation choices. + dimension_values : list[str] | None + Already-selected extra dimension filters in DIM_ID:VALUE format, + used to narrow choices. + + Returns + ------- + list[dict[str, str]] + [{label, value}] for the requested dimension. + """ + # pylint: disable=import-outside-toplevel + from urllib.parse import unquote + + if not symbol: + return [] + + symbol = unquote(symbol) + symbols = [s.strip() for s in symbol.split(",") if s.strip()] + + if not symbols: + return [] + + # Parse dataflow + indicator codes from the (possibly comma-joined) symbol. + dataflows_seen: set[str] = set() + indicator_codes: list[str] = [] + + for sym in symbols: + if "::" in sym: + df_part, ind_part = sym.split("::", 1) + dataflows_seen.add(df_part.strip()) + if ind_part.strip(): + indicator_codes.append(ind_part.strip()) + else: + dataflows_seen.add(sym.strip()) + + dataflow_id = next(iter(dataflows_seen), None) + + if not dataflow_id: + return [] + + try: + dim_order = metadata.get_dimension_order(dataflow_id) + # get_constrained_values uses embedded DSD constraints; no live call needed + # as long as the dataflow structure is already cached. + constrained = metadata.get_constrained_values(dataflow_id) + params = metadata.get_dataflow_parameters(dataflow_id) + except (ValueError, KeyError, AttributeError): + return [] + + # Identify each special dimension (take first matching dim in DSD order). + country_dim = next((d for d in dim_order if d in _COUNTRY_DIMS), None) + freq_dim = next((d for d in dim_order if d in _FREQ_DIMS), None) + transform_dim = next((d for d in dim_order if d in _TRANSFORM_DIMS), None) + full_id = metadata._resolve_dataflow_id(dataflow_id) + nd_pins = _parse_not_displayed( + metadata.dataflows.get(full_id, {}).get("annotations", {}) + ) + + def _to_choices(dim_id: str) -> list[dict[str, str]]: + """Convert constrained or full options for dim_id to label/value pairs.""" + entries = constrained.get(dim_id) or params.get(dim_id, []) + return [ + { + "label": e.get("label", e.get("value", "")), + "value": str(e.get("value", "")), + } + for e in entries + if e.get("value") is not None + ] + + # Dispatch: each step passes "true" as the value of the dim being resolved. + requesting = ( + "country" + if country == "true" + else ( + "frequency" + if frequency == "true" + else "transform" if transform == "true" else None + ) + ) + + if requesting == "country": + if not country_dim: + return [] + choices = sorted(_to_choices(country_dim), key=lambda x: x["label"]) + choices.insert(0, {"label": "All Countries", "value": "*"}) + return choices + + if requesting == "frequency": + if not freq_dim or freq_dim in nd_pins: + return [] + return _to_choices(freq_dim) + + if requesting == "transform": + if not transform_dim or transform_dim in nd_pins: + return [] + choices = _to_choices(transform_dim) + if choices: + choices.insert(0, {"label": "All", "value": "*"}) + return choices + + return [] + + +@router.command( + methods=["GET"], + widget_config={"exclude": True}, + examples=[ + APIEx( + description="Get the topic choices (step 0).", + parameters={}, + ), + APIEx( + description="Get subtopic choices for the 'ECO' topic (step 1).", + parameters={"topic": "ECO"}, + ), + APIEx( + description="Get table choices for topic and subtopic (step 2).", + parameters={"topic": "ECO", "subtopic": "ECO.EO"}, + ), + APIEx( + description="Get country choices for a specific table (step 3).", + parameters={ + "topic": "ECO", + "subtopic": "ECO.EO", + "table": "DF_QNA::T0101", + }, + ), + APIEx( + description="Get frequency choices (step 4).", + parameters={ + "topic": "ECO", + "subtopic": "ECO.EO", + "table": "DF_QNA::T0101", + "country": "USA", + }, + ), + ], +) +async def presentation_table_choices( # noqa: PLR0911,PLR0912 + metadata: OECDMetadataDependency, + topic: str | None = None, + subtopic: str | None = None, + table: str | None = None, + country: str | None = None, + frequency: str | None = None, +) -> list[dict[str, str]]: + """Get presentation table choices for OECD data retrieval. + + Progressive cascading selector using the OECD metadata taxonomy. + All data is discovered dynamically from the OECD SDMX metadata — + topics, subtopics, dataflows, and their TABLE_IDENTIFIER dimension + values are resolved at runtime. + + Parameters + ---------- + topic : str | None + OECD topic code (e.g. 'ECO'). Omit to list all topics. + subtopic : str | None + OECD subtopic code (e.g. 'ECO.EO'). Enter a topic to see choices. + Pure UI convenience — not required for API / Python usage. + table : str | None + Dataflow::table symbol (e.g. 'DF_QNA::T0101'). Enter a subtopic to see choices. + country : str | None + Country code. Enter a topic and table to see choices. + frequency : str | None + Frequency code. Enter topic, table, and country to see choices. + + Returns + ------- + list[dict[str, str]] + [{label, value}] choices for the current cascading step. + """ + # pylint: disable=import-outside-toplevel + from collections import ( + Counter, + defaultdict as _ddict, + ) + + from openbb_oecd.utils.progressive_helper import OecdParamsBuilder + + topic = topic if topic and topic.strip() else None + subtopic = subtopic if subtopic and subtopic.strip() else None + table = table if table and table.strip() else None + country = country if country and country.strip() else None + frequency = frequency if frequency and frequency.strip() else None + + # Step 0: No params → return topic choices from taxonomy. + if topic is None: + topics = metadata.list_topics() + tm = metadata.table_map() + topic_counts = Counter(r.get("topic_id", "") for r in tm) + + return sorted( + [ + { + "label": f"{t['name']} ({topic_counts.get(t['id'], 0)})", + "value": t["id"], + } + for t in topics + if topic_counts.get(t["id"], 0) > 0 + ], + key=lambda x: x["label"], + ) + + # Step 1: topic selected → return subtopic choices. + if topic is not None and subtopic is None: + topics = metadata.list_topics() + t_upper = topic.upper() + tm = metadata.table_map() + sub_counts = Counter( + r.get("subtopic_id", "") + for r in tm + if r.get("topic_id", "").upper() == t_upper + ) + for t in topics: + if t["id"].upper() == t_upper: + subtopics = t.get("subtopics", []) + choices = sorted( + [ + { + "label": f"{s['name']} ({sub_counts.get(s['id'], 0)} tables)", + "value": s["id"], + } + for s in subtopics + if sub_counts.get(s["id"], 0) > 0 + ], + key=lambda x: x["label"], + ) + # Single subtopic → auto-select. + if len(choices) == 1: + return choices + return choices + + return [{"label": "No subtopics found for this topic", "value": ""}] + + # Step 2: subtopic selected → return table choices. + if table is None and topic is not None: + dataflows = metadata.list_dataflows(topic=topic) + sub_upper = subtopic.upper() + dataflows = [ + df + for df in dataflows + if sub_upper in [s.upper() for s in df.get("all_subtopics", [])] + or df.get("subtopic", "").upper() == sub_upper + ] + + section_map = metadata._detect_section_families() + # Remove section children — they'll be reintroduced via their + # parent root below. + dataflows = [df for df in dataflows if df["value"] not in section_map] + _children_of: dict[str, list[str]] = _ddict(list) + + for _child, _parent in section_map.items(): + _children_of[_parent].append(_child) + + _expanded: list[dict] = [] + + for df in dataflows: + annots = metadata.dataflows.get(df["value"], {}).get("annotations", {}) + if ( + annots.get("NonProductionDataflow") == "true" + and df["value"] in _children_of + ): + for child_id in _children_of[df["value"]]: + child_info = metadata.dataflows.get(child_id, {}) + _expanded.append( + { + **df, + "label": child_info.get("name", df["label"]), + "value": child_id, + } + ) + else: + _expanded.append(df) + + dataflows = _expanded + country_family_map = metadata._detect_country_families() + dataflows = [ + df + for df in dataflows + if df["value"] not in country_family_map + or country_family_map[df["value"]]["representative"] == df["value"] + ] + + # Collect candidates: for each TABLE_IDENTIFIER value, track + # which dataflow has the most indicators. Dataflows that are + # just pre-filtered slices (fewer indicators) are dropped so + # only the richest dataflow per table is shown. Keyed by + # (dsd_prefix, table_id) so that table IDs from different DSDs + # (e.g. T0101 in DSD_NAMAIN10 vs DSD_NAMAIN1) don't collide. + _best_for_table: dict[tuple[str, str], tuple[str, str, str, int]] = {} + _no_group: list[dict[str, str]] = [] + # Minimum average indicators per table group. Dataflows where + # groups are just granular API slices (e.g. SUT developer tables + # with 100+ TABLE_IDENTIFIER values and ~1 indicator each) are + # not useful as presentation tables. + min_indicators_per_group = 3 + + for df in dataflows: + full_id = df["value"] + info = metadata.dataflows.get(full_id, {}) + short_id = info.get("short_id", full_id.split("@")[-1]) + # Only expand table groups when the DSD structure is already + # cached in memory. Calling get_table_groups / get_indicator_tree + # on an uncached dataflow triggers _ensure_structure → network + # fetch + LZMA cache write per dataflow, which is far too slow + # for a dropdown endpoint that may iterate dozens of dataflows. + structure_cached = full_id in metadata.datastructures + groups = metadata.get_table_groups(short_id) if structure_cached else [] + + if groups: + tree = metadata.get_indicator_tree(short_id) + + def _count(nodes: list) -> int: + c = 0 + for n in nodes: + c += 1 + c += _count(n.get("children", [])) # pylint: disable=W0640 + return c + + n_indicators = _count(tree) + + # When there are too many groups relative to indicators, + # each group is a tiny slice — not a real presentation + # table. Offer the dataflow as a single flat entry. + n_groups = len(groups) + + if n_groups > 1 and n_indicators / n_groups < min_indicators_per_group: + _no_group.append( + { + "label": df["label"], + "value": short_id, + } + ) + continue + + for g in groups: + tid = g["value"] + dsd_prefix = full_id.split("@")[0] if "@" in full_id else full_id + key = (dsd_prefix, tid) + prev = _best_for_table.get(key) + + if prev is None or n_indicators > prev[3]: + _best_for_table[key] = ( + short_id, + df["label"], + g["label"], + n_indicators, + ) + else: + _no_group.append( + { + "label": df["label"], + "value": short_id, + } + ) + + results: list[dict[str, str]] = list(_no_group) + + for (_, tid), (sid, df_label, tbl_label, _) in _best_for_table.items(): + results.append( + { + "label": f"{df_label}: {tbl_label}", + "value": f"{sid}::{tid}", + } + ) + + return sorted(results, key=lambda x: x["label"]) + + # From here, table is a symbol like "DF_QNA::T0101" or "DF_PRICES_ALL". + parts = table.split("::", 1) + dataflow_id = parts[0] + full_id = metadata._resolve_dataflow_id(dataflow_id) + annotations = metadata.dataflows.get(full_id, {}).get("annotations", {}) + nd_pins = _parse_not_displayed(annotations) + defaults = _parse_defaults(annotations) + _TABLE_GROUP_DIMS = {"TABLE_IDENTIFIER", "CHAPTER"} + # Collect single-value NOT_DISPLAYED pins for real DSD dimensions. + _tmp_pb = OecdParamsBuilder(dataflow_id=dataflow_id) + pb_dims = _tmp_pb.get_dimensions_in_order() + pb_dim_set = set(pb_dims) + nd_avail_pins: dict[str, str] = {} + + for dim_id, val in nd_pins.items(): + if ( + dim_id in pb_dim_set + and dim_id not in _TABLE_GROUP_DIMS + and val + and "+" not in val + ): + nd_avail_pins[dim_id] = val + + # Build progressive helper with known pins applied in DSD order. + country_dim = next((d for d in pb_dims if d in _COUNTRY_DIMS), None) + freq_dim = next((d for d in pb_dims if d in _FREQ_DIMS), None) + + def _build_pb( + pin_country: str | None = None, + pin_freq: str | None = None, + ) -> OecdParamsBuilder: + _pb = OecdParamsBuilder(dataflow_id=dataflow_id) + + for _dim in _pb.get_dimensions_in_order(): + if _dim == country_dim and pin_country: + _pb.set_dimension((_dim, pin_country.replace(",", "+"))) + elif _dim == freq_dim and pin_freq is not None: + _pb.set_dimension((_dim, pin_freq)) + elif _dim in nd_avail_pins: + _pb.set_dimension((_dim, nd_avail_pins[_dim])) + + return _pb + + def _mark_default( + options: list[dict[str, str]], dim_id: str + ) -> list[dict[str, str]]: + """Tag the option matching the DEFAULT annotation for *dim_id*.""" + default_val = defaults.get(dim_id) + + if not default_val: + return options + for opt in options: + if opt["value"] == default_val: + opt["default"] = "true" + break + + return options + + # Step 3: table selected → return country choices (availability-filtered). + if not country: + if not country_dim: + return [{"label": "Select a Table", "value": ""}] + pb = _build_pb() + options = pb.get_options_for_dimension(country_dim) + return _mark_default(options, country_dim) + + # Step 4: country selected → return frequency choices. + if frequency is None: + if not freq_dim: + return [{"label": "N/A (no frequency dimension)", "value": "_NA"}] + if freq_dim in nd_pins: + val = nd_pins[freq_dim] + if val: + labels = metadata.get_codelist_for_dimension(full_id, freq_dim) + return [{"label": labels.get(val, val), "value": val}] + return [] + pb = _build_pb(pin_country=country) + options = pb.get_options_for_dimension(freq_dim) + return _mark_default(options, freq_dim) + + return [] + + +@router.command( + methods=["GET"], + widget_config={"exclude": True}, + include_in_schema=False, + examples=[ + APIEx( + description="Get unit_measure choices.", + parameters={ + "table": "DF_QNA::T0101", + "country": "USA", + "frequency": "Q", + "dimension": "unit_measure", + }, + ), + APIEx( + description="Get adjustment choices.", + parameters={ + "table": "DF_QNA::T0101", + "country": "USA", + "frequency": "Q", + "dimension": "adjustment", + }, + ), + ], +) +async def presentation_table_dim_choices( + metadata: OECDMetadataDependency, + table: str, + dimension: str, + country: str | None = None, + frequency: str | None = None, +) -> list[dict[str, str]]: + """Return available values for a single dimension (unit, adjustment, transform). + + Independent of the other dimension selections — each dropdown queries + this endpoint separately so they don’t block each other. + + Parameters + ---------- + table : str + Dataflow::table symbol (e.g. 'DF_QNA::T0101'). + country : str + Country code(s). + dimension : str + Which dimension to query: 'unit_measure', 'adjustment', or 'transformation'. + frequency : str | None + Frequency code (optional — auto-resolved when only one exists). + + Returns + ------- + list[dict[str, str]] + [{label, value}] choices. + """ + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.progressive_helper import OecdParamsBuilder + + dim_map: dict[str, str] = { + "unit_measure": "UNIT_MEASURE", + "adjustment": "ADJUSTMENT", + "transformation": "TRANSFORMATION", + "counterpart_area": "COUNTERPART_AREA", + "counterpart": "COUNTERPART_AREA", + "service": "SERVICE", + "product": "PRODUCT", + "sex": "SEX", + "age": "AGE", + "sector": "SECTOR", + "education_lev": "EDUCATION_LEV", + } + _table: str | None = table if table and table.strip() else None + country = country if country and country.strip() else None + frequency = frequency if frequency and frequency.strip() else None + + if not _table: + return [] + + target_dim = dim_map.get(dimension.lower(), dimension.upper()) + + parts = _table.split("::", 1) + dataflow_id = parts[0] + full_id = metadata._resolve_dataflow_id(dataflow_id) + annotations = metadata.dataflows.get(full_id, {}).get("annotations", {}) + nd_pins = _parse_not_displayed(annotations) + defaults = _parse_defaults(annotations) + + if target_dim in nd_pins: + return [] + + _TABLE_GROUP_DIMS = {"TABLE_IDENTIFIER", "CHAPTER"} + pb = OecdParamsBuilder(dataflow_id=dataflow_id) + dims_in_order = pb.get_dimensions_in_order() + dims_in_order_set = set(dims_in_order) + + if target_dim not in dims_in_order_set: + return [] + + country_dim = next((d for d in dims_in_order if d in _COUNTRY_DIMS), None) + freq_dim = next((d for d in dims_in_order if d in _FREQ_DIMS), None) + + # Pin known dimensions in DSD order to avoid clearing downstream pins. + for dim_id in dims_in_order: + if dim_id == country_dim and country: + pb.set_dimension((dim_id, str(country).replace(",", "+"))) + elif dim_id == freq_dim and frequency is not None: + pb.set_dimension((dim_id, frequency)) + elif ( + dim_id in nd_pins + and dim_id not in _TABLE_GROUP_DIMS + and nd_pins[dim_id] + and "+" not in nd_pins[dim_id] + ): + pb.set_dimension((dim_id, nd_pins[dim_id])) + + if frequency is None and freq_dim and freq_dim in dims_in_order_set: + freq_options = pb.get_options_for_dimension(freq_dim) + + if len(freq_options) == 1: + pb.set_dimension((freq_dim, freq_options[0]["value"])) + + if target_dim not in dims_in_order_set: + return [] + + not_applicable = {"not applicable", "not available", "n/a"} + options = pb.get_options_for_dimension(target_dim) + options = [o for o in options if o.get("label", "").lower() not in not_applicable] + + if not options: + return [] + + default_val = defaults.get(target_dim) + + if default_val: + for opt in options: + if opt["value"] == default_val: + opt["default"] = True # type: ignore[assignment] + break + + if len(options) == 1: + return options + + options.insert(0, {"label": "All", "value": "all"}) + options.insert(0, {"label": "Auto", "value": "auto"}) + + return options + + +@router.command( + methods=["GET"], + widget_config={ + "params": [ + { + "paramName": "topic", + "label": "Topic", + "value": None, + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_choices", + "description": "The OECD topic.", + }, + { + "paramName": "subtopic", + "label": "Subtopic", + "value": None, + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_choices", + "optionsParams": { + "topic": "$topic", + }, + "style": {"popupWidth": 500}, + "description": "Filter by subtopic. UI convenience only.", + "show": True, + }, + { + "paramName": "table", + "label": "Table", + "type": "endpoint", + "value": None, + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_choices", + "optionsParams": { + "topic": "$topic", + "subtopic": "$subtopic", + }, + "style": {"popupWidth": 950}, + "description": "The OECD presentation table (DATAFLOW::TABLE_ID).", + }, + { + "paramName": "country", + "label": "Country", + "description": "Country or region for the table.", + "type": "endpoint", + "multiSelect": True, + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_choices", + "optionsParams": { + "topic": "$topic", + "subtopic": "$subtopic", + "table": "$table", + }, + }, + { + "paramName": "counterpart", + "label": "Counterpart", + "type": "endpoint", + "multiSelect": True, + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_dim_choices", + "optionsParams": { + "table": "$table", + "country": "$country", + "frequency": "$frequency", + "dimension": "counterpart_area", + }, + "style": {"popupWidth": 400}, + "description": ( + "Counterpart area for bilateral data." + " Leave blank for auto-selection (World)." + ), + "optional": True, + }, + { + "paramName": "frequency", + "label": "Frequency", + "type": "endpoint", + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_choices", + "optionsParams": { + "topic": "$topic", + "subtopic": "$subtopic", + "table": "$table", + "country": "$country", + }, + "description": "The data frequency.", + }, + { + "paramName": "unit_measure", + "label": "Unit Measure", + "type": "endpoint", + "value": None, + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_dim_choices", + "optionsParams": { + "table": "$table", + "country": "$country", + "frequency": "$frequency", + "dimension": "unit_measure", + }, + "style": {"popupWidth": 400}, + "description": "Unit of measure. Leave blank for auto-selection.", + }, + { + "paramName": "adjustment", + "label": "Adjustment", + "type": "endpoint", + "value": None, + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_dim_choices", + "optionsParams": { + "table": "$table", + "country": "$country", + "frequency": "$frequency", + "dimension": "adjustment", + }, + "description": "Seasonal adjustment type. Leave blank for auto-selection.", + "optional": True, + }, + { + "paramName": "transformation", + "label": "Transformation", + "type": "endpoint", + "value": None, + "optionsEndpoint": f"{api_prefix}/oecd_utils/presentation_table_dim_choices", + "optionsParams": { + "table": "$table", + "country": "$country", + "frequency": "$frequency", + "dimension": "transformation", + }, + "description": "Data transformation. Leave blank for auto-selection.", + "optional": True, + }, + { + "paramName": "dimension_values", + "label": "Dimension Values", + "type": "text", + "value": None, + "description": "Dimension selection for filtering. Format: 'DIM_ID1:VAL1+VAL2.' " + + "See the Metadata tab for available dimensions and values.", + "multiple": True, + "multiSelect": False, + }, + { + "paramName": "start_date", + "label": "Start Date", + "type": "date", + "value": None, + "description": "Earliest date to include (ISO format). Filters out obsolete data.", + "optional": True, + }, + { + "paramName": "end_date", + "label": "End Date", + "type": "date", + "value": None, + "description": "Latest date to include (ISO format).", + "optional": True, + }, + { + "paramName": "limit", + "label": "Limit", + "value": 5, + "description": "Most recent N records to retrieve per series.", + "type": "number", + }, + ], + "runButton": True, + "refetchInterval": False, + "name": "OECD Presentation Table", + "description": "Presentation tables from the OECD database.", + "source": ["OECD"], + "category": "OECD Utilities", + "subCategory": "Presentation Tables", + }, + examples=[ + APIEx( + description="Get quarterly GDP output table for the United States.", + parameters={ + "topic": "ECO", + "table": "DF_QNA::T0101", + "country": "USA", + "frequency": "Q", + "limit": 4, + }, + ) + ], +) +async def presentation_table( # noqa: PLR0912 + topic: Annotated[ + str | None, + Query( + title="Topic", + description="The OECD topic code." + + " UI navigation aid — not required when table is provided directly.", + ), + ] = None, + subtopic: Annotated[ + str | None, + Query( + title="Subtopic", + description="The OECD subtopic code (e.g. 'ECO.EO')." + + " UI navigation aid — not required when table is provided directly.", + ), + ] = None, + table: Annotated[ + str | None, + Query( + title="Table", + description="The OECD presentation table key." + + " Accepts 'DATAFLOW::TABLE_ID' or bare 'DATAFLOW_ID'." + + " See presentation_table_choices() for options.", + ), + ] = None, + country: Annotated[ + str | None, + Query( + title="Country", + description=( + "Country code to filter the data." + " Enter multiple codes by joining on '+'." + " See presentation_table_choices() for options." + ), + ), + ] = None, + counterpart: Annotated[ + str | None, + Query( + title="Counterpart", + description="Counterpart area code for bilateral data (e.g. 'W' for World, 'USA')." + + " When omitted, auto-selected (defaults to World aggregate).", + ), + ] = None, + frequency: Annotated[ + str | None, + Query( + title="Frequency", + description="The data frequency. See presentation_table_choices() for options." + + " Typical values are 'A' (annual), 'Q' (quarter), 'M' (month).", + ), + ] = None, + unit_measure: Annotated[ + str | None, + Query( + title="Unit of Measure", + description="Unit of measure code." + + " E.g. 'XDC' (national currency), 'USD_EXC' (US dollars)," + + " 'PS' (persons), 'PT_B1GQ' (% of GDP)." + + " When omitted, auto-selected via availability.", + ), + ] = None, + adjustment: Annotated[ + str | None, + Query( + title="Adjustment", + description="Seasonal adjustment code." + + " 'Y' = seasonally adjusted, 'N' = not adjusted." + + " When omitted, prefers 'Y' where available, falls back to 'N'.", + ), + ] = None, + transformation: Annotated[ + str | None, + Query( + title="Transformation", + description="Data transformation code." + + " E.g. 'N' (none), 'GY' (growth year-on-year), 'IX' (index)." + + " When omitted, auto-selected via availability.", + ), + ] = None, + dimension_values: Annotated[ + list[str] | str | None, + Query( + title="Dimension Values", + description="Dimension selection for filtering. Format: 'DIM_ID1:VAL1+VAL2.'", + ), + ] = None, + start_date: Annotated[ + str | None, + Query( + title="Start Date", + description="Earliest date to include (ISO format, e.g. '2020-01-01')." + + " Filters out obsolete rows that stopped reporting before this date.", + ), + ] = None, + end_date: Annotated[ + str | None, + Query( + title="End Date", + description="Latest date to include (ISO format, e.g. '2025-12-31').", + ), + ] = None, + limit: Annotated[ + int, + Query( + title="Limit", + description="Maximum number of records to retrieve per series.", + ), + ] = 1, +) -> Any: + """Get a formatted presentation table from the OECD database.""" + # pylint: disable=import-outside-toplevel,too-many-branches + import re as _re_mod + + from openbb_core.app.model.abstract.error import OpenBBError + from openbb_oecd.utils.helpers import oecd_date_to_python_date + from openbb_oecd.utils.table_builder import OecdTableBuilder + from pandas import DataFrame + + if table is None: + raise OpenBBError( + ValueError( + "Please select a topic, subtopic, and table from the dropdown menus." + ) + ) + if country is None: + raise OpenBBError( + ValueError("Please select a country and frequency from the dropdown menus.") + ) + + if frequency is not None and frequency.strip().upper() == "_NA": + frequency = None + + # Parse dimension_values into kwargs for the table builder. + extra_dims: dict[str, str] = {} + + # Explicit dimension parameters — handle special UI values. + for _dim_id, _dim_val in [ + ("COUNTERPART_AREA", counterpart), + ("UNIT_MEASURE", unit_measure), + ("ADJUSTMENT", adjustment), + ("TRANSFORMATION", transformation), + ]: + if _dim_val is not None and _dim_val.strip().lower() not in ("", "auto"): + if _dim_val.strip().lower() == "all": + extra_dims[_dim_id] = "*" + else: + extra_dims[_dim_id] = _dim_val.strip().upper() + + if dimension_values: + dv_list = ( + [dimension_values] + if isinstance(dimension_values, str) + else dimension_values + ) + for dv in dv_list: + if not dv or not isinstance(dv, str): + continue + for pair in (p.strip() for p in dv.split(",") if p.strip()): + if ":" in pair: + dim_id, dim_val = pair.split(":", 1) + extra_dims[dim_id.strip().upper()] = dim_val.strip().upper() + + # table can be "DF_QNA::T0101" or bare "DF_FDI_FLOW_AGGR" + builder = OecdTableBuilder() + try: + result = builder.get_table( + table_id=table, # handles both "DF::TBL" and bare "DF" formats + country=country, + frequency=frequency, + start_date=start_date, + end_date=end_date, + limit=limit, + **extra_dims, # type: ignore[arg-type] + ) + except (ValueError, OpenBBError) as exc: + raise OpenBBError(str(exc)) from exc + + data_rows = result.get("data", []) + + if not data_rows: + raise OpenBBError(ValueError("No data returned for the given parameters.")) + + # Build output rows matching the expected format. + table_meta = result.get("table_metadata", {}) + fixed_dims = table_meta.get("fixed_dimensions", {}) + # Build a subtitle describing units, currency, etc. + # Multiplier is excluded because values are already expanded. + skip_labels = {"not applicable", "not available", "n/a", "_z", ""} + subtitle_parts: list[str] = [] + unit = table_meta.get("unit_measure", "") + currency = table_meta.get("currency", "") + price_base = table_meta.get("price_base", "") + + if unit and unit.lower() not in skip_labels: + subtitle_parts.append(unit) + + if ( + currency + and currency.lower() not in skip_labels + and currency.lower() != unit.lower() + ): + subtitle_parts.append(currency) + + if price_base and price_base.lower() not in skip_labels: + subtitle_parts.append(price_base) + table_subtitle = ", ".join(subtitle_parts) + fixed_country = "" + + for dim_key in ("REF_AREA", "COUNTRY", "AREA"): + if dim_key in fixed_dims: + fixed_country = fixed_dims[dim_key].get("label", "") + break + + results_json: list[dict] = [] + # Collect per-row unit metadata to detect whether units vary. + unit_keys = ("unit_measure", "currency_denom", "currency", "price_base") + _row_units: list[str] = [] + _row_unit_parts: list[list[str]] = [] + + for row in data_rows: + time_str = row.get("time_period", "") + parsed_date = oecd_date_to_python_date(time_str) if time_str else None + country_val = row.get("ref_area", "") or row.get("country", "") or fixed_country + # Build per-row unit description from available metadata. + _parts: list[str] = [] + + for _uk in unit_keys: + _uv = row.get(_uk, "") + if ( + _uv + and str(_uv).lower() not in skip_labels + and (not _parts or str(_uv).lower() != _parts[-1].lower()) + ): + _parts.append(str(_uv)) + + _row_unit = ", ".join(_parts) + _row_units.append(_row_unit) + _row_unit_parts.append(_parts) + results_json.append( + { + "title": row.get("label", ""), + "country": country_val, + "date": str(parsed_date) if parsed_date else time_str, + "value": row.get("value"), + "order": row.get("order"), + "level": row.get("level", 0), + "_acct_sort": row.get("_acct_sort", 0), + "_child_order": row.get("_child_order", 0), + "_sub_order": row.get("_sub_order", 0), + "_compound_order": row.get("_compound_order", 0), + "code": row.get("code", ""), + "is_header": row.get("is_category_header", False), + "_unit_desc": _row_unit, + } + ) + + # Determine if units vary across rows (excluding headers with no value). + _unique_units = { + r["_unit_desc"] + for r in results_json + if r.get("value") is not None and r["_unit_desc"] + } + _units_vary = len(_unique_units) > 1 + + df = DataFrame(results_json) + # Pivot: one row per indicator + accounting entry, dates as columns. + _pivot_index = [ + "title", + "country", + "order", + "level", + "_acct_sort", + "_child_order", + "_sub_order", + "_compound_order", + "code", + "is_header", + "_unit_desc", + ] + if "date" in df.columns and "title" in df.columns: + try: + pivot_df = df.pivot_table( + index=_pivot_index, + columns="date", + values="value", + aggfunc="first", + ).reset_index() + + pivot_df = pivot_df.sort_values( + ["order", "_acct_sort", "_child_order", "_sub_order", "_compound_order"] + ) + pivot_df.columns.name = None + df = pivot_df + except (KeyError, ValueError, TypeError): + pass + + # When units vary per row, append the unit description to the title. + if _units_vary and "_unit_desc" in df.columns: + + def _append_unit(row_data): + title = str(row_data.get("title", "")) + udesc = str(row_data.get("_unit_desc", "")) + if udesc and row_data.get("is_header") is not True: + return f"{title} ({udesc})" + return title + + df["title"] = df.apply(_append_unit, axis=1) + + # Apply hierarchy indentation to titles. + if "level" in df.columns: + + def _indent_title(row_data): + lvl = int(row_data.get("level", 0) or 0) + title = str(row_data.get("title", "")) + if lvl > 0: + return ">\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0" * lvl + title + return title + + df["title"] = df.apply(_indent_title, axis=1) + + # Drop helper columns that shouldn't appear in the table. + df = df.drop( + columns=[ + "order", + "level", + "_acct_sort", + "_child_order", + "_sub_order", + "_compound_order", + "code", + "is_header", + "_unit_desc", + ], + errors="ignore", + ) + + # Drop country column when only one value (already in subtitle). + if "country" in df.columns and df["country"].nunique() <= 1: + df = df.drop(columns=["country"]) + + # Reorder columns: title first, then date columns latest-first. + def _is_date_col(c): + return bool(_re_mod.match(r"\d{4}-\d{2}-\d{2}$", str(c))) + + _fixed_cols = [c for c in df.columns if not _is_date_col(c)] + _date_cols = sorted( + [c for c in df.columns if _is_date_col(c)], + reverse=True, + ) + df = df[_fixed_cols + _date_cols] + records = df.to_dict(orient="records") + # Prepend a title row with null values for all data columns. + title_row = {"title": table_subtitle} + + for col in df.columns: + if col != "title": + title_row[col] = None # type: ignore + + return [title_row] + records + + +async def get_oecd_utils_apps_json() -> list[dict[str, Any]]: + """Get the OECD apps.json file. + + This endpoint serves the apps.json file containing OpenBB Workspace app configurations + related to OECD data and utilities. + + Returns + ------- + list[dict[str, Any]] + A list of OpenBB Workspace app configurations. + """ + # pylint: disable=import-outside-toplevel + import json + from pathlib import Path + + apps_file = Path(__file__).parent / "apps.json" + + try: + with apps_file.open("r", encoding="utf-8") as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return [] + + +router._api_router.add_api_route( + path="/apps.json", + endpoint=get_oecd_utils_apps_json, + methods=["GET"], + include_in_schema=False, +) diff --git a/openbb_platform/providers/oecd/openbb_oecd/py.typed b/openbb_platform/providers/oecd/openbb_oecd/py.typed new file mode 100644 index 00000000000..e69de29bb2d diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/constants.py b/openbb_platform/providers/oecd/openbb_oecd/utils/constants.py index cc101c88200..f377516a3e6 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/utils/constants.py +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/constants.py @@ -1,647 +1,569 @@ -"""Constants for the OECD provider.""" +"""Constants for the OECD provider. -COUNTRY_TO_CODE_GDP = { - "oecd": "OECD", - "oecd_26": "OECD26", - "oecd_europe": "OECDE", - "g7": "G7", - "g20": "G20", - "euro_area": "EA20", - "european_union_27": "EU27_2020", - "european_union_15": "EU15", - "nafta": "USMCA", - "argentina": "ARG", - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "bulgaria": "BGR", - "brazil": "BRA", - "canada": "CAN", - "chile": "CHL", - "colombia": "COL", - "costa_rica": "CRI", - "croatia": "HRV", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "india": "IND", - "indonesia": "IDN", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "poland": "POL", - "portugal": "PRT", - "romania": "ROU", - "russia": "RUS", - "saudi_arabia": "SAU", - "slovak_republic": "SVK", - "slovenia": "SVN", - "south_africa": "ZAF", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", -} - -CODE_TO_COUNTRY_GDP = {v: k for k, v in COUNTRY_TO_CODE_GDP.items()} +Country resolution is handled dynamically by +OecdMetadata.resolve_country_codes() using the REF_AREA codelist +from each dataflow's DSD. +""" -COUNTRY_TO_CODE_RGDP = { - "G20": "G-20", - "G7": "G-7", - "argentina": "ARG", - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "bulgaria": "BGR", - "canada": "CAN", - "chile": "CHL", - "china": "CHN", - "colombia": "COL", - "costa_rica": "CRI", - "croatia": "HRV", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "euro_area_20": "EA20", - "euro_area_19": "EA19", - "europe": "OECDE", - "european_union_27": "EU27_2020", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "india": "IND", - "indonesia": "IDN", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "oecd_total": "OECD", - "poland": "POL", - "portugal": "PRT", - "romania": "ROU", - "russia": "RUS", - "saudi_arabia": "SAU", - "slovak_republic": "SVK", - "slovenia": "SVN", - "south_africa": "ZAF", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", +# Frequency mapping: user-friendly name -> SDMX code +FREQUENCY_MAP: dict[str, str] = { + "annual": "A", + "yearly": "A", + "quarter": "Q", + "quarterly": "Q", + "month": "M", + "monthly": "M", + "week": "W", + "weekly": "W", + "day": "D", + "daily": "D", } -CODE_TO_COUNTRY_RGDP = {v: k for k, v in COUNTRY_TO_CODE_RGDP.items()} - -COUNTRY_TO_CODE_GDP_FORECAST = { - "argentina": "ARG", - "asia": "DAE", - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "bulgaria": "BGR", - "canada": "CAN", - "chile": "CHL", - "china": "CHN", - "colombia": "COL", - "costa_rica": "CRI", - "croatia": "HRV", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "india": "IND", - "indonesia": "IDN", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "peru": "PER", - "poland": "POL", - "portugal": "PRT", - "romania": "ROU", - "russia": "RUS", - "slovak_republic": "SVK", - "slovenia": "SVN", - "south_africa": "ZAF", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", - "other_major_oil_producers": "OIL_O", - "rest_of_the_world": "WXD", - "world": "W", +FREQUENCY_MAP_REV: dict[str, str] = { + v: k + for k, v in FREQUENCY_MAP.items() + if k == v.lower() or k in ("annual", "quarterly", "monthly", "weekly", "daily") } -CODE_TO_COUNTRY_GDP_FORECAST = {v: k for k, v in COUNTRY_TO_CODE_GDP_FORECAST.items()} - -COUNTRY_TO_CODE_CPI = { - "G20": "G20", - "G7": "G7", - "argentina": "ARG", - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "canada": "CAN", - "chile": "CHL", - "china": "CHN", - "colombia": "COL", - "costa_rica": "CRI", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "euro_area_20": "EA20", - "europe": "OECDE", - "european_union_27": "EU27_2020", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "india": "IND", - "indonesia": "IDN", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "oecd_total": "OECD", - "poland": "POL", - "portugal": "PRT", - "russia": "RUS", - "saudi_arabia": "SAU", - "slovak_republic": "SVK", - "slovenia": "SVN", - "south_africa": "ZAF", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", +# Canonical frequency labels for reverse lookup (code -> label) +FREQUENCY_LABELS: dict[str, str] = { + "A": "annual", + "Q": "quarterly", + "M": "monthly", + "W": "weekly", + "D": "daily", } -CODE_TO_COUNTRY_CPI = {v: k for k, v in COUNTRY_TO_CODE_CPI.items()} -COUNTRY_TO_CODE_BALANCE = { - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "canada": "CAN", - "chile": "CHL", - "colombia": "COL", - "costa_rica": "CRI", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "euro_area": "EA", - "european_union": "EU", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "indonesia": "IDN", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "poland": "POL", - "portugal": "PRT", - "russia": "RUS", - "slovak_republic": "SVK", - "slovenia": "SVN", - "south_africa": "ZAF", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", +# Well-known dataflow aliases for user convenience. +# Keys are short aliases; values are the dataflow short IDs. +DATAFLOW_ALIASES: dict[str, str] = { + "CPI": "DF_PRICES_ALL", + "CLI": "DF_CLI", + "KEI": "DF_KEI", + "GDP": "DF_QNA", + "GDP_NOMINAL": "DF_QNA_EXPENDITURE_CAPITA", + "GDP_FORECAST": "DF_EO", + "UNEMPLOYMENT": "DF_IALFS_UNE_M", + "HPI": "DF_RHPI_TARGET", + "SHARES": "DF_FINMARK", + "BOP": "DF_BOP", + "NAAG": "DF_NAAG_I", } -COUNTRY_TO_CODE_REVENUE = { - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "canada": "CAN", - "chile": "CHL", - "colombia": "COL", - "costa_rica": "CRI", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "euro_area": "EA", - "european_union": "EU", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "indonesia": "IDN", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "oecd_average": "OAVG", - "oecd_europe": "OEU", - "oecd_total": "OECD", - "poland": "POL", - "portugal": "PRT", - "romania": "ROU", - "russia": "RUS", - "slovak_republic": "SVK", - "slovenia": "SVN", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", -} -COUNTRY_TO_CODE_SPENDING = { - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "canada": "CAN", - "chile": "CHL", - "colombia": "COL", - "costa_rica": "CRI", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "indonesia": "IDN", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "oecd_average": "OAVG", - "oecd_europe": "OEU", - "oecd_total": "OECD", - "poland": "POL", - "portugal": "PRT", - "romania": "ROU", - "russia": "RUS", - "slovak_republic": "SVK", - "slovenia": "SVN", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", -} +# --- Constrained country lists per dataflow --- +# Sourced from OECD SDMX availability constraints (fetch_availability). +# These are the actual REF_AREA values that have data in each dataflow, +# NOT the full codelist (which has 500+ entries including non-existent ones). -COUNTRY_TO_CODE_DEBT = { - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "canada": "CAN", - "chile": "CHL", - "colombia": "COL", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "oecd_average": "OAVG", - "oecd_total": "OECD", - "poland": "POL", - "portugal": "PRT", - "romania": "ROU", - "russia": "RUS", - "slovak_republic": "SVK", - "slovenia": "SVN", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", -} +CPI_COUNTRIES: tuple[str, ...] = ( + "argentina", + "australia", + "austria", + "belgium", + "brazil", + "bulgaria", + "canada", + "chile", + "china", + "colombia", + "costa_rica", + "croatia", + "czechia", + "denmark", + "estonia", + "eu27", + "euro_area_20_countries", + "finland", + "france", + "g20", + "g7", + "germany", + "greece", + "hungary", + "iceland", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "netherlands", + "new_zealand", + "norway", + "oecd", + "oecd_europe", + "poland", + "portugal", + "romania", + "russia", + "saudi_arabia", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", +) -COUNTRY_TO_CODE_TRUST = { - "australia": "AUS", - "austria": "AUT", - "belgium": "BEL", - "brazil": "BRA", - "canada": "CAN", - "chile": "CHL", - "colombia": "COL", - "costa_rica": "CRI", - "czech_republic": "CZE", - "denmark": "DNK", - "estonia": "EST", - "finland": "FIN", - "france": "FRA", - "germany": "DEU", - "greece": "GRC", - "hungary": "HUN", - "iceland": "ISL", - "ireland": "IRL", - "israel": "ISR", - "italy": "ITA", - "japan": "JPN", - "korea": "KOR", - "latvia": "LVA", - "lithuania": "LTU", - "luxembourg": "LUX", - "mexico": "MEX", - "netherlands": "NLD", - "new_zealand": "NZL", - "norway": "NOR", - "poland": "POL", - "portugal": "PRT", - "russia": "RUS", - "slovak_republic": "SVK", - "slovenia": "SVN", - "south_africa": "ZAF", - "spain": "ESP", - "sweden": "SWE", - "switzerland": "CHE", - "turkey": "TUR", - "united_kingdom": "GBR", - "united_states": "USA", -} +GDP_REAL_COUNTRIES: tuple[str, ...] = ( + "argentina", + "australia", + "austria", + "belgium", + "brazil", + "bulgaria", + "canada", + "chile", + "china", + "colombia", + "costa_rica", + "croatia", + "czechia", + "denmark", + "estonia", + "eu15", + "eu27", + "euro_area_20_countries", + "finland", + "france", + "g20", + "g7", + "germany", + "greece", + "hungary", + "iceland", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "netherlands", + "new_zealand", + "norway", + "oecd", + "oecd_26_countries", + "oecd_europe", + "poland", + "portugal", + "romania", + "russia", + "saudi_arabia", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", + "usmca", +) -COUNTRY_TO_CODE_UNEMPLOYMENT = { - "colombia": "COL", - "new_zealand": "NZL", - "united_kingdom": "GBR", - "italy": "ITA", - "luxembourg": "LUX", - "sweden": "SWE", - "oecd": "OECD", - "south_africa": "ZAF", - "denmark": "DNK", - "canada": "CAN", - "switzerland": "CHE", - "slovakia": "SVK", - "hungary": "HUN", - "portugal": "PRT", - "spain": "ESP", - "france": "FRA", - "czech_republic": "CZE", - "costa_rica": "CRI", - "japan": "JPN", - "slovenia": "SVN", - "russia": "RUS", - "austria": "AUT", - "latvia": "LVA", - "netherlands": "NLD", - "israel": "ISR", - "iceland": "ISL", - "united_states": "USA", - "ireland": "IRL", - "mexico": "MEX", - "germany": "DEU", - "greece": "GRC", - "turkey": "TUR", - "australia": "AUS", - "poland": "POL", - "korea": "KOR", - "chile": "CHL", - "finland": "FIN", - "european_union27_2020": "EU27_2020", - "norway": "NOR", - "lithuania": "LTU", - "euro_area20": "EA20", - "estonia": "EST", - "belgium": "BEL", - "g7": "G7", -} +GDP_FORECAST_COUNTRIES: tuple[str, ...] = ( + "argentina", + "australia", + "austria", + "belgium", + "brazil", + "bulgaria", + "canada", + "chile", + "china", + "colombia", + "costa_rica", + "croatia", + "czechia", + "dae", + "denmark", + "estonia", + "eu22_oecd", + "euro_area_17_countries", + "finland", + "former_ussr", + "france", + "germany", + "greece", + "hungary", + "iceland", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "nafta", + "netherlands", + "new_zealand", + "non_oecd_economies", + "norway", + "oecd", + "oecd_asia_oceania", + "opec", + "other_countries", + "other_major_oil", + "other_oil_producers", + "peru", + "poland", + "portugal", + "rest_of_world", + "romania", + "russia", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "thailand", + "turkiye", + "united_kingdom", + "united_states", + "world", +) -CODE_TO_COUNTRY_UNEMPLOYMENT = {v: k for k, v in COUNTRY_TO_CODE_UNEMPLOYMENT.items()} +KEI_COUNTRIES: tuple[str, ...] = ( + "argentina", + "australia", + "austria", + "belgium", + "brazil", + "canada", + "chile", + "china", + "colombia", + "costa_rica", + "croatia", + "czechia", + "denmark", + "estonia", + "eu27", + "euro_area", + "euro_area_19_countries", + "euro_area_20_countries", + "finland", + "france", + "g20", + "g7", + "germany", + "greece", + "hungary", + "iceland", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "netherlands", + "new_zealand", + "norway", + "oecd", + "oecd_europe", + "poland", + "portugal", + "russia", + "saudi_arabia", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", +) -COUNTRY_TO_CODE_CLI = { - "united_states": "USA", - "united_kingdom": "GBR", - "japan": "JPN", - "mexico": "MEX", - "indonesia": "IDN", - "australia": "AUS", - "brazil": "BRA", - "canada": "CAN", - "italy": "ITA", - "germany": "DEU", - "turkey": "TUR", - "france": "FRA", - "south_africa": "ZAF", - "south_korea": "KOR", - "spain": "ESP", - "india": "IND", - "china": "CHN", - "g7": "G7", - "g20": "G20", -} +CLI_COUNTRIES: tuple[str, ...] = ( + "asia5", + "australia", + "austria", + "belgium", + "brazil", + "bulgaria", + "canada", + "chile", + "china", + "colombia", + "costa_rica", + "croatia", + "czechia", + "denmark", + "estonia", + "eu27", + "euro_area_19_countries", + "euro_area_20_countries", + "europe4", + "finland", + "france", + "g20", + "g7", + "germany", + "greece", + "hungary", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "nafta", + "netherlands", + "new_zealand", + "norway", + "oecd", + "oecd_europe", + "oecd_non_euro_area", + "poland", + "portugal", + "romania", + "russia", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", +) -CODE_TO_COUNTRY_CLI = {v: k for k, v in COUNTRY_TO_CODE_CLI.items()} +UNEMPLOYMENT_COUNTRIES: tuple[str, ...] = ( + "australia", + "austria", + "belgium", + "bulgaria", + "canada", + "chile", + "colombia", + "costa_rica", + "croatia", + "czechia", + "denmark", + "estonia", + "eu27", + "euro_area", + "finland", + "france", + "g7", + "germany", + "greece", + "hungary", + "iceland", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "netherlands", + "new_zealand", + "norway", + "oecd", + "poland", + "portugal", + "romania", + "slovak_republic", + "slovenia", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", +) -COUNTRY_TO_CODE_IR = { - "belgium": "BEL", - "bulgaria": "BGR", - "brazil": "BRA", - "ireland": "IRL", - "mexico": "MEX", - "indonesia": "IDN", - "new_zealand": "NZL", - "japan": "JPN", - "united_kingdom": "GBR", - "france": "FRA", - "chile": "CHL", - "canada": "CAN", - "netherlands": "NLD", - "united_states": "USA", - "south_korea": "KOR", - "norway": "NOR", - "austria": "AUT", - "south_africa": "ZAF", - "denmark": "DNK", - "switzerland": "CHE", - "hungary": "HUN", - "luxembourg": "LUX", - "australia": "AUS", - "germany": "DEU", - "sweden": "SWE", - "iceland": "ISL", - "turkey": "TUR", - "greece": "GRC", - "israel": "ISR", - "czech_republic": "CZE", - "latvia": "LVA", - "slovenia": "SVN", - "poland": "POL", - "estonia": "EST", - "lithuania": "LTU", - "portugal": "PRT", - "costa_rica": "CRI", - "slovakia": "SVK", - "finland": "FIN", - "spain": "ESP", - "romania": "ROU", - "russia": "RUS", - "euro_area19": "EA19", - "colombia": "COL", - "italy": "ITA", - "india": "IND", - "china": "CHN", - "croatia": "HRV", -} +RHPI_COUNTRIES: tuple[str, ...] = ( + "australia", + "austria", + "belgium", + "brazil", + "bulgaria", + "canada", + "chile", + "china", + "colombia", + "croatia", + "czechia", + "denmark", + "estonia", + "eu_27", + "euro_area_20_countries", + "finland", + "france", + "germany", + "greece", + "hungary", + "iceland", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "netherlands", + "new_zealand", + "norway", + "poland", + "portugal", + "russia", + "saudi_arabia", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", +) -CODE_TO_COUNTRY_IR = {v: k for k, v in COUNTRY_TO_CODE_IR.items()} +BOP_COUNTRIES: tuple[str, ...] = ( + "argentina", + "australia", + "austria", + "belgium", + "brazil", + "canada", + "chile", + "china", + "colombia", + "costa_rica", + "czechia", + "denmark", + "estonia", + "euro_area_20_countries", + "eu27", + "finland", + "france", + "g20", + "g7", + "germany", + "greece", + "hungary", + "iceland", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "netherlands", + "new_zealand", + "norway", + "oecd", + "poland", + "portugal", + "russia", + "saudi_arabia", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", +) -COUNTRY_TO_CODE_SHARES = { - "slovenia": "SVN", - "russia": "RUS", - "latvia": "LVA", - "korea": "KOR", - "brazil": "BRA", - "france": "FRA", - "sweden": "SWE", - "luxembourg": "LUX", - "belgium": "BEL", - "china": "CHN", - "finland": "FIN", - "euro_area19": "EA19", - "japan": "JPN", - "hungary": "HUN", - "australia": "AUS", - "switzerland": "CHE", - "portugal": "PRT", - "estonia": "EST", - "canada": "CAN", - "slovak_republic": "SVK", - "turkey": "TUR", - "croatia": "HRV", - "denmark": "DNK", - "italy": "ITA", - "india": "IND", - "south_africa": "ZAF", - "czech_republic": "CZE", - "new_zealand": "NZL", - "netherlands": "NLD", - "iceland": "ISL", - "germany": "DEU", - "indonesia": "IDN", - "ireland": "IRL", - "united_states": "USA", - "chile": "CHL", - "lithuania": "LTU", - "greece": "GRC", - "united_kingdom": "GBR", - "colombia": "COL", - "norway": "NOR", - "spain": "ESP", - "israel": "ISR", - "poland": "POL", - "austria": "AUT", - "mexico": "MEX", -} -CODE_TO_COUNTRY_SHARES = {v: k for k, v in COUNTRY_TO_CODE_SHARES.items()} +FINMARK_COUNTRIES: tuple[str, ...] = ( + "argentina", + "australia", + "austria", + "belgium", + "brazil", + "bulgaria", + "canada", + "chile", + "china", + "colombia", + "costa_rica", + "croatia", + "czechia", + "denmark", + "estonia", + "euro_area_19_countries", + "euro_area_20_countries", + "finland", + "france", + "germany", + "greece", + "hungary", + "iceland", + "india", + "indonesia", + "ireland", + "israel", + "italy", + "japan", + "korea", + "latvia", + "lithuania", + "luxembourg", + "mexico", + "netherlands", + "new_zealand", + "norway", + "poland", + "portugal", + "romania", + "russia", + "saudi_arabia", + "sdr", + "slovak_republic", + "slovenia", + "south_africa", + "spain", + "sweden", + "switzerland", + "turkiye", + "united_kingdom", + "united_states", +) diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/generate_cache.py b/openbb_platform/providers/oecd/openbb_oecd/utils/generate_cache.py new file mode 100644 index 00000000000..00e2f6e10c8 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/generate_cache.py @@ -0,0 +1,1307 @@ +#!/usr/bin/env python +"""Generate the shipped oecd_cache.json.xz baseline cache. + +Run from the oecd provider root: + + python generate_cache.py + +Uses **bulk** SDMX v2 endpoints to fetch everything in ~4 API calls: + + 1. /structure/dataflow — all dataflow IDs/names (~1400+) + 2. /structure/datastructure — all DSDs (dimensions, codelist refs) + 3. /structure/codelist — all codelists (code->label mappings) + 3b. /structure/hierarchicalcodelist — nested hierarchies (parent-child) + 4. /structure/categoryscheme + /structure/categorisation — taxonomy + +Then joins dataflows->DSDs->codelists in memory, derives parameters and +indicators for every dataflow, and writes the result to +openbb_oecd/assets/oecd_cache.json.xz. + +This file ships with the package so users have a complete metadata map +with zero API calls at runtime. +""" + +# pylint: disable=C0302,R0914,R0917 +# flake8: noqa: T201 + +from __future__ import annotations + +import json +import lzma +import re +import time +from collections import defaultdict +from pathlib import Path + +import requests + +ASSETS_DIR = Path(__file__).resolve().parent.parent / "assets" +CACHE_FILE = ASSETS_DIR / "oecd_cache.json.xz" +BASE_URL = "https://sdmx.oecd.org/public/rest/v2" +STRUCTURE_ACCEPT = "application/vnd.sdmx.structure+json; version=1.0; charset=utf-8" +_CL_URN_RE = re.compile(r"Codelist=([^:]+):([^(]+)\(([^)]+)\)") +_DSD_URN_RE = re.compile(r"DataStructure=([^:]+):([^(]+)\(([^)]+)\)") +_CATEGORISATION_DF_RE = re.compile(r"Dataflow=([^:]+):([^(]+)\(([^)]+)\)") +_CATEGORISATION_CAT_RE = re.compile(r"OECDCS1\([^)]+\)\.(.+)") +_session = requests.Session() +_session.headers["Accept"] = STRUCTURE_ACCEPT + + +def _get(url: str, retries: int = 5, backoff: float = 3.0) -> dict: + """GET *url* and return parsed JSON. Retries on transient failures.""" + for attempt in range(retries): + try: + resp = _session.get(url, timeout=300) + if resp.status_code == 429: + wait = max(15, backoff * (attempt + 1) * 5) + time.sleep(wait) + continue + resp.raise_for_status() + return resp.json() + except (requests.RequestException, json.JSONDecodeError): + if attempt == retries - 1: + raise + wait = backoff * (attempt + 1) + time.sleep(wait) + raise requests.RequestException(f"failed after {retries} attempts: {url}") + + +def _extract_codelist_id(urn: str) -> str: + """Extract the fully-qualified codelist key from a URN. + + Returns ``agency:id(version)`` so that codelists from different + agencies (e.g. ``OECD.SDD.STES:CL_MEASURE(1.0)`` vs + ``OECD.WISE.WDP:CL_MEASURE(1.1)``) never collide. + """ + m = _CL_URN_RE.search(urn) + if m: + return f"{m.group(1)}:{m.group(2)}({m.group(3)})" + if ":" in urn: + return urn.rsplit(":", 1)[-1].split("(", maxsplit=1)[0] + return urn + + +# --------------------------------------------------------------------------- +# Step 1 -- Fetch all dataflows +# --------------------------------------------------------------------------- + + +def fetch_dataflows() -> tuple[dict[str, dict], dict[str, str]]: + """Return (dataflows dict keyed by full_id, short_id_map).""" + raw = _get(f"{BASE_URL}/structure/dataflow") + dataflows: dict[str, dict] = {} + short_id_map: dict[str, str] = {} + + for df in raw.get("data", raw).get("dataflows", []): + full_id = df.get("id", "") + agency_id = df.get("agencyID", "") + version = df.get("version", "") + names = df.get("names", {}) + name = (names.get("en", "") if isinstance(names, dict) else "") or df.get( + "name", full_id + ) + struct_urn = df.get("structure", "") + short_id = full_id.split("@")[-1] if "@" in full_id else full_id + dsd_key = "" + m = _DSD_URN_RE.search(struct_urn) + + if m: + dsd_key = f"{m.group(1)}:{m.group(2)}({m.group(3)})" + + annotations: dict[str, str] = {} + for ann in df.get("annotations", []): + ann_type = ann.get("type", "") + if ann_type: + ann_text = ann.get("title", "") or ann.get("text", "") + if isinstance(ann_text, dict): + ann_text = ann_text.get("en", next(iter(ann_text.values()), "")) + annotations[ann_type] = str(ann_text) if ann_text else "" + + dataflows[full_id] = { + "short_id": short_id, + "agency_id": agency_id, + "version": version, + "name": name, + "_dsd_key": dsd_key, + "annotations": annotations, + } + short_id_map[short_id] = full_id + + return dataflows, short_id_map + + +# --------------------------------------------------------------------------- +# Step 2 -- Fetch all DSDs (bulk) +# --------------------------------------------------------------------------- + + +def fetch_all_dsds() -> dict[str, dict]: + """Fetch all DSDs in one call. Returns {agency:id(ver): parsed_dsd}.""" + raw = _get(f"{BASE_URL}/structure/datastructure") + result: dict[str, dict] = {} + + for dsd in raw.get("data", raw).get("dataStructures", []): + dsd_id = dsd.get("id", "") + dsd_agency = dsd.get("agencyID", "") + dsd_version = dsd.get("version", "") + key = f"{dsd_agency}:{dsd_id}({dsd_version})" + + dims: list[dict] = [] + components = dsd.get("dataStructureComponents", {}) + for dim in components.get("dimensionList", {}).get("dimensions", []): + dim_id = dim.get("id", "") + position = dim.get("position", len(dims)) + local_repr = dim.get("localRepresentation", {}) + enum_urn = local_repr.get("enumeration", "") + cl_id = _extract_codelist_id(enum_urn) if enum_urn else "" + names = dim.get("names", {}) + dim_name = ( + names.get("en", "") if isinstance(names, dict) else "" + ) or dim.get("name", dim_id) + dims.append( + { + "id": dim_id, + "position": position, + "codelist_id": cl_id, + "name": dim_name, + } + ) + dims.sort(key=lambda d: d["position"]) + + time_dims = components.get("dimensionList", {}).get("timeDimensions", []) + result[key] = { + "dimensions": dims, + "has_time_dimension": bool(time_dims), + } + + return result + + +# --------------------------------------------------------------------------- +# Step 3 -- Fetch all codelists (bulk) +# --------------------------------------------------------------------------- + + +def fetch_all_codelists() -> tuple[ + dict[str, dict[str, str]], + dict[str, dict[str, str]], + dict[str, dict[str, str]], + dict[str, dict[str, str]], + list[dict], +]: + """Fetch all codelists in one call, with hierarchical codelists. + + Uses ``?references=hierarchicalcodelist`` so the single bulk call + returns both the flat codelists **and** any associated hierarchical + codelists (HCLs), avoiding per-agency HCL fetches. + + Returns + ------- + tuple + (codelists_by_id, codelist_descriptions_by_id, codelist_parents_by_id, + codelist_comp_rules_by_id, raw_hcls). + Keys are fully-qualified ``agency:id(version)`` strings so that + codelists from different agencies (e.g. multiple ``CL_MEASURE`` + variants) never collide. + codelist_descriptions_by_id maps {cl_key: {code: description}}. + codelist_parents_by_id maps {cl_key: {code: parent_code}} — only + for codelists whose codes have a ``parent`` field. + codelist_comp_rules_by_id maps {cl_key: {code: comp_rule_string}} — + extracted from ``COMP_RULE`` annotations on codes. + raw_hcls is the list of hierarchicalCodelist dicts from the response. + """ + raw = _get(f"{BASE_URL}/structure/codelist?references=hierarchicalcodelist") + by_id: dict[str, dict[str, str]] = {} + descs_by_id: dict[str, dict[str, str]] = {} + parents_by_id: dict[str, dict[str, str]] = {} + comp_rules_by_id: dict[str, dict[str, str]] = {} + + for cl in raw.get("data", raw).get("codelists", []): + bare_id = cl.get("id", "") + agency = cl.get("agencyID", "") + version = cl.get("version", "") + cl_id = f"{agency}:{bare_id}({version})" if agency and version else bare_id + + codes: dict[str, str] = {} + descs: dict[str, str] = {} + parents: dict[str, str] = {} + comp_rules: dict[str, str] = {} + + for code in cl.get("codes", []): + code_id = code.get("id", "") + names = code.get("names", {}) + label = ( + names.get("en", "") if isinstance(names, dict) else "" + ) or code.get("name", code_id) + codes[code_id] = label + + d = code.get("descriptions", {}) + desc = (d.get("en", "") if isinstance(d, dict) else "") or code.get( + "description", "" + ) + descs[code_id] = desc or label + + parent = code.get("parent") + if parent: + parents[code_id] = parent + + # Extract COMP_RULE annotation — defines the composition of + # aggregate codes (e.g. "CP045+CP0722" for Energy). + for ann in code.get("annotations", []): + if ann.get("type") == "COMP_RULE": + rule = ann.get("title", "") + if rule: + comp_rules[code_id] = rule + break + + if cl_id: + by_id[cl_id] = codes + descs_by_id[cl_id] = descs + if parents: + parents_by_id[cl_id] = parents + if comp_rules: + comp_rules_by_id[cl_id] = comp_rules + + raw_hcls: list[dict] = raw.get("data", raw).get("hierarchicalCodelists", []) + print(f" {len(raw_hcls)} hierarchical codelists included via ?references") + + return by_id, descs_by_id, parents_by_id, comp_rules_by_id, raw_hcls + + +# --------------------------------------------------------------------------- +# Step 3b -- Infer orphan parents from COMP_RULE annotations +# --------------------------------------------------------------------------- + + +def infer_orphan_parents( + parents_by_id: dict[str, dict[str, str]], + comp_rules_by_id: dict[str, dict[str, str]], + codelists_by_id: dict[str, dict[str, str]], +) -> None: + """Infer parents for orphan codes using COMP_RULE annotations. + + Mutates *parents_by_id* in place. For each codelist that has both + parent data and COMP_RULE annotations, this resolves orphan codes + (codes without an explicit ``parent``) by computing the closest + common ancestor of the COMP_RULE component codes. + + For example, if ``CP041T043`` has COMP_RULE ``CP041+CP042+CP043`` and + all three components have ``parent=CP04``, then ``CP041T043`` is + inferred to be a child of ``CP04``. + """ + for cl_id, comp_rules in comp_rules_by_id.items(): + parents = parents_by_id.get(cl_id) + if parents is None: + # Codelist doesn't have any parent info — COMP_RULE alone + # can't tell us the hierarchy. + continue + codelist_codes = codelists_by_id.get(cl_id, {}) + if not codelist_codes: + continue + + for code, rule in comp_rules.items(): + if code in parents: + # Already has an explicit parent. + continue + + # Parse COMP_RULE: "CP045+CP0722" → ["CP045", "CP0722"] + components = [c.strip() for c in rule.split("+") if c.strip()] + if not components: + continue + + # Find closest common ancestor of all component codes. + ancestor = _closest_common_ancestor(components, parents) + if ancestor and ancestor in codelist_codes: + parents[code] = ancestor + + # Second pass: codes that are referenced as parent by other codes + # but don't themselves have a parent → they're root nodes. We don't + # assign them a synthetic parent — they're genuinely top-level. + + +# --------------------------------------------------------------------------- +# Step 3c -- Fetch hierarchical codelists (HCLs) +# --------------------------------------------------------------------------- + +_HCL_CODE_URN_RE = re.compile(r"Code=([^:]+):([^(]+)\(([^)]+)\)\.(.+)") + + +def _extract_hcl_parents( + hierarchical_codes: list[dict], + parent_id: str | None = None, + result: dict[str, str] | None = None, +) -> dict[str, str]: + """Recursively extract parent-child relationships from nested HCL codes.""" + if result is None: + result = {} + for hc in hierarchical_codes: + code_id = hc.get("codeID", hc.get("id", "")) + if parent_id and code_id not in result: + result[code_id] = parent_id + children = hc.get("hierarchicalCodes", []) + if children: + _extract_hcl_parents(children, code_id, result) + return result + + +def parse_hierarchical_codelists( + raw_hcls: list[dict], + referenced_cl_ids: set[str], +) -> dict[str, dict[str, str]]: + """Parse pre-fetched hierarchical codelists into parent-child maps. + + The HCL data is returned by the bulk codelist fetch when using + ``?references=hierarchicalcodelist`` — no extra API calls needed. + + Parameters + ---------- + raw_hcls + The list of hierarchicalCodelist dicts from the bulk response. + referenced_cl_ids + The set of fully-qualified codelist keys (``agency:id(ver)``) + that are actually used by dataflow dimensions. + + Returns + ------- + dict + ``{cl_key: {code: parent_code}}`` for each codelist that has + HCL hierarchy data. + """ + if not raw_hcls: + return {} + + # Map "agency:bare_id(" prefix → actual fully-qualified key. + _prefix_to_key: dict[str, str] = {} + for key in referenced_cl_ids: + m = _CL_URN_RE.search(f"Codelist={key}") + if m: + _prefix_to_key[f"{m.group(1)}:{m.group(2)}("] = key + + # Parse HCLs: group extracted parents by base codelist. + result: dict[str, dict[str, str]] = {} + for hcl in raw_hcls: + for h in hcl.get("hierarchies", []): + top_codes = h.get("hierarchicalCodes", []) + if not top_codes: + continue + # Identify target codelist from the first code's URN. + urn = top_codes[0].get("code", "") + m = _HCL_CODE_URN_RE.search(urn) + if not m: + continue + hcl_agency = m.group(1) + hcl_cl = m.group(2) + # Map to actual key via version-agnostic prefix. + prefix = f"{hcl_agency}:{hcl_cl}(" + actual_key = _prefix_to_key.get(prefix) + if not actual_key: + continue + parents = _extract_hcl_parents(top_codes) + if parents: + result.setdefault(actual_key, {}).update(parents) + + return result + + +def merge_hcl_parents( + codelist_parents: dict[str, dict[str, str]], + hcl_parents: dict[str, dict[str, str]], + codelists_by_id: dict[str, dict[str, str]], +) -> int: + """Merge HCL-derived parents into *codelist_parents* for orphan codes. + + Only fills in parents for codes that: + 1. Don't already have a parent (from the flat codelist or COMP_RULE). + 2. Both child and parent exist in the flat codelist. + + Mutates *codelist_parents* in place. Returns the number of parents added. + """ + added = 0 + for cl_key, hcl_map in hcl_parents.items(): + existing = codelist_parents.get(cl_key, {}) + flat_codes = set(codelists_by_id.get(cl_key, {})) + if not flat_codes: + continue + for code, parent in hcl_map.items(): + if code in existing: + continue # already has a parent + if code in flat_codes and parent in flat_codes: + if cl_key not in codelist_parents: + codelist_parents[cl_key] = {} + codelist_parents[cl_key][code] = parent + added += 1 + return added + + +def _closest_common_ancestor( + codes: list[str], + parents: dict[str, str], +) -> str | None: + """Compute the closest common ancestor of *codes* in the parent tree. + + Returns ``None`` if no common ancestor can be determined (e.g. codes + are from different root branches or not found in the parent map). + """ + + def _ancestors(code: str) -> list[str]: + """Return ancestor chain from immediate parent to root.""" + chain: list[str] = [] + visited: set[str] = set() + current = code + while current in parents: + p = parents[current] + if p in visited: + break # cycle guard + visited.add(p) + chain.append(p) + current = p + return chain + + # Build ancestor chains for each component. + chains: list[list[str]] = [] + for code in codes: + if code not in parents: + # Component doesn't have a parent — it's a root code. + # The common ancestor must include this root. + return None + chain = _ancestors(code) + if not chain: + return None + chains.append(chain) + + if not chains: + return None + + # Walk the first chain from nearest ancestor outward; the closest + # common ancestor is the first one that appears in ALL other chains. + ancestor_sets = [set(c) for c in chains[1:]] + for ancestor in chains[0]: + if all(ancestor in s for s in ancestor_sets): + return ancestor + + return None + + +# --------------------------------------------------------------------------- +# Step 3c -- Fetch external DSDs for dataflows not in the bulk result +# --------------------------------------------------------------------------- + +_STRUCTURE_ACCEPT = "application/vnd.sdmx.structure+json; version=1.0; charset=utf-8" + + +def fetch_external_dsds( + dataflows: dict[str, dict], + datastructures: dict[str, dict], + codelists_by_id: dict[str, dict[str, str]], + codelist_descriptions: dict[str, dict[str, str]], + codelist_parents: dict[str, dict[str, str]], + codelist_comp_rules: dict[str, dict[str, str]], +) -> dict[str, dict]: + """Fetch DSDs for dataflows whose structures are external references. + + Some dataflows (e.g. DF_BTIGE, DF_CRS) have their DSD hosted on a + different OECD subdomain (sti-public instead of public). The bulk + ``/structure/datastructure`` call doesn't include these. + + This function: + 1. Identifies dataflows that have no entry in *datastructures* + 2. Fetches each dataflow's structure endpoint individually + 3. Follows ``isExternalReference`` links to get the real DSD + 4. Merges the external codelists into the provided dicts + + Returns the additional DSDs dict to be merged with the main dsds. + """ + missing = { + full_id: df_meta + for full_id, df_meta in dataflows.items() + if full_id not in datastructures + } + + if not missing: + return {} + + print(f" Fetching external DSDs for {len(missing)} dataflows...") + ext_dsds: dict[str, dict] = {} + + for full_id, df_meta in missing.items(): + agency = df_meta.get("agency_id", "") + version = df_meta.get("version", "") + url = ( + f"{BASE_URL}/structure/dataflow/{agency}/{full_id}/{version}" + "?references=all&detail=referencepartial" + ) + try: + raw = _get(url) + except Exception: # noqa: BLE001 + print(f" WARN: failed to fetch structure for {full_id}") + continue + + raw_data = raw.get("data", raw) + raw_dsd_list = raw_data.get("dataStructures", []) + + if not raw_dsd_list: + for df in raw_data.get("dataflows", []): + if not df.get("isExternalReference"): + continue + for link in df.get("links", []): + href = link.get("href", "") + if not href: + continue + ext_url = f"{href}?references=all&detail=referencepartial" + try: + ext_raw = _get(ext_url) + ext_data = ext_raw.get("data", ext_raw) + raw_dsd_list = ext_data.get("dataStructures", []) + if raw_dsd_list: + raw_data = ext_data + break + except Exception: # noqa: S112 + continue + if raw_dsd_list: + break + + if not raw_dsd_list: + print(f" WARN: no DSD found for {full_id} (even via external refs)") + continue + + for dsd in raw_dsd_list: + dsd_id = dsd.get("id", "") + dsd_agency = dsd.get("agencyID", "") + dsd_version = dsd.get("version", "") + key = f"{dsd_agency}:{dsd_id}({dsd_version})" + + dims: list[dict] = [] + components = dsd.get("dataStructureComponents", {}) + for dim in components.get("dimensionList", {}).get("dimensions", []): + dim_id = dim.get("id", "") + position = dim.get("position", len(dims)) + local_repr = dim.get("localRepresentation", {}) + enum_urn = local_repr.get("enumeration", "") + cl_id = _extract_codelist_id(enum_urn) if enum_urn else "" + names = dim.get("names", {}) + dim_name = ( + names.get("en", "") if isinstance(names, dict) else "" + ) or dim.get("name", dim_id) + dims.append( + { + "id": dim_id, + "position": position, + "codelist_id": cl_id, + "name": dim_name, + } + ) + dims.sort(key=lambda d: d["position"]) + + time_dims = components.get("dimensionList", {}).get("timeDimensions", []) + ext_dsds[key] = { + "dimensions": dims, + "has_time_dimension": bool(time_dims), + } + break + + for cl in raw_data.get("codelists", []): + bare_id = cl.get("id", "") + cl_agency = cl.get("agencyID", "") + cl_version = cl.get("version", "") + cl_id = ( + f"{cl_agency}:{bare_id}({cl_version})" + if cl_agency and cl_version + else bare_id + ) + + if cl_id in codelists_by_id: + continue + + codes: dict[str, str] = {} + descs: dict[str, str] = {} + parents: dict[str, str] = {} + comp_rules: dict[str, str] = {} + + for code in cl.get("codes", []): + code_id = code.get("id", "") + code_names = code.get("names", {}) + label = ( + code_names.get("en", "") if isinstance(code_names, dict) else "" + ) or code.get("name", code_id) + codes[code_id] = label + + d = code.get("descriptions", {}) + desc = (d.get("en", "") if isinstance(d, dict) else "") or code.get( + "description", "" + ) + if desc and desc != label: + descs[code_id] = desc + + parent = code.get("parent", "") + if parent: + parents[code_id] = parent + + for ann in code.get("annotations", []): + if ann.get("id") == "COMP_RULE" or ann.get("type") == "COMP_RULE": + comp_rules[code_id] = ann.get("value", ann.get("text", "")) + + codelists_by_id[cl_id] = codes + if descs: + codelist_descriptions[cl_id] = descs + if parents: + codelist_parents[cl_id] = parents + if comp_rules: + codelist_comp_rules[cl_id] = comp_rules + + print(f" {full_id}: {len(raw_dsd_list)} DSD(s)") + + return ext_dsds + + +# --------------------------------------------------------------------------- +# Step 4 -- Join: map each dataflow to its DSD +# --------------------------------------------------------------------------- + + +def join_dataflows_to_structures( + dataflows: dict[str, dict], + dsds: dict[str, dict], +) -> dict[str, dict]: + """Build {full_id: dsd} for every dataflow that has a DSD.""" + datastructures: dict[str, dict] = {} + unmatched = 0 + + # Build a version-agnostic index so we can fall back when the version + # embedded in a dataflow's structure URN differs from the version that + # the bulk /structure/datastructure endpoint returned. + # Maps "agency:dsd_id" → first matching full key "agency:dsd_id(ver)". + _base_to_key: dict[str, str] = {} + for k in dsds: + base = k.rsplit("(", 1)[0] # strip "(version)" suffix + _base_to_key.setdefault(base, k) + + for full_id, df_meta in dataflows.items(): + dsd_key = df_meta.get("_dsd_key", "") + dsd = dsds.get(dsd_key) + + if not dsd and dsd_key: + # Version mismatch fallback: strip version and match by agency:id. + base = dsd_key.rsplit("(", 1)[0] + fallback = _base_to_key.get(base) + if fallback: + dsd = dsds.get(fallback) + + if not dsd: + # Last resort: infer DSD id from the "@"-prefixed full_id + # (e.g. "DSD_STES@DF_CLI" → dsd_id "DSD_STES") and the + # dataflow's own agency_id / version metadata. + agency_id = df_meta.get("agency_id", "") + version = df_meta.get("version", "") + if "@" in full_id and agency_id: + dsd_prefix = full_id.split("@", 1)[0] + # Try exact inferred key first, then version-agnostic. + inferred_key = f"{agency_id}:{dsd_prefix}({version})" + dsd = dsds.get(inferred_key) + if not dsd: + inferred_base = f"{agency_id}:{dsd_prefix}" + fallback = _base_to_key.get(inferred_base) + if fallback: + dsd = dsds.get(fallback) + + if not dsd: + unmatched += 1 + continue + + # Copy so multiple dataflows sharing a DSD don't collide. + datastructures[full_id] = { + "dimensions": [dict(d) for d in dsd["dimensions"]], + "has_time_dimension": dsd.get("has_time_dimension", True), + } + + return datastructures + + +# --------------------------------------------------------------------------- +# Step 6 -- Derive parameters and indicators +# --------------------------------------------------------------------------- + +_INDICATOR_DIMENSION_CANDIDATES = ( + "MEASURE", + "INDICATOR", + "SUBJECT", + "TRANSACTION", + "ACTIVITY", + "PRODUCT", + "SERIES", + "ITEM", + "ACCOUNTING_ENTRY", + "SECTOR", +) + +_COUNTRY_DIMENSION_CANDIDATES = ( + "REF_AREA", + "COUNTERPART_AREA", + "JURISDICTION", + "COUNTRY", + "AREA", +) + +# Dimensions that are NEVER indicators — they describe how data is +# measured, adjusted, or transformed, not what is being measured. +_NON_INDICATOR_DIMENSIONS = ( + "FREQ", + "ADJUSTMENT", + "TRANSFORMATION", + "UNIT_MEASURE", + "UNIT_MULT", + "CURRENCY_DENOM", + "CURRENCY", + "VALUATION", + "PRICE_BASE", + "CONSOLIDATION", + "MATURITY", + "METHODOLOGY", + "TABLE_IDENTIFIER", + "TIME_PERIOD", + "COUNTERPART_AREA", + "DEBT_BREAKDOWN", +) + + +def derive_parameters( + datastructures: dict[str, dict], + codelists_by_id: dict[str, dict[str, str]], +) -> dict[str, dict[str, list[dict]]]: + """Build {full_id: {dim_id: [{label, value}, ...]}}.""" + result: dict[str, dict[str, list[dict]]] = {} + + for full_id, dsd in datastructures.items(): + params: dict[str, list[dict]] = {} + + for dim in dsd.get("dimensions", []): + dim_id = dim["id"] + + if dim_id == "TIME_PERIOD": + continue + + cl_id = dim.get("codelist_id", "") + codes = codelists_by_id.get(cl_id, {}) if cl_id else {} + params[dim_id] = [ + {"label": label, "value": code} for code, label in sorted(codes.items()) + ] + + result[full_id] = params + + return result + + +def derive_indicators( + datastructures: dict[str, dict], + dataflow_parameters: dict[str, dict[str, list[dict]]], + codelist_descriptions: dict[str, dict[str, str]], + codelist_parents: dict[str, dict[str, str]] | None = None, +) -> dict[str, dict]: + """Build {full_id: {dims: [{dim_id, codes: [...]}]}}. + + Every content dimension (not country, not freq, not metadata) gets + its codes enumerated so each individual code is a searchable + indicator that produces time series. + """ + if codelist_parents is None: + codelist_parents = {} + _skip = ( + set(_COUNTRY_DIMENSION_CANDIDATES) + | set(_NON_INDICATOR_DIMENSIONS) + | {"FREQ", "TIME_PERIOD"} + ) + result: dict[str, dict] = {} + + for full_id, dsd in datastructures.items(): + params = dataflow_parameters.get(full_id, {}) + dims_sorted = sorted( + dsd.get("dimensions", []), key=lambda d: d.get("position", 0) + ) + + # Identify ALL content dimensions. + content_dims: list[str] = [] + for d in dims_sorted: + d_id = d["id"] + if d_id not in _skip and d_id in params and params[d_id]: + content_dims.append(d_id) + + if not content_dims: + result[full_id] = {"dims": []} + continue + + dim_codelist_map: dict[str, str] = {} + for d in dsd.get("dimensions", []): + dim_codelist_map[d["id"]] = d.get("codelist_id", "") + + dims_list: list[dict] = [] + seen_codes: set[str] = set() + + for dim_id in content_dims: + cl_id = dim_codelist_map.get(dim_id, "") + descriptions = codelist_descriptions.get(cl_id, {}) + parents = codelist_parents.get(cl_id, {}) + codes: list[dict] = [] + + for entry in params[dim_id]: + code = entry["value"] + if code in seen_codes: + continue + seen_codes.add(code) + desc = descriptions.get(code, entry["label"]) + item: dict = {"indicator": code, "label": entry["label"]} + if desc != entry["label"]: + item["description"] = desc + if code in parents: + item["parent"] = parents[code] + codes.append(item) + + if codes: + dims_list.append({"dim_id": dim_id, "codes": codes}) + + result[full_id] = {"dims": dims_list} + + return result + + +# --------------------------------------------------------------------------- +# Step 7 -- Fetch topic taxonomy +# --------------------------------------------------------------------------- + + +def _parse_category_tree( + categories: list[dict], + prefix: str = "", +) -> tuple[list[dict], dict[str, str]]: + """Recursively parse a category scheme into a tree and flat name map.""" + tree: list[dict] = [] + names: dict[str, str] = {} + for cat in categories: + cid = cat.get("id", "") + cnames = cat.get("names", {}) + name = (cnames.get("en", "") if isinstance(cnames, dict) else "") or cat.get( + "name", cid + ) + path = f"{prefix}.{cid}" if prefix else cid + names[path] = name + children, child_names = _parse_category_tree(cat.get("categories", []), path) + names.update(child_names) + tree.append( + { + "id": cid, + "name": name, + "path": path, + "children": children, + } + ) + return tree, names + + +def fetch_taxonomy( + dataflows: dict[str, dict], +) -> tuple[list[dict], dict[str, str], dict[str, list[str]], dict[str, list[str]]]: + """Return (taxonomy_tree, category_names, df_to_categories, category_to_dfs).""" + + cs_raw = _get(f"{BASE_URL}/structure/categoryscheme/OECD/OECDCS1") + schemes = cs_raw.get("data", cs_raw).get("categorySchemes", []) + if not schemes: + return [], {}, {}, {} + + tree, category_names = _parse_category_tree(schemes[0].get("categories", [])) + + cat_raw = _get(f"{BASE_URL}/structure/categorisation") + raw_cats = cat_raw.get("data", cat_raw).get("categorisations", []) + + seen: dict[tuple[str, str], str] = {} + for entry in raw_cats: + src = entry.get("source", "") + tgt = entry.get("target", "") + m_df = _CATEGORISATION_DF_RE.search(src) + m_cat = _CATEGORISATION_CAT_RE.search(tgt) + if not m_df or not m_cat: + continue + agency = m_df.group(1) + dsd_df = m_df.group(2) + version = m_df.group(3) + cat_path = m_cat.group(1) + full_ext = f"{agency}:{dsd_df}" + key = (full_ext, cat_path) + if version >= seen.get(key, ""): + seen[key] = version + + df_to_cats: dict[str, list[str]] = defaultdict(list) + cat_to_dfs: dict[str, list[str]] = defaultdict(list) + for (ext_id, cat_path), _ in seen.items(): + dsd_df = ext_id.split(":", 1)[-1] if ":" in ext_id else ext_id + if dsd_df not in dataflows: + continue + if cat_path not in df_to_cats[dsd_df]: + df_to_cats[dsd_df].append(cat_path) + if dsd_df not in cat_to_dfs[cat_path]: + cat_to_dfs[cat_path].append(dsd_df) + + return tree, category_names, dict(df_to_cats), dict(cat_to_dfs) + + +# --------------------------------------------------------------------------- +# Step 8 -- Fetch content constraints (batch) +# --------------------------------------------------------------------------- + +_CC_DF_URN_RE = re.compile(r"Dataflow=([^:]+):([^(]+)\(([^)]+)\)") + + +def fetch_all_constraints( + dataflows: dict[str, dict], +) -> dict[str, dict[str, list[str]]]: + """Fetch all content constraints for agencies that own dataflows. + + Uses the bulk ``/structure/contentconstraint/{agency}`` endpoint so + that we need only one API call per agency instead of one per dataflow. + + Returns + ------- + dict + ``{dataflow_full_id: {dim_id: [value, ...]}}`` + """ + # Collect distinct agencies that own at least one dataflow. + agencies: set[str] = set() + for df_meta in dataflows.values(): + aid = df_meta.get("agency_id", "") + if aid: + agencies.add(aid) + + result: dict[str, dict[str, list[str]]] = {} + agencies_sorted = sorted(agencies) + + for i, agency in enumerate(agencies_sorted, 1): + url = f"{BASE_URL}/structure/contentconstraint/{agency}" + raw = None + for attempt in range(6): + try: + resp = _session.get(url, timeout=120) + if resp.status_code == 404: + break + if resp.status_code == 429: + wait = 20 * (attempt + 1) + print( + f" [{i}/{len(agencies_sorted)}] {agency}: 429 — waiting {wait}s" + ) + time.sleep(wait) + continue + if resp.status_code != 200: + break + raw = resp.json() + # Pace after successful response to stay under rate limit. + time.sleep(3) + break + except Exception: # noqa: S112, BLE001 + if attempt < 5: + time.sleep(3 * (attempt + 1)) + if raw is None: + continue + print(f" [{i}/{len(agencies_sorted)}] {agency}") + + for cc in raw.get("data", raw).get("contentConstraints", []): + # Identify the attached dataflow. + attachment = cc.get("constraintAttachment", {}) + df_urns = attachment.get("dataflows", []) + if not df_urns: + continue + m = _CC_DF_URN_RE.search(df_urns[0]) + if not m: + continue + df_full_id = m.group(2) # e.g. "DSD_NAMAIN10@DF_TABLE1" + if df_full_id not in dataflows: + continue + + # Parse cube regions. + for region in cc.get("cubeRegions", []): + dim_constraints: dict[str, list[str]] = {} + for kv in region.get("keyValues", []): + dim_id = kv.get("id", "") + vals = kv.get("values", []) + if dim_id and vals: + dim_constraints[dim_id] = sorted(vals) + if dim_constraints: + existing = result.get(df_full_id, {}) + for dim_id, vals in dim_constraints.items(): + prev = set(existing.get(dim_id, [])) + existing[dim_id] = sorted(prev | set(vals)) + result[df_full_id] = existing + + return result + + +# --------------------------------------------------------------------------- +# Step 9 -- Build table map (TABLE_IDENTIFIER → metadata) +# --------------------------------------------------------------------------- + + +def build_table_map( + datastructures: dict[str, dict], + dataflow_constraints: dict[str, dict[str, list[str]]], + codelists_by_id: dict[str, dict[str, str]], + codelist_descriptions: dict[str, dict[str, str]], +) -> dict[str, dict]: + """Build a table map: ``{table_id: {name, description, dataflows}}``. + + A "table" is an OECD National Accounts TABLE_IDENTIFIER code. The map + connects each table ID to its human-readable name and the list of + dataflows that serve data for that table. + + For composite/grouping codes whose names reference other table IDs + (e.g. ``"Tables 0101, 0102 and 0103"``), dataflows are propagated + from the referenced sub-tables so that the composite code also has a + non-empty dataflow list. + + Returns + ------- + dict + ``{table_id: {"name": str, "description": str, "dataflows": [full_id, ...]}}`` + """ + # 1. Identify which codelists are used as TABLE_IDENTIFIER. + table_codelist_ids: set[str] = set() + for dsd in datastructures.values(): + for dim in dsd.get("dimensions", []): + if dim.get("id") == "TABLE_IDENTIFIER": + cl_id = dim.get("codelist_id", "") + if cl_id: + table_codelist_ids.add(cl_id) + + # 2. Collect all table IDs with names/descriptions from those codelists. + table_map: dict[str, dict] = {} + for cl_id in table_codelist_ids: + codes = codelists_by_id.get(cl_id, {}) + descs = codelist_descriptions.get(cl_id, {}) + for code, label in codes.items(): + if code not in table_map: + desc = descs.get(code, label) + table_map[code] = { + "name": label, + "description": desc if desc != label else "", + "dataflows": [], + } + + # 3. Map table IDs to dataflows using constraints. + for df_full_id, constraints in dataflow_constraints.items(): + table_ids = constraints.get("TABLE_IDENTIFIER", []) + for tid in table_ids: + if tid in table_map and df_full_id not in table_map[tid]["dataflows"]: + table_map[tid]["dataflows"].append(df_full_id) + + # 4. Propagate dataflows to composite / grouping codes. + # Parse names like "Tables 0101, 0102 and 0103" to find sub-table IDs, + # then inherit their dataflows. + _SUBTABLE_RE = re.compile(r"\b(\d{4})\b") + for tid, entry in table_map.items(): + if entry["dataflows"]: + continue # already directly mapped + name = entry["name"] + # Look for 4-digit codes in the name that reference other table IDs. + matches = _SUBTABLE_RE.findall(name) + sub_ids = [f"T{m}" for m in matches if f"T{m}" in table_map and f"T{m}" != tid] + if sub_ids: + inherited: set[str] = set() + for sub_id in sub_ids: + inherited.update(table_map[sub_id]["dataflows"]) + if inherited: + entry["dataflows"] = sorted(inherited) + entry["sub_tables"] = sub_ids + + # Sort dataflow lists for deterministic output. + for entry in table_map.values(): + entry["dataflows"].sort() + + return table_map + + +def main() -> None: + """Generate the shipped oecd_cache.json.xz file.""" + t0 = time.time() + print("Generating OECD cache... this will take a few minutes...") + + # 1. Dataflows + dataflows, short_id_map = fetch_dataflows() + + # 2. All DSDs + dsds = fetch_all_dsds() + + # 3. All codelists (includes hierarchical codelists via ?references) + ( + codelists_by_id, + codelist_descriptions, + codelist_parents, + codelist_comp_rules, + raw_hcls, + ) = fetch_all_codelists() + + # 4. Join: map every dataflow to its DSD + datastructures = join_dataflows_to_structures(dataflows, dsds) + + # 4a. Fetch external DSDs for dataflows not in the bulk result. + ext_dsds = fetch_external_dsds( + dataflows, + datastructures, + codelists_by_id, + codelist_descriptions, + codelist_parents, + codelist_comp_rules, + ) + if ext_dsds: + dsds.update(ext_dsds) + ext_structures = join_dataflows_to_structures(dataflows, ext_dsds) + datastructures.update(ext_structures) + print(f" External DSDs resolved: {len(ext_structures)} dataflows") + + # 4b. Remap dimension codelist_id to match actual keys in the cache. + # DSDs may reference v1.0 but the bulk codelist fetch returned v1.2. + # Build a prefix→actual_key lookup and update dimensions in place. + _cl_prefix_map: dict[str, str] = {} # "OECD.SDD.STES:CL_MEASURE(" → actual key + + for key in codelists_by_id: + m = _CL_URN_RE.search(f"Codelist={key}") # reuse existing regex + if m: + prefix = f"{m.group(1)}:{m.group(2)}(" + _cl_prefix_map[prefix] = key + + remapped = 0 + + for dsd in datastructures.values(): + for dim in dsd.get("dimensions", []): + cl_id = dim.get("codelist_id", "") + if cl_id and cl_id not in codelists_by_id: + # Try prefix match (same agency:id, different version). + m = _CL_URN_RE.search(f"Codelist={cl_id}") + if m: + prefix = f"{m.group(1)}:{m.group(2)}(" + actual = _cl_prefix_map.get(prefix) + if actual: + dim["codelist_id"] = actual + remapped += 1 + + # 5. Filter codelists to only those referenced by dimensions + referenced_cl_ids: set[str] = set() + + for dsd in datastructures.values(): + for dim in dsd.get("dimensions", []): + cl_id = dim.get("codelist_id", "") + if cl_id: + referenced_cl_ids.add(cl_id) + + codelists_by_id = { + k: v for k, v in codelists_by_id.items() if k in referenced_cl_ids + } + codelist_parents = { + k: v for k, v in codelist_parents.items() if k in referenced_cl_ids + } + codelist_comp_rules = { + k: v for k, v in codelist_comp_rules.items() if k in referenced_cl_ids + } + + # 5b. Infer orphan parents using COMP_RULE annotations. + infer_orphan_parents(codelist_parents, codelist_comp_rules, codelists_by_id) + + # 5c. Parse hierarchical codelists (already fetched with codelists). + print(" Parsing hierarchical codelists...") + hcl_parents = parse_hierarchical_codelists(raw_hcls, referenced_cl_ids) + print(f" HCL hierarchies for {len(hcl_parents)} codelists") + n_hcl = merge_hcl_parents(codelist_parents, hcl_parents, codelists_by_id) + print(f" HCL parents merged: {n_hcl}") + + # Only keep descriptions that differ from the label (saves ~90% of space). + codelist_descriptions_trimmed: dict[str, dict[str, str]] = {} + + for cl_id in referenced_cl_ids: + descs = codelist_descriptions.get(cl_id, {}) + labels = codelists_by_id.get(cl_id, {}) + differing = { + code: desc for code, desc in descs.items() if desc != labels.get(code, "") + } + + if differing: + codelist_descriptions_trimmed[cl_id] = differing + + codelist_descriptions = codelist_descriptions_trimmed + + # 6a. Derive parameters + print(" Deriving parameters...") + dataflow_parameters = derive_parameters(datastructures, codelists_by_id) + + # 6b. Derive indicators + print(" Deriving indicators...") + dataflow_indicators = derive_indicators( + datastructures, + dataflow_parameters, + codelist_descriptions, + codelist_parents, + ) + + # 7. Taxonomy + print(" Fetching taxonomy...") + taxonomy_tree, category_names, df_to_categories, category_to_dfs = fetch_taxonomy( + dataflows + ) + + # 8. Content constraints (batch fetch per agency). + print(" Fetching content constraints...") + dataflow_constraints = fetch_all_constraints(dataflows) + + # 9. Table map: TABLE_IDENTIFIER → {name, description, dataflows}. + print(" Building table map...") + table_map = build_table_map( + datastructures, + dataflow_constraints, + codelists_by_id, + codelist_descriptions, + ) + + # 10. Strip temporary keys from dataflows before persisting. + for df in dataflows.values(): + df.pop("_dsd_key", None) + df.pop("_dsd_key", None) + + # 11. Write cache + print(" Compressing and writing cache...") + # Note: dataflow_parameters is NOT persisted — it's 18+ MB because it + # duplicates codelist codes across every dataflow. It's trivially + # derivable at runtime from datastructures + codelists (just look up the + # codelist for each dimension). dataflow_indicators IS persisted because + # it's the non-trivial derived data that search_indicators() needs. + blob = { + "dataflows": dataflows, + "datastructures": datastructures, + "codelists": codelists_by_id, + "codelist_parents": codelist_parents, + "codelist_descriptions": codelist_descriptions, + "codelist_comp_rules": codelist_comp_rules, + "dataflow_constraints": dataflow_constraints, + "dataflow_indicators": dataflow_indicators, + "table_map": table_map, + "short_id_map": short_id_map, + "taxonomy_tree": taxonomy_tree, + "df_to_categories": df_to_categories, + "category_to_dfs": category_to_dfs, + "category_names": category_names, + } + + ASSETS_DIR.mkdir(parents=True, exist_ok=True) + + with lzma.open(CACHE_FILE, "wb", format=lzma.FORMAT_XZ, preset=6) as fh: + fh.write(json.dumps(blob, separators=(",", ":")).encode()) + + size_mb = CACHE_FILE.stat().st_size / (1024 * 1024) + elapsed = time.time() - t0 + n_indicators = sum( + sum(len(d.get("codes", [])) for d in v.get("dims", [])) + for v in dataflow_indicators.values() + ) + n_tables = len(table_map) + n_constraints = len(dataflow_constraints) + print( + f"Wrote {CACHE_FILE} ({size_mb:.1f} MB, " + f"{len(dataflows)} dataflows, {n_indicators} indicators, " + f"{n_constraints} constrained dataflows, {n_tables} table IDs) " + f"in {elapsed:.0f}s" + ) + + +if __name__ == "__main__": + main() diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/helpers.py b/openbb_platform/providers/oecd/openbb_oecd/utils/helpers.py index cfbad7d8ff4..2576461a2d8 100644 --- a/openbb_platform/providers/oecd/openbb_oecd/utils/helpers.py +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/helpers.py @@ -1,262 +1,568 @@ -"""OECD helper functions.""" +"""OECD helper functions. -import ssl -from datetime import date -from io import StringIO -from pathlib import Path -from typing import Any, Literal - -import requests -import urllib3 -from defusedxml.ElementTree import fromstring -from openbb_core.app.model.abstract.error import OpenBBError -from openbb_core.app.utils import get_user_cache_directory -from openbb_core.provider import helpers -from pandas import DataFrame, read_csv, read_parquet, to_datetime - -cache = get_user_cache_directory() + "/oecd" -# Create the cache directory if it does not exist -Path(cache).mkdir(parents=True, exist_ok=True) - -# OECD does not play well with newer python. This code block from stackoverflow helps -# to create a custom session: - - -class CustomHttpAdapter(requests.adapters.HTTPAdapter): # type: ignore - """Transport adapter" that allows us to use custom ssl_context.""" - - def __init__(self, ssl_context=None, **kwargs): - """Initialize the adapter with a custom ssl_context.""" - self.ssl_context = ssl_context - super().__init__(**kwargs) - - # pylint: disable=arguments-differ - def init_poolmanager(self, connections, maxsize, block=False): # type: ignore - """Initialize the poolmanager with a custom ssl_context.""" - self.poolmanager = urllib3.poolmanager.PoolManager( # pylint: disable=attribute-defined-outside-init - num_pools=connections, - maxsize=maxsize, - block=block, - ssl_context=self.ssl_context, - ) - - -# pylint: enable=arguments-differ +Compatibility layer that re-exports utilities from the new infrastructure +(query_builder and metadata). All data fetching goes through +OecdQueryBuilder.fetch_data(). +""" +# pylint: disable=R0916, W0212 -def get_legacy_session(): - """Create a custom session.""" - ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) - ctx.options |= 0x4 # OP_LEGACY_SERVER_CONNECT - session = requests.Session() - session.mount("https://", CustomHttpAdapter(ctx)) - return session +from __future__ import annotations +import re +import unicodedata +from collections import defaultdict +from datetime import date +from typing import TYPE_CHECKING, Any -def fetch_data(url: str, csv_kwargs: dict | None = None, **kwargs: Any) -> DataFrame: - """Create a session and fetch data from the OECD API.""" - session = get_legacy_session() - response = helpers.make_request(url, session=session, **kwargs) - if csv_kwargs is None: - csv_kwargs = {} - # Pass any additional arguments to read_csv. This will likely need to be skiplines - # or a delimiter. - data = read_csv(StringIO(response.text), **csv_kwargs) - return data - +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_oecd.utils.query_builder import parse_time_period -### The functions below are for using the new oecd data-explorer instead of the stats.oecd +if TYPE_CHECKING: + from openbb_oecd.utils.metadata import OecdMetadata -def oecd_xml_to_df(xml_string: str) -> DataFrame: - """Parse the OECD XML and return a dataframe. +def oecd_date_to_python_date(input_date: str | int) -> date | None: + """Convert an SDMX time-period string to a Python date. Parameters ---------- - xml_string : str - A string containing the OECD XML data. + input_date : str | int + SDMX time-period string (e.g. "2024", "2024-Q3"). Returns ------- - DataFrame - A Pandas DataFrame containing the parsed data from the XML string. + date | None + Parsed date, or None when *input_date* is empty or + cannot be parsed. """ - root = fromstring(xml_string) - - namespaces = { - "message": "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message", - "generic": "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic", + raw = str(input_date).strip() if input_date else "" + if not raw: + return None + s = parse_time_period(raw) + try: + return date.fromisoformat(s) + except (ValueError, TypeError): + try: + return date.fromisoformat(s[:10]) + except (ValueError, TypeError): + return None + + +# Cleanup mapping for verbose/ugly OECD labels -> concise names. +# Applied after ASCII normalization. +_LABEL_OVERRIDES: dict[str, str] = { + "china_people_s_republic_of": "china", + "czech_republic": "czechia", + "korea_republic_of": "korea", + "european_union_27_countries_from_01_02_2020": "eu27", + "european_union_15_countries": "eu15", + "european_union_22_countries_in_oecd": "eu22_oecd", + "euro_area_evolving_composition": "euro_area", + "oecd_excluding_the_euro_area": "oecd_non_euro_area", + "major_five_asia_economies": "asia5", + "major_four_european_countries": "europe4", + "dynamic_asian_economies": "dae", + "other_major_oil_producers": "other_major_oil", + "rest_of_the_world": "rest_of_world", +} + + +def normalize_country_label(label: str) -> str: + """Normalize a country label to ASCII lower_snake_case. + + Examples + -------- + >>> normalize_country_label("United States") + 'united_states' + >>> normalize_country_label("China (People's Republic of)") + 'china' + """ + # Decompose Unicode chars, then drop combining marks (accents) + s = unicodedata.normalize("NFKD", label) + s = "".join(c for c in s if not unicodedata.combining(c)) + s = s.strip().lower() + # Keep only ASCII letters, digits, and whitespace + s = re.sub(r"[^a-z0-9\s]", " ", s) + s = re.sub(r"\s+", "_", s).strip("_") + return _LABEL_OVERRIDES.get(s, s) + + +def parse_search_query(query: str) -> list[list[str]]: + """Parse a search query into AND-groups of OR-terms. + + Supports + (AND) and | (OR) operators and double-quoted phrases. + + Examples + -------- + >>> parse_search_query('gdp "per capita" | income') + [['gdp'], ['per capita', 'income']] + >>> parse_search_query('consumer + price') + [['consumer'], ['price']] + """ + tokens: list[str] = [] + for match in re.finditer(r'"([^"]+)"|(\S+)', query): + tokens.append(match.group(1) or match.group(2)) + + groups: list[list[str]] = [[]] + for token in tokens: + low = token.lower() + if low == "+": + groups.append([]) + elif low == "|": + continue # next token added to current group + else: + groups[-1].append(token.lower()) + + return [g for g in groups if g] + + +# Dimensions to exclude from compound-code matching — they are handled +# separately via dedicated parameters (country, frequency). +_EXCLUDE_DIMS = frozenset( + { + "FREQUENCY", + "FREQ", + "TIME_PERIOD", + "REF_AREA", + "COUNTERPART_AREA", + "JURISDICTION", + "COUNTRY", + "AREA", } +) - # Prepare a list to hold your extracted data - data = [] - - # Iterate through each 'Series' in the XML - for series in root.findall(".//generic:Series", namespaces=namespaces): - series_data = {} - # Extract series key values - for value in series.findall(".//generic:Value", namespaces=namespaces): - series_data[value.get("id")] = value.get("value") - # Extract observation values - for obs in series.findall("./generic:Obs", namespaces=namespaces): - obs_data = series_data.copy() - obs_data["TIME_PERIOD"] = obs.find( - "./generic:ObsDimension", namespaces=namespaces - ).get( # type: ignore - "value" - ) - obs_data["VALUE"] = obs.find( - "./generic:ObsValue", namespaces=namespaces - ).get( # type: ignore - "value" - ) - data.append(obs_data) - - # Create a DataFrame - return DataFrame(data) +def _parse_compound_code( + code: str, + code_to_dimension: dict[str, str], +) -> tuple[list[tuple[str, str]], list[str]]: + """Parse a compound code like ``CPI_CP01_N`` into dimension matches. -def parse_url(url: str) -> DataFrame: - """Parse the SDMX url and return a dataframe. - - Parameters - ---------- - url:str - URL to parse + Uses greedy matching to find the longest combination first. Returns ------- - DataFrame - Pandas dataframe containing URL data + tuple + ``(matched_parts, unmatched_parts)`` where *matched_parts* is a + list of ``(dimension_id, matched_code)`` tuples. """ - response = helpers.make_request(url, timeout=30) - response.raise_for_status() - return oecd_xml_to_df(response.text) + parts = code.split("_") + matched_parts: list[tuple[str, str]] = [] + unmatched_parts: list[str] = [] + + i = 0 + while i < len(parts): + matched = False + # Try longest possible combination first (greedy). + for j in range(len(parts), i, -1): + combined = "_".join(parts[i:j]) + if combined in code_to_dimension: + dim_id = code_to_dimension[combined] + # Don't match the same dimension twice. + if not any(m[0] == dim_id for m in matched_parts): + matched_parts.append((dim_id, combined)) + i = j + matched = True + break + if not matched: + unmatched_parts.append(parts[i]) + i += 1 + + return matched_parts, unmatched_parts + + +def _build_dimension_lookups( + dataflow: str, + metadata: OecdMetadata | None = None, +) -> tuple[dict[str, str], dict[str, set[str]], list[str]]: + """Build lookups for mapping codes to dimensions. + Returns + ------- + tuple + ``(code_to_dimension, codes_by_dimension, dimension_order)`` -def check_cache_exists_and_valid(cache_str: str, cache_method: str = "csv") -> bool: - """Check if the cache exists and is valid. + - *code_to_dimension*: maps any valid code to its dimension ID + - *codes_by_dimension*: maps dimension ID to its set of valid codes + - *dimension_order*: list of dimension IDs in DSD position order + (excluding country/frequency/time dimensions) + """ + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata + + if metadata is None: + metadata = OecdMetadata() + + code_to_dimension: dict[str, str] = {} + codes_by_dimension: dict[str, set[str]] = defaultdict(set) + dimension_order: list[str] = [] + + try: + all_params = metadata.get_dataflow_parameters(dataflow) + + for dim_id, values in all_params.items(): + if dim_id.upper() in _EXCLUDE_DIMS: + continue + for v in values: + code = v.get("value") + if code: + codes_by_dimension[dim_id].add(code) + if code not in code_to_dimension: + code_to_dimension[code] = dim_id + except Exception: # noqa: BLE001, S110 + pass + + # Build dimension order from DSD (excluding country/freq/time/transform). + try: + full_id = metadata._resolve_dataflow_id(dataflow) + metadata._ensure_structure(full_id) + dsd = metadata.datastructures.get(full_id, {}) + + trailing_dims = { + "FREQUENCY", + "FREQ", + "TIME_PERIOD", + "TRANSFORMATION", + "UNIT_MEASURE", + "ADJUSTMENT", + } + for dim in sorted(dsd.get("dimensions", []), key=lambda d: d["position"]): + dim_id = dim.get("id", "") + if ( + dim_id + and dim_id.upper() not in trailing_dims + and dim_id.upper() not in _EXCLUDE_DIMS + and "TRANSFORM" not in dim_id.upper() + ): + dimension_order.append(dim_id) + except Exception: # noqa: BLE001, S110 + pass + + return code_to_dimension, dict(codes_by_dimension), dimension_order + + +def detect_indicator_dimensions( + dataflow: str, + indicator_codes: list[str], + metadata: OecdMetadata | None = None, +) -> dict[str, list[str]]: + """Detect which dimension(s) each indicator code belongs to. + + Supports compound codes that span multiple dimensions, decomposing + them via greedy matching against all DSD dimension codelists. Parameters ---------- - cache_str : str - The base cache to check for. - cache_method : str, optional - The method used for caching (default is 'csv'). + dataflow : str + OECD dataflow ID. + indicator_codes : list[str] + One or more indicator codes (may be compound, e.g. ``"CPI_CP01_N"``). + metadata : OecdMetadata, optional + Metadata singleton. Created lazily if not provided. Returns ------- - bool - True if the cache exists and is valid for the current day, False otherwise. + dict[str, list[str]] + ``{dimension_id: [code, ...]}``. + + Raises + ------ + OpenBBError + If any indicator code cannot be resolved for the dataflow. """ - # TODO: add setting to disable cache for tests - - if cache_method not in ["csv", "parquet"]: - raise NotImplementedError("Currently only working with parquet or csv") - # First check that the cache exists. This will be a parquet/csv and a timestamp - cache_path = f"{cache_str}.{cache_method}" - time_cache_path = f"{cache_str}.timestamp" - if Path(cache_path).exists() and Path(time_cache_path).exists(): - # Now check that the cache is valid. I am going to check that we write to a file the date the cache was made - # Read the timestamp - with open(time_cache_path) as f: - cached_date = f.read().strip() - # TODO: More robust caching logic - return cached_date == str(date.today()) - return False - - -def write_to_cache(cache_str: str, data: DataFrame, cache_method: str) -> None: - """Write data to the cache. + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata + + if metadata is None: + metadata = OecdMetadata() + + dimension_codes: dict[str, list[str]] = defaultdict(list) + + try: + code_to_dimension, codes_by_dimension, dimension_order = ( + _build_dimension_lookups(dataflow, metadata) + ) + + # Determine a reasonable primary dimension for wildcards. + primary_dim = _guess_primary_dimension(codes_by_dimension) + + invalid_codes: list[tuple[str, list[str]]] = [] + for code in indicator_codes: + if code == "*": + dimension_codes[primary_dim].append(code) + elif code in code_to_dimension: + dim_id = code_to_dimension[code] + dimension_codes[dim_id].append(code) + else: + # Try compound-code decomposition. + matched_parts, unmatched = _parse_compound_code(code, code_to_dimension) + if matched_parts and not unmatched: + for dim_id, code_part in matched_parts: + if code_part not in dimension_codes[dim_id]: + dimension_codes[dim_id].append(code_part) + else: + invalid_codes.append((code, unmatched)) + + if invalid_codes: + _raise_invalid_codes_error( + dataflow, + invalid_codes, + code_to_dimension, + codes_by_dimension, + dimension_order, + ) + + except OpenBBError: + raise + except Exception: # noqa: BLE001 + # Fallback: put all codes in a generic INDICATOR dimension. + dimension_codes["INDICATOR"] = indicator_codes + + return dict(dimension_codes) + + +def _guess_primary_dimension( + codes_by_dimension: dict[str, set[str]], +) -> str: + """Return the most likely primary indicator dimension name.""" + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import _INDICATOR_DIMENSION_CANDIDATES + + for candidate in _INDICATOR_DIMENSION_CANDIDATES: + if candidate in codes_by_dimension: + return candidate + # Fallback: first dimension with codes, or generic "INDICATOR". + if codes_by_dimension: + return next(iter(codes_by_dimension)) + return "INDICATOR" + + +def _raise_invalid_codes_error( + dataflow: str, + invalid_codes: list[tuple[str, list[str]]], + code_to_dimension: dict[str, str], + codes_by_dimension: dict[str, set[str]], + dimension_order: list[str], +) -> None: + """Build and raise a detailed error for invalid indicator codes.""" + country_dims = {"COUNTRY", "REF_AREA"} + error_parts: list[str] = [] + + for code, unmatched in invalid_codes: + if not unmatched: + error_parts.append(f"'{code}'") + continue + + parts = code.split("_") + segments: list[tuple[str, str | None]] = [] + + i = 0 + while i < len(parts): + found = False + for j in range(len(parts), i, -1): + combined = "_".join(parts[i:j]) + if combined in code_to_dimension: + segments.append((combined, code_to_dimension[combined])) + i = j + found = True + break + if not found: + segments.append((parts[i], None)) + i += 1 + + has_country = any(dim_id in country_dims for _, dim_id in segments if dim_id) + effective_order = ( + dimension_order + if has_country + else [d for d in dimension_order if d not in country_dims] + ) + + first_matched_idx: Any = None + first_matched_pos: Any = None + for idx, (_, dim_id) in enumerate(segments): + if dim_id and dim_id in effective_order: + first_matched_idx = idx + first_matched_pos = effective_order.index(dim_id) + break + + segment_errors: list[str] = [] + for idx, (seg, dim_id) in enumerate(segments): + if dim_id is not None: + continue + if first_matched_idx is not None and first_matched_pos is not None: + expected_pos = first_matched_pos - (first_matched_idx - idx) + else: + expected_pos = idx + + if 0 <= expected_pos < len(effective_order): + expected_dim = effective_order[expected_pos] + sample = sorted(codes_by_dimension.get(expected_dim, set()))[:5] + segment_errors.append( + f"'{seg}' is invalid for {expected_dim} (valid: {', '.join(sample)})" + ) + else: + segment_errors.append(f"'{seg}' is unrecognized") + + error_parts.append(f"'{code}': {'; '.join(segment_errors)}") + + raise OpenBBError( + f"Invalid indicator code(s) for dataflow '{dataflow}': " + f"{'; '.join(error_parts)}. " + f"Use `obb.economy.available_indicators(provider='oecd', dataflows='{dataflow}')` " + f"to see all valid codes." + ) + + +def detect_transform_dimension( + dataflow: str, + metadata: OecdMetadata | None = None, +) -> tuple[str | None, str | None, dict[str, str], dict[str, str]]: + """Detect transformation and unit dimensions for a dataflow. + + Dynamically finds dimensions containing ``TRANSFORM`` or ``UNIT`` in + their names and builds a user-friendly lookup mapping. Parameters ---------- - cache_str : str - The cache key to write - data : DataFrame - The DataFrame to be cached. - cache_method : str - The method used for caching the data. + dataflow : str + OECD dataflow ID. + metadata : OecdMetadata, optional + Metadata singleton. - Raises - ------ - NotImplementedError - If the cache_method is not 'parquet'. + Returns + ------- + tuple + ``(transform_dim, unit_dim, transform_lookup, unit_lookup)`` + where lookups map friendly names to SDMX codes. """ - if cache_method == "parquet": - cache_path = f"{cache_str}.parquet" - data.to_parquet(cache_path, engine="pyarrow") - # Write the current date to a file called cache/function.timestamp - with open(f"{cache_str}.timestamp", "w") as f: - f.write(str(date.today())) - elif cache_method == "csv": - cache_path = f"{cache_str}.csv" - data.to_csv(cache_path) - # Write the current date to a file called cache/function.timestamp - with open(f"{cache_str}.timestamp", "w") as f: - f.write(str(date.today())) - else: - raise NotImplementedError - - -def query_dict_to_path(query_dict: dict) -> str: - """Convert the query dict into something usable for writing file.""" - items = sorted(query_dict.items()) - key_parts = [f"{key}_{value}" for key, value in items] - return "-".join(key_parts).replace("/", "_").replace(" ", "_") - - -def get_possibly_cached_data( - url: str, - function: str | None = None, - query_dict: dict | None = None, - cache_method: Literal["csv", "parquet"] = "csv", - skip_cache: bool = False, -) -> DataFrame: - """Retrieve data from a given URL or from the cache if available and valid. + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata + + if metadata is None: + metadata = OecdMetadata() + + transform_dim: str | None = None + unit_dim: str | None = None + transform_lookup: dict[str, str] = {} + unit_lookup: dict[str, str] = {} + + try: + params = metadata.get_dataflow_parameters(dataflow) + + for dim, values in params.items(): + dim_upper = dim.upper() + + if "TRANSFORM" in dim_upper or dim_upper == "ADJUSTMENT": + transform_dim = dim + for v in values: + code = v.get("value", "") + label = v.get("label", "").lower() + + is_simple = ( + not code.startswith("SRP_") + and not code.startswith("WGT") + and not code.startswith("SA_") + ) + + if ( + label == "index" + or ( + "index" in label + and "change" not in label + and "percent" not in label + ) + ) and ("index" not in transform_lookup or is_simple): + transform_lookup["index"] = code + + if ( + "year-over-year" in label + or "yoy" in label + or "year ago" in label + ) and ("yoy" not in transform_lookup or is_simple): + transform_lookup["yoy"] = code + + if ( + "period-over-period" in label + or ( + "period" in label + and "change" in label + and "year" not in label + ) + ) and ("period" not in transform_lookup or is_simple): + transform_lookup["period"] = code + + if ("percent of gdp" in label or "% of gdp" in label) and ( + "percent_gdp" not in transform_lookup or is_simple + ): + transform_lookup["percent_gdp"] = code + + if ("domestic currency" in label or label == "currency") and ( + "currency" not in transform_lookup or is_simple + ): + transform_lookup["currency"] = code + + # Direct code access (case-insensitive). + transform_lookup[code.lower()] = code + + elif dim_upper in ("UNIT_MEASURE", "UNIT"): + unit_dim = dim + for v in values: + code = v.get("value", "") + label = v.get("label", "").lower() + + if "us dollar" in label or label == "usd": + unit_lookup["usd"] = code + if "euro" in label or label == "eur": + unit_lookup["eur"] = code + if label == "index" or "index" in label: + unit_lookup["index"] = code + if "local" in label or "national" in label or "domestic" in label: + unit_lookup["local"] = code + if "percent" in label or "%" in label: + unit_lookup["percent"] = code + + unit_lookup[code.lower()] = code + + except (KeyError, ValueError): + pass + + return transform_dim, unit_dim, transform_lookup, unit_lookup + + +def resolve_country_code( + country: str, + metadata: OecdMetadata | None = None, + dataflow: str | None = None, +) -> str: + """Resolve a single country name or code to an ISO country code. Parameters ---------- - url : str - The URL from which to fetch the data if it's not available in the cache. - function : Optional[str], optional - The name of the function for which data is being fetched or cached. - query_dict : Optional[dict], optional - A dictionary containing the query parameters for the function. - cache_method : str, optional - The method used for caching the data (default is 'csv'). + country : str + Country name or code (e.g. ``"Japan"``, ``"JPN"``). + metadata : OecdMetadata, optional + Metadata singleton. + dataflow : str, optional + Dataflow whose country codelist to search. If omitted, returns + the upper-cased input. Returns ------- - DataFrame - A Pandas DataFrame containing the fetched or cached data. + str + Resolved country code, or upper-cased input if resolution fails. """ - base_cache = ( - f"{cache}/{function}_{query_dict_to_path(query_dict if query_dict else {})}" - ) - use_cache = check_cache_exists_and_valid( - cache_str=base_cache, cache_method=cache_method - ) - if use_cache and not skip_cache: - cache_path = f"{base_cache}.{cache_method}" - if cache_method == "csv": - data = read_csv(cache_path) - elif cache_method == "parquet": - data = read_parquet(cache_path, engine="pyarrow") - else: - data = parse_url(url) - if not skip_cache: - write_to_cache(cache_str=base_cache, data=data, cache_method=cache_method) - return data - - -def oecd_date_to_python_date(input_date: str | int) -> date: - """Date formatter helper.""" - input_date = str(input_date) - if "Q" in input_date: - return to_datetime(input_date).to_period("Q").start_time.date() - if len(input_date) == 4: - return date(int(input_date), 1, 1) - if len(input_date) == 7: - return to_datetime(input_date).to_period("M").start_time.date() - raise OpenBBError("Date not in expected format") + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata + + if metadata is None: + metadata = OecdMetadata() + + if not dataflow: + return country.upper().strip() + + try: + resolved = metadata.resolve_country_codes(dataflow, country) + return resolved[0] if resolved else country.upper().strip() + except Exception: # noqa: BLE001 + return country.upper().strip() diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/__init__.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/__init__.py new file mode 100644 index 00000000000..698c2b97c65 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/__init__.py @@ -0,0 +1,57 @@ +"""OECD SDMX metadata package. + +Re-exports all public symbols so existing imports continue to work: + + from openbb_oecd.utils.metadata import OecdMetadata, _TABLE_GROUP_CANDIDATES, ... +""" + +from openbb_oecd.utils.metadata._constants import ( + _COUNTRY_DIMENSION_CANDIDATES, + _DATA_ACCEPT_CSV, + _DATA_ACCEPT_CSV_LABELS, + _INDICATOR_DIMENSION_CANDIDATES, + _NON_INDICATOR_DIMENSIONS, + _SHIPPED_CACHE_DIR, + _SHIPPED_CACHE_FILE, + _STRUCTURE_ACCEPT, + _TABLE_GROUP_CANDIDATES, + BASE_URL, +) +from openbb_oecd.utils.metadata._core import OecdMetadata, OECDMetadataDependency +from openbb_oecd.utils.metadata._helpers import ( + _build_code_tree, + _extract_codelist_id_from_urn, + _extract_concept_id_from_urn, + _get_user_cache_file, + _make_request, + _matches_query, + _normalize_label, + _parse_sdmx_json_codelists, + _parse_search_query, + _term_matches, +) + +__all__ = [ + "BASE_URL", + "OecdMetadata", + "OECDMetadataDependency", + "_COUNTRY_DIMENSION_CANDIDATES", + "_DATA_ACCEPT_CSV", + "_DATA_ACCEPT_CSV_LABELS", + "_INDICATOR_DIMENSION_CANDIDATES", + "_NON_INDICATOR_DIMENSIONS", + "_SHIPPED_CACHE_DIR", + "_SHIPPED_CACHE_FILE", + "_STRUCTURE_ACCEPT", + "_TABLE_GROUP_CANDIDATES", + "_build_code_tree", + "_extract_codelist_id_from_urn", + "_extract_concept_id_from_urn", + "_get_user_cache_file", + "_make_request", + "_matches_query", + "_normalize_label", + "_parse_sdmx_json_codelists", + "_parse_search_query", + "_term_matches", +] diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_cache_mixin.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_cache_mixin.py new file mode 100644 index 00000000000..cc7f60d68e7 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_cache_mixin.py @@ -0,0 +1,234 @@ +"""Cache management mixin for OecdMetadata.""" + +# pylint: disable=R0902 +import gzip +import json +import lzma +import warnings +from pathlib import Path + +from openbb_oecd.utils.metadata._constants import _SHIPPED_CACHE_FILE +from openbb_oecd.utils.metadata._helpers import _get_user_cache_file +from openbb_oecd.utils.metadata._typing import _MixinBase + +_XZ_MAGIC = b"\xfd7zXZ\x00" + + +class CacheMixin(_MixinBase): # pylint: disable=abstract-method + """Methods for reading, writing, and applying metadata cache blobs.""" + + @staticmethod + def _read_cache_file(path: Path) -> dict | None: + """Read and return a cache blob, or *None* on any failure.""" + try: + if not path.exists(): + return None + raw = path.read_bytes() + data = ( + lzma.decompress(raw) if raw[:6] == _XZ_MAGIC else gzip.decompress(raw) + ) + return json.loads(data) + except Exception: # noqa: BLE001 + return None + + def _apply_blob(self, blob: dict) -> None: + """Merge a cache *blob* into the current metadata stores.""" + self.dataflows.update(blob.get("dataflows", {})) + self.datastructures.update(blob.get("datastructures", {})) + self.codelists.update(blob.get("codelists", {})) + self._codelist_descriptions.update(blob.get("codelist_descriptions", {})) + self._codelist_parents.update(blob.get("codelist_parents", {})) + self._codelist_comp_rules.update(blob.get("codelist_comp_rules", {})) + self._infer_orphan_parents() + self._dataflow_constraints.update(blob.get("dataflow_constraints", {})) + self._table_map.update(blob.get("table_map", {})) + for k, v in blob.get("dataflow_parameters", {}).items(): + if v: + self._dataflow_parameters_cache[k] = v + raw_indicators = blob.get("dataflow_indicators", {}) + + for full_id, val in raw_indicators.items(): + if isinstance(val, dict) and "dims" in val: + df_meta = self.dataflows.get(full_id, {}) + short_id = df_meta.get( + "short_id", full_id.split("@")[-1] if "@" in full_id else full_id + ) + df_name = df_meta.get("name", short_id) + expanded: list[dict] = [] + seen: set[str] = set() + + for dim_block in val["dims"]: + dim_id = dim_block.get("dim_id", "") + for c in dim_block.get("codes", []): + code = c["indicator"] + if code in seen: + continue + seen.add(code) + entry = { + "dataflow_id": short_id, + "dataflow_name": df_name, + "dimension_id": dim_id, + "indicator": code, + "label": c["label"], + "description": c.get("description", c["label"]), + "symbol": f"{short_id}::{code}", + } + if "parent" in c: + entry["parent"] = c["parent"] + expanded.append(entry) + + self._dataflow_indicators_cache[full_id] = expanded + elif isinstance(val, dict) and "codes" in val: + df_meta = self.dataflows.get(full_id, {}) + short_id = df_meta.get( + "short_id", full_id.split("@")[-1] if "@" in full_id else full_id + ) + df_name = df_meta.get("name", short_id) + dim_id = val.get("dim_id", "") + expanded = [] + + for c in val["codes"]: + entry = { + "dataflow_id": short_id, + "dataflow_name": df_name, + "dimension_id": dim_id, + "indicator": c["indicator"], + "label": c["label"], + "description": c.get("description", c["label"]), + "symbol": f"{short_id}::{c['indicator']}", + } + + if "parent" in c: + entry["parent"] = c["parent"] + + expanded.append(entry) + + self._dataflow_indicators_cache[full_id] = expanded + else: + self._dataflow_indicators_cache[full_id] = val # type: ignore + + self._short_id_map.update(blob.get("short_id_map", {})) + tax = blob.get("taxonomy_tree", []) + + if tax: + self._taxonomy_tree = tax + self._df_to_categories.update(blob.get("df_to_categories", {})) + self._category_to_dfs.update(blob.get("category_to_dfs", {})) + self._category_names.update(blob.get("category_names", {})) + self._taxonomy_loaded = True + + def _infer_orphan_parents(self) -> None: + """Infer parent for orphan codes using COMP_RULE annotations.""" + for cl_id, comp_rules in self._codelist_comp_rules.items(): + parents = self._codelist_parents.get(cl_id) + if parents is None: + continue + for code, rule in comp_rules.items(): + if code in parents: + continue + components = [c.strip() for c in rule.split("+") if c.strip()] + if not components: + continue + ancestor = self._closest_common_ancestor(components, parents) + if ancestor is not None: + parents[code] = ancestor + + @staticmethod + def _closest_common_ancestor( + codes: list[str], parents: dict[str, str] + ) -> str | None: + """Return the nearest ancestor shared by all *codes*, or ``None``.""" + if not codes: + return None + + def _chain(code: str) -> list[str]: + chain: list[str] = [] + visited: set[str] = set() + cur = parents.get(code) + while cur and cur not in visited: + chain.append(cur) + visited.add(cur) + cur = parents.get(cur) + return chain + + ancestor_sets: list[set[str]] = [] + ordered_chains: list[list[str]] = [] + for c in codes: + ch = _chain(c) + ancestor_sets.append(set(ch)) + ordered_chains.append(ch) + + if not ancestor_sets: + return None + + common = ancestor_sets[0] + for s in ancestor_sets[1:]: + common = common & s + + if not common: + return None + + best: str | None = None + best_depth = float("inf") + for ch in ordered_chains: + for depth, ancestor in enumerate(ch): + if ancestor in common and depth < best_depth: + best_depth = depth + best = ancestor + return best + + def _load_from_cache(self) -> bool: + """Load metadata from the shipped cache, then layer user cache on top.""" + loaded = False + shipped = self._read_cache_file(_SHIPPED_CACHE_FILE) + if shipped: + self._apply_blob(shipped) + loaded = True + user = self._read_cache_file(_get_user_cache_file()) + if user: + self._apply_blob(user) + loaded = True + if loaded: + if not self._short_id_map and self.dataflows: + self._rebuild_short_id_map() + if self.dataflows: + self._full_catalogue_loaded = True + else: + warnings.warn( + "No OECD metadata cache found; will fetch from API.", + stacklevel=2, + ) + return loaded + + def _save_cache(self) -> None: + """Persist current metadata to the user-writable cache.""" + if not self._cache_dirty: # type: ignore[has-type] + return + try: + cache_file = _get_user_cache_file() + cache_file.parent.mkdir(parents=True, exist_ok=True) + blob = { + "dataflows": self.dataflows, + "datastructures": self.datastructures, + "codelists": self.codelists, + "codelist_descriptions": self._codelist_descriptions, + "codelist_parents": self._codelist_parents, + "codelist_comp_rules": self._codelist_comp_rules, + "dataflow_constraints": self._dataflow_constraints, + "table_map": self._table_map, + "dataflow_parameters": self._dataflow_parameters_cache, + "dataflow_indicators": self._dataflow_indicators_cache, + "short_id_map": self._short_id_map, + "taxonomy_tree": self._taxonomy_tree, + "df_to_categories": self._df_to_categories, + "category_to_dfs": self._category_to_dfs, + "category_names": self._category_names, + } + raw = json.dumps(blob, separators=(",", ":")).encode() + cache_file.write_bytes(gzip.compress(raw, compresslevel=1)) + self._cache_dirty = False + except Exception: # noqa: BLE001 + warnings.warn( + "Failed to persist OECD metadata cache.", + stacklevel=2, + ) diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_constants.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_constants.py new file mode 100644 index 00000000000..59e7d890eb3 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_constants.py @@ -0,0 +1,54 @@ +"""Module-level constants for OECD SDMX metadata.""" + +from pathlib import Path + +BASE_URL = "https://sdmx.oecd.org/public/rest/v2" +_STRUCTURE_ACCEPT = "application/vnd.sdmx.structure+json; version=1.0; charset=utf-8" +_DATA_ACCEPT_CSV = "application/vnd.sdmx.data+csv; charset=utf-8" +_DATA_ACCEPT_CSV_LABELS = "application/vnd.sdmx.data+csv; charset=utf-8; labels=both" +_SHIPPED_CACHE_DIR = Path(__file__).resolve().parent.parent.parent / "assets" +_SHIPPED_CACHE_FILE = _SHIPPED_CACHE_DIR / "oecd_cache.json.xz" + +_INDICATOR_DIMENSION_CANDIDATES = ( + "MEASURE", + "INDICATOR", + "SUBJECT", + "TRANSACTION", + "ACTIVITY", + "PRODUCT", + "SERIES", + "ITEM", + "ACCOUNTING_ENTRY", + "SECTOR", +) +_COUNTRY_DIMENSION_CANDIDATES = ( + "REF_AREA", + "COUNTERPART_AREA", + "JURISDICTION", + "COUNTRY", + "AREA", +) +_TABLE_GROUP_CANDIDATES = ( + "TABLE_IDENTIFIER", + "CHAPTER", +) +_NON_INDICATOR_DIMENSIONS = frozenset( + { + "FREQ", + "ADJUSTMENT", + "TRANSFORMATION", + "UNIT_MEASURE", + "UNIT_MULT", + "CURRENCY_DENOM", + "CURRENCY", + "VALUATION", + "PRICE_BASE", + "CONSOLIDATION", + "MATURITY", + "METHODOLOGY", + "TABLE_IDENTIFIER", + "TIME_PERIOD", + "COUNTERPART_AREA", + "DEBT_BREAKDOWN", + } +) diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_core.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_core.py new file mode 100644 index 00000000000..0f901e96f6b --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_core.py @@ -0,0 +1,101 @@ +"""Core OecdMetadata singleton class assembled from mixins.""" + +# pylint: disable=R0902 + +import threading +from typing import Annotated + +from fastapi import Depends +from openbb_oecd.utils.metadata._cache_mixin import CacheMixin +from openbb_oecd.utils.metadata._indicator_mixin import IndicatorMixin +from openbb_oecd.utils.metadata._loader_mixin import LoaderMixin +from openbb_oecd.utils.metadata._public_api_mixin import PublicApiMixin +from openbb_oecd.utils.metadata._query_mixin import QueryMixin +from openbb_oecd.utils.metadata._search_mixin import SearchMixin + + +class OecdMetadata( + CacheMixin, + LoaderMixin, + PublicApiMixin, + IndicatorMixin, + SearchMixin, + QueryMixin, +): + """Thread-safe singleton that lazily loads and caches OECD SDMX metadata. + + Public API + ---------- + list_dataflows(topic=None) → list[dict] + list_topics() → list[dict] + list_dataflows_by_topic() → list[dict] + get_dataflow_info(dataflow_id) → dict + get_dataflow_parameters(dataflow_id) → dict[str, list[dict]] + resolve_country_codes(dataflow_id, input) → list[str] + get_codelist_for_dimension(df_id, dim_id) → dict[str, str] + get_indicators_in(dataflow_id) → list[dict] + search_indicators(query, dataflows, …) → list[dict] + get_dimension_order(dataflow_id) → list[str] + + All public methods are safe to call from any thread. + """ + + _instance: "OecdMetadata | None" = None + _lock = threading.Lock() + _codelist_lock = threading.Lock() + _initialized: bool = False + _search_index: list[tuple[str, dict]] | None = None + + def __new__(cls) -> "OecdMetadata": + """Ensure only one instance of OecdMetadata exists.""" + if cls._instance is None: + with cls._lock: + if cls._instance is None: + inst = object.__new__(cls) + cls._instance = inst + return cls._instance # type: ignore[return-value] + + def __init__(self) -> None: + """Initialize the OecdMetadata class.""" + if self._initialized: + return + + with self._lock: + if self._initialized: + return + + self.dataflows: dict[str, dict] = {} + self.datastructures: dict[str, dict] = {} + self.codelists: dict[str, dict[str, str]] = {} + self._short_id_map: dict[str, str] = {} + self._codelist_descriptions: dict[str, dict[str, str]] = {} + self._codelist_parents: dict[str, dict[str, str]] = {} + self._codelist_comp_rules: dict[str, dict[str, str]] = {} + self._dataflow_constraints: dict[str, dict[str, list[str]]] = {} + self._dataflow_parameters_cache: dict[str, dict] = {} + self._dataflow_indicators_cache: dict[str, list] = {} + self._availability_cache: dict[str, dict[str, list[str]]] = {} + self._indicator_dim_cache: dict[str, str | None] = {} + self._table_map: dict[str, dict] = {} + self._full_catalogue_loaded: bool = False + self._taxonomy_tree: list[dict] = [] + self._df_to_categories: dict[str, list[str]] = {} + self._category_to_dfs: dict[str, list[str]] = {} + self._category_names: dict[str, str] = {} + self._taxonomy_loaded: bool = False + self._cache_dirty: bool = False + self._load_from_cache() + self.__class__._initialized = True + + def __call__(self) -> "OecdMetadata": + return self + + @classmethod + def _reset(cls) -> None: + """Destroy the singleton (for testing only).""" + with cls._lock: + cls._instance = None + cls._initialized = False + + +OECDMetadataDependency = Annotated[OecdMetadata, Depends(OecdMetadata)] diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_helpers.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_helpers.py new file mode 100644 index 00000000000..c4b6ae0a3a3 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_helpers.py @@ -0,0 +1,239 @@ +"""Standalone helper functions for OECD SDMX metadata.""" + +import re +from pathlib import Path +from typing import Any + + +def _get_user_cache_file() -> Path: + """Resolve the user-writable cache path via OpenBB core settings.""" + # pylint: disable=import-outside-toplevel + try: + from openbb_core.app.utils import get_user_cache_directory + + return Path(get_user_cache_directory()) / "oecd_cache.json.gz" + except Exception: # noqa: BLE001 + return Path.home() / ".openbb_platform" / "cache" / "oecd_cache.json.gz" + + +def _make_request(url: str, headers: dict | None = None, timeout: int = 30) -> Any: + """Make a HTTP GET request. Fails immediately on 429.""" + # pylint: disable=import-outside-toplevel + from openbb_core.provider.utils.helpers import make_request + + resp = make_request(url, headers=headers, timeout=timeout) + resp.raise_for_status() + + return resp + + +def _normalize_label(label: str) -> str: + """Normalise a country / concept label to lower_snake_case.""" + label = re.sub(r"\s*\(.*?\)\s*", "", label) + label = label.split(",")[0] + label = label.strip().lower().replace("-", "_").replace(" ", "_") + label = re.sub(r"_+", "_", label) + + return label.strip("_") + + +def _build_code_tree( + codes: dict[str, str], + parents: dict[str, str], + descriptions: dict[str, str], +) -> list[dict]: + """Build a tree from a flat mapping of codes -> labels using parent refs. + + Parameters + ---------- + codes : dict[str, str] + {code: label} for every code that should appear in the tree. + parents : dict[str, str] + {code: parent_code} for codes that have a parent. + descriptions : dict[str, str] + {code: description} for extended descriptions. + + Returns + ------- + list[dict] + Tree of {'code', 'label', 'description', 'children': [...]} dicts. + """ + nodes: dict[str, dict] = {} + + for code, label in codes.items(): + nodes[code] = { + "code": code, + "label": label, + "description": descriptions.get(code, label), + "children": [], + } + + roots: list[dict] = [] + + for code in list(nodes): + parent = parents.get(code) + + if parent and parent in nodes: + nodes[parent]["children"].append(nodes[code]) + else: + roots.append(nodes[code]) + + def _sort(items: list[dict]): + items.sort(key=lambda n: n["label"]) + + for item in items: + if item["children"]: + _sort(item["children"]) + + _sort(roots) + + return roots + + +def _parse_sdmx_json_codelists( + raw: dict, +) -> tuple[dict[str, dict[str, str]], dict[str, dict[str, str]]]: + """Extract codelists and parent hierarchies from an SDMX-JSON structure response. + + Returns + ------- + tuple[dict[str, dict[str, str]], dict[str, dict[str, str]]] + (codelists, codelist_parents) where: + - codelists: {cl_id: {code: label}} + - codelist_parents: {cl_id: {code: parent_code}} (only codes that have a parent) + """ + codelists: dict[str, dict[str, str]] = {} + codelist_parents: dict[str, dict[str, str]] = {} + raw_cls = raw.get("data", raw).get("codelists", []) + + for cl in raw_cls: + bare_id = cl.get("id", "") + agency = cl.get("agencyID", "") + version = cl.get("version", "") + cl_id = f"{agency}:{bare_id}({version})" if agency and version else bare_id + codes: dict[str, str] = {} + parents: dict[str, str] = {} + + for code in cl.get("codes", []): + code_id = code.get("id", "") + names = code.get("names", {}) + code_label = ( + names.get("en", "") if isinstance(names, dict) else "" + ) or code.get("name", code_id) + codes[code_id] = code_label + parent = code.get("parent", "") + + if parent: + parents[code_id] = parent + + if cl_id: + codelists[cl_id] = codes + + if parents: + codelist_parents[cl_id] = parents + + return codelists, codelist_parents + + +def _extract_codelist_id_from_urn(urn: str) -> str: + """Extract the fully-qualified codelist key from an SDMX URN string. + + Parameters + ---------- + urn : str + SDMX URN, e.g.:: + + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=OECD.SDD.TPS:CL_REF_AREA(3.0) + urn:sdmx:org.sdmx.infomodel.codelist.Codelist=OECD:CL_FREQ(2.1) + + Returns + ------- + str + The fully-qualified key, e.g. + ``"OECD.SDD.TPS:CL_REF_AREA(3.0)"`` or ``"OECD:CL_FREQ(2.1)"``. + """ + match = re.search(r"=([^=]+:[^(]+\([^)]+\))", urn) + if match: + return match.group(1) + match2 = re.search(r":([^:(]+)\(", urn) + + if match2: + return match2.group(1) + + if ":" in urn: + return urn.rsplit(":", 1)[-1].split("(", maxsplit=1)[0] + + return urn + + +def _extract_concept_id_from_urn(urn: str) -> str: + """Extract the concept ID from an SDMX concept identity URN. + + Parameters + ---------- + urn : str + SDMX concept URN, e.g.:: + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=OECD:CS_COMMON(2.0).FREQ + + Returns + ------- + str + The concept ID, e.g. "FREQ". + """ + if "." in urn: + return urn.rsplit(".", 1)[-1] + + return urn + + +def _parse_search_query(query: str) -> list[list[str]]: + """Parse a search query into OR-groups of AND-terms. + + Semicolon ; separates phrases (OR at phrase level). + Space within a phrase is implicit AND. + Pipe | within a term is OR at term level. + + Returns + ------- + list[list[str]] + [[term1, term2], [term3]] where outer list is OR, + inner lists are AND. + """ + if not query: + return [] + + phrases = [p.strip() for p in query.split(";") if p.strip()] + result: list[list[str]] = [] + + for phrase in phrases: + words = re.sub(r"\s*\|\s*", "|", phrase) + words = words.replace("+", " ") + + terms = [t.strip().lower() for t in words.split() if t.strip()] + + if terms: + result.append(terms) + + return result + + +def _matches_query(text: str, phrases: list[list[str]]) -> bool: + """Check if *text* matches any phrase (OR) where all terms match (AND). + + Within a single term, | is an OR: "gdp|gross" matches if + *either* "gdp" or "gross" is in *text*. + """ + if not phrases: + return True + + return any( + all(_term_matches(text, term) for term in and_terms) for and_terms in phrases + ) + + +def _term_matches(text: str, term: str) -> bool: + """Check if *term* matches *text*, supporting | as intra-term OR.""" + alternatives = [t.strip() for t in term.split("|") if t.strip()] + + return any(alt in text for alt in alternatives) diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_indicator_mixin.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_indicator_mixin.py new file mode 100644 index 00000000000..30a243bab29 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_indicator_mixin.py @@ -0,0 +1,466 @@ +"""Indicator enumeration, codelist lookup, and country resolution mixin.""" + +# pylint: disable=R0914 +import re + +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_oecd.utils.metadata._constants import ( + _COUNTRY_DIMENSION_CANDIDATES, + _INDICATOR_DIMENSION_CANDIDATES, + _NON_INDICATOR_DIMENSIONS, +) +from openbb_oecd.utils.metadata._helpers import ( + _build_code_tree, + _normalize_label, + _parse_sdmx_json_codelists, +) +from openbb_oecd.utils.metadata._typing import _MixinBase + + +class IndicatorMixin(_MixinBase): # pylint: disable=abstract-method + """Indicator enumeration, codelist lookup, and country resolution.""" + + def _get_indicator_dim(self, full_id: str) -> str | None: + """Return the indicator dimension for *full_id* using cached data only.""" + if full_id in self._indicator_dim_cache: + return self._indicator_dim_cache[full_id] + + dsd = self.datastructures.get(full_id, {}) + dim_ids = {d["id"] for d in dsd.get("dimensions", [])} + + _indicator_set = set(_INDICATOR_DIMENSION_CANDIDATES) + layout_row = ( + self.dataflows.get(full_id, {}).get("annotations", {}).get("LAYOUT_ROW", "") + ) + if layout_row: + for lr_dim in (d.strip() for d in layout_row.split(",") if d.strip()): + if lr_dim in dim_ids and lr_dim in _indicator_set: + self._indicator_dim_cache[full_id] = lr_dim + return lr_dim + + for candidate in _INDICATOR_DIMENSION_CANDIDATES: + if candidate in dim_ids: + self._indicator_dim_cache[full_id] = candidate + return candidate + + _skip = ( + set(_COUNTRY_DIMENSION_CANDIDATES) + | _NON_INDICATOR_DIMENSIONS + | {"FREQ", "TIME_PERIOD"} + ) + for d in sorted(dsd.get("dimensions", []), key=lambda x: x.get("position", 0)): + if d["id"] not in _skip: + self._indicator_dim_cache[full_id] = d["id"] + return d["id"] + + self._indicator_dim_cache[full_id] = None + return None + + def _find_indicator_dimension( + self, dataflow_id: str, indicator_code: str | None = None + ) -> str | None: + """Find the indicator dimension ID for *dataflow_id*.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + params = self.get_dataflow_parameters(full_id) + + for candidate in _INDICATOR_DIMENSION_CANDIDATES: + if candidate in params and params[candidate]: + if indicator_code: + codes = {e["value"] for e in params[candidate]} + if indicator_code in codes: + return candidate + else: + return candidate + + for d in self.get_dimension_order(full_id): + if ( + d not in _COUNTRY_DIMENSION_CANDIDATES + and d not in _NON_INDICATOR_DIMENSIONS + and d in params + and params[d] + ): + if indicator_code: + codes = {e["value"] for e in params[d]} + if indicator_code in codes: + return d + else: + return d + return None + + def get_codelist_for_dimension( + self, dataflow_id: str, dim_id: str + ) -> dict[str, str]: + """Return {code: label} for a specific dimension in a dataflow.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + + for dim in dsd.get("dimensions", []): + if dim["id"] == dim_id: + cl_id = dim.get("codelist_id", "") + + if cl_id: + cl = dict(self._get_codelist(cl_id, dataflow_id)) + constraints = self._dataflow_constraints.get(full_id, {}) + constrained_codes = constraints.get(dim_id, []) + if constrained_codes and any( + c not in cl for c in constrained_codes + ): + self._ensure_structure(full_id, force=True) + cl = dict(self._get_codelist(cl_id, dataflow_id)) + return cl + + return {} + + return {} + + def resolve_country_codes(self, dataflow_id: str, country_input: str) -> list[str]: + """Resolve user-supplied country string to a list of ISO codes.""" + if not country_input or country_input.strip().lower() in ("all", "*"): + return [] + + country_cl = self._get_country_codelist(dataflow_id) + + if not country_cl: + return [c.strip().upper() for c in country_input.split(",") if c.strip()] + + code_lookup: dict[str, str] = {} + + for code, label in country_cl.items(): + code_lookup[code.upper()] = code + code_lookup[code.lower()] = code + code_lookup[_normalize_label(label)] = code + + resolved: list[str] = [] + parts = [p.strip() for p in country_input.split(",") if p.strip()] + + for part in parts: + key = part.strip() + match = ( + code_lookup.get(key) + or code_lookup.get(key.upper()) + or code_lookup.get(key.lower()) + or code_lookup.get(_normalize_label(key)) + ) + + if match: + resolved.append(match) + else: + available = sorted(country_cl.keys()) + sample = available[:20] + raise OpenBBError( + f"Invalid country '{part}' for dataflow '{dataflow_id}'. " + f"Available codes ({len(available)} total): " + f"{', '.join(sample)}" + (" ..." if len(available) > 20 else "") + ) + + return resolved + + def _get_country_codelist(self, dataflow_id: str) -> dict[str, str]: + """Return the country/ref-area codelist for *dataflow_id*, or {}.""" + full_id = self._resolve_dataflow_id(dataflow_id) + if full_id in self.datastructures: + dsd = self.datastructures[full_id] + for dim in dsd.get("dimensions", []): + if dim["id"] in _COUNTRY_DIMENSION_CANDIDATES: + cl_id = dim.get("codelist_id", "") + if cl_id: + return dict(self._get_codelist(cl_id, None)) + with self._codelist_lock: + for key, codes in self.codelists.items(): + if ":CL_AREA(" in key and codes: + return dict(codes) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + + for dim in dsd.get("dimensions", []): + if dim["id"] in _COUNTRY_DIMENSION_CANDIDATES: + cl_id = dim.get("codelist_id", "") + if cl_id: + return dict(self._get_codelist(cl_id, dataflow_id)) + + return {} + + def _filter_indicators_by_constraints( + self, + dataflow_id: str, + indicators: list[dict], + ) -> list[dict]: + """Filter cached indicator list against embedded constraints.""" + full_id = self._resolve_dataflow_id(dataflow_id) + constraints = self._dataflow_constraints.get(full_id, {}) + if not constraints or not indicators: + return indicators + + filtered: list[dict] = [] + for ind in indicators: + dim_id = ind.get("dimension_id", "") + if ( + not dim_id + or dim_id not in constraints + or ind.get("indicator") in set(constraints[dim_id]) + ): + filtered.append(ind) + return filtered + + def get_indicators_in(self, dataflow_id: str) -> list[dict]: + """Enumerate all series-producing codes across ALL content dimensions.""" + if dataflow_id in self._dataflow_indicators_cache: + return self._filter_indicators_by_constraints( + dataflow_id, self._dataflow_indicators_cache[dataflow_id] + ) + + full_id = self._resolve_dataflow_id(dataflow_id) + + if full_id in self._dataflow_indicators_cache: + return self._filter_indicators_by_constraints( + full_id, self._dataflow_indicators_cache[full_id] + ) + + self._ensure_structure(full_id) + params = self.get_dataflow_parameters(full_id) + df_meta = self.dataflows.get(full_id, {}) + df_name = df_meta.get("name", dataflow_id) + short_df_id = ( + dataflow_id.rsplit("@", 1)[-1] if "@" in dataflow_id else dataflow_id + ) + + _skip = ( + set(_COUNTRY_DIMENSION_CANDIDATES) + | _NON_INDICATOR_DIMENSIONS + | {"FREQ", "TIME_PERIOD"} + ) + + ind_dim = self._get_indicator_dim(full_id) + content_dims: list[str] = [] + if ind_dim and ind_dim in params and params[ind_dim]: + content_dims = [ind_dim] + else: + for d in self.get_dimension_order(full_id): + if d not in _skip and d in params and params[d]: + content_dims.append(d) + + if not content_dims: + self._dataflow_indicators_cache[full_id] = [] + return [] + + constraints = self._dataflow_constraints.get(full_id, {}) + avail_cache: dict[str, set[str] | None] = {} + for dim_id in content_dims: + if dim_id in constraints: + avail_cache[dim_id] = set(constraints[dim_id]) + else: + avail_cache[dim_id] = None + + if all(v is None for v in avail_cache.values()): + try: + avail = self.fetch_availability(full_id) + for dim_id in content_dims: + codes = avail.get(dim_id) + if codes is not None: + avail_cache[dim_id] = set(codes) + except Exception: # noqa: BLE001, S110 + pass + + dsd = self.datastructures.get(full_id, {}) + dim_codelist_map: dict[str, str] = {} + for dim in dsd.get("dimensions", []): + dim_codelist_map[dim["id"]] = dim.get("codelist_id", "") + + indicators: list[dict] = [] + seen_codes: set[str] = set() + + for dim_id in content_dims: + cl_id = dim_codelist_map.get(dim_id, "") + descriptions = self._codelist_descriptions.get(cl_id, {}) + parents = self._codelist_parents.get(cl_id, {}) + available_codes = avail_cache.get(dim_id) + + for entry in params[dim_id]: + code = entry["value"] + + if available_codes is not None and code not in available_codes: + continue + if code in seen_codes: + continue + seen_codes.add(code) + + ind: dict = { + "dataflow_id": short_df_id, + "dataflow_name": df_name, + "dimension_id": dim_id, + "indicator": code, + "label": entry["label"], + "description": descriptions.get(code, entry["label"]), + "symbol": f"{short_df_id}::{code}", + } + + if code in parents: + ind["parent"] = parents[code] + + indicators.append(ind) + + self._dataflow_indicators_cache[full_id] = indicators + + return indicators + + def get_indicator_dataflows(self, indicator_code: str) -> list[str]: + """Return all dataflow IDs that contain *indicator_code*.""" + result: list[str] = [] + + for full_id, inds in self._dataflow_indicators_cache.items(): + for ind in inds: + if ind.get("indicator") == indicator_code: + result.append(ind.get("dataflow_id", full_id)) + break + + return result + + def get_codelist_hierarchy(self, codelist_id: str) -> dict[str, str]: + """Return {code: parent_code} for a codelist with parent hierarchy.""" + return dict(self._codelist_parents.get(codelist_id, {})) + + def get_indicator_tree(self, dataflow_id: str) -> list[dict]: + """Return indicators for *dataflow_id* as a hierarchical tree.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + params = self.get_dataflow_parameters(full_id) + dim_id = self._find_indicator_dimension(full_id) + + if not dim_id: + return [] + + dsd = self.datastructures.get(full_id, {}) + cl_id = "" + + for dim in dsd.get("dimensions", []): + if dim["id"] == dim_id: + cl_id = dim.get("codelist_id", "") + break + + parents = self._codelist_parents.get(cl_id, {}) + descriptions = self._codelist_descriptions.get(cl_id, {}) + + available_codes = {e["value"]: e["label"] for e in params.get(dim_id, [])} + + constraints = self._dataflow_constraints.get(full_id, {}) + + if dim_id in constraints: + constrained = set(constraints[dim_id]) + available_codes = { + k: v for k, v in available_codes.items() if k in constrained + } + + if not available_codes: + return [] + + return _build_code_tree(available_codes, parents, descriptions) + + _CL_KEY_RE = re.compile(r"^(.+):(.+)\((.+)\)$") + + def _find_codelist_by_prefix(self, codelist_id: str) -> dict[str, str] | None: + """Find a codelist by id, merging all versions with the same bare ID.""" + m = self._CL_KEY_RE.match(codelist_id) + + if not m: + return None + + agency = m.group(1) + bare_id = m.group(2) + prefixes = [f"{agency}:{bare_id}("] + parts = agency.split(".") + + while len(parts) > 1: + parts = parts[:-1] + prefixes.append(f"{'.'.join(parts)}:{bare_id}(") + + merged: dict[str, str] = {} + for prefix in prefixes: + for key, codes in self.codelists.items(): + if key.startswith(prefix) and codes: + for code, label in codes.items(): + if code not in merged: + merged[code] = label + + return merged or None + + def _get_codelist( + self, codelist_id: str, _dataflow_id: str | None = None + ) -> dict[str, str]: + """Return {code: label} for *codelist_id*, fetching if needed.""" + with self._codelist_lock: + exact = self.codelists.get(codelist_id) + prefix_match = self._find_codelist_by_prefix(codelist_id) + + if exact and prefix_match and prefix_match is not exact: + if len(prefix_match) > len(exact): + merged = dict(prefix_match) + merged.update(exact) + return merged + return exact + if exact: + return exact + if prefix_match: + return prefix_match + + return self._fetch_single_codelist(codelist_id, _dataflow_id) + + def _fetch_single_codelist( + self, codelist_id: str, _dataflow_id: str | None = None + ) -> dict[str, str]: + """Fetch a single codelist from the OECD structure API.""" + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata._constants import ( + _STRUCTURE_ACCEPT, + BASE_URL, + ) + from openbb_oecd.utils.metadata._helpers import ( + _make_request, + ) + + _cl_key_re = re.compile(r"^([^:]+):([^(]+)\(([^)]+)\)$") + m = _cl_key_re.match(codelist_id) + if m: + agency = m.group(1) + bare_id = m.group(2) + version = m.group(3) + else: + bare_id = codelist_id + version = "" + agency = "all" + + if _dataflow_id: + resolved = ( + self._resolve_dataflow_id(_dataflow_id) + if "@" not in _dataflow_id + else _dataflow_id + ) + df_meta = self.dataflows.get(resolved, {}) + + if df_meta.get("agency_id"): + agency = df_meta["agency_id"] + + version_part = f"/{version}" if version else "" + url = f"{BASE_URL}/structure/codelist/{agency}/{bare_id}{version_part}?references=none" + try: + resp = _make_request(url, headers={"Accept": _STRUCTURE_ACCEPT}, timeout=15) + raw = resp.json() + parsed, parsed_parents = _parse_sdmx_json_codelists(raw) + + with self._codelist_lock: + for cl_id, codes in parsed.items(): + if cl_id in self.codelists: + self.codelists[cl_id].update(codes) + else: + self.codelists[cl_id] = codes + + for cl_id, parents in parsed_parents.items(): + if cl_id in self._codelist_parents: + self._codelist_parents[cl_id].update(parents) + else: + self._codelist_parents[cl_id] = parents + self._cache_dirty = True + return self.codelists.get(codelist_id, {}) + except Exception: # noqa: BLE001 + return {} diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_loader_mixin.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_loader_mixin.py new file mode 100644 index 00000000000..f907a01ccc4 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_loader_mixin.py @@ -0,0 +1,524 @@ +"""Dataflow, structure, and taxonomy loading mixin for OecdMetadata.""" + +# pylint: disable=R0914 +import json +import re +import warnings + +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_oecd.utils.metadata._constants import ( + _STRUCTURE_ACCEPT, + BASE_URL, +) +from openbb_oecd.utils.metadata._helpers import ( + _extract_codelist_id_from_urn, + _extract_concept_id_from_urn, + _make_request, + _parse_sdmx_json_codelists, +) +from openbb_oecd.utils.metadata._typing import _MixinBase + + +class LoaderMixin(_MixinBase): # pylint: disable=abstract-method + """Methods for lazy-loading dataflows, structures, taxonomy from SDMX API.""" + + _full_catalogue_loaded: bool + _taxonomy_loaded: bool + + def _ensure_dataflows(self) -> None: + """Lazy-load the full dataflow catalogue if not yet populated.""" + if self._full_catalogue_loaded: + _first = next(iter(self.dataflows.values()), None) + if self.dataflows and _first is not None and not _first.get("annotations"): + self._backfill_annotations() + return + + url = f"{BASE_URL}/structure/dataflow" + resp = _make_request(url, headers={"Accept": _STRUCTURE_ACCEPT}) + + try: + raw = resp.json() + except (json.JSONDecodeError, AttributeError) as exc: + raise OpenBBError( + f"Failed to parse OECD dataflow catalogue from {url}" + ) from exc + + raw_dfs = raw.get("data", raw).get("dataflows", []) + + for df in raw_dfs: + full_id = df.get("id", "") + agency_id = df.get("agencyID", "") + version = df.get("version", "") + names = df.get("names", {}) + name = (names.get("en", "") if isinstance(names, dict) else "") or df.get( + "name", full_id + ) + descriptions = df.get("descriptions", {}) + description = ( + descriptions.get("en", "") if isinstance(descriptions, dict) else "" + ) or df.get("description", "") + struct_ref = df.get("structure", "") + short_id = full_id.split("@")[-1] if "@" in full_id else full_id + annotations: dict[str, str] = {} + for ann in df.get("annotations", []): + ann_type = ann.get("type", "") + if ann_type: + ann_text = ann.get("title", "") or ann.get("text", "") + if isinstance(ann_text, dict): + ann_text = ann_text.get("en", next(iter(ann_text.values()), "")) + annotations[ann_type] = str(ann_text) if ann_text else "" + self.dataflows[full_id] = { + "id": full_id, + "short_id": short_id, + "agency_id": agency_id, + "version": version, + "name": name, + "description": description, + "structure_ref": struct_ref, + "annotations": annotations, + } + self._short_id_map[short_id] = full_id + + if self.dataflows: + self._full_catalogue_loaded = True + self._cache_dirty = True + self._save_cache() + + def _backfill_annotations(self) -> None: + """Fetch dataflow catalogue and merge annotations into existing entries.""" + url = f"{BASE_URL}/structure/dataflow" + try: + resp = _make_request(url, headers={"Accept": _STRUCTURE_ACCEPT}) + raw = resp.json() + except Exception: # noqa: BLE001 + return + + for df in raw.get("data", raw).get("dataflows", []): + full_id = df.get("id", "") + if full_id not in self.dataflows: + continue + annotations: dict[str, str] = {} + for ann in df.get("annotations", []): + ann_type = ann.get("type", "") + if ann_type: + ann_text = ann.get("title", "") or ann.get("text", "") + if isinstance(ann_text, dict): + ann_text = ann_text.get("en", next(iter(ann_text.values()), "")) + annotations[ann_type] = str(ann_text) if ann_text else "" + if annotations: + self.dataflows[full_id]["annotations"] = annotations + + self._cache_dirty = True + self._save_cache() + + def _rebuild_short_id_map(self) -> None: + """Rebuild _short_id_map from dataflows.""" + self._short_id_map.clear() + + for full_id, meta in self.dataflows.items(): + short_id = meta.get("short_id", full_id.split("@")[-1]) + self._short_id_map[short_id] = full_id + + _CATEGORISATION_DF_RE = re.compile(r"Dataflow=([^:]+):([^(]+)\(([^)]+)\)") + _CATEGORISATION_CAT_RE = re.compile(r"OECDCS1\([^)]+\)\.(.+)") + + def _ensure_taxonomy(self) -> None: + """Lazy-load the OECD topic taxonomy (category scheme + categorisations).""" + if self._taxonomy_loaded: + return + + self._ensure_dataflows() + cs_url = f"{BASE_URL}/structure/categoryscheme/OECD/OECDCS1" + + try: + resp = _make_request( + cs_url, headers={"Accept": _STRUCTURE_ACCEPT}, timeout=30 + ) + cs_raw = resp.json() + except Exception as exc: + warnings.warn( + f"Failed to fetch OECD category scheme: {exc}", + stacklevel=2, + ) + self._taxonomy_loaded = True + return + + schemes = cs_raw.get("data", cs_raw).get("categorySchemes", []) + + if not schemes: + self._taxonomy_loaded = True + return + + tree, names = self._parse_category_tree(schemes[0].get("categories", [])) + self._taxonomy_tree = tree + self._category_names = names + cat_url = f"{BASE_URL}/structure/categorisation" + + try: + resp2 = _make_request( + cat_url, headers={"Accept": _STRUCTURE_ACCEPT}, timeout=30 + ) + cat_raw = resp2.json() + except Exception as exc: + warnings.warn( + f"Failed to fetch OECD categorisations: {exc}", + stacklevel=2, + ) + self._taxonomy_loaded = True + return + + raw_cats = cat_raw.get("data", cat_raw).get("categorisations", []) + self._parse_categorisations(raw_cats) + self._taxonomy_loaded = True + self._save_cache() + + @staticmethod + def _parse_category_tree( + categories: list[dict], + prefix: str = "", + ) -> tuple[list[dict], dict[str, str]]: + """Recursively parse the category scheme into a tree + flat name map.""" + tree: list[dict] = [] + names: dict[str, str] = {} + + for cat in categories: + cid = cat.get("id", "") + cnames = cat.get("names", {}) + name = ( + cnames.get("en", "") if isinstance(cnames, dict) else "" + ) or cat.get("name", cid) + path = f"{prefix}.{cid}" if prefix else cid + names[path] = name + subcats = cat.get("categories", []) + children, child_names = LoaderMixin._parse_category_tree(subcats, path) + names.update(child_names) + tree.append( + { + "id": cid, + "name": name, + "path": path, + "children": children, + } + ) + + return tree, names + + def _parse_categorisations(self, raw_cats: list[dict]) -> None: + """Parse categorisation records into df<->category mappings.""" + # pylint: disable=import-outside-toplevel + from collections import defaultdict + + df_re = self._CATEGORISATION_DF_RE + cat_re = self._CATEGORISATION_CAT_RE + + seen: dict[tuple[str, str], str] = {} + + for entry in raw_cats: + src = entry.get("source", "") + tgt = entry.get("target", "") + + m_df = df_re.search(src) + m_cat = cat_re.search(tgt) + + if not m_df or not m_cat: + continue + + agency = m_df.group(1) + dsd_df = m_df.group(2) + version = m_df.group(3) + cat_path = m_cat.group(1) + + full_id = f"{agency}:{dsd_df}" + key = (full_id, cat_path) + prev_ver = seen.get(key, "") + + if version >= prev_ver: + seen[key] = version + + df_to_cats: dict[str, list[str]] = defaultdict(list) + cat_to_dfs: dict[str, list[str]] = defaultdict(list) + + for (ext_id, cat_path), _ver in seen.items(): + dsd_df = ext_id.split(":", 1)[-1] if ":" in ext_id else ext_id + + if dsd_df not in self.dataflows: + continue + + if cat_path not in df_to_cats[dsd_df]: + df_to_cats[dsd_df].append(cat_path) + + if dsd_df not in cat_to_dfs[cat_path]: + cat_to_dfs[cat_path].append(dsd_df) + + self._df_to_categories = dict(df_to_cats) + self._category_to_dfs = dict(cat_to_dfs) + + def _resolve_dataflow_id(self, dataflow_id: str) -> str: + """Resolve a short or full dataflow id to the canonical full id.""" + if dataflow_id in self.dataflows: + return dataflow_id + + full_id = self._short_id_map.get(dataflow_id) + + if full_id: + return full_id + + self._ensure_dataflows() + + if dataflow_id in self.dataflows: + return dataflow_id + + full_id = self._short_id_map.get(dataflow_id) + + if full_id: + return full_id + + raise OpenBBError( + f"Unknown OECD dataflow: '{dataflow_id}'. Use list_dataflows() to see available dataflows." + ) + + def _ensure_description(self, full_id: str) -> None: + """Fetch and cache the narrative description for a single dataflow.""" + if not hasattr(self, "_description_fetched"): + self._description_fetched: set = set() # pylint: disable=W0201 + + if full_id in self._description_fetched: + return + if self.dataflows.get(full_id, {}).get("description"): + self._description_fetched.add(full_id) + return + + df_meta = self.dataflows.get(full_id, {}) + agency = df_meta.get("agency_id", "") + version = df_meta.get("version", "") + + if not agency or not version: + self._description_fetched.add(full_id) + return + + try: + url = f"{BASE_URL}/structure/dataflow/{agency}/{full_id}/{version}" + resp = _make_request(url, headers={"Accept": _STRUCTURE_ACCEPT}) + raw_dfs = resp.json().get("data", {}).get("dataflows", []) + + for df in raw_dfs: + desc_raw = df.get("descriptions", {}) + desc = ( + desc_raw.get("en", "") if isinstance(desc_raw, dict) else "" + ) or df.get("description", "") + + if desc: + desc_clean = re.sub(r"<[^>]+>", "", desc) + desc_clean = ( + desc_clean.replace(" ", " ") + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + ) + desc_clean = re.sub(r"[ \t]+", " ", desc_clean).strip() + self.dataflows.setdefault(full_id, {})["description"] = desc_clean + break + except Exception: # noqa: S110 + pass + self._description_fetched.add(full_id) + + def _fetch_external_dsd( + self, raw_data: dict, full_id: str # pylint: disable=unused-argument + ) -> tuple[list[dict], dict]: + """Follow the external link for dataflows whose DSD isn't on the main API.""" + for df in raw_data.get("dataflows", []): + if not df.get("isExternalReference"): + continue + for link in df.get("links", []): + href = link.get("href", "") + if not href: + continue + ext_url = f"{href}?references=all&detail=referencepartial" + try: + ext_resp = _make_request( + ext_url, headers={"Accept": _STRUCTURE_ACCEPT}, timeout=30 + ) + ext_raw = ext_resp.json() + ext_data = ext_raw.get("data", ext_raw) + ext_dsds = ext_data.get("dataStructures", []) + if ext_dsds: + return ext_dsds, ext_data + except Exception: # noqa: S112 + continue + return [], raw_data + + def _ensure_structure(self, dataflow_id: str, *, force: bool = False) -> None: + """Lazy-load the DSD, codelists and concept schemes for *dataflow_id*.""" + full_id = self._resolve_dataflow_id(dataflow_id) + + if full_id in self.datastructures and not force: + return + + df_meta = self.dataflows[full_id] + agency = df_meta["agency_id"] + version = df_meta["version"] + url = f"{BASE_URL}/structure/dataflow/{agency}/{full_id}/{version}?references=all&detail=referencepartial" + resp = _make_request(url, headers={"Accept": _STRUCTURE_ACCEPT}) + + try: + raw = resp.json() + except (json.JSONDecodeError, AttributeError) as exc: + raise OpenBBError( + f"Failed to parse OECD structure for {full_id} from {url}" + ) from exc + + raw_data = raw.get("data", raw) + raw_dsds = raw_data.get("dataStructures", []) + + if not raw_dsds: + raw_dsds, raw_data = self._fetch_external_dsd(raw_data, full_id) + self._dataflow_parameters_cache.pop(full_id, None) + + for dsd in raw_dsds: + dsd_id = dsd.get("id", "") + dsd_agency = dsd.get("agencyID", "") + dsd_version = dsd.get("version", "") + dimensions = self._parse_dimension_list(dsd) + attributes = self._parse_attribute_list(dsd) + components = dsd.get("dataStructureComponents", {}) + time_dims = components.get("dimensionList", {}).get("timeDimensions", []) + has_time_dimension = bool(time_dims) + self.datastructures[full_id] = { + "dsd_id": dsd_id, + "agency_id": dsd_agency, + "version": dsd_version, + "dimensions": dimensions, + "attributes": attributes, + "has_time_dimension": has_time_dimension, + } + break + + raw_dfs = raw_data.get("dataflows", []) + + for df in raw_dfs: + desc_raw = df.get("descriptions", {}) + desc = ( + desc_raw.get("en", "") if isinstance(desc_raw, dict) else "" + ) or df.get("description", "") + + if desc and desc != self.dataflows.get(full_id, {}).get("description", ""): + desc_clean = re.sub(r"<[^>]+>", "", desc) + desc_clean = re.sub(r"[ \t]+", " ", desc_clean).strip() + self.dataflows.setdefault(full_id, {})["description"] = desc_clean + break + + parsed_cls, parsed_parents = _parse_sdmx_json_codelists({"data": raw_data}) + + with self._codelist_lock: + for cl_id, codes in parsed_cls.items(): + if cl_id in self.codelists: + self.codelists[cl_id].update(codes) + else: + self.codelists[cl_id] = codes + + for cl_id, parents in parsed_parents.items(): + if cl_id in self._codelist_parents: + self._codelist_parents[cl_id].update(parents) + else: + self._codelist_parents[cl_id] = parents + + raw_cls = raw_data.get("codelists", []) + + for cl in raw_cls: + bare_id = cl.get("id", "") + agency = cl.get("agencyID", "") + version = cl.get("version", "") + cl_id = ( + f"{agency}:{bare_id}({version})" if agency and version else bare_id + ) + descs: dict[str, str] = {} + + for code in cl.get("codes", []): + code_id = code.get("id", "") + d = code.get("descriptions", {}) + desc = (d.get("en", "") if isinstance(d, dict) else "") or code.get( + "description", "" + ) + descs[code_id] = desc or self.codelists.get(cl_id, {}).get( + code_id, "" + ) + + if cl_id: + existing_descs = self._codelist_descriptions.get(cl_id, {}) + existing_descs.update(descs) + self._codelist_descriptions[cl_id] = existing_descs + + raw_constraints = raw_data.get("contentConstraints", []) + + if raw_constraints: + constraints: dict[str, list[str]] = {} + + for cc in raw_constraints: + for region in cc.get("cubeRegions", []): + for kv in region.get("keyValues", []): + dim_id = kv.get("id", "") + vals = kv.get("values", []) + + if dim_id and vals: + if dim_id in constraints: + existing = set(constraints[dim_id]) + constraints[dim_id] = sorted(existing | set(vals)) + else: + constraints[dim_id] = sorted(vals) + if constraints: + self._dataflow_constraints[full_id] = constraints + + self._cache_dirty = True + + @staticmethod + def _parse_dimension_list(dsd: dict) -> list[dict]: + """Return ordered dimension descriptors from a DSD JSON object.""" + dims: list[dict] = [] + components = dsd.get("dataStructureComponents", {}) + dim_list = components.get("dimensionList", {}).get("dimensions", []) + + for dim in dim_list: + dim_id = dim.get("id", "") + position = dim.get("position", len(dims)) + local_repr = dim.get("localRepresentation", {}) + enum_ref = local_repr.get("enumeration", "") + cl_id = _extract_codelist_id_from_urn(enum_ref) if enum_ref else "" + concept_identity = dim.get("conceptIdentity", "") + concept_id = ( + _extract_concept_id_from_urn(concept_identity) + if concept_identity + else dim_id + ) + names = dim.get("names", {}) + name = (names.get("en", "") if isinstance(names, dict) else "") or dim.get( + "name", dim_id + ) + dims.append( + { + "id": dim_id, + "position": position, + "codelist_id": cl_id, + "concept_id": concept_id, + "name": name, + } + ) + + dims.sort(key=lambda d: d["position"]) + + return dims + + @staticmethod + def _parse_attribute_list(dsd: dict) -> list[dict]: + """Return attribute descriptors from a DSD JSON object.""" + attrs: list[dict] = [] + components = dsd.get("dataStructureComponents", {}) + attr_list = components.get("attributeList", {}).get("attributes", []) + + for attr in attr_list: + attr_id = attr.get("id", "") + local_repr = attr.get("localRepresentation", {}) + enum_ref = local_repr.get("enumeration", "") + cl_id = _extract_codelist_id_from_urn(enum_ref) if enum_ref else "" + attrs.append({"id": attr_id, "codelist_id": cl_id}) + + return attrs diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_public_api_mixin.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_public_api_mixin.py new file mode 100644 index 00000000000..df464ffd65e --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_public_api_mixin.py @@ -0,0 +1,667 @@ +"""Public API mixin: listing, table map, dimension info for OecdMetadata.""" + +# pylint: disable=R0914 +import re + +from openbb_oecd.utils.metadata._constants import ( + _TABLE_GROUP_CANDIDATES, +) +from openbb_oecd.utils.metadata._typing import _MixinBase + + +class PublicApiMixin(_MixinBase): # pylint: disable=abstract-method + """Public dataflow listing, table map, dimension info methods.""" + + def list_dataflows(self, topic: str | None = None) -> list[dict]: + """Return OECD dataflows as [{label, value, topic, subtopic, all_subtopics}, ...].""" + self._ensure_dataflows() + self._ensure_taxonomy() + + topic_upper = topic.upper() if topic else "" + + result: list[dict] = [] + for full_id, v in self.dataflows.items(): + cats = self._df_to_categories.get(full_id, []) + + primary_topic = "" + primary_subtopic = "" + all_subtopics: list[str] = [] + + if cats: + for cat_path in cats: + parts = cat_path.split(".") + t = parts[0] if parts else "" + s = parts[1] if len(parts) > 1 else "" + if not primary_topic: + primary_topic = t + if topic_upper: + if t == topic_upper and s: + all_subtopics.append(s) + if not primary_subtopic: + primary_subtopic = s + else: + if s and not primary_subtopic: + primary_subtopic = s + if s: + all_subtopics.append(s) + + if topic_upper and not any(c.split(".")[0] == topic_upper for c in cats): + continue + + all_subtopics = sorted(set(all_subtopics)) + + result.append( + { + "label": v["name"], + "value": full_id, + "topic": topic_upper if topic_upper else primary_topic, + "topic_name": self._category_names.get( + topic_upper or primary_topic, "" + ), + "subtopic": primary_subtopic, + "subtopic_name": ( + self._category_names.get( + f"{topic_upper or primary_topic}.{primary_subtopic}", "" + ) + if primary_subtopic + else "" + ), + "all_subtopics": all_subtopics, + } + ) + + return sorted(result, key=lambda d: d["value"]) + + def list_topics(self) -> list[dict]: + """Return the OECD topic taxonomy as a hierarchical tree.""" + self._ensure_taxonomy() + + def _annotate(node: dict) -> dict: + path = node["path"] + direct = len(self._category_to_dfs.get(path, [])) + children = [_annotate(c) for c in node.get("children", [])] + children = [c for c in children if c["dataflow_count"] > 0] + child_total = sum(c["dataflow_count"] for c in children) + + return { + "id": node["id"], + "name": node["name"], + "dataflow_count": direct + child_total, + "subtopics": children, + } + + return [_annotate(t) for t in self._taxonomy_tree] + + def list_dataflows_by_topic(self) -> list[dict]: + """Return all dataflows organised by topic -> subtopic hierarchy.""" + self._ensure_dataflows() + self._ensure_taxonomy() + + def _df_entry(full_id: str) -> dict: + v = self.dataflows.get(full_id, {}) + + return {"label": v.get("name", full_id), "value": full_id} + + def _build(node: dict) -> dict: + path = node["path"] + dfs = [ + _df_entry(fid) for fid in sorted(self._category_to_dfs.get(path, [])) + ] + children = [_build(c) for c in node.get("children", [])] + children = [c for c in children if c["dataflows"] or c["subtopics"]] + + return { + "id": node["id"], + "name": node["name"], + "dataflows": dfs, + "subtopics": children, + } + + return [_build(t) for t in self._taxonomy_tree] + + def get_dataflow_info(self, dataflow_id: str) -> dict: + """Return metadata dict for a single dataflow.""" + full_id = self._resolve_dataflow_id(dataflow_id) + return self.dataflows[full_id] + + _COUNTRY_SUFFIX_RE = re.compile(r"^_?([A-Z]{2,3})$") + + def _detect_country_families(self) -> dict[str, dict]: + """Detect dataflow families that are per-country splits of the same table.""" + # pylint: disable=import-outside-toplevel + from collections import defaultdict + + dsd_groups: dict[str, list[str]] = defaultdict(list) + + for full_id in self.dataflows: + dsd = full_id.split("@")[0] if "@" in full_id else full_id + dsd_groups[dsd].append(full_id) + + family_map: dict[str, dict] = {} + + for dsd, fids in dsd_groups.items(): + if len(fids) < 5: + continue + + shorts = { + fid: self.dataflows[fid].get("short_id", fid.split("@")[-1]) + for fid in fids + } + prefix = min(shorts.values(), key=len) + + for sid in shorts.values(): + while prefix and not sid.startswith(prefix): + prefix = prefix[:-1] + + if len(prefix) < 4: + continue + + suffixes = {fid: sid[len(prefix) :] for fid, sid in shorts.items()} + country_members = { + fid + for fid, sfx in suffixes.items() + if self._COUNTRY_SUFFIX_RE.match(sfx) + } + + if len(country_members) / len(fids) < 0.7: + continue + + representative = None + + for fid, sfx in suffixes.items(): + if sfx.upper() in ("ALL", "_ALL"): + representative = fid + break + + if not representative: + for fid, sfx in suffixes.items(): + if sfx == "": + representative = fid + break + + if not representative: + non_country = [fid for fid in fids if fid not in country_members] + representative = ( + non_country[0] + if non_country + else min(fids, key=lambda f: shorts[f]) # pylint: disable=W0640 + ) + + rep_name = self.dataflows[representative].get("name", "") + info = { + "dsd": dsd, + "representative": representative, + "rep_short_id": shorts.get( + representative, + representative.split("@")[-1], + ), + "family_name": rep_name, + "member_count": len(fids), + "members": sorted(fids), + } + + for fid in fids: + family_map[fid] = info + + return family_map + + def _detect_section_families(self) -> dict[str, str]: + """Map section dataflows to their root within the same DSD family. + + Returns a dict ``{section_full_id: root_full_id}`` for every + dataflow whose short_id is a strict extension of another + dataflow in the same DSD (e.g. ``DF_TABLE1_EXPENDITURE`` is a + section of ``DF_TABLE1``). Root dataflows are **not** included + as keys — only subordinate sections. + """ + from collections import defaultdict # pylint: disable=import-outside-toplevel + + dsd_groups: dict[str, list[str]] = defaultdict(list) + for full_id in self.dataflows: + dsd = full_id.split("@")[0] if "@" in full_id else full_id + dsd_groups[dsd].append(full_id) + + section_map: dict[str, str] = {} + + for fids in dsd_groups.values(): + if len(fids) < 2: + continue + + shorts = { + fid: self.dataflows[fid].get("short_id", fid.split("@")[-1]) + for fid in fids + } + sorted_fids = sorted( + fids, + key=lambda f, s=shorts: (len(s[f]), s[f]), # type: ignore[misc] + ) + + roots: list[str] = [] + for fid in sorted_fids: + sid = shorts[fid] + parent = None + for r in roots: + if sid.startswith(shorts[r] + "_"): + parent = r + break + if parent is None: + roots.append(fid) + else: + section_map[fid] = parent + + return section_map + + def table_map(self, *, include_empty: bool = False) -> list[dict]: + """Return a flat, navigable map of every OECD presentation table.""" + self._ensure_dataflows() + self._ensure_taxonomy() + family_map = self._detect_country_families() + section_map = self._detect_section_families() + + emitted: set[tuple[str, str]] = set() + rows: list[dict] = [] + + def _make_row( # pylint: disable=too-many-positional-arguments + crumb: list[str], + id_crumb: list[str], + table_name: str, + dataflow_id: str, + short_id: str, + countries: int, + ) -> dict: + return { + "topic": crumb[0] if crumb else "", + "topic_id": id_crumb[0] if id_crumb else "", + "subtopic": crumb[1] if len(crumb) > 1 else "", + "subtopic_id": id_crumb[1] if len(id_crumb) > 1 else "", + "sub_subtopic": (" > ".join(crumb[2:]) if len(crumb) > 2 else ""), + "path": " > ".join(crumb), + "table": table_name, + "dataflow_id": dataflow_id, + "short_id": short_id, + "countries": countries, + } + + def _walk( + nodes: list[dict], + breadcrumb: list[str], + id_breadcrumb: list[str], + ) -> None: + for node in nodes: + crumb = breadcrumb + [node["name"]] + ids = id_breadcrumb + [node["id"]] + cat_path = node["path"] + + for fid in sorted(self._category_to_dfs.get(cat_path, [])): + entry = self.dataflows.get(fid) + + if not entry: + continue + + if fid in section_map: + continue + + family = family_map.get(fid) + + if family: + rep = family["representative"] + key = (cat_path, rep) + + if key in emitted: + continue + + emitted.add(key) + rows.append( + _make_row( + crumb, + ids, + family["family_name"], + rep, + family["rep_short_id"], + family["member_count"], + ) + ) + else: + rows.append( + _make_row( + crumb, + ids, + entry.get("name", fid), + fid, + entry.get("short_id", fid.split("@")[-1]), + 0, + ) + ) + + _walk(node.get("children", []), crumb, ids) + + _walk(self._taxonomy_tree, [], []) + + if include_empty: + categorised = set(self._df_to_categories.keys()) + + for fid, entry in sorted(self.dataflows.items()): + if ( + fid not in categorised + and fid not in family_map + and fid not in section_map + ): + rows.append( + { + "topic": "(Uncategorised)", + "topic_id": "", + "subtopic": "", + "subtopic_id": "", + "sub_subtopic": "", + "path": "(Uncategorised)", + "table": entry.get("name", fid), + "dataflow_id": fid, + "short_id": entry.get("short_id", fid.split("@")[-1]), + "countries": 0, + } + ) + + rows.sort(key=lambda r: (r["path"], r["table"])) + + return rows + + def find_tables(self, query: str) -> list[dict]: + """Search the table map by keyword.""" + full_map = self.table_map() + tokens = [t.lower() for t in query.strip().split() if t.strip()] + + if not tokens: + return full_map + + def _tok(token: str, text: str) -> bool: + return any(alt in text for alt in token.split("|")) + + matched: dict[str, dict] = {} + + for row in full_map: + text = " ".join( + [ + row["topic"], + row["subtopic"], + row["sub_subtopic"], + row["path"], + row["table"], + row["dataflow_id"], + row["short_id"], + ] + ).lower() + + if all(_tok(t, text) for t in tokens): + fid = row["dataflow_id"] + prev = matched.get(fid) + + if prev is None or len(row["path"]) > len(prev["path"]): + matched[fid] = row + + results = sorted(matched.values(), key=lambda r: (r["path"], r["table"])) + + return results + + def print_table_map( + self, + query: str | None = None, + *, + topic: str | None = None, + ) -> str: + """Return a human-readable string of the table map.""" + # pylint: disable=import-outside-toplevel + from collections import OrderedDict + + rows = self.find_tables(query) if query else self.table_map() + + if topic: + t = topic.lower() + rows = [ + r for r in rows if t in r["topic"].lower() or t in r["path"].lower() + ] + + if not rows: + return "(no matching tables)" + + groups: OrderedDict[str, list[dict]] = OrderedDict() + + for row in rows: + groups.setdefault(row["path"], []).append(row) + + lines: list[str] = [] + current_topic = "" + + for group_rows in groups.values(): + top = group_rows[0]["topic"] + + if top != current_topic: + if current_topic: + lines.append("") + + lines.append(f"{'=' * 60}") + lines.append(f" {top}") + lines.append(f"{'=' * 60}") + current_topic = top + + sub_parts = [group_rows[0]["subtopic"]] + + if group_rows[0]["sub_subtopic"]: + sub_parts.append(group_rows[0]["sub_subtopic"]) + + sub_label = " > ".join(p for p in sub_parts if p) + + if sub_label: + lines.append(f" [{sub_label}]") + + for row in group_rows: + ccount = row.get("countries", 0) + suffix = f" ({ccount} countries)" if ccount else "" + lines.append(f" {row['table']:<60s} {row['short_id']}{suffix}") + + return "\n".join(lines) + + def get_dimension_order(self, dataflow_id: str) -> list[str]: + """Return the DSD-defined dimension IDs in position order.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + + return [d["id"] for d in dsd.get("dimensions", []) if d["id"] != "TIME_PERIOD"] + + def get_dataflow_parameters(self, dataflow_id: str) -> dict[str, list[dict]]: + """Return queryable parameters for *dataflow_id*.""" + if dataflow_id in self._dataflow_parameters_cache: + return self._dataflow_parameters_cache[dataflow_id] + + full_id = self._resolve_dataflow_id(dataflow_id) + if full_id in self._dataflow_parameters_cache: + return self._dataflow_parameters_cache[full_id] + + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + params: dict[str, list[dict]] = {} + + for dim in dsd.get("dimensions", []): + dim_id = dim["id"] + + if dim_id == "TIME_PERIOD": + continue + + cl_id = dim.get("codelist_id", "") + + if cl_id: + cl = self._get_codelist(cl_id, dataflow_id) + params[dim_id] = [ + {"label": label, "value": code} + for code, label in sorted(cl.items()) + ] + else: + params[dim_id] = [] + + if params: + self._dataflow_parameters_cache[dataflow_id] = params + self._dataflow_parameters_cache[full_id] = params + + return params + + def get_dimension_info(self, dataflow_id: str) -> list[dict]: + """Return rich metadata for every dimension in a dataflow.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + constraints = self._dataflow_constraints.get(full_id, {}) + params = self.get_dataflow_parameters(full_id) + + result: list[dict] = [] + + for dim in dsd.get("dimensions", []): + dim_id = dim["id"] + + if dim_id == "TIME_PERIOD": + continue + + cl_id = dim.get("codelist_id", "") + codelist = self._get_codelist(cl_id, dataflow_id) if cl_id else {} + cl_size = len(codelist) + parents = self._codelist_parents.get(cl_id, {}) + + if not parents: + m_pref = self._CL_KEY_RE.match(cl_id) if cl_id else None + + if m_pref: + prefix = f"{m_pref.group(1)}:{m_pref.group(2)}(" + for pk, pv in self._codelist_parents.items(): + if pk.startswith(prefix) and pv: + parents = pv + break + + descriptions = self._codelist_descriptions.get(cl_id, {}) + + if not descriptions: + m_pref = self._CL_KEY_RE.match(cl_id) if cl_id else None + + if m_pref: + prefix = f"{m_pref.group(1)}:{m_pref.group(2)}(" + + for dk, dv in self._codelist_descriptions.items(): + if dk.startswith(prefix) and dv: + descriptions = dv + break + + entries = params.get(dim_id, []) + + if dim_id in constraints: + allowed = set(constraints[dim_id]) + constrained_entries = [e for e in entries if e["value"] in allowed] + + if not constrained_entries and allowed: + constrained_entries = [ + {"value": code, "label": codelist.get(code, code)} + for code in sorted(allowed) + ] + else: + constrained_entries = entries + + values = [] + + for e in constrained_entries: + v: dict = { + "value": e["value"], + "label": e["label"], + "description": descriptions.get(e["value"], e["label"]), + } + + if e["value"] in parents: + v["parent"] = parents[e["value"]] + + values.append(v) + + result.append( + { + "id": dim_id, + "position": dim["position"], + "name": dim.get("name", dim_id), + "codelist_id": cl_id, + "total_codes": cl_size, + "constrained_codes": len(constrained_entries), + "has_hierarchy": bool(parents), + "values": values, + } + ) + + return result + + def get_table_groups(self, dataflow_id: str) -> list[dict]: + """Return table groups within a dataflow.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + table_dim = None + + for candidate in _TABLE_GROUP_CANDIDATES: + for dim in dsd.get("dimensions", []): + if dim["id"] == candidate: + table_dim = dim + break + if table_dim is not None: + break + + if table_dim is None: + return [] + + dim_id = table_dim["id"] + cl_id = table_dim.get("codelist_id", "") + params = self.get_dataflow_parameters(full_id) + entries = params.get(dim_id, []) + constraints = self._dataflow_constraints.get(full_id, {}) + + if dim_id in constraints: + allowed = set(constraints[dim_id]) + entries = [e for e in entries if e["value"] in allowed] + + descriptions = self._codelist_descriptions.get(cl_id, {}) + + return [ + { + "value": e["value"], + "label": e["label"], + "description": descriptions.get(e["value"], e["label"]), + } + for e in entries + ] + + def get_constrained_values(self, dataflow_id: str) -> dict[str, list[dict]]: + """Return dimension values filtered by embedded content constraints.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + constraints = self._dataflow_constraints.get(full_id, {}) + params = self.get_dataflow_parameters(full_id) + + result: dict[str, list[dict]] = {} + dsd = self.datastructures.get(full_id, {}) + + for dim in dsd.get("dimensions", []): + dim_id = dim["id"] + + if dim_id == "TIME_PERIOD": + continue + + cl_id = dim.get("codelist_id", "") + descriptions = self._codelist_descriptions.get(cl_id, {}) + entries = params.get(dim_id, []) + + if dim_id in constraints: + allowed = set(constraints[dim_id]) + entries = [e for e in entries if e["value"] in allowed] + + result[dim_id] = [ + { + "value": e["value"], + "label": e["label"], + "description": descriptions.get(e["value"], e["label"]), + } + for e in entries + ] + + self._save_cache() + return result diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_query_mixin.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_query_mixin.py new file mode 100644 index 00000000000..64e6a4b2a1b --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_query_mixin.py @@ -0,0 +1,326 @@ +"""Dataflow triplet resolution, URL building, dimension classification, and query construction mixin.""" + +from openbb_oecd.utils.metadata._constants import ( + _COUNTRY_DIMENSION_CANDIDATES, + _STRUCTURE_ACCEPT, + BASE_URL, +) +from openbb_oecd.utils.metadata._helpers import _make_request +from openbb_oecd.utils.metadata._typing import _MixinBase + + +class QueryMixin(_MixinBase): # pylint: disable=abstract-method + """Dataflow triplet resolution, URL building, dimension filter, availability.""" + + _SELECTOR_MAX = 50 + + def resolve_dataflow_triplet(self, dataflow_id: str) -> tuple[str, str, str]: + """Resolve a dataflow id to (agency, full_id, version) for v2 URLs.""" + full_id = self._resolve_dataflow_id(dataflow_id) + info = self.dataflows[full_id] + + return info["agency_id"], full_id, info["version"] + + _LASTN_BLOCKED_DATAFLOWS = frozenset( + { + "DF_BATIS", + "DF_BIMTS_HS2017_2D", + "DF_BIMTS_CPA_2_1", + "DF_EXTREME_TEMP_DDOWN", + "DF_POP_AGE_DDOWN", + "DF_SDBS_ISIC4", + "DF_STES_REVISIONS", + "DF_TIM_2023", + "DF_TIM_2021", + "DF_TIMBC_2023", + "DF_UOE_FIN_INDIC_SOURCE_NATURE", + "EXT_TEMP_P", + } + ) + + _LASTN_BLOCKED_AGENCIES = frozenset( + { + "OECD.STI", + "OECD.DCD", + } + ) + + def build_data_url( # pylint: disable=too-many-positional-arguments + self, + dataflow_id: str, + dimension_filter: str = "*", + last_n: int | None = None, + first_n: int | None = None, + detail: str = "dataonly", + ) -> str: + """Build a fully-qualified SDMX v2 data query URL.""" + agency, full_id, version = self.resolve_dataflow_triplet(dataflow_id) + url_id = full_id.replace("@", "%40") + path = ( + f"{BASE_URL}/data/dataflow/{agency}/{url_id}/{version}/{dimension_filter}" + ) + qp: list[str] = [] + + resolved = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(resolved) + dsd = self.datastructures.get(resolved, {}) + if dsd.get("has_time_dimension", True): + qp.append("dimensionAtObservation=TIME_PERIOD") + + short_id = full_id.split("@")[-1] if "@" in full_id else full_id + agency_prefix = ".".join(agency.split(".")[:2]) + _lastn_ok = ( + short_id not in self._LASTN_BLOCKED_DATAFLOWS + and agency_prefix not in self._LASTN_BLOCKED_AGENCIES + ) + + if last_n is not None and _lastn_ok: + qp.append(f"lastNObservations={last_n}") + + if first_n is not None and _lastn_ok: + qp.append(f"firstNObservations={first_n}") + + qp.append(f"detail={detail}") + + return f"{path}?{'&'.join(qp)}" + + def build_dimension_filter(self, dataflow_id: str, **dimension_values: str) -> str: + """Build the dot-separated dimension filter string for v2.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + + all_dims = [ + d["id"] + for d in sorted(dsd.get("dimensions", []), key=lambda d: d["position"]) + ] + + parts: list[str] = [] + for dim_id in all_dims: + val = dimension_values.get(dim_id, "*") + parts.append(val if val else "*") + return ".".join(parts) + + def classify_dimensions(self, dataflow_id: str) -> dict[str, list[dict]]: + """Classify all dimensions of *dataflow_id* into functional roles.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + classified: dict[str, list[dict]] = { + "country": [], + "freq": [], + "fixed": [], + "selector": [], + "axis": [], + } + + for dim in dsd.get("dimensions", []): + dim_id = dim["id"] + + if dim_id == "TIME_PERIOD": + continue + + cl_id = dim.get("codelist_id", "") + cl_size = 0 + cl_values: dict[str, str] = {} + + if cl_id and cl_id in self.codelists: + cl_size = len(self.codelists[cl_id]) + cl_values = dict(self.codelists[cl_id]) + + entry = { + "id": dim_id, + "position": dim["position"], + "name": dim.get("name", dim_id), + "codelist_id": dim.get("codelist_id", ""), + "codelist_size": cl_size, + "values": cl_values, + } + + if dim_id in _COUNTRY_DIMENSION_CANDIDATES: + entry["role"] = "country" + classified["country"].append(entry) + elif dim_id == "FREQ": + entry["role"] = "freq" + classified["freq"].append(entry) + elif cl_size <= 1: + entry["role"] = "fixed" + classified["fixed"].append(entry) + elif cl_size <= self._SELECTOR_MAX: + entry["role"] = "selector" + classified["selector"].append(entry) + else: + entry["role"] = "axis" + classified["axis"].append(entry) + + return classified + + def get_table_parameters(self, dataflow_id: str) -> dict[str, dict]: + """Return the queryable dimensions for building table queries.""" + classified = self.classify_dimensions(dataflow_id) + params: dict[str, dict] = {} + + for role, dims in classified.items(): + for dim in dims: + default = "*" + + if role == "fixed": + default = next(iter(dim["values"])) if dim["values"] else "*" + elif role == "freq": + vals = dim.get("values", {}) + if "A" in vals: + default = "A" + elif vals: + default = next(iter(vals)) + + params[dim["id"]] = { + "role": role, + "position": dim["position"], + "name": dim["name"], + "codelist_id": dim["codelist_id"], + "codelist_size": dim["codelist_size"], + "values": dim["values"], + "default": default, + } + + return params + + def build_table_query( + self, + dataflow_id: str, + country: str | list[str] | None = None, + frequency: str | None = None, + **selector_overrides: str, + ) -> str: + """Build a dimension filter string optimized for fetching a full table.""" + full_id = self._resolve_dataflow_id(dataflow_id) + table_params = self.get_table_parameters(full_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + all_dims = sorted(dsd.get("dimensions", []), key=lambda d: d["position"]) + country_val = "*" + + if country and str(country).strip().lower() not in ("", "all"): + country_val = ( + "+".join(country) if isinstance(country, list) else str(country) + ) + + primary_country_set = False + parts: list[str] = [] + + for dim in all_dims: + dim_id = dim["id"] + info = table_params.get(dim_id, {}) + role = info.get("role", "") + + if dim_id in selector_overrides: + parts.append(selector_overrides[dim_id]) + elif dim_id == "TIME_PERIOD": + parts.append("*") + elif role == "country": + if not primary_country_set: + parts.append(country_val) + primary_country_set = True + else: + parts.append("*") + elif role == "freq": + parts.append(frequency if frequency else info.get("default", "*")) + elif role == "fixed": + parts.append(info.get("default", "*")) + else: + parts.append(info.get("default", "*")) + + return ".".join(parts) + + def describe_table_dimensions(self, dataflow_id: str) -> list[dict]: + """Return a human-readable summary of dimensions and their roles.""" + table_params = self.get_table_parameters(dataflow_id) + result: list[dict] = [] + + for dim_id, info in sorted( + table_params.items(), key=lambda x: x[1]["position"] + ): + sample = [] + + if info["values"]: + items = list(info["values"].items())[:8] + sample = [{"code": k, "label": v} for k, v in items] + + result.append( + { + "id": dim_id, + "name": info["name"], + "role": info["role"], + "codelist_size": info["codelist_size"], + "default": info["default"], + "sample_values": sample, + } + ) + return result + + def fetch_availability( + self, + dataflow_id: str, + pinned: dict[str, str] | None = None, + ) -> dict[str, list[str]]: + """Query the OECD availability endpoint for valid dimension values.""" + agency, full_id, version = self.resolve_dataflow_triplet(dataflow_id) + dims = self.get_dimension_order(full_id) + pinned = pinned or {} + cache_key = ( + f"{full_id}::{'|'.join(f'{k}={v}' for k, v in sorted(pinned.items()))}" + ) + + if cache_key in self._availability_cache: + return self._availability_cache[cache_key] + + parts: list[str] = [] + + for dim_id in dims: + parts.append(pinned.get(dim_id, "*")) + + key_filter = ".".join(parts) + url_id = full_id.replace("@", "%40") + url = ( + f"{BASE_URL}/availability/dataflow/{agency}/{url_id}/{version}/{key_filter}" + ) + + try: + resp = _make_request( + url, + headers={"Accept": _STRUCTURE_ACCEPT}, + timeout=30, + ) + raw = resp.json() + except Exception: + fallback: dict[str, list[str]] = {} + for dim_id in dims: + cl = self.get_codelist_for_dimension(full_id, dim_id) + fallback[dim_id] = sorted(cl.keys()) if cl else [] + self._availability_cache[cache_key] = fallback + return fallback + + available: dict[str, list[str]] = {} + + for cc in raw.get("data", raw).get("contentConstraints", []): + for region in cc.get("cubeRegions", []): + for member in region.get("keyValues", []): + dim_id = member.get("id", "") + if dim_id and dim_id != "TIME_PERIOD": + available[dim_id] = sorted(member.get("values", [])) + + for dim_id in dims: + if dim_id not in available: + cl = self.get_codelist_for_dimension(full_id, dim_id) + available[dim_id] = sorted(cl.keys()) if cl else [] + + constraints = self._dataflow_constraints.get(full_id, {}) + if constraints: + for dim_id in dims: + if dim_id in constraints and dim_id in available: + allowed = set(constraints[dim_id]) + available[dim_id] = [c for c in available[dim_id] if c in allowed] + + self._availability_cache[cache_key] = available + + return available diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_search_mixin.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_search_mixin.py new file mode 100644 index 00000000000..d7cfdffaf74 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_search_mixin.py @@ -0,0 +1,322 @@ +"""Dataflow search, indicator search, table listing mixin.""" + +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_oecd.utils.metadata._constants import _TABLE_GROUP_CANDIDATES +from openbb_oecd.utils.metadata._helpers import ( + _matches_query, + _parse_search_query, +) +from openbb_oecd.utils.metadata._typing import _MixinBase + + +class SearchMixin(_MixinBase): # pylint: disable=abstract-method + """Dataflow and indicator search methods.""" + + _search_index: list[tuple[str, dict]] | None + + def search_dataflows(self, query: str) -> list[dict]: + """Search dataflows by keyword.""" + self._ensure_dataflows() + terms = [t.lower() for t in query.strip().split() if t.strip()] + if not terms: + return list(self.dataflows.values()) + + results: list[dict] = [] + for fid, entry in self.dataflows.items(): + text = " ".join( + [ + fid, + entry.get("name", ""), + entry.get("description", ""), + entry.get("short_id", ""), + ] + ).lower() + if all(t in text for t in terms): + results.append(entry) + return results + + def describe_dataflow(self, dataflow_id: str) -> dict: + """Return a comprehensive description of a dataflow and its parameters.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_description(full_id) + df_meta = self.dataflows.get(full_id, {}) + dim_info = self.get_dimension_info(full_id) + table_groups = self.get_table_groups(full_id) + indicator_dim = self._find_indicator_dimension(full_id) + indicator_tree = self.get_indicator_tree(full_id) + + def _count_leaves(nodes: list[dict]) -> int: + total = 0 + for n in nodes: + children = n.get("children", []) + if children: + total += _count_leaves(children) + else: + total += 1 + return total + + return { + "dataflow_id": full_id, + "short_id": df_meta.get("short_id", full_id.split("@")[-1]), + "name": df_meta.get("name", full_id), + "description": df_meta.get("description", ""), + "dimensions": dim_info, + "table_groups": table_groups, + "indicator_dimension": indicator_dim or "", + "indicator_count": _count_leaves(indicator_tree) if indicator_tree else 0, + "indicator_tree": indicator_tree, + } + + def search_indicators( + self, + query: str | None = None, + dataflows: str | list[str] | None = None, + keywords: str | list[str] | None = None, + ) -> list[dict]: + """Full-text search across dataflow indicators.""" + self._ensure_dataflows() + + scoped = False + if dataflows: + if isinstance(dataflows, str): + dataflows = [d.strip() for d in dataflows.split(",")] + target_ids = dataflows + scoped = True + elif not query and not keywords: + raise OpenBBError( + "At least one of 'query', 'dataflows', or 'keywords' is required." + ) + else: + target_ids = None + + _table_dims = set(_TABLE_GROUP_CANDIDATES) + if scoped: + all_indicators: list[dict] = [] + for df_id in target_ids: # type: ignore[union-attr] + full_id = None + if df_id in self._dataflow_indicators_cache: + full_id = df_id + else: + resolved = self._short_id_map.get(df_id) + if resolved and resolved in self._dataflow_indicators_cache: + full_id = resolved + if full_id is None: + continue + cached = self._dataflow_indicators_cache[full_id] + constraints = self._dataflow_constraints.get(full_id, {}) + ind_dim = self._get_indicator_dim(full_id) + if constraints and cached: + allowed_sets: dict[str, set[str]] = { + k: set(v) for k, v in constraints.items() + } + for ind in cached: + dim_id = ind.get("dimension_id", "") + if dim_id in _table_dims: + continue + if ind_dim and dim_id and dim_id != ind_dim: + continue + if ( + not dim_id + or dim_id not in allowed_sets + or ind.get("indicator") in allowed_sets[dim_id] + ): + all_indicators.append(ind) + else: + all_indicators.extend( + ind + for ind in cached + if ind.get("dimension_id", "") not in _table_dims + and not ( + ind_dim + and ind.get("dimension_id", "") + and ind.get("dimension_id", "") != ind_dim + ) + ) + + if query: + phrases = _parse_search_query(query) + all_indicators = [ + ind + for ind in all_indicators + if _matches_query( + f"{ind.get('label', '')} {ind.get('description', '')} " + f"{ind.get('dataflow_name', '')} {ind.get('dataflow_id', '')} " + f"{ind.get('indicator', '')}".lower(), + phrases, + ) + ] + else: + search_index = self._get_search_index() + phrases = _parse_search_query(query) if query else [] + all_indicators = [] + + for search_text, ind in search_index: + if phrases and not _matches_query(search_text, phrases): + continue + all_indicators.append(ind) + + if keywords: + if isinstance(keywords, str): + keywords = [keywords] + + for raw_kw in keywords: + kw = raw_kw.strip() + + if kw.lower().startswith("not "): + exclude_word = kw[4:].strip().lower() + all_indicators = [ + i + for i in all_indicators + if exclude_word + not in f"{i.get('label', '')} {i.get('description', '')} {i.get('indicator', '')}".lower() + ] + else: + include_word = kw.lower() + all_indicators = [ + i + for i in all_indicators + if include_word + in f"{i.get('label', '')} {i.get('description', '')} {i.get('indicator', '')}".lower() + ] + + return all_indicators + + def _get_search_index(self) -> list[tuple[str, dict]]: + """Return a lazily-built search index: [(search_text, indicator_dict), ...].""" + if hasattr(self, "_search_index") and self._search_index is not None: + return self._search_index + + _table_dims = set(_TABLE_GROUP_CANDIDATES) + index: list[tuple[str, dict]] = [] + for full_id, cached in self._dataflow_indicators_cache.items(): + constraints = self._dataflow_constraints.get(full_id, {}) + allowed_sets: dict[str, set[str]] = ( + {k: set(v) for k, v in constraints.items()} if constraints else {} + ) + ind_dim = self._get_indicator_dim(full_id) + for ind in cached: + dim_id = ind.get("dimension_id", "") + if dim_id in _table_dims: + continue + if ind_dim and dim_id and dim_id != ind_dim: + continue + if ( + allowed_sets + and dim_id + and dim_id in allowed_sets + and ind.get("indicator") not in allowed_sets[dim_id] + ): + continue + text = ( + f"{ind.get('label', '')} {ind.get('description', '')} " + f"{ind.get('dataflow_name', '')} {ind.get('dataflow_id', '')} " + f"{ind.get('indicator', '')}" + ).lower() + index.append((text, ind)) + + self._search_index = index + + return index + + def list_tables( + self, + query: str | None = None, + topic: str | None = None, + subtopic: str | None = None, + ) -> list[dict]: + """List all OECD tables (dataflows) with names and topics.""" + rows = self.find_tables(query) if query else self.table_map() + + if topic: + t = topic.upper() + rows = [r for r in rows if r.get("topic_id", "").upper() == t] + + if subtopic: + s = subtopic.upper() + rows = [r for r in rows if r.get("subtopic_id", "").upper() == s] + + return [ + { + "table_id": row["short_id"], + "name": row["table"], + "topic": row["topic"], + "topic_id": row.get("topic_id", ""), + "subtopic": row.get("subtopic", ""), + "subtopic_id": row.get("subtopic_id", ""), + "dataflow_id": row["dataflow_id"], + } + for row in rows + ] + + def get_table(self, table_id: str) -> dict: + """Get full metadata for a single table (dataflow).""" + return self.describe_dataflow(table_id) + + def get_dataflow_hierarchies(self, dataflow_id: str) -> list[dict]: + """Return available table / hierarchy identifiers for a dataflow.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + dsd = self.datastructures.get(full_id, {}) + + table_dim = None + for candidate in _TABLE_GROUP_CANDIDATES: + for dim in dsd.get("dimensions", []): + if dim["id"] == candidate: + table_dim = dim + break + if table_dim is not None: + break + + if table_dim is None: + return [] + + cl_id = table_dim.get("codelist_id", "") + table_groups = self.get_table_groups(dataflow_id) + + return [ + { + "id": g["value"], + "name": g["label"], + "description": g.get("description", g["label"]), + "codelist_id": cl_id, + } + for g in table_groups + ] + + def get_dataflow_table_structure(self, dataflow_id: str, table_id: str) -> dict: + """Return the hierarchy structure for a specific table.""" + full_id = self._resolve_dataflow_id(dataflow_id) + self._ensure_structure(full_id) + + groups = self.get_table_groups(dataflow_id) + table_meta = next((g for g in groups if g["value"] == table_id), None) + hierarchy_name = table_meta["label"] if table_meta else table_id + + tree = self.get_indicator_tree(dataflow_id) + + flat: list[dict] = [] + counter = [0] + + def _walk(nodes: list[dict], level: int, parent: str | None) -> None: + for node in nodes: + children_codes = [c["code"] for c in node.get("children", [])] + flat.append( + { + "code": node["code"], + "label": node.get("label", node["code"]), + "order": counter[0], + "level": level, + "parent": parent, + "children": children_codes, + } + ) + counter[0] += 1 + _walk(node.get("children", []), level + 1, node["code"]) + + _walk(tree, 0, None) + + return { + "hierarchy_id": table_id, + "hierarchy_name": hierarchy_name, + "indicators": flat, + } diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_typing.py b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_typing.py new file mode 100644 index 00000000000..3e4c7fe0970 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/metadata/_typing.py @@ -0,0 +1,149 @@ +"""Type stubs for the mixin pattern. + +Each mixin inherits from ``_MixinBase`` so that both mypy and pylint can +see cross-mixin attribute and method references. The methods all raise +``NotImplementedError`` — they exist only so type-checkers know the +signatures; the real implementations live in the individual mixins and +override these via normal MRO. +""" + +from __future__ import annotations + +import re +import threading +from pathlib import Path + + +class _MixinBase: + + # -- instance attributes (initialised in OecdMetadata.__init__) -- + dataflows: dict[str, dict] + datastructures: dict[str, dict] + codelists: dict[str, dict[str, str]] + _short_id_map: dict[str, str] + _codelist_lock: threading.Lock + _codelist_descriptions: dict[str, dict[str, str]] + _codelist_parents: dict[str, dict[str, str]] + _codelist_comp_rules: dict[str, dict[str, str]] + _dataflow_constraints: dict[str, dict[str, list[str]]] + _dataflow_parameters_cache: dict[str, dict] + _dataflow_indicators_cache: dict[str, list] + _availability_cache: dict[str, dict[str, list[str]]] + _indicator_dim_cache: dict[str, str | None] + _table_map: dict[str, dict] + _full_catalogue_loaded: bool + _cache_dirty: bool + _taxonomy_tree: list[dict] + _df_to_categories: dict[str, list[str]] + _category_to_dfs: dict[str, list[str]] + _category_names: dict[str, str] + _taxonomy_loaded: bool + _search_index: list[tuple[str, dict]] | None + + # -- class attributes -- + _CL_KEY_RE: re.Pattern[str] + + # -- methods from CacheMixin -- + @staticmethod + def _read_cache_file(path: Path) -> dict | None: + raise NotImplementedError + + def _apply_blob(self, blob: dict) -> None: + raise NotImplementedError + + def _infer_orphan_parents(self) -> None: + raise NotImplementedError + + @staticmethod + def _closest_common_ancestor( + codes: list[str], parents: dict[str, str] + ) -> str | None: + raise NotImplementedError + + def _load_from_cache(self) -> bool: + raise NotImplementedError + + def _save_cache(self) -> None: + raise NotImplementedError + + # -- methods from LoaderMixin -- + def _ensure_dataflows(self) -> None: + raise NotImplementedError + + def _rebuild_short_id_map(self) -> None: + raise NotImplementedError + + def _ensure_taxonomy(self) -> None: + raise NotImplementedError + + def _resolve_dataflow_id(self, dataflow_id: str) -> str: + raise NotImplementedError + + def _ensure_description(self, full_id: str) -> None: + raise NotImplementedError + + def _ensure_structure(self, dataflow_id: str, *, force: bool = False) -> None: + raise NotImplementedError + + # -- methods from PublicApiMixin -- + def list_dataflows(self, topic: str | None = None) -> list[dict]: + raise NotImplementedError + + def get_dataflow_parameters(self, dataflow_id: str) -> dict[str, list[dict]]: + raise NotImplementedError + + def get_dimension_order(self, dataflow_id: str) -> list[str]: + raise NotImplementedError + + def get_dimension_info(self, dataflow_id: str) -> list[dict]: + raise NotImplementedError + + def get_table_groups(self, dataflow_id: str) -> list[dict]: + raise NotImplementedError + + def get_constrained_values(self, dataflow_id: str) -> dict[str, list[dict]]: + raise NotImplementedError + + def table_map(self, *, include_empty: bool = False) -> list[dict]: + raise NotImplementedError + + def find_tables(self, query: str) -> list[dict]: + raise NotImplementedError + + def _get_codelist( + self, + codelist_id: str, + _dataflow_id: str | None = None, + ) -> dict[str, str]: + raise NotImplementedError + + # -- methods from IndicatorMixin -- + def _get_indicator_dim(self, full_id: str) -> str | None: + raise NotImplementedError + + def _find_indicator_dimension( + self, + dataflow_id: str, + indicator_code: str | None = None, + ) -> str | None: + raise NotImplementedError + + def get_codelist_for_dimension( + self, dataflow_id: str, dim_id: str + ) -> dict[str, str]: + raise NotImplementedError + + def get_indicator_tree(self, dataflow_id: str) -> list[dict]: + raise NotImplementedError + + # -- methods from SearchMixin -- + def describe_dataflow(self, dataflow_id: str) -> dict: + raise NotImplementedError + + # -- methods from QueryMixin -- + def fetch_availability( + self, + dataflow_id: str, + pinned: dict[str, str] | None = None, + ) -> dict[str, list[str]]: + raise NotImplementedError diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/progressive_helper.py b/openbb_platform/providers/oecd/openbb_oecd/utils/progressive_helper.py new file mode 100644 index 00000000000..b8d4853a47e --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/progressive_helper.py @@ -0,0 +1,382 @@ +"""OECD Progressive Query Helper. + +Matches the interface of ImfParamsBuilder for consistent cross-provider +behaviour. All live constraint checking is delegated to +OecdMetadata.fetch_availability(). +""" + +# pylint: disable=W0212 + +from __future__ import annotations + +from openbb_core.app.model.abstract.error import OpenBBError + + +class OecdParamsBuilder: + """Progressive dimension selection with cascading availability checks. + + Walks the DSD dimensions in order. At each step, queries the OECD + availability endpoint so that only values valid given prior + selections are returned. When a dimension is set, all *downstream* + selections (later in DSD order) are cleared — they may now be + invalid. + + Examples + -------- + >>> from openbb_oecd.utils.progressive_helper import OecdParamsBuilder + >>> builder = OecdParamsBuilder("DF_PRICES_ALL") + >>> builder.get_dimensions_in_order() + ['REF_AREA', 'FREQ', 'METHODOLOGY', 'MEASURE', ...] + >>> builder.set_dimension(("REF_AREA", "USA")) + {'REF_AREA': 'USA', 'FREQ': None, ...} + >>> builder.get_options_for_dimension("FREQ") + [{'label': 'Annual', 'value': 'A'}, ...] + """ + + def __init__(self, dataflow_id: str) -> None: + """Initialize the OecdParamsBuilder. + + Parameters + ---------- + dataflow_id : str + Short or full OECD dataflow ID (e.g. ``"DF_PRICES_ALL"``). + + Raises + ------ + OpenBBError + If the dataflow cannot be resolved. + """ + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata + + self._metadata = OecdMetadata() + + # Resolve and ensure the DSD is loaded. + self._full_id = self._metadata._resolve_dataflow_id(dataflow_id) + self._metadata._ensure_structure(self._full_id) + + self.dataflow_id = dataflow_id + + # Dimension IDs in DSD position order (excludes TIME_PERIOD). + self._dimensions: list[str] = self._metadata.get_dimension_order(self._full_id) + self.current_dimension: str | None = ( + self._dimensions[0] if self._dimensions else None + ) + + # Current selections: ``None`` = not yet selected. + self._selections: dict[str, str | None] = { + dim: None for dim in self._dimensions + } + + # Availability cache keyed by frozen pinned state. + self._avail_cache: dict[frozenset, dict[str, list[str]]] = {} + + # Codelist label caches (dim_id → {code: label}). + self._labels: dict[str, dict[str, str]] = {} + + def get_dimensions_in_order(self) -> list[str]: + """Return dimension IDs sorted by DSD position, excluding TIME_PERIOD.""" + return list(self._dimensions) + + def get_next_dimension_to_select(self) -> str | None: + """Return the first dimension where no selection has been made. + + Returns + ------- + str or None + Dimension ID, or ``None`` if all dimensions have been selected. + """ + for dim in self._dimensions: + if self._selections[dim] is None: + return dim + return None + + def set_dimension(self, dimension: tuple[str, str]) -> dict[str, str | None]: + """Pin *dimension* to *value* and **clear all downstream** selections. + + Parameters + ---------- + dimension : tuple[str, str] + ``(dimension_id, value)`` to set. + + Returns + ------- + dict + Updated selections after setting the dimension. + + Raises + ------ + KeyError + If *dimension_id* is not a valid dimension of this dataflow. + """ + dim_id, value = dimension + + if dim_id not in self._selections: + raise KeyError( + f"Dimension '{dim_id}' not valid for dataflow '{self.dataflow_id}'. " + f"Valid dimensions: {list(self._selections.keys())}" + ) + + self._selections[dim_id] = value + + # Clear everything *after* this dimension in DSD order. + found = False + for d in self._dimensions: + if found: + self._selections[d] = None + if d == dim_id: + found = True + + # Invalidate stale cache entries. + self._avail_cache.clear() + self.current_dimension = self.get_next_dimension_to_select() + + return dict(self._selections) + + def get_options_for_dimension( + self, dimension_id: str | None = None + ) -> list[dict[str, str]]: + """Return available values for a dimension given the current selections. + + Parameters + ---------- + dimension_id : str, optional + The dimension to query. Defaults to the next unselected + dimension. + + Returns + ------- + list[dict] + ``[{label, value}, ...]`` reflecting cascading constraints. + """ + dimension_id = dimension_id or self.get_next_dimension_to_select() + if not dimension_id: + return [] + if dimension_id not in self._selections: + raise ValueError( + f"Dimension '{dimension_id}' not found for dataflow '{self.dataflow_id}'." + ) + + avail = self._fetch_current_availability() + codes = avail.get(dimension_id, []) + labels = self._get_labels(dimension_id) + + return [{"label": labels.get(code, code), "value": code} for code in codes] + + def get_dimensions(self) -> dict[str, str | None]: + """Return the current selections dictionary.""" + return dict(self._selections) + + @property + def dimensions(self) -> list[str]: + """Return dimension IDs in DSD order.""" + return list(self._dimensions) + + @property + def pinned(self) -> dict[str, str]: + """Return currently pinned (non-None) selections.""" + return {k: v for k, v in self._selections.items() if v is not None} + + def available(self, dim_id: str) -> list[dict[str, str]]: + """Alias for :meth:`get_options_for_dimension`.""" + return self.get_options_for_dimension(dim_id) + + def available_values(self, dim_id: str) -> list[str]: + """Return just the available codes for *dim_id* (no labels).""" + if dim_id not in self._selections: + raise OpenBBError( + f"'{dim_id}' is not a dimension of '{self.dataflow_id}'. Dimensions: {self._dimensions}" + ) + avail = self._fetch_current_availability() + return avail.get(dim_id, []) + + def set(self, dim_id: str, value: str) -> OecdParamsBuilder: + """Pin a dimension (chainable). Validates against current availability. + + Parameters + ---------- + dim_id : str + Dimension ID. + value : str + Code or ``+``-separated codes. + + Returns + ------- + OecdParamsBuilder + Self, for chaining. + """ + if dim_id not in self._selections: + raise OpenBBError( + f"'{dim_id}' is not a dimension of '{self.dataflow_id}'. Dimensions: {self._dimensions}" + ) + + # Validate each code. + avail = self._fetch_current_availability() + valid_codes = set(avail.get(dim_id, [])) + codes = [c.strip() for c in value.split("+") if c.strip()] + invalid = [c for c in codes if c not in valid_codes] + + if invalid: + labels = self._get_labels(dim_id) + sample = [f"{c} ({labels.get(c, c)})" for c in sorted(valid_codes)[:20]] + raise OpenBBError( + f"Invalid value(s) {invalid} for '{dim_id}' " + f"given current selections {self.pinned}. " + f"Available ({len(valid_codes)}): {sample}" + + (" …" if len(valid_codes) > 20 else "") + ) + + self._selections[dim_id] = value + + # Clear downstream selections (IMF behaviour). + found = False + for d in self._dimensions: + if found: + self._selections[d] = None + if d == dim_id: + found = True + + # Keep only the cache entry that matches the new state. + key = self._cache_key() + self._avail_cache = {k: v for k, v in self._avail_cache.items() if k == key} + + return self + + def unset(self, dim_id: str) -> OecdParamsBuilder: + """Remove the pin for *dim_id*. Returns self for chaining.""" + if dim_id in self._selections: + self._selections[dim_id] = None + self._avail_cache.clear() + return self + + def reset(self) -> OecdParamsBuilder: + """Clear all pins and caches. Returns self for chaining.""" + for d in self._selections: + self._selections[d] = None + self._avail_cache.clear() + return self + + def describe(self) -> list[dict]: + """Full description of every dimension and its current state.""" + avail = self._fetch_current_availability() + table_params = self._metadata.get_table_parameters(self._full_id) + result: list[dict] = [] + + for dim_id in self._dimensions: + info = table_params.get(dim_id, {}) + codes = avail.get(dim_id, []) + labels = self._get_labels(dim_id) + result.append( + { + "id": dim_id, + "name": info.get("name", dim_id), + "position": info.get("position", -1), + "role": info.get("role", ""), + "pinned": self._selections.get(dim_id), + "available_count": len(codes), + "available": [ + {"value": c, "label": labels.get(c, c)} for c in codes + ], + } + ) + + return result + + def summary(self) -> list[dict]: + """Compact summary: id, name, role, pinned value, available count.""" + avail = self._fetch_current_availability() + table_params = self._metadata.get_table_parameters(self._full_id) + result: list[dict] = [] + + for dim_id in self._dimensions: + info = table_params.get(dim_id, {}) + codes = avail.get(dim_id, []) + result.append( + { + "id": dim_id, + "name": info.get("name", dim_id), + "role": info.get("role", ""), + "pinned": self._selections.get(dim_id), + "available_count": len(codes), + } + ) + + return result + + def build(self) -> str: + """Build the v2 dimension filter string from current pins. + + Unpinned dimensions default to ``*`` (wildcard). + TIME_PERIOD is appended as the final ``*``. + """ + parts: list[str] = [] + for dim_id in self._dimensions: + parts.append(self._selections.get(dim_id) or "*") + # TIME_PERIOD is always the last position as wildcard. + parts.append("*") + return ".".join(parts) + + def build_url( + self, + last_n: int | None = None, + first_n: int | None = None, + ) -> str: + """Build a complete SDMX v2 data URL from the current state.""" + return self._metadata.build_data_url( + self._full_id, + dimension_filter=self.build(), + last_n=last_n, + first_n=first_n, + ) + + def fetch( + self, + start_date: str | None = None, + end_date: str | None = None, + ) -> dict: + """Fetch data based on the current selections. + + Parameters + ---------- + start_date, end_date : str, optional + Date bounds. + + Returns + ------- + dict + ``{data: [...], metadata: {...}}``. + """ + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.query_builder import OecdQueryBuilder + + qb = OecdQueryBuilder() + return qb.fetch_data( + dataflow=self.dataflow_id, + start_date=start_date, + end_date=end_date, + **self.pinned, # type: ignore[arg-type] + ) + + def _cache_key(self) -> frozenset: + """Hashable key for the current pinned state.""" + return frozenset((k, v) for k, v in self._selections.items() if v is not None) + + def _fetch_current_availability(self) -> dict[str, list[str]]: + """Fetch (or return cached) availability for the current pinned state.""" + key = self._cache_key() + if key not in self._avail_cache: + self._avail_cache[key] = self._metadata.fetch_availability( + self._full_id, self.pinned + ) + return self._avail_cache[key] + + def _get_labels(self, dim_id: str) -> dict[str, str]: + """Get codelist labels for a dimension, caching them.""" + if dim_id not in self._labels: + self._labels[dim_id] = self._metadata.get_codelist_for_dimension( + self._full_id, dim_id + ) + return self._labels[dim_id] + + def __repr__(self) -> str: # noqa: D105 + return f"OecdParamsBuilder({self.dataflow_id}, pinned={self.pinned}, dims={self._dimensions})" diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/query_builder.py b/openbb_platform/providers/oecd/openbb_oecd/utils/query_builder.py new file mode 100644 index 00000000000..27c5e148364 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/query_builder.py @@ -0,0 +1,619 @@ +"""OECD Query Builder for constructing and executing SDMX v2 data queries. + +Shared data-fetching engine that all OECD fetchers delegate to. +Uses OecdMetadata for structural metadata (DSD-driven dimension ordering, +codelist lookups, availability constraints) and SDMX-CSV v2 for data retrieval. +""" + +# pylint: disable=C0302,R0911,R0912,R0913,R0914,R0915,R0917,R1702,W0212 +# flake8: noqa: PLR0911,PLR0912,PLR0913,PLR0917 + +import warnings +from io import StringIO +from typing import TYPE_CHECKING, Any + +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_oecd.utils.metadata import OecdMetadata +from pandas.api.types import is_string_dtype + +if TYPE_CHECKING: + from pandas import DataFrame # type: ignore[import-untyped] + + +class OecdQueryBuilder: + """Build and execute OECD SDMX v2 data queries.""" + + def __init__(self): + """Initialize the query builder with the metadata singleton.""" + self.metadata = OecdMetadata() + + def build_url( + self, + dataflow: str, + start_date: str | None = None, + end_date: str | None = None, + limit: int | None = None, + **kwargs: Any, + ) -> str: + """Build an SDMX v2 data URL for *dataflow* with dimension kwargs. + + Parameters + ---------- + dataflow : str + Dataflow short ID ("DF_PRICES_ALL") or full v2 ID. + start_date : str | None + ISO date string ("YYYY-MM-DD" or "YYYY"). + Not natively supported in v2 — mapped to startPeriod. + end_date : str | None + ISO date string. Mapped to endPeriod. + limit : int | None + lastNObservations for the most recent N data points. + **kwargs + Dimension values keyed by dimension ID (case-sensitive). + Use "+"-separated strings for multi-select. + Omitted dimensions default to "*" (wildcard). + + Returns + ------- + str + A fully-qualified SDMX v2 data URL. + """ + # Resolve dimension kwargs to a filter string using DSD ordering. + dimension_filter = self._build_dimension_filter(dataflow, **kwargs) + + # When time constraints are present, detail=dataonly must NOT be + # used — the OECD API silently ignores c[TIME_PERIOD] filters + # when detail=dataonly is set. + has_time_constraint = bool(start_date or end_date) + + # Build the base URL via metadata. + url = self.metadata.build_data_url( + dataflow, + dimension_filter=dimension_filter, + last_n=limit, + detail="full" if has_time_constraint else "dataonly", + ) + + # v2 uses c[TIME_PERIOD] constraints (not startPeriod/endPeriod). + # Both ge: and le: must be in a SINGLE c[TIME_PERIOD] param, + # comma-separated. Duplicate query params are silently ignored. + _time_parts: list[str] = [] + if start_date: + _time_parts.append(f"ge:{_format_period(start_date)}") + if end_date: + _time_parts.append(f"le:{_format_period(end_date)}") + if _time_parts: + url += f"&c[TIME_PERIOD]={','.join(_time_parts)}" + + return url + + def _build_dimension_filter(self, dataflow: str, **kwargs: Any) -> str: + """Build a dot-separated dimension filter from keyword args. + + Delegates to OecdMetadata.build_dimension_filter which includes + ALL dimensions (including TIME_PERIOD) in DSD order — required by + the v2 API. + + Performs case-insensitive matching of kwarg keys to DSD dimension + IDs, so callers can use ref_area="USA" or REF_AREA="USA" + interchangeably. + """ + # Get the DSD dimension order for case-insensitive matching. + dim_order = self.metadata.get_dimension_order(dataflow) + dim_id_map = {d.lower(): d for d in dim_order} + + # Normalise kwargs to canonical dimension IDs. + normalised: dict[str, str] = {} + for key, value in kwargs.items(): + matched = dim_id_map.get(key.lower()) + if matched: + normalised[matched] = str(value) if value is not None else "*" + + # Delegate to metadata which includes TIME_PERIOD in the filter. + return self.metadata.build_dimension_filter(dataflow, **normalised) + + def validate_dimension_constraints(self, dataflow: str, **kwargs: Any) -> None: + """Validate dimension parameters against the OECD availability API. + + Fetches availability with ALL provided dimensions pinned + simultaneously, then checks that each value appears in the + result. This avoids progressive cascading which can + wrongly exclude values for dimensions late in DSD order + (e.g. TABLE_IDENTIFIER) when earlier auto-pinned dims + narrow the context without the later pin. + + Parameters + ---------- + dataflow : str + Dataflow short ID or full v2 ID. + **kwargs + Dimension parameters to validate. Non-dimension kwargs + (start_date, end_date, limit) are ignored. + + Raises + ------ + ValueError + If any dimension value is not available given the selections. + """ + # pylint: disable=import-outside-toplevel + from openbb_core.app.model.abstract.warning import OpenBBWarning + + dim_order = self.metadata.get_dimension_order(dataflow) + dim_id_map = {d.lower(): d for d in dim_order} + non_dimension_keys = {"start_date", "end_date", "limit"} + + dim_kwargs: dict[str, str] = {} + for key, value in kwargs.items(): + if key in non_dimension_keys: + continue + matched = dim_id_map.get(key.lower()) + if matched and value is not None: + dim_kwargs[matched] = str(value) + + if not dim_kwargs: + return + + try: + constrained = self.metadata.get_constrained_values(dataflow) + except Exception as exc: # noqa: BLE001 + warnings.warn( + f"Could not load constraints for dataflow '{dataflow}': {exc}", + OpenBBWarning, + stacklevel=2, + ) + return + + for dim_id in dim_order: + if dim_id not in dim_kwargs: + continue + + user_value = dim_kwargs[dim_id] + + if "+" in user_value: + user_values = [v.strip() for v in user_value.split("+")] + elif "," in user_value: + user_values = [v.strip() for v in user_value.split(",")] + else: + user_values = [user_value] + + user_values = [v for v in user_values if v and v != "*"] + if not user_values: + continue + + entries = constrained.get(dim_id, []) + if not entries: + continue + + available_codes = {e["value"] for e in entries} + + invalid = [v for v in user_values if v not in available_codes] + if invalid: + prior = { + d: dim_kwargs[d] + for d in dim_order + if d in dim_kwargs and d != dim_id + } + labels = {e["value"]: e["label"] for e in entries} + avail_display = [ + ( + f"{code} ({labels[code]})" + if code in labels and labels[code] != code + else code + ) + for code in sorted(available_codes) + ] + raise ValueError( + f"Invalid value(s) for dimension '{dim_id}': {invalid}. " + + (f"Given selections {prior}, " if prior else "") + + f"Available options ({len(avail_display)}): " + + ", ".join(avail_display) + ) + + def fetch_data( + self, + dataflow: str, + start_date: str | None = None, + end_date: str | None = None, + limit: int | None = None, + _skip_validation: bool = False, + **kwargs: Any, + ) -> dict: + """Fetch data from the OECD SDMX v2 API. + + This is the main entry point for all refactored models. + + Parameters + ---------- + dataflow : str + Dataflow short ID ("DF_PRICES_ALL") or full v2 ID. + start_date, end_date : str | None + Date bounds (ISO format). + limit : int | None + lastNObservations to limit time series depth. + _skip_validation : bool + Skip constraint validation (when caller already validated). + **kwargs + Dimension parameters keyed by dimension ID. + + Returns + ------- + dict + {"data": list[dict], "metadata": dict} + + Each row in data contains: + - One key per dimension ID (the **code**). + - One key per {dim_id}_label (the human-readable label). + - TIME_PERIOD — the time period string ("2024", "2024-Q3", etc.). + - OBS_VALUE — the numeric observation value (float | None). + + metadata contains: + - dataflow_id, dataflow_name, url, row_count. + """ + # pylint: disable=import-outside-toplevel + from openbb_core.provider.utils.errors import EmptyDataError + from pandas import read_csv, to_numeric + + # Validate constraints (unless caller opted out). + if not _skip_validation: + self.validate_dimension_constraints(dataflow, **kwargs) + + url = self.build_url(dataflow, start_date, end_date, limit, **kwargs) + + # Fetch SDMX-CSV v2 with labels=both. + headers = { + "Accept": "application/vnd.sdmx.data+csv; version=2.0.0; labels=both", + "User-Agent": "OpenBB/1.0", + } + + # Attempt the request; if it fails with 404 and there are + # multi-value dimensions (e.g. "USA+DEU"), fall back to + # individual requests per value and merge results. + text = self._fetch_with_multi_value_fallback( + url, + headers, + dataflow, + start_date, + end_date, + limit, + kwargs, + ) + + # Parse the CSV. + try: + df = read_csv(StringIO(text)) + except Exception as exc: + raise OpenBBError( + f"Failed to parse OECD CSV response: {exc}\nURL: {url}" + ) from exc + + if df.empty: + raise OpenBBError( + EmptyDataError(f"No data rows for dataflow '{dataflow}'. URL: {url}") + ) + + # Split "code: label" columns into separate code and label columns. + df = self._split_label_columns(df, dataflow) + + # Ensure OBS_VALUE is numeric. + if "OBS_VALUE" in df.columns: + df["OBS_VALUE"] = to_numeric(df["OBS_VALUE"], errors="coerce") + + # Build result metadata. + df_meta = self.metadata.dataflows.get( + self.metadata._resolve_dataflow_id(dataflow), {} + ) + metadata = { + "dataflow_id": dataflow, + "dataflow_name": df_meta.get("name", dataflow), + "url": url, + "row_count": len(df), + } + + # Convert NaN → None so downstream JSON serialization doesn't break. + records = df.where(df.notna(), other=None).to_dict(orient="records") + + return {"data": records, "metadata": metadata} + + def _fetch_with_multi_value_fallback( + self, + url: str, + headers: dict, + dataflow: str, + start_date: str | None, + end_date: str | None, + limit: int | None, + dim_kwargs: dict[str, Any], + ) -> str: + """Fetch CSV text, falling back to per-value requests if multi-value fails. + + The OECD SDMX v2 API doesn't reliably support the ``+`` + multi-value separator in the path-based dimension key. + When a 404 is received and at least one dimension contains ``+``, + we split the first such dimension into individual requests + and concatenate the CSV results. + """ + # pylint: disable=import-outside-toplevel + from requests.exceptions import HTTPError + + try: + response = _make_request(url, headers=headers, timeout=120) + text = response.text + if text and text.strip(): + return text + raise OpenBBError( + f"Empty response from OECD for dataflow '{dataflow}'. URL: {url}" + ) + except (HTTPError, OpenBBError) as exc: + # Identify dimensions with multi-value (contains +). + multi_dims = { + k: v for k, v in dim_kwargs.items() if isinstance(v, str) and "+" in v + } + if not multi_dims: + raise OpenBBError( + f"OECD data request failed: {exc}\nURL: {url}" + ) from exc + + # Pick the first multi-value dimension to split on. + split_dim = next(iter(multi_dims)) + values = multi_dims[split_dim].split("+") + + csv_parts: list[str] = [] + csv_header: str | None = None + for val in values: + single_kwargs = {**dim_kwargs, split_dim: val} + single_url = self.build_url( + dataflow, + start_date, + end_date, + limit, + **single_kwargs, + ) + try: + resp = _make_request(single_url, headers=headers, timeout=120) + except Exception: # noqa: BLE001, S112 + continue + part = resp.text + if not part or not part.strip(): + continue + lines = part.strip().split("\n") + if csv_header is None: + csv_header = lines[0] + csv_parts.append(part.strip()) + else: + # Skip the header line for subsequent parts. + csv_parts.append("\n".join(lines[1:])) + + if not csv_parts: + raise OpenBBError( + f"OECD data request failed for all values of '{split_dim}': {values}\nURL: {url}" + ) from exc + + return "\n".join(csv_parts) + + def _split_label_columns(self, df: "DataFrame", dataflow: str) -> "DataFrame": + """Process SDMX-CSV v2 labels=both columns. + + The labels=both format gives column headers like + "MEASURE: Measure" and cell values like + "B1GQ: Gross domestic product". + + This method renames columns from "DIM: Label" to "DIM", + splits cell values "code: label" into DIM (code) and + DIM_label (label), and leaves TIME_PERIOD / OBS_VALUE as-is. + """ + + # Build a map of original column names → clean dimension IDs. + rename_map: dict[str, str] = {} + + for col in df.columns: + if ": " in col: + dim_id = col.split(":")[0].strip() + rename_map[col] = dim_id + else: + rename_map[col] = col + + df = df.rename(columns=rename_map) + + # Identify dimension columns that contain "code: label" values. + # Exclude TIME_PERIOD, OBS_VALUE, and any purely numeric columns. + skip_cols = { + "TIME_PERIOD", + "OBS_VALUE", + "DATAFLOW", + "STRUCTURE", + "STRUCTURE_ID", + "ACTION", + } + dim_cols = [ + c for c in df.columns if c not in skip_cols and is_string_dtype(df[c]) + ] + + for col in dim_cols: + # Check if values actually contain ": " separator. + sample = df[col].dropna().head(10) + if sample.empty: + continue + + has_labels = sample.str.contains(": ", regex=False).any() + + if has_labels: + # Split into code and label. + split = df[col].str.split(": ", n=1, expand=True) + df[col] = split[0].str.strip() + if split.shape[1] > 1: + df[f"{col}_label"] = split[1].str.strip() + else: + df[f"{col}_label"] = df[col] + else: + # No labels embedded — try to resolve from codelist. + cl = self.metadata.get_codelist_for_dimension(dataflow, col) + if cl: + df[f"{col}_label"] = df[col].map(cl).fillna(df[col]) + + return df + + def get_translation_maps(self, dataflow: str) -> dict[str, dict[str, str]]: + """Return code-to-label maps for all dimensions. + + Returns + ------- + dict[str, dict[str, str]] + {dim_id: {code: label, ...}} for all dimensions. + """ + params = self.metadata.get_dataflow_parameters(dataflow) + return { + dim_id: {opt["value"]: opt["label"] for opt in options} + for dim_id, options in params.items() + } + + def get_country_dimension(self, dataflow: str) -> str | None: + """Return the dimension ID used for country/reference area, or None.""" + classification = self.metadata.classify_dimensions(dataflow) + country_dims = classification.get("country", []) + return country_dims[0]["id"] if country_dims else None + + def get_frequency_dimension(self, dataflow: str) -> str | None: + """Return the dimension ID used for frequency, or None.""" + classification = self.metadata.classify_dimensions(dataflow) + freq_dims = classification.get("freq", []) + return freq_dims[0]["id"] if freq_dims else None + + def list_tables( + self, + query: str | None = None, + topic: str | None = None, + subtopic: str | None = None, + ) -> list[dict]: + """List all OECD tables (every dataflow is a table). + + Parameters + ---------- + query : str, optional + Keyword search on table name / dataflow ID / topic. + topic : str, optional + Topic code filter (e.g. "ECO", "HEA"). + subtopic : str, optional + Subtopic code filter (e.g. "ECO_OUTLOOK"). + + Returns + ------- + list[dict] + [{table_id, name, topic, topic_id, subtopic, subtopic_id, dataflow_id}, ...] + """ + return self.metadata.list_tables(query=query, topic=topic, subtopic=subtopic) + + def get_table(self, table_id: str) -> dict: + """Get full metadata for a table: name, dimensions, allowed values. + + Parameters + ---------- + table_id : str + Dataflow short ID (e.g. "DF_T725R_Q") or full ID. + + Returns + ------- + dict + {'dataflow_id', 'short_id', 'name', 'description', 'dimensions', ...} + """ + return self.metadata.get_table(table_id) + + +def _make_request(url: str, headers: dict | None = None, timeout: int = 30) -> Any: + """HTTP GET with raw URL support (OECD requires un-encoded brackets).""" + # pylint: disable=import-outside-toplevel + import time as _time + + import requests as _requests # type: ignore[import-untyped] + + # Use a prepared request so that brackets in ``c[TIME_PERIOD]`` are + # sent as-is instead of being percent-encoded to ``%5B`` / ``%5D`` + # which the OECD SDMX v2 API rejects with a 404. + max_retries = 5 + for attempt in range(max_retries): + req = _requests.Request("GET", url, headers=headers or {}) + prepared = req.prepare() + prepared.url = url # override — keep raw brackets + + sess = _requests.Session() + resp = sess.send(prepared, timeout=timeout) + + if resp.status_code == 429 and attempt < max_retries - 1: + retry_after = int(resp.headers.get("Retry-After", 15 * (attempt + 1))) + _time.sleep(min(max(retry_after, 15), 90)) + continue + + resp.raise_for_status() + return resp + + # Should not reach here, but just in case: + resp.raise_for_status() + return resp + + +def _format_period(date_str: str) -> str: + """Normalise a date string to SDMX period format. + + Parameters + ---------- + date_str : str + Date in "YYYY-MM-DD", "YYYY-MM", or "YYYY" format. + + Returns + ------- + str + Period string suitable for startPeriod/endPeriod. + """ + if not date_str: + return date_str + + s = str(date_str) + parts = s.split("-") + + if len(parts) == 3: + return f"{parts[0]}-{parts[1]}" + + return s + + +def parse_time_period(time_str: str) -> str: + """Convert SDMX time period strings to standardised date strings. + + Parameters + ---------- + time_str : str + SDMX time period (e.g. "2024", "2024-Q3", + "2024-06", "2024-03-15"). + + Returns + ------- + str + ISO date string ("YYYY-MM-DD"). Returns the original + string if the format is unrecognised. + """ + if not time_str: + return time_str + + s = str(time_str).strip() + + # Daily: already YYYY-MM-DD + if len(s) == 10 and s[4] == "-" and s[7] == "-": + return s + + # Quarterly: YYYY-QN (must check before monthly since both are len 7) + if "Q" in s: + parts = s.split("-Q") + if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit(): + year = parts[0] + quarter = int(parts[1]) + month = (quarter - 1) * 3 + 1 + return f"{year}-{month:02d}-01" + + # Monthly: YYYY-MM + if len(s) == 7 and s[4] == "-": + return f"{s}-01" + + # Annual: YYYY + if len(s) == 4 and s.isdigit(): + return f"{s}-01-01" + + return s diff --git a/openbb_platform/providers/oecd/openbb_oecd/utils/table_builder.py b/openbb_platform/providers/oecd/openbb_oecd/utils/table_builder.py new file mode 100644 index 00000000000..1d8da1bc334 --- /dev/null +++ b/openbb_platform/providers/oecd/openbb_oecd/utils/table_builder.py @@ -0,0 +1,1227 @@ +"""OECD Table Builder — hierarchical table data fetching with validation.""" + +# pylint: disable=C0302,R0912,R0913,R0914,R0915,R0917,R1702,W0212,W0640 + +from __future__ import annotations + +import warnings +from typing import TYPE_CHECKING, Any + +from openbb_core.app.model.abstract.warning import OpenBBWarning +from openbb_core.provider.utils.errors import OpenBBError + +if TYPE_CHECKING: + from openbb_oecd.utils.metadata import OecdMetadata + from openbb_oecd.utils.query_builder import OecdQueryBuilder + + +def _calculate_depth( + node: dict, + indicator_by_code: dict[str, dict], + visited: set | None = None, +) -> int: + """Trace the parent chain to find tree depth (0 for roots).""" + if visited is None: + visited = set() + code = node.get("code", "") + if not code or code in visited: + return 0 + visited.add(code) + parent = node.get("parent") + if parent is None or parent not in indicator_by_code: + return 0 + return 1 + _calculate_depth(indicator_by_code[parent], indicator_by_code, visited) + + +class OecdTableBuilder: + """Fetch and organise OECD data according to hierarchical table structures. + + Mirrors IMF's ``ImfTableBuilder`` interface while using OECD-specific + metadata (``OecdMetadata``) and data access (``OecdQueryBuilder``). + """ + + def __init__( + self, + metadata: OecdMetadata | None = None, + query_builder: OecdQueryBuilder | None = None, + ) -> None: + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import OecdMetadata as _Meta + from openbb_oecd.utils.query_builder import OecdQueryBuilder as _QB + + self.metadata: _Meta = metadata or _Meta() + self.query_builder: _QB = query_builder or _QB() + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def get_table( # noqa: PLR0912 + self, + dataflow: str | None = None, + table_id: str | None = None, + start_date: str | None = None, + end_date: str | None = None, + limit: int | None = None, + depth: int | None = None, + parent_id: str | None = None, + indicators: list[str] | str | None = None, + country: str | None = None, + frequency: str | None = None, + use_labels: bool = True, + **kwargs: Any, + ) -> dict: + """Fetch hierarchical table data. + + Parameters + ---------- + dataflow : str | None + Dataflow short ID (e.g. ``"DF_EO"``). Can be omitted when + *table_id* uses the ``"DATAFLOW::TABLE"`` format. + table_id : str | None + ``TABLE_IDENTIFIER`` value (e.g. ``"T101"``), or combined + ``"DF_EO::T101"`` format. When ``None``, auto-selects the + first available table for the dataflow (if there is only one). + start_date, end_date : str | None + Date bounds (ISO or SDMX period like ``"2024-Q3"``). + limit : int | None + ``lastNObservations`` (limit time series depth). + depth : int | None + Restrict hierarchy to items at this depth (``0`` = top level). + parent_id : str | None + Restrict to children of this parent indicator code. + indicators : list | str | None + Restrict to specific indicator codes. + country : str | None + Country name / ISO code (``"AUT"``). Multi-select: + ``"AUT,DEU"`` or ``"austria,germany"``. + frequency : str | None + Frequency code (``"Q"``, ``"A"``, ``"M"``). + use_labels : bool + Use human-readable labels for dimension columns (default). + **kwargs + Extra dimension filters (e.g. ``MEASURE="CPI"``). + + Returns + ------- + dict + ``{table_metadata, structure, data, series_metadata}`` + """ + # pylint: disable=import-outside-toplevel + from openbb_oecd.utils.metadata import ( + _NON_INDICATOR_DIMENSIONS, + _TABLE_GROUP_CANDIDATES, + ) + + # ---- Parse combined "DATAFLOW::TABLE" format ---- + if table_id and "::" in table_id: + parsed_df, parsed_tid = table_id.split("::", 1) + if dataflow is not None and dataflow != parsed_df: + raise OpenBBError( + f"Dataflow mismatch: provided '{dataflow}' but table_id specifies '{parsed_df}'." + ) + dataflow = parsed_df + table_id = parsed_tid + elif table_id and dataflow is None: + # Bare dataflow ID (e.g. "DF_FDI_FLOW_AGGR") — treat as + # dataflow with no specific table selection. + dataflow = table_id + table_id = None + + if dataflow is None: + raise OpenBBError( + "dataflow is required. Provide it directly or use table_id in 'DATAFLOW::TABLE' format." + ) + + # ---- Resolve dataflow + ensure structure loaded ---- + full_id = self.metadata._resolve_dataflow_id(dataflow) + + # If the dataflow is a section child, resolve up to the parent. + # Section children are organisational groupings — only the parent + # dataflow actually serves data. + _section_map = self.metadata._detect_section_families() + _parent_full = _section_map.get(full_id) + if _parent_full: + full_id = _parent_full + info = self.metadata.dataflows.get(_parent_full, {}) + dataflow = info.get("short_id", _parent_full.split("@")[-1].split("/")[0]) + + self.metadata._ensure_structure(full_id) + dsd = self.metadata.datastructures.get(full_id, {}) + dims = sorted(dsd.get("dimensions", []), key=lambda d: d["position"]) + + # ---- Auto-select table_id if not given ---- + if table_id is None: + available = self.metadata.get_dataflow_hierarchies(dataflow) + if len(available) == 1: + table_id = available[0]["id"] + elif len(available) == 0: + # No TABLE_IDENTIFIER — treat the whole dataflow as one table. + table_id = None + # else: multiple tables exist but caller didn't choose → we'll + # fetch without a TABLE_IDENTIFIER filter (entire dataflow). + + # ---- Build hierarchy structure ---- + table_structure = self.metadata.get_dataflow_table_structure( + dataflow, table_id or "" + ) + all_hierarchy_entries: list[dict] = table_structure.get("indicators", []) + + # ---- Filter by user request ---- + filtered = list(all_hierarchy_entries) + + if indicators is not None: + ind_set = {indicators} if isinstance(indicators, str) else set(indicators) + filtered = [e for e in filtered if e["code"] in ind_set] + elif parent_id is not None: + filtered = [e for e in filtered if e.get("parent") == parent_id] + elif depth is not None: + filtered = [e for e in filtered if e.get("level") == depth] + + if not filtered: + raise OpenBBError( + f"No indicators match filters (depth={depth}, " + f"parent_id={parent_id}, indicators={indicators}). " + f"Total in hierarchy: {len(all_hierarchy_entries)}" + ) + + # ---- Identify the indicator dimension ---- + indicator_dim = self.metadata._find_indicator_dimension(full_id) + if not indicator_dim: + # Fallback: pick the dimension with the most unique codes. + best_dim, best_count = None, 0 + for dim in dims: + if dim["id"] == "TIME_PERIOD": + continue + cl = self.metadata.codelists.get(dim.get("codelist_id", ""), {}) + if len(cl) > best_count: + best_count = len(cl) + best_dim = dim["id"] + indicator_dim = best_dim or "MEASURE" + + # ---- Map hierarchy codes to the indicator dimension ---- + hierarchy_codes = [e["code"] for e in filtered if e.get("code")] + + # ---- Resolve country / frequency ---- + country_dim = self.query_builder.get_country_dimension(dataflow) + if country and country_dim and country_dim not in kwargs: + codes = self.metadata.resolve_country_codes( + dataflow, country.replace("+", ",") + ) + if codes: + kwargs[country_dim] = "+".join(codes) + + if frequency: + freq_dim = self.query_builder.get_frequency_dimension(dataflow) + if freq_dim and freq_dim not in kwargs: + kwargs[freq_dim] = frequency.upper() + + # ---- Set table-grouping dimension if present ---- + # TABLE_IDENTIFIER is the standard; CHAPTER and others act as + # alternatives (see _TABLE_GROUP_CANDIDATES in metadata.py). + table_group_dim: str | None = None # which dim carries the table_id + if table_id: + for candidate in _TABLE_GROUP_CANDIDATES: + dim_present = any(d["id"] == candidate for d in dims) + if dim_present and candidate not in kwargs: + kwargs[candidate] = table_id + table_group_dim = candidate + break + + # ---- Build indicator post-filter ---- + # For TABLE_IDENTIFIER-based dataflows the indicator codelist is + # partitioned per table, so the hierarchy codes are accurate. + # For dimension-based grouping (e.g. CHAPTER) the hierarchy + # covers the *entire* dataflow — using it as a post-filter would + # wrongly discard indicators. In that case the API query already + # constrains the results via the pinned dimension. + codes_for_post_filter: set[str] | None = None + if hierarchy_codes and ( + table_group_dim is None or table_group_dim == "TABLE_IDENTIFIER" + ): + codes_for_post_filter = set(hierarchy_codes) + + # ---- Pin dimensions ---- + # Metadata dims are parameters (like country / frequency) — they + # describe HOW the data is measured, not WHAT is being measured. + # Pin them in the request using a preference list so we only + # request the data we actually want. + + _SKIP_DIMS = { + "TIME_PERIOD", + indicator_dim, + *(k for k in kwargs), # Already pinned by caller / above logic + } + + # Structural dims — pin to neutral/aggregate value. + _STRUCTURAL_PIN_PREFERENCES: dict[str, list[str]] = { + "SECTOR": ["S1", "_Z", "_T"], + "COUNTERPART_SECTOR": ["S1"], + "INSTR_ASSET": ["_Z"], + } + _NEUTRAL_CODES = {"_Z", "_T"} + + # Country-like dimensions (for secondary country pin logic). + _COUNTRY_DIMS = { + "REF_AREA", + "COUNTERPART_AREA", + "JURISDICTION", + "COUNTRY", + "AREA", + } + + # Metadata / measurement parameter dims — pin to preferred value. + _METADATA_PIN_PREFERENCES: dict[str, list[str]] = { + "ADJUSTMENT": ["Y", "N"], + "UNIT_MEASURE": ["XDC", "USD_EXC", "USD_PPP", "PB", "PS"], + "PRICE_BASE": ["V", "_Z"], + "TRANSFORMATION": ["N", "LA"], + } + + try: + avail = self.metadata.fetch_availability(dataflow, pinned=kwargs) + + # Identify dimensions needing auto-selection. + auto_dims: list[str] = [] + for dim_id, available_vals in avail.items(): + if dim_id in _SKIP_DIMS: + continue + if codes_for_post_filter and ( + set(available_vals) & codes_for_post_filter + ): + continue + auto_dims.append(dim_id) + + # First pass: pin single-value dimensions (unambiguous). + for dim_id in auto_dims: + vals = avail.get(dim_id, []) + if len(vals) == 1: + kwargs[dim_id] = vals[0] + + # Second pass: progressively pin remaining dimensions, + # re-fetching availability after each new pin so that + # cross-dimension constraints are respected. + remaining = [d for d in auto_dims if d not in kwargs] + _refresh = bool(remaining) + for dim_id in remaining: + if _refresh: + avail = self.metadata.fetch_availability(dataflow, pinned=kwargs) + _refresh = False + available_vals = avail.get(dim_id, []) + if not available_vals: + continue + _old_len = len(kwargs) + if len(available_vals) == 1: + kwargs[dim_id] = available_vals[0] + # Metadata parameter dims → pin to best available value. + elif dim_id in _METADATA_PIN_PREFERENCES: + prefs = _METADATA_PIN_PREFERENCES[dim_id] + for pref in prefs: + if pref in available_vals: + kwargs[dim_id] = pref + break + else: + kwargs[dim_id] = available_vals[0] + # Structural dims → pin to preferred aggregate value. + elif dim_id in _STRUCTURAL_PIN_PREFERENCES: + prefs = _STRUCTURAL_PIN_PREFERENCES[dim_id] + if ( + set(prefs) <= _NEUTRAL_CODES + and len(available_vals) > len(prefs) + 1 + ): + pass # data-carrying dimension — wildcard + else: + for pref in prefs: + if pref in available_vals: + kwargs[dim_id] = pref + break + # ACTIVITY / EXPENDITURE may carry real series data. + elif dim_id in ("ACTIVITY", "EXPENDITURE"): + pass + # Secondary country dims (e.g. COUNTERPART_AREA) — pin + # to aggregate / world value so queries don't explode. + elif dim_id in _COUNTRY_DIMS and dim_id != country_dim: + for pref in ("W", "WLD", "_T", "_Z"): + if pref in available_vals: + kwargs[dim_id] = pref + break + else: + # No known aggregate — take first value. + kwargs[dim_id] = available_vals[0] + if len(kwargs) > _old_len: + _refresh = True + except Exception: # noqa: BLE001 + avail = {} + + # ---- Validate constraints ---- + if kwargs or start_date or end_date: + try: + self.query_builder.validate_dimension_constraints( + dataflow, + start_date=start_date, + end_date=end_date, + **kwargs, + ) + except ValueError: + raise + except Exception as exc: # noqa: BLE001 + warnings.warn( + f"Constraint validation failed: {exc}", + OpenBBWarning, + stacklevel=2, + ) + + # ---- Fetch data ---- + raw = self.query_builder.fetch_data( + dataflow=dataflow, + start_date=start_date, + end_date=end_date, + limit=limit, + _skip_validation=True, + **kwargs, + ) + data_rows: list[dict] = raw.get("data", []) + + # Post-fetch filter to hierarchy codes. + if codes_for_post_filter: + data_rows = [ + row + for row in data_rows + if row.get(indicator_dim) in codes_for_post_filter + ] + + # ---- Build hierarchy lookup maps ---- + indicator_by_code: dict[str, dict] = {} + for entry in all_hierarchy_entries: + code = entry.get("code") + if code: + indicator_by_code[code] = entry + + hierarchy_order_map: dict[str, dict] = {} + for entry in all_hierarchy_entries: + code = entry.get("code") + if not code: + continue + depth_val = entry.get("level") + if depth_val is None: + depth_val = _calculate_depth(entry, indicator_by_code) + hierarchy_order_map[code] = { + "order": entry.get("order", 0), + "level": depth_val, + "parent": entry.get("parent"), + "parent_code": entry.get("parent"), + "label": entry.get("label", code), + "children": entry.get("children", []), + } + + # ---- Canonical root ordering ---- + # Ensure Current Account (CA) sorts before Capital Account (KA) + # when both exist as root indicators. + if "CA" in hierarchy_order_map and "KA" in hierarchy_order_map: + _ca = hierarchy_order_map["CA"] + _ka = hierarchy_order_map["KA"] + if ( + _ca.get("parent") is None + and _ka.get("parent") is None + and _ca["order"] > _ka["order"] + ): + _ca["order"], _ka["order"] = _ka["order"], _ca["order"] + + # ---- Enrich data rows ---- + dim_ids = [d["id"] for d in dims if d["id"] != "TIME_PERIOD"] + + # Determine varying / fixed dimensions. + # ACCOUNTING_ENTRY is handled specially as sub-hierarchy — never + # treat it as a generic "varying dim" column. + varying_dims: list[str] = [] + fixed_values: dict[str, dict[str, str]] = {} + _has_acct_entry = any("ACCOUNTING_ENTRY" in r for r in data_rows) + for did in dim_ids: + unique = {row.get(did) for row in data_rows if did in row} + unique.discard(None) + if len(unique) > 1: + if did == "ACCOUNTING_ENTRY" and _has_acct_entry: + continue # handled as sub-hierarchy below + varying_dims.append(did) + elif unique: + code = next(iter(unique)) + label = next( + ( + row.get(f"{did}_label", code) + for row in data_rows + if f"{did}_label" in row + ), + code, + ) + fixed_values[did] = {"code": code, "label": label or code} # type: ignore + + # ---- Compound symbol treatment ---- + # Split varying dims into *content* dims (part of what is being + # measured, e.g. EXPENDITURE, SECTOR) and *metadata* dims (how + # it is measured, e.g. UNIT_MEASURE). Content dims are folded + # into a compound code + label; metadata dims remain as columns. + _COUNTRY_DIMS = { + "REF_AREA", + "COUNTERPART_AREA", + "JURISDICTION", + "COUNTRY", + "AREA", + } + _compound_dims: list[str] = [] # content-varying (join into code) + _meta_varying: list[str] = [] # metadata-varying (keep as columns) + for did in varying_dims: + if ( + did == indicator_dim + or did in _NON_INDICATOR_DIMENSIONS + or did in _COUNTRY_DIMS + or did in _TABLE_GROUP_CANDIDATES + ): + _meta_varying.append(did) + else: + _compound_dims.append(did) + # Replace varying_dims with only the metadata-varying ones. + varying_dims = _meta_varying + + # ACCOUNTING_ENTRY sort order: Balance/Net first (parent level), + # then the breakdown entries grouped under the parent. + _ACCT_SORT: dict[str, int] = { + "B": 0, + "N": 0, # Balance / Net → indicator's own level + "C": 1, + "D": 2, # Revenue / Expenditure + "A": 1, + "L": 2, # Assets / Liabilities + } + _BN_ENTRIES = {"B", "N", ""} + + # ---- Build parent-grouped hierarchy ---- + # When a parent indicator has children AND their data contains + # matching accounting entries (A/L or C/D), the parent's entries + # become sub-parent rows with the children's entries nested under + # them. Example: + # Financial account (Balance) + # Assets (FA total) ← sub-parent + # Direct investment ← child's asset portion + # Portfolio investment + # Liabilities (FA total) ← sub-parent + # Direct investment ← child's liability portion + + if _has_acct_entry: + from collections import defaultdict as _dd + + # (indicator_code, acct_entry) → [rows] + _ind_acct: dict[tuple[str, str], list[dict]] = _dd(list) + # indicator_code → set of acct entries + _ind_accts: dict[str, set[str]] = _dd(set) + for _r in data_rows: + _c = _r.get(indicator_dim, "") + _a = _r.get("ACCOUNTING_ENTRY", "") + _ind_acct[(_c, _a)].append(_r) + if _c: + _ind_accts[_c].add(_a) + + # Get a representative accounting-entry label from data rows. + _acct_labels: dict[str, str] = {} + for _r in data_rows: + _a = _r.get("ACCOUNTING_ENTRY", "") + if _a and _a not in _acct_labels: + _acct_labels[_a] = _r.get("ACCOUNTING_ENTRY_label", _a) + + all_rows: list[dict] = [] + _consumed: set[tuple[str, str]] = set() + + def _enrich_row( + row: dict, + order: int, + level: int, + asort: int, + child_order: int, + label: str, + is_header: bool, + ) -> None: + row["order"] = order + row["level"] = level + row["_acct_sort"] = asort + row["_child_order"] = child_order + row["label"] = label + row["is_category_header"] = is_header + _info = hierarchy_order_map.get(row.get(indicator_dim, ""), {}) + row["parent_id"] = _info.get("parent") + row["parent_code"] = _info.get("parent_code") + all_rows.append(row) + + def _synthetic_header( + code: str, + order: int, + level: int, + asort: int, + child_order: int, + label: str, + ) -> None: + all_rows.append( + { + indicator_dim: code, + "order": order, + "level": level, + "_acct_sort": asort, + "_child_order": child_order, + "label": label, + "is_category_header": True, + "parent_id": hierarchy_order_map.get(code, {}).get("parent"), + "parent_code": hierarchy_order_map.get(code, {}).get( + "parent_code" + ), + "TIME_PERIOD": "", + "OBS_VALUE": None, + } + ) + + def _emit_indicator( # noqa: PLR0912 + code: str, + base_level: int, + parent_order: int | None = None, + acct_filter: str | None = None, + child_order: int = 0, + ) -> None: + """Recursively emit enriched rows for *code*.""" + info = hierarchy_order_map.get(code) + if not info: + return + order = parent_order if parent_order is not None else info["order"] + children = [ + c for c in info.get("children", []) if c in hierarchy_order_map + ] + accts = _ind_accts.get(code, set()) + + # --- Filtered emit (child under parent's acct group) --- + if acct_filter is not None: + rows = _ind_acct.get((code, acct_filter), []) + for row in rows: + _enrich_row( + row, + order, + base_level, + _ACCT_SORT.get(acct_filter, 3), + child_order, + info["label"], + is_header=bool(children), + ) + if not rows: + _synthetic_header( + code, + order, + base_level, + _ACCT_SORT.get(acct_filter, 3), + child_order, + info["label"], + ) + _consumed.add((code, acct_filter)) + # Recurse into children for the same acct entry. + for ch in sorted( + children, + key=lambda c: hierarchy_order_map[c]["order"], + ): + if acct_filter in _ind_accts.get(ch, set()): + _emit_indicator( + ch, + base_level + 1, + parent_order=order, + acct_filter=acct_filter, + child_order=hierarchy_order_map[ch]["order"], + ) + return + + # --- Full emit (top-level or unfiltered) --- + bn = sorted(accts & _BN_ENTRIES) + non_bn = sorted( + accts - _BN_ENTRIES, + key=lambda a: _ACCT_SORT.get(a, 3), + ) + + # Determine which non-B/N entries can group children. + grouped: list[str] = [] + ungrouped: list[str] = [] + if children: + for acct in non_bn: + if any(acct in _ind_accts.get(ch, set()) for ch in children): + grouped.append(acct) + else: + ungrouped.append(acct) + else: + ungrouped = list(non_bn) + + has_sub = bool(grouped) or bool(ungrouped) or bool(children) + + # Emit B/N rows. + for b in bn: + for row in _ind_acct.get((code, b), []): + _enrich_row( + row, + info["order"], + base_level, + 0, + 0, + info["label"], + is_header=has_sub, + ) + _consumed.add((code, b)) + + # Synthetic B/N header when indicator has sub-rows but + # no balance/net data. + if not bn and has_sub: + _synthetic_header( + code, + info["order"], + base_level, + 0, + 0, + info["label"], + ) + + # Grouped accounting entries — children nest under them. + for acct in grouped: + asort = _ACCT_SORT.get(acct, 3) + parent_rows = _ind_acct.get((code, acct), []) + lbl = _acct_labels.get(acct, acct) + for row in parent_rows: + lbl = row.get("ACCOUNTING_ENTRY_label", lbl) + _enrich_row( + row, + info["order"], + base_level + 1, + asort, + 0, + lbl, + is_header=True, + ) + if not parent_rows: + _synthetic_header( + code, + info["order"], + base_level + 1, + asort, + 0, + lbl, + ) + _consumed.add((code, acct)) + # Children's matching entries. + for ch in sorted( + children, + key=lambda c: hierarchy_order_map[c]["order"], + ): + if acct in _ind_accts.get(ch, set()): + _emit_indicator( + ch, + base_level + 2, + parent_order=info["order"], + acct_filter=acct, + child_order=hierarchy_order_map[ch]["order"], + ) + # Consume children's B/N (redundant once grouped). + if grouped: + for ch in children: + for b in _BN_ENTRIES: + _consumed.add((ch, b)) + + # Ungrouped accounting entries — simple sub-children. + for acct in ungrouped: + asort = _ACCT_SORT.get(acct, 3) + for row in _ind_acct.get((code, acct), []): + _enrich_row( + row, + info["order"], + base_level + 1, + asort, + 0, + row.get("ACCOUNTING_ENTRY_label", acct), + is_header=False, + ) + _consumed.add((code, acct)) + + # Recurse into children that have unconsumed entries. + if children: + for ch in sorted( + children, + key=lambda c: hierarchy_order_map[c]["order"], + ): + ch_accts = _ind_accts.get(ch, set()) + if ch_accts and not all((ch, a) in _consumed for a in ch_accts): + _emit_indicator(ch, base_level + 1) + + # Walk root indicators in hierarchy order. + roots = sorted( + [ + c + for c, inf in hierarchy_order_map.items() + if inf.get("parent") is None + ], + key=lambda c: hierarchy_order_map[c]["order"], + ) + for root_code in roots: + _emit_indicator(root_code, hierarchy_order_map[root_code]["level"]) + else: + # No ACCOUNTING_ENTRY dimension — simple enrichment. + all_rows = [] + for row in data_rows: + ind_code = row.get(indicator_dim, "") + hier_info = hierarchy_order_map.get(ind_code) + if not hier_info and ind_code: + for h_code, h_value in hierarchy_order_map.items(): + if ind_code.startswith(h_code + "_") or h_code.startswith( + ind_code + "_" + ): + hier_info = h_value + break + if hier_info: + row["order"] = hier_info["order"] + row["level"] = hier_info["level"] + row["_acct_sort"] = 0 + row["_child_order"] = 0 + row["label"] = hier_info["label"] + row["is_category_header"] = bool(hier_info["children"]) + row["parent_id"] = hier_info["parent"] + row["parent_code"] = hier_info["parent_code"] + all_rows.append(row) + + # ---- Build clean flat output rows (one per observation) ---- + _USELESS_LABELS = { + "not applicable", + "not specified", + "no breakdown", + "total", + "all items", + "all activities", + "total economy", + "non transformed data", + "nan", + } + + def _clean_label(val: Any) -> str: + """Return a clean string label, or empty string for NaN/useless values.""" + if val is None: + return "" + if isinstance(val, float): + return "" + s = str(val).strip() + if s.lower() in _USELESS_LABELS: + return "" + return s + + clean_rows: list[dict] = [] + for row in all_rows: + clean: dict[str, Any] = {} + # Hierarchy fields. + clean["order"] = row.get("order", 9999) + clean["level"] = row.get("level", 0) + clean["_acct_sort"] = row.get("_acct_sort", 0) + clean["_child_order"] = row.get("_child_order", 0) + clean["parent_id"] = row.get("parent_id") + clean["parent_code"] = row.get("parent_code") + base_label = _clean_label(row.get("label", "")) + _base_from_compounds = False + + # When the hierarchy label is a useless placeholder, derive a + # meaningful label from the data row's dimension labels. + if not base_label or base_label.startswith("_"): + # Try the indicator dimension's own label first. + alt = _clean_label(row.get(f"{indicator_dim}_label", "")) + if alt: + base_label = alt + else: + # Build from all content dimension labels on the row. + parts: list[str] = [] + for cdim in _compound_dims: + lbl = _clean_label(row.get(f"{cdim}_label", "")) + if lbl: + parts.append(lbl) + if parts: + base_label = " - ".join(parts) + _base_from_compounds = True + elif not base_label: + base_label = row.get(indicator_dim, "") or "" + + # Raw neutral dimension codes are never useful labels. + if base_label.startswith("_"): + base_label = "" + + clean["is_category_header"] = row.get("is_category_header", False) + base_code = row.get(indicator_dim, "") + clean["_acct_code"] = row.get("ACCOUNTING_ENTRY", "") + clean["_sub_order"] = 0 + + # ---- Compound symbol: fold content-varying dims into + # code and label so each row has a unique identity. ---- + if _compound_dims: + _code_parts = [base_code] + _label_parts = [] + _is_compound_total = True + _NEUTRAL = {"_Z", "_T", "_X", ""} + _comp_codes: dict[str, str] = {} + for cdim in _compound_dims: + cv = row.get(cdim, "") + if cv in _NEUTRAL: + continue # skip both code and label for neutral values + _code_parts.append(cv) + _comp_codes[cdim] = cv + _is_compound_total = False + cl = _clean_label(row.get(f"{cdim}_label", cv)) + if cl: + _label_parts.append(str(cl)) + clean["code"] = "_".join(p for p in _code_parts if p) + # Build label: when the indicator hierarchy is flat (no + # parent/child nesting), prepend the base indicator label + # for context. When a nested hierarchy exists, the + # parent row already provides that context. + _hierarchy_is_flat = not any( + e.get("parent") for e in all_hierarchy_entries + ) + if _label_parts: + if ( + base_label + and not _is_compound_total + and _hierarchy_is_flat + and not _base_from_compounds + ): + clean["label"] = base_label + " - " + " - ".join(_label_parts) + else: + clean["label"] = " - ".join(_label_parts) + else: + clean["label"] = base_label + # Track which base indicator this row belongs to. + clean["_base_indicator"] = base_code + clean["_compound_codes"] = _comp_codes + # Total/aggregate rows sort first; detail rows indent + # one level below as children. + if _is_compound_total: + clean["_sub_order"] = 0 + else: + clean["_sub_order"] = 1 + clean["level"] = clean.get("level", 0) + 1 + else: + clean["code"] = base_code + clean["label"] = base_label + + # Varying dimension columns. + for did in varying_dims: + key = did.lower() + if use_labels: + clean[key] = row.get(f"{did}_label", row.get(did, "")) + else: + clean[key] = row.get(did, "") + + # Core data. + clean["time_period"] = row.get("TIME_PERIOD", "") + _obs = row.get("OBS_VALUE") + # Expand by UNIT_MULT (power-of-10 code) so values are in units. + _um = row.get("UNIT_MULT") + if _obs is not None and _um is not None: + try: + _exp = int(str(_um).strip()) + if _exp > 0: + _obs = _obs * (10**_exp) + except (ValueError, TypeError): + pass + clean["value"] = _obs + + # Unit metadata from fixed dimensions. + for unit_key in ( + "UNIT_MEASURE", + "CURRENCY_DENOM", + "CURRENCY", + "UNIT_MULT", + "PRICE_BASE", + ): + if unit_key in fixed_values: + clean[unit_key.lower()] = fixed_values[unit_key]["label"] + + clean_rows.append(clean) + + # When compound dims are present, undo the level bump for + # indicators that have no "total" aggregate row — there's no + # parent to nest under, so compound details should stay at + # the indicator's natural hierarchy level. + if _compound_dims: + _indicators_with_total: set[str] = set() + for cr in clean_rows: + if cr.get("_sub_order", 0) == 0 and cr.get("_base_indicator"): + _indicators_with_total.add(cr["_base_indicator"]) + for cr in clean_rows: + base = cr.get("_base_indicator", "") + if ( + cr.get("_sub_order", 0) == 1 + and base + and base not in _indicators_with_total + ): + cr["level"] = max(cr["level"] - 1, 0) + cr["_sub_order"] = 0 + + # ---- Compound-dimension hierarchy from codelist parents ---- + # When a compound dim has codelist parent-child relationships, + # use them to assign proper depth levels and tree-order sorting + # so nested categories (e.g. Transport → Sea transport) display + # correctly instead of appearing flat. + if _compound_dims and clean_rows: + _compound_hier: dict[str, dict[str, dict]] = {} + for cdim in _compound_dims: + # Find the codelist ID for this dimension. + _cl_id = "" + for _d in dims: + if _d["id"] == cdim: + _cl_id = _d.get("codelist_id", "") + break + if not _cl_id: + continue + _parents = self.metadata._codelist_parents.get(_cl_id, {}) + if not _parents: + continue + # Collect codes actually present in the data. + _present = {row.get(cdim) for row in all_rows if row.get(cdim)} + _present.discard(None) + _present.discard("") + if not _present: + continue + + def _cdepth(code: str, depth_cache: dict[str, int]) -> int: + if code in depth_cache: + return depth_cache[code] + p = _parents.get(code) + if p is None or p not in _present: + depth_cache[code] = 0 + return 0 + d = 1 + _cdepth(p, depth_cache) + depth_cache[code] = d + return d + + _dc: dict[str, int] = {} + # Build children map for DFS traversal. + _children: dict[str, list[str]] = {} + for c in _present: + p = _parents.get(c) + # Effective parent: must also be present in data. + while p and p not in _present: + p = _parents.get(p) + if p and p in _present: + _children.setdefault(p, []).append(c) # type: ignore + # Determine effective roots: codes whose effective parent + # (after skipping absent ancestors) is not in _present. + _effective_roots: list = [] + for c in sorted(_present): # type: ignore + p = _parents.get(c) + while p and p not in _present: + p = _parents.get(p) + if not p or p not in _present: + _effective_roots.append(c) + + # DFS traversal to assign hierarchy order. + _hier: dict[str, dict] = {} + _ord = [0] + + def _visit(code: str) -> None: + kids = sorted(_children.get(code, [])) + _hier[code] = { + "depth": _cdepth(code, _dc), + "order": _ord[0], + "has_children": bool(kids), + } + _ord[0] += 1 + for kid in kids: + _visit(kid) + + for root in _effective_roots: + _visit(root) + if _hier: + _compound_hier[cdim] = _hier + + # Apply hierarchy info to clean_rows. + if _compound_hier: + for cr in clean_rows: + _extra_depth = 0 + _hier_order = 0 + _is_header = cr.get("is_category_header", False) + _codes = cr.get("_compound_codes", {}) + for cdim in _compound_dims: + if cdim not in _compound_hier: + continue + _cdim_code = _codes.get(cdim, "") + if not _cdim_code: + continue + _h = _compound_hier[cdim].get(_cdim_code) + if _h: + _extra_depth += _h["depth"] + _hier_order = _h["order"] + if _h["has_children"]: + _is_header = True + if _extra_depth or _hier_order: + cr["level"] = cr.get("level", 0) + _extra_depth + cr["_compound_order"] = _hier_order + cr["is_category_header"] = _is_header + else: + cr["_compound_order"] = -1 # totals sort first + + # Sort by hierarchy order, accounting entry, sub-order, compound + # hierarchy order, then time period. + clean_rows.sort( + key=lambda r: ( + r.get("order", 9999), + r.get("_acct_sort", 0), + r.get("_child_order", 0), + r.get("_sub_order", 0), + r.get("_compound_order", 0), + r.get("code", ""), + r.get("time_period", ""), + ) + ) + + # ---- Supplementary % of GDP rows ---- + # If PT_B1GQ is available and isn't the primary unit already, + # fetch it and insert as sub-rows beneath each indicator. + _primary_unit = kwargs.get("UNIT_MEASURE", "") + _pct_gdp_available = ( + "PT_B1GQ" in avail.get("UNIT_MEASURE", []) and _primary_unit != "PT_B1GQ" + ) + if _pct_gdp_available: + try: + _pct_kwargs = dict(kwargs) + _pct_kwargs["UNIT_MEASURE"] = "PT_B1GQ" + _pct_raw = self.query_builder.fetch_data( + dataflow=dataflow, + start_date=start_date, + end_date=end_date, + limit=limit, + _skip_validation=True, + **_pct_kwargs, + ) + _pct_rows = _pct_raw.get("data", []) + # Build lookup: (indicator, acct_entry, time) → value + _pct_lookup: dict[tuple[str, str, str], float] = {} + for _pr in _pct_rows: + _pk = ( + _pr.get(indicator_dim, ""), + _pr.get("ACCOUNTING_ENTRY", ""), + _pr.get("TIME_PERIOD", ""), + ) + val = _pr.get("OBS_VALUE") + if val is not None: + # Expand by UNIT_MULT for % of GDP too. + _pum = _pr.get("UNIT_MULT") + if _pum is not None: + try: + _pexp = int(str(_pum).strip()) + if _pexp > 0: + val = val * (10**_pexp) + except (ValueError, TypeError): + pass + _pct_lookup[_pk] = val + # Create % of GDP sub-rows. + pct_sub_rows: list[dict] = [] + for cr in clean_rows: + _ck = ( + cr.get("code", ""), + cr.get("_acct_code", ""), + cr.get("time_period", ""), + ) + if _ck in _pct_lookup: + pct_row: dict[str, Any] = { + "order": cr["order"], + "level": cr["level"] + 1, + "_acct_sort": cr["_acct_sort"], + "_child_order": cr["_child_order"], + "_sub_order": 1, + "parent_id": cr.get("parent_id"), + "parent_code": cr.get("parent_code"), + "label": "% of GDP", + "is_category_header": False, + "code": cr.get("code", "") + "_PCTGDP", + "_acct_code": cr.get("_acct_code", ""), + "time_period": cr["time_period"], + "value": _pct_lookup[_ck], + } + # Copy varying dim columns. + for did in varying_dims: + key = did.lower() + if key in cr: + pct_row[key] = cr[key] + pct_sub_rows.append(pct_row) + if pct_sub_rows: + clean_rows.extend(pct_sub_rows) + clean_rows.sort( + key=lambda r: ( + r.get("order", 9999), + r.get("_acct_sort", 0), + r.get("_child_order", 0), + r.get("_sub_order", 0), + r.get("code", ""), + r.get("time_period", ""), + ) + ) + except Exception: # noqa: BLE001, S110 + pass # Supplementary fetch failure is non-fatal. + + # ---- Metadata ---- + df_meta = self.metadata.dataflows.get(full_id, {}) + + def _fixed_label(dim_candidates: list[str]) -> str: + for d in dim_candidates: + if d in fixed_values: + return fixed_values[d]["label"] + return "" + + # Extract unit metadata from data-level attributes when not + # available as fixed dimensions (UNIT_MULT, CURRENCY are often + # observation-level attributes, not DSD dimensions). + def _attr_label(attr: str) -> str: + """Get the most common label for a data-level attribute.""" + label_key = f"{attr}_label" + for row in data_rows: + if label_key in row: + v = _clean_label(row[label_key]) + if v: + return v + if attr in row: + v = _clean_label(row[attr]) + if v: + return v + return "" + + unit_measure = _fixed_label(["UNIT_MEASURE"]) or _attr_label("UNIT_MEASURE") + unit_mult = _fixed_label(["UNIT_MULT"]) or _attr_label("UNIT_MULT") + + # Also get the raw UNIT_MULT code for consumers. + def _attr_code(attr: str) -> str: + for row in data_rows: + if attr in row and row[attr]: + return str(row[attr]) + return "" + + unit_mult_code = ( + fixed_values["UNIT_MULT"]["code"] + if "UNIT_MULT" in fixed_values + else _attr_code("UNIT_MULT") + ) + currency = _fixed_label(["CURRENCY_DENOM", "CURRENCY"]) or _attr_label( + "CURRENCY" + ) + price_base = _fixed_label(["PRICE_BASE"]) or _attr_label("PRICE_BASE") + + table_metadata = { + "table_id": table_id or df_meta.get("short_id", dataflow), + "table_name": table_structure.get("hierarchy_name", ""), + "dataflow_id": full_id, + "dataflow_name": df_meta.get("name", dataflow), + "url": raw.get("metadata", {}).get("url", ""), + "row_count": len(clean_rows), + "total_indicators": len(hierarchy_codes), + "unit_measure": unit_measure, + "unit_multiplier": unit_mult, + "unit_multiplier_code": unit_mult_code, + "currency": currency, + "price_base": price_base, + "fixed_dimensions": fixed_values, + } + + return { + "table_metadata": table_metadata, + "structure": table_structure, + "data": clean_rows, + "series_metadata": raw.get("metadata", {}), + } diff --git a/openbb_platform/providers/oecd/poetry.lock b/openbb_platform/providers/oecd/poetry.lock index 608ee515991..b42e755f7ac 100644 --- a/openbb_platform/providers/oecd/poetry.lock +++ b/openbb_platform/providers/oecd/poetry.lock @@ -420,17 +420,29 @@ files = [ ] [[package]] -name = "defusedxml" -version = "0.7.1" -description = "XML bomb protection for Python stdlib modules" +name = "deepdiff" +version = "9.0.0" +description = "Deep Difference and Search of any Python object/data. Recreate objects by adding adding deltas to each other." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, - {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, + {file = "deepdiff-9.0.0-py3-none-any.whl", hash = "sha256:b1ae0dd86290d86a03de5fbee728fde43095c1472ae4974bdab23ab4656305bd"}, + {file = "deepdiff-9.0.0.tar.gz", hash = "sha256:4872005306237b5b50829803feff58a1dfd20b2b357a55de22e7ded65b2008a7"}, ] +[package.dependencies] +orderly-set = ">=5.5.0,<6" + +[package.extras] +cli = ["click (>=8.3.1,<8.4.0)", "pyyaml (>=6.0.3,<6.1.0)"] +coverage = ["coverage (>=7.13.5,<7.14.0)"] +dev = ["bump2version (>=1.0.1,<1.1.0)", "flit-core (==3.12.0)", "ipdb (>=0.13.13,<0.14.0)", "jsonpickle (>=4.1.1,<4.2.0)", "nox (==2026.2.9)", "numpy (>=2.2.0,<2.3.0) ; python_version < \"3.14\"", "numpy (>=2.4.3,<2.5.0) ; python_version >= \"3.14\"", "orjson (>=3.11.7,<3.12.0)", "pandas (>=2.2.0,<2.3.0) ; python_version < \"3.11\"", "pandas (>=3.0.1,<3.1.0) ; python_version >= \"3.11\"", "polars (>=1.39.3,<1.40.0)", "python-dateutil (>=2.9.0.post0,<2.10.0)", "pytz", "tomli (>=2.4.0,<2.5.0)", "tomli-w (>=1.2.0,<1.3.0)", "uuid6 (==2025.0.1)"] +docs = ["Sphinx (>=8.1.3,<8.2.0)", "furo (>=2024.8.6)", "sphinx-sitemap (>=2.9.0,<2.10.0)", "sphinxemoji (>=0.3.2,<0.4.0)"] +optimize = ["orjson"] +static = ["flake8 (>=7.3.0,<7.4.0)", "flake8-pyproject (>=1.2.4,<1.3.0)", "pydantic (>=2.12.5,<2.13.0)"] +test = ["pytest (>=9.0.2,<9.1.0)", "pytest-benchmark (>=5.2.3,<5.3.0)", "pytest-cov (>=7.1.0,<7.2.0)", "python-dotenv (>=1.2.2,<1.3.0)"] + [[package]] name = "exceptiongroup" version = "1.3.1" @@ -1023,6 +1035,56 @@ uuid7 = ">=0.1.0,<0.2.0" uvicorn = ">=0.40.0,<0.41.0" websockets = ">=15.0" +[[package]] +name = "openbb-economy" +version = "1.6.1" +description = "Economy extension for OpenBB" +optional = false +python-versions = "<4,>=3.10" +groups = ["main"] +files = [ + {file = "openbb_economy-1.6.1-py3-none-any.whl", hash = "sha256:d9da6acb0ca56fa8316fd04d54c88bb4edd7df3fad68128e80c429c8499b7669"}, + {file = "openbb_economy-1.6.1.tar.gz", hash = "sha256:ff69ac293742d4b9d2812f9d58faa5f4e8b3d7cc1e681c598434ff3371310c26"}, +] + +[package.dependencies] +openbb-core = ">=1.6.1,<2.0.0" + +[[package]] +name = "openbb-platform-api" +version = "1.3.5" +description = "OpenBB Platform API: Launch script and widgets builder for the Open Data Platform REST API and Workspace Backend Connector." +optional = false +python-versions = "<4,>=3.10" +groups = ["main"] +files = [ + {file = "openbb_platform_api-1.3.5-py3-none-any.whl", hash = "sha256:dfa0175006aede9e09906b81832105075747d836b135e945e73af51060731066"}, + {file = "openbb_platform_api-1.3.5.tar.gz", hash = "sha256:40816bb6ffe18481d7e023a5c36304761e5334d18098c72af7bd6d44f4235932"}, +] + +[package.dependencies] +deepdiff = ">=8.6.2" +openbb-core = ">=1.6.4,<2.0.0" + +[[package]] +name = "orderly-set" +version = "5.5.0" +description = "Orderly set" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "orderly_set-5.5.0-py3-none-any.whl", hash = "sha256:46f0b801948e98f427b412fcabb831677194c05c3b699b80de260374baa0b1e7"}, + {file = "orderly_set-5.5.0.tar.gz", hash = "sha256:e87185c8e4d8afa64e7f8160ee2c542a475b738bc891dc3f58102e654125e6ce"}, +] + +[package.extras] +coverage = ["coverage (>=7.6.0,<7.7.0)"] +dev = ["bump2version (>=1.0.0,<1.1.0)", "ipdb (>=0.13.0,<0.14.0)"] +optimize = ["orjson"] +static = ["flake8 (>=7.1.0,<7.2.0)", "flake8-pyproject (>=1.2.3,<1.3.0)"] +test = ["pytest (>=8.3.0,<8.4.0)", "pytest-benchmark (>=5.1.0,<5.2.0)", "pytest-cov (>=6.0.0,<6.1.0)", "python-dotenv (>=1.0.0,<1.1.0)"] + [[package]] name = "pandas" version = "2.3.3" @@ -1996,4 +2058,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4" -content-hash = "06474912b46bc32cd7356b838e803da594bcc173d358b1a68794bb02740ea0fd" +content-hash = "4210a4e2386b77601c80eb88eb724a69c476485a5b903cdc55f5c844afa4e812" diff --git a/openbb_platform/providers/oecd/pyproject.toml b/openbb_platform/providers/oecd/pyproject.toml index 13b30fd2adb..b2992f6e6b2 100644 --- a/openbb_platform/providers/oecd/pyproject.toml +++ b/openbb_platform/providers/oecd/pyproject.toml @@ -9,8 +9,9 @@ packages = [{ include = "openbb_oecd" }] [tool.poetry.dependencies] python = ">=3.10,<4" -openbb-core = "^1.6.3" -defusedxml = ">=0.7.1" +openbb-core = "^1.6.8" +openbb-platform-api = "^1.3.5" +openbb-economy = "^1.6.1" [build-system] requires = ["poetry-core"] @@ -18,3 +19,12 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.plugins."openbb_provider_extension"] oecd = "openbb_oecd:oecd_provider" + +[tool.poetry.plugins."openbb_core_extension"] +oecd_utils = "openbb_oecd.oecd_router:router" + +[tool.poetry.scripts] +generate-oecd-cache = "openbb_oecd.utils.generate_cache:main" + +[tool.mypy] +explicit_package_bases = true diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_balance_of_payments_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_balance_of_payments_fetcher_urllib3_v2.yaml new file mode 100644 index 00000000000..58fc8c85f84 --- /dev/null +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_balance_of_payments_fetcher_urllib3_v2.yaml @@ -0,0 +1,202 @@ +interactions: +- request: + body: null + headers: + Accept: + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both + Accept-Encoding: + - gzip, deflate, br + Connection: + - keep-alive + method: GET + uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_BOP@DF_BOP,1.0/GBR..CA+G+IN1+IN2+S.B+C+D..A.PT_B1GQ+PT_CA+PT_GS+USD_EXC.Y?endPeriod=2021&startPeriod=2020 + response: + body: + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,COUNTERPART_AREA: + Counterpart area,MEASURE: Measure,ACCOUNTING_ENTRY: Accounting entry,FS_ENTRY: + Flow or stock entry,FREQ: Frequency of observation,UNIT_MEASURE: Unit of measure,ADJUSTMENT: + Adjustment,TIME_PERIOD: Time period,OBS_VALUE,OBS_STATUS: Observation status,UNIT_MULT: + Unit multiplier,CURRENCY: Currency,DECIMALS: Decimals\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN1: Primary income,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange + rate converted\",Y: Calendar and seasonally adjusted,2020,-57669.58,A: Normal + value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,PT_B1GQ: Percentage of GDP,Y: + Calendar and seasonally adjusted,2021,-0.7560323,A: Normal value,0: Units,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,D: Expenditure,T: Transactions,A: + Annual,PT_CA: Percentage of current account,Y: Calendar and seasonally adjusted,2021,19.73371,A: + Normal value,0: Units,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,C: Revenue,T: Transactions,A: + Annual,PT_GS: Percentage of goods and services,Y: Calendar and seasonally + adjusted,2021,49.49661,A: Normal value,0: Units,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange + rate converted\",Y: Calendar and seasonally adjusted,2021,-24155.07,A: Normal + value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange + rate converted\",Y: Calendar and seasonally adjusted,2020,-74129.38,A: Normal + value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN2: Secondary income,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange + rate converted\",Y: Calendar and seasonally adjusted,2021,-25633.81,A: Normal + value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,D: Expenditure,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2021,253014.7,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,D: Expenditure,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2020,228174.8,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN2: Secondary income,D: Expenditure,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2021,63136.06,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN2: Secondary income,D: Expenditure,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2020,70334.41,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,G: Goods,D: Expenditure,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2021,681483.8,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,G: Goods,D: Expenditure,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2020,567827.2,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN2: Secondary income,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange + rate converted\",Y: Calendar and seasonally adjusted,2020,-36554.25,A: Normal + value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2021,1257989,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2020,1025694,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,G: Goods,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2021,472605.7,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,G: Goods,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2020,416917.5,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN2: Secondary income,C: Revenue,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2021,37502.25,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN2: Secondary income,C: Revenue,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2020,33780.17,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN1: Primary income,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2021,284696.9,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN1: Primary income,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2020,175817.4,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2021,463184.4,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,C: Revenue,T: Transactions,A: + Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: Calendar and seasonally + adjusted,2020,399179,A: Normal value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN1: Primary income,D: Expenditure,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2021,284509.8,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN1: Primary income,D: Expenditure,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2020,233487,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,D: Expenditure,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2021,1282144,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,D: Expenditure,T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2020,1099824,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,G: Goods,B: Balance (revenue minus expenditure),T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2021,-208878,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,G: Goods,B: Balance (revenue minus expenditure),T: + Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate converted\",Y: + Calendar and seasonally adjusted,2020,-150909.7,A: Normal value,6: Millions,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,B: Balance (revenue minus + expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate + converted\",Y: Calendar and seasonally adjusted,2021,210169.7,A: Normal value,6: + Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,B: Balance (revenue minus + expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange rate + converted\",Y: Calendar and seasonally adjusted,2020,171004.2,A: Normal value,6: + Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,IN1: Primary income,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,\"USD_EXC: US dollars, exchange + rate converted\",Y: Calendar and seasonally adjusted,2021,187.078,A: Normal + value,6: Millions,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,CA: Current account,B: Balance (revenue + minus expenditure),T: Transactions,A: Annual,PT_B1GQ: Percentage of GDP,Y: + Calendar and seasonally adjusted,2020,-2.720358,A: Normal value,0: Units,_Z: + Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,D: Expenditure,T: Transactions,A: + Annual,PT_CA: Percentage of current account,Y: Calendar and seasonally adjusted,2020,20.7465,A: + Normal value,0: Units,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,C: Revenue,T: Transactions,A: + Annual,PT_CA: Percentage of current account,Y: Calendar and seasonally adjusted,2021,36.81942,A: + Normal value,0: Units,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,C: Revenue,T: Transactions,A: + Annual,PT_CA: Percentage of current account,Y: Calendar and seasonally adjusted,2020,38.91794,A: + Normal value,0: Units,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,C: Revenue,T: Transactions,A: + Annual,PT_GS: Percentage of goods and services,Y: Calendar and seasonally + adjusted,2020,48.91321,A: Normal value,0: Units,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,D: Expenditure,T: Transactions,A: + Annual,PT_GS: Percentage of goods and services,Y: Calendar and seasonally + adjusted,2021,27.07492,A: Normal value,0: Units,_Z: Not applicable,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_BOP(1.0),I,GBR: + United Kingdom,WXD: Rest of the world,S: Services,D: Expenditure,T: Transactions,A: + Annual,PT_GS: Percentage of goods and services,Y: Calendar and seasonally + adjusted,2020,28.66511,A: Normal value,0: Units,_Z: Not applicable,2: Two\r\n" + headers: + CF-Cache-Status: + - EXPIRED + CF-RAY: + - 9dc5fe830c570c8d-YVR + Cache-Control: + - public,max-age=7200 + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Language: + - en,en-US + Content-Type: + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 + Date: + - Sat, 14 Mar 2026 20:23:07 GMT + Last-Modified: + - Sat, 14 Mar 2026 20:23:07 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=2592000 + Transfer-Encoding: + - chunked + Vary: + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding + X-Server-Node: + - Server 1 + alt-svc: + - h3=":443"; ma=86400 + api-supported-versions: + - '1' + set-cookie: + - __cf_bm=0PTJFRZ0IuKcLrkqJOYOrA.Ef6jAn2HPVakOGa.xhxk-1773519785.449739-1.0.1.1-AeqBy1_q5FPjZO4C9CeySQKuTe.KOxkn83_2.h.vR8RLdGLYWGRJtKj2HwsA5gVl_w6Q9WMgYauKxBT9xXWdwJZwjIaAOmXSCFkgrcB6.nZIHUKXZL3HDG4kOZ50Kxt.; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:53:07 + GMT + - _cfuvid=PyH7cC.5leMnEDQrlR_XsAGbKA0E.rGvcu_EMJbHmOU-1773519785.449739-1.0.1.1-P6IjZZAVdtBoUgp0KHYB3.hpzJbMroAICmXxmqsZwCM; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org + status: + code: 200 + message: OK +version: 1 diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_composite_leading_indicator_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_composite_leading_indicator_fetcher_urllib3_v2.yaml index 30db430e175..99fb24feb52 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_composite_leading_indicator_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_composite_leading_indicator_fetcher_urllib3_v2.yaml @@ -3,47 +3,72 @@ interactions: body: null headers: Accept: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_CLI,4.1/G20.M.LI...AA.IX..H?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2023-06-06&format=csvfile&startPeriod=2023-01-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.STES/DSD_STES%40DF_CLI/4.1/G20.M.LI.*.*.AA.IX.*.H?c%5BTIME_PERIOD%5D=ge%3A2023-01%2Cle%3A2023-06&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: !!binary | - H4sIAAAAAAAEAO29B2AcSZYlJi9tynt/SvVK1+B0oQiAYBMk2JBAEOzBiM3mkuwdaUcjKasqgcpl - VmVdZhZAzO2dvPfee++999577733ujudTif33/8/XGZkAWz2zkrayZ4hgKrIHz9+fB8/Ip4evzl+ - 9vzL745enT77/Y9fnR6Pnr06/YnRF6fHr796dTr66sXZm9/f/HF88ubsJ8/e/D6j46ff+er1my9O - X7wZvXl1/OL1sy9ffXH85uzLF6M3Z1+c/v7f/vLV2U8RiDff/vLpl8+//Pz3kY9fnr46+/Lp6Msn - r3//nzx+/tUp//b6zfGbr15rP189fzN6enpy9sXx89ejJ8ev+Z3fOPny9OTp+PVT+v+b09ePnr5+ - Sm+dvv49nz77/U+en23tj3fvjD7f2xl9MXp+Njr7vUe//0+Njo/1l2+P9nb27m3v7I0ePhzv3dvZ - vzei5xsBuQuQOw/vPTwgiN8MyPsA+em9T/cI4DcDcR8Q7+98+mCfIH4zIO8B5L0H93bvE8TRb5z8 - P5vP+95CAgAA + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,FREQ: Frequency + of observation,MEASURE: Measure,UNIT_MEASURE: Unit of measure,ACTIVITY: Economic + activity,ADJUSTMENT: Adjustment,TRANSFORMATION: Transformation,TIME_HORIZ: + Time horizon,METHODOLOGY: Calculation methodology,TIME_PERIOD: Time period,OBS_VALUE,OBS_STATUS: + Observation status,UNIT_MULT: Unit multiplier,DECIMALS: Decimals,BASE_PER: + Base period\r\nDATAFLOW,OECD.SDD.STES:DF_CLI(4.1),I,G20: G20,M: Monthly,LI: + Composite leading indicator (CLI),IX: Index,_Z: Not applicable,AA: Amplitude + adjusted,IX: Index,_Z: Not applicable,H: OECD harmonised,2023-04,99.61441,A: + Normal value,0: Units,2: Two,\r\nDATAFLOW,OECD.SDD.STES:DF_CLI(4.1),I,G20: + G20,M: Monthly,LI: Composite leading indicator (CLI),IX: Index,_Z: Not applicable,AA: + Amplitude adjusted,IX: Index,_Z: Not applicable,H: OECD harmonised,2023-05,99.74016,A: + Normal value,0: Units,2: Two,\r\nDATAFLOW,OECD.SDD.STES:DF_CLI(4.1),I,G20: + G20,M: Monthly,LI: Composite leading indicator (CLI),IX: Index,_Z: Not applicable,AA: + Amplitude adjusted,IX: Index,_Z: Not applicable,H: OECD harmonised,2023-06,99.85122,A: + Normal value,0: Units,2: Two,\r\nDATAFLOW,OECD.SDD.STES:DF_CLI(4.1),I,G20: + G20,M: Monthly,LI: Composite leading indicator (CLI),IX: Index,_Z: Not applicable,AA: + Amplitude adjusted,IX: Index,_Z: Not applicable,H: OECD harmonised,2023-01,99.20787,A: + Normal value,0: Units,2: Two,\r\nDATAFLOW,OECD.SDD.STES:DF_CLI(4.1),I,G20: + G20,M: Monthly,LI: Composite leading indicator (CLI),IX: Index,_Z: Not applicable,AA: + Amplitude adjusted,IX: Index,_Z: Not applicable,H: OECD harmonised,2023-02,99.34254,A: + Normal value,0: Units,2: Two,\r\nDATAFLOW,OECD.SDD.STES:DF_CLI(4.1),I,G20: + G20,M: Monthly,LI: Composite leading indicator (CLI),IX: Index,_Z: Not applicable,AA: + Amplitude adjusted,IX: Index,_Z: Not applicable,H: OECD harmonised,2023-03,99.48336,A: + Normal value,0: Units,2: Two,\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fe091884497b-YVR Cache-Control: - - no-store,no-cache - Content-Disposition: - - attachment; filename="OECD.SDD.STES,DSD_STES@DF_CLI,4.1+G20.M.LI...AA.IX..H.csv" - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both; charset=utf-8 Date: - - Fri, 12 Jul 2024 22:07:45 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:47 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 3 + - Server 2 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:47 GMT + set-cookie: + - __cf_bm=tcF6EsCXREWUlm_hXUHuU._ojE9s1zRqBPqU79w4Gzo-1773519765.9402282-1.0.1.1-ojxSSyhZyxC_vbcccL7uvRXCP1sBh3USJz95UN1l8_dw_joxoCtU8D7ullyRoNYcomUqrMTL7Kel0gzVQIBEcbAoqDql30HSwtPr2cqTjoFekxJ_OFwnykNBkA.1SXYs; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:47 + GMT + - _cfuvid=jeGCkjlxzC2Kealz_Pa0lZSDYFprZUg9td7hOrlDhck-1773519765.9402282-1.0.1.1-wMdJ1YRsxVSHKNSOqgiYA9lk1crbG5YXkX_My3b6iLY; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_country_interest_rates_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_country_interest_rates_fetcher_urllib3_v2.yaml index a75c7cf6207..793976ccb1d 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_country_interest_rates_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_country_interest_rates_fetcher_urllib3_v2.yaml @@ -3,41 +3,92 @@ interactions: body: null headers: Accept: - - application/vnd.sdmx.data+csv; charset=utf-8 - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_KEI@DF_KEI,4.0/GBR.M.IRLT....?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2024-01&startPeriod=2023-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.STES/DSD_KEI%40DF_KEI/4.0/GBR.M.IRLT.PA._Z._Z._Z?c%5BTIME_PERIOD%5D=ge%3A2023-01%2Cle%3A2024-01&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: "DATAFLOW,REF_AREA,FREQ,MEASURE,UNIT_MEASURE,ACTIVITY,ADJUSTMENT,TRANSFORMATION,TIME_PERIOD,OBS_VALUE,OBS_STATUS,UNIT_MULT,DECIMALS,BASE_PER\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-03,3.5638,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-02,3.5553,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-01,3.5115,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2024-01,3.9319,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-12,3.8622,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-11,4.2721,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-10,4.5695,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-09,4.4199,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-08,4.5298,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-07,4.4372,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-06,4.3659,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-05,3.9621,,,,\r\nOECD.SDD.STES:DSD_KEI@DF_KEI(4.0),GBR,M,IRLT,PA,_Z,_Z,_Z,2023-04,3.6503,,,,\r\n" + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,FREQ: Frequency + of observation,MEASURE: Measure,UNIT_MEASURE: Unit of measure,ACTIVITY: Economic + activity,ADJUSTMENT: Adjustment,TRANSFORMATION: Transformation,TIME_PERIOD: + Time period,OBS_VALUE,OBS_STATUS: Observation status,UNIT_MULT: Unit multiplier,DECIMALS: + Decimals,BASE_PER: Base period\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-08,4.5298,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-07,4.4372,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-06,4.3659,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-05,3.9621,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-04,3.6503,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-03,3.5638,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-02,3.5553,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-01,3.5115,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2024-01,3.9319,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-12,3.8622,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-11,4.2721,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-10,4.5695,A: Normal + value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,M: Monthly,IRLT: Long-term interest rates,PA: Percent per annum,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,2023-09,4.4199,A: Normal + value,0: Units,1: One,\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fe40bae8bdbf-YVR Cache-Control: - - no-store,no-cache - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 Date: - - Wed, 31 Jul 2024 22:54:54 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:57 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - Server 1 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:57 GMT + set-cookie: + - __cf_bm=82amJxWYb7GVtbyeli64QHHz1LBueA7j2rMPEcePhd4-1773519774.8349323-1.0.1.1-MWUxrK9b_yWbGiIOObFKyGmB2M9k.NxkTg9YqDOS6.L1wJbDT4gpf4OJHnxJuehvsLaQRVCzXxQwFlpjaTdjSO_Cmm8tacKqBKs9J2OG67a.X4OD9zu26RsL1N.nJXns; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:57 + GMT + - _cfuvid=eK3W1NiJF1a_mybmkwvkEbCZj.Oer0JFCTY4Lkz4P_Y-1773519774.8349323-1.0.1.1-AfZVf7B8jYT0ZtxVQaBqQiWd9Hbk43W5c5fxs3Wj5Hs; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_cpi_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_cpi_fetcher_urllib3_v2.yaml index 9ba24df6ff3..802f11e8f06 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_cpi_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_cpi_fetcher_urllib3_v2.yaml @@ -3,190 +3,60 @@ interactions: body: null headers: Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_PRICES@DF_PRICES_ALL,1.0/GBR.A.N.CPI.PA._T.N. + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.TPS/DSD_PRICES%40DF_PRICES_ALL/1.0/GBR.A.N.CPI.*._T.*.GY?c%5BTIME_PERIOD%5D=ge%3A2020-01%2Cle%3A2022-01&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: IREF020005true2024-06-28T18:10:59InformationDSD_PRICES + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,FREQ: Frequency + of observation,METHODOLOGY: Methodology,MEASURE: Measure,UNIT_MEASURE: Unit + of measure,EXPENDITURE: Expenditure,ADJUSTMENT: Adjustment,TRANSFORMATION: + Transformation,TIME_PERIOD: Time period,OBS_VALUE,OBS_STATUS: Observation + status,UNIT_MULT: Unit multiplier,BASE_PER: Base period,DURABILITY: Durability,DECIMALS: + Decimals\r\nDATAFLOW,OECD.SDD.TPS:DF_PRICES_ALL(1.0),I,GBR: United + Kingdom,A: Annual,N: National,CPI: Consumer price index,PA: Percent per annum,_T: + Total,N: Neither seasonally adjusted nor calendar adjusted,\"GY: Growth rate, + over 1 year\",2020,1,A: Normal value,,,,2: Two\r\nDATAFLOW,OECD.SDD.TPS:DF_PRICES_ALL(1.0),I,GBR: + United Kingdom,A: Annual,N: National,CPI: Consumer price index,PA: Percent + per annum,_T: Total,N: Neither seasonally adjusted nor calendar adjusted,\"GY: + Growth rate, over 1 year\",2021,2.5,A: Normal value,,,,2: Two\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fdb1fac4b646-YVR Cache-Control: - - no-store,no-cache - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive + Content-Language: + - en,en-US Content-Type: - - application/vnd.sdmx.genericdata+xml; charset=utf-8; version=2.1 + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both; charset=utf-8 Date: - - Fri, 28 Jun 2024 16:10:59 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:33 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 3 + - Server 2 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:33 GMT + set-cookie: + - __cf_bm=Y._VioO0SKiW5XlmlNxfl_gDEvrMuqna39Ibkn0Hijo-1773519751.995473-1.0.1.1-0v4lXGn6873k7l7CZkFHUaVLcrI33Crd8F1zexLmQVqgfzJ1VOq5oeecR1Q9HHYIs1nu642IDnolYwksB3k8aImb7CIGdUZQbSIFgYuDf8_aRtisn2PqZ1fxzMU.8BVY; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:33 + GMT + - _cfuvid=Hxt4an.BkDFVCkqN3WjOAlEjD03._Z_lFeiSWhK81a8-1773519751.995473-1.0.1.1-fzUtMB7vJi4IhwJu6lkqPtxFScQdesLm3wzmW_fOA9M; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_economic_indicators_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_economic_indicators_fetcher_urllib3_v2.yaml new file mode 100644 index 00000000000..7b0ab131d3c --- /dev/null +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_economic_indicators_fetcher_urllib3_v2.yaml @@ -0,0 +1,256 @@ +interactions: +- request: + body: null + headers: + Accept: + - application/vnd.sdmx.structure+json; version=1.0; charset=utf-8 + Accept-Encoding: + - gzip, deflate, br + Connection: + - keep-alive + method: GET + uri: https://sdmx.oecd.org/public/rest/v2/availability/dataflow/OECD.SDD.STES/DSD_KEI%40DF_KEI/4.0/*.*.*.*.*.*.* + response: + body: + string: '{"data":{"contentConstraints":[{"id":"CC","version":"1.0","agencyID":"SDMX","name":"Autogenerated + content constraint","names":{"en":"Autogenerated content constraint"},"annotations":[{"id":"obs_count","title":"1570442","type":"sdmx_metrics"}],"type":"Actual","constraintAttachment":{"dataflows":["urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=OECD.SDD.STES:DF_KEI(4.0)"]},"cubeRegions":[{"isIncluded":true,"keyValues":[{"id":"REF_AREA","values":["ARG","AUS","AUT","BEL","BRA","CAN","CHE","CHL","CHN","COL","CRI","CZE","DEU","DNK","EA","EA19","EA20","ESP","EST","EU27_2020","FIN","FRA","G20","G7","GBR","GRC","HRV","HUN","IDN","IND","IRL","ISL","ISR","ITA","JPN","KOR","LTU","LUX","LVA","MEX","NLD","NOR","NZL","OECD","OECDE","POL","PRT","RUS","SAU","SVK","SVN","SWE","TUR","USA","ZAF"]},{"id":"FREQ","values":["A","M","Q"]},{"id":"MEASURE","values":["B1GQ_Q","BCICP","CA_GDP","CC","CCICP","CP","EMP","EX","H_EARN","IM","IR3TIB","IRLT","IRSTCI","LI","MABM","MANM","NODW","P3_S13_Q","P3_S1M_Q","P51G_Q","P6_Q","P7_Q","PP","PRVM","RS","SHARE","TOCAPA","TOVM","ULC","UNEMP"]},{"id":"UNIT_MEASURE","values":["GR","IX","PA","PB","PS","PT_B1GQ","PT_LF","USD","XDC_USD"]},{"id":"ACTIVITY","values":["_T","_Z","BTE","C","F","F41","G45","G47"]},{"id":"ADJUSTMENT","values":["_Z","AA","RT","Y"]},{"id":"TRANSFORMATION","values":["_Z","G1","GY"]},{"id":"TIME_PERIOD","timeRange":{"startPeriod":{"period":"1914-01-01T00:00:00","isInclusive":true},"endPeriod":{"period":"2026-03-30T22:00:00","isInclusive":true}}}]}]}]},"meta":{"schema":"https://raw.githubusercontent.com/sdmx-twg/sdmx-json/develop/structure-message/tools/schemas/1.0/sdmx-json-structure-schema.json","contentLanguages":["en"],"id":"IDREF7335","prepared":"2026-03-14T20:22:58Z","test":false,"sender":{"id":"Unknown"},"receiver":[{"id":"Unknown"}]}}' + headers: + Accept-Ranges: + - values + CF-RAY: + - 9dc5fe50fc157f0a-YVR + Cache-Control: + - public,max-age=86400 + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/vnd.sdmx.structure+json; charset=utf-8; version=1.0 + Date: + - Sat, 14 Mar 2026 20:22:58 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=2592000 + Transfer-Encoding: + - chunked + Vary: + - Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding + X-Server-Node: + - Server 2 + alt-svc: + - h3=":443"; ma=86400 + api-supported-versions: + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:58 GMT + set-cookie: + - __cf_bm=inJ1CCI0ejv8huyPHYoD7w7gsxY5MUiDy8XPszcL80I-1773519777.4343143-1.0.1.1-E9ONQAF6VSuBdc0RCClopUzyG7JX0L0wC7jFmvlRuChbOtylKa1OawrS8PicaKx0Smr0Cvt2_h6J3L89ugsXgO7Gyatx3hp75B0JW0zwK3f.y.x35IM7Y60s5T8LZJeF; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:58 + GMT + - _cfuvid=3UdG3cr2_ELDfic6n2aftYSv577FzT9u6dAmSixOsCI-1773519777.4343143-1.0.1.1-5JVt1TookDce536rKIYWCUOC3Um1adOGDbiI3XkKIAk; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - application/vnd.sdmx.structure+json; version=1.0; charset=utf-8 + Accept-Encoding: + - gzip, deflate, br + Connection: + - keep-alive + method: GET + uri: https://sdmx.oecd.org/public/rest/v2/availability/dataflow/OECD.SDD.STES/DSD_KEI%40DF_KEI/4.0/GBR.*.*.*.*.*.* + response: + body: + string: '{"data":{"contentConstraints":[{"id":"CC","version":"1.0","agencyID":"SDMX","name":"Autogenerated + content constraint","names":{"en":"Autogenerated content constraint"},"annotations":[{"id":"obs_count","title":"46988","type":"sdmx_metrics"}],"type":"Actual","constraintAttachment":{"dataflows":["urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=OECD.SDD.STES:DF_KEI(4.0)"]},"cubeRegions":[{"isIncluded":true,"keyValues":[{"id":"REF_AREA","values":["GBR"]},{"id":"FREQ","values":["A","M","Q"]},{"id":"MEASURE","values":["B1GQ_Q","BCICP","CA_GDP","CC","CCICP","CP","EMP","EX","H_EARN","IM","IR3TIB","IRLT","IRSTCI","LI","MABM","MANM","NODW","P3_S13_Q","P3_S1M_Q","P51G_Q","P6_Q","P7_Q","PP","PRVM","RS","SHARE","TOCAPA","TOVM","ULC","UNEMP"]},{"id":"UNIT_MEASURE","values":["GR","IX","PA","PB","PS","PT_B1GQ","PT_LF","USD","XDC_USD"]},{"id":"ACTIVITY","values":["_T","_Z","BTE","C","F","F41","G45","G47"]},{"id":"ADJUSTMENT","values":["_Z","AA","RT","Y"]},{"id":"TRANSFORMATION","values":["_Z","G1","GY"]},{"id":"TIME_PERIOD","timeRange":{"startPeriod":{"period":"1948-01-01T00:00:00","isInclusive":true},"endPeriod":{"period":"2026-02-27T23:00:00","isInclusive":true}}}]}]}]},"meta":{"schema":"https://raw.githubusercontent.com/sdmx-twg/sdmx-json/develop/structure-message/tools/schemas/1.0/sdmx-json-structure-schema.json","contentLanguages":["en"],"id":"IDREF7240","prepared":"2026-03-14T20:23:00Z","test":false,"sender":{"id":"Unknown"},"receiver":[{"id":"Unknown"}]}}' + headers: + Accept-Ranges: + - values + CF-RAY: + - 9dc5fe5f989aba29-YVR + Cache-Control: + - public,max-age=86400 + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/vnd.sdmx.structure+json; version=1.0; charset=utf-8 + Date: + - Sat, 14 Mar 2026 20:23:00 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=2592000 + Transfer-Encoding: + - chunked + Vary: + - Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding + X-Server-Node: + - Server 3 + alt-svc: + - h3=":443"; ma=86400 + api-supported-versions: + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:23:00 GMT + set-cookie: + - __cf_bm=zO16t6tSNb0mrkNA2MlPk5dbAgIljEOrXYYMMP.AiOY-1773519779.7743244-1.0.1.1-BAXnXWC7CjNfTSwDaMjCWuNSCLOvb5aaCQ1Ex9181AkJef2QzoYHbIuadLbWdBrvR.PsiaZKzaYI5z8wugLSAtdtuYIEpGJLB8jv2XaVPyDKOle3kRBXLvokc..RcM.t; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:53:00 + GMT + - _cfuvid=kquRsnlPCvosEBFaT6.EXctEdkuc7_cGsY8cxxs_wPU-1773519779.7743244-1.0.1.1-VG8.9KYQfFRiAU0pz1ukkbkahlRWOX9GVi8S7HFuSq8; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - application/vnd.sdmx.structure+json; version=1.0; charset=utf-8 + Accept-Encoding: + - gzip, deflate, br + Connection: + - keep-alive + method: GET + uri: https://sdmx.oecd.org/public/rest/v2/availability/dataflow/OECD.SDD.STES/DSD_KEI%40DF_KEI/4.0/GBR.Q.*.*.*.*.* + response: + body: + string: '{"data":{"contentConstraints":[{"id":"CC","version":"1.0","agencyID":"SDMX","name":"Autogenerated + content constraint","names":{"en":"Autogenerated content constraint"},"annotations":[{"id":"obs_count","title":"13997","type":"sdmx_metrics"}],"type":"Actual","constraintAttachment":{"dataflows":["urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=OECD.SDD.STES:DF_KEI(4.0)"]},"cubeRegions":[{"isIncluded":true,"keyValues":[{"id":"REF_AREA","values":["GBR"]},{"id":"FREQ","values":["Q"]},{"id":"MEASURE","values":["B1GQ_Q","BCICP","CA_GDP","CC","CP","EMP","EX","H_EARN","IM","IR3TIB","IRLT","IRSTCI","MABM","MANM","NODW","P3_S13_Q","P3_S1M_Q","P51G_Q","P6_Q","P7_Q","PP","PRVM","SHARE","TOVM","ULC","UNEMP"]},{"id":"UNIT_MEASURE","values":["GR","IX","PA","PB","PS","PT_B1GQ","PT_LF","USD","XDC_USD"]},{"id":"ACTIVITY","values":["_T","_Z","BTE","C","F","F41","G47"]},{"id":"ADJUSTMENT","values":["_Z","Y"]},{"id":"TRANSFORMATION","values":["_Z","G1","GY"]},{"id":"TIME_PERIOD","timeRange":{"startPeriod":{"period":"1948-01-01T00:00:00","isInclusive":true},"endPeriod":{"period":"2025-12-30T23:00:00","isInclusive":true}}}]}]}]},"meta":{"schema":"https://raw.githubusercontent.com/sdmx-twg/sdmx-json/develop/structure-message/tools/schemas/1.0/sdmx-json-structure-schema.json","contentLanguages":["en"],"id":"IDREF7336","prepared":"2026-03-14T20:23:01Z","test":false,"sender":{"id":"Unknown"},"receiver":[{"id":"Unknown"}]}}' + headers: + Accept-Ranges: + - values + CF-RAY: + - 9dc5fe65cb9458a4-YVR + Cache-Control: + - public,max-age=86400 + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/vnd.sdmx.structure+json; charset=utf-8; version=1.0 + Date: + - Sat, 14 Mar 2026 20:23:01 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=2592000 + Transfer-Encoding: + - chunked + Vary: + - Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding + X-Server-Node: + - Server 2 + alt-svc: + - h3=":443"; ma=86400 + api-supported-versions: + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:23:01 GMT + set-cookie: + - __cf_bm=dd6OLEb8NrBnmKp4cR3MtQktfMOguGc4fCrvdmXMoF8-1773519780.767934-1.0.1.1-qz2S8Y6j4vhhhSHIBvjnsk0lIXXA7ArHE0.I4EgooVftEeXC.UcbO9IGpCyFHOjb4S7dUHwICq5a.BPkarSQzvM.Paqac9qoezmJDFI8j6m7XNj71oz46bAugAJG9smR; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:53:01 + GMT + - _cfuvid=nzQJkzJKjdLgF1goKxhcwjrud5.w.TznEsmAIfAkAFo-1773519780.767934-1.0.1.1-U6AC2Hgol8pTbtUwq2qh.ZYmXuN116CeuOll9DTyZgg; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both + method: GET + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.STES/DSD_KEI%40DF_KEI/4.0/GBR.Q.B1GQ_Q.*.*.*.*?c%5BTIME_PERIOD%5D=ge%3A2023-01%2Cle%3A2024-01&detail=full&dimensionAtObservation=TIME_PERIOD + response: + body: + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,FREQ: Frequency + of observation,MEASURE: Measure,UNIT_MEASURE: Unit of measure,ACTIVITY: Economic + activity,ADJUSTMENT: Adjustment,TRANSFORMATION: Transformation,TIME_PERIOD: + Time period,OBS_VALUE,OBS_STATUS: Observation status,UNIT_MULT: Unit multiplier,DECIMALS: + Decimals,BASE_PER: Base period\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,Q: Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: + Growth rate,_T: Total - all activities,Y: Calendar and seasonally adjusted,\"G1: + Growth rate, period on period\",2023-Q1,0.0564736352323347,A: Normal value,0: + Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: United + Kingdom,Q: Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: Growth + rate,_T: Total - all activities,Y: Calendar and seasonally adjusted,\"GY: + Growth rate, over 1 year\",2023-Q3,0.117898953319528,A: Normal value,0: Units,1: + One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: United Kingdom,Q: + Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: Growth rate,_T: Total + - all activities,Y: Calendar and seasonally adjusted,\"G1: Growth rate, period + on period\",2023-Q3,-0.239597036073858,A: Normal value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,Q: Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: + Growth rate,_T: Total - all activities,Y: Calendar and seasonally adjusted,\"GY: + Growth rate, over 1 year\",2023-Q1,0.978569345974423,A: Normal value,0: Units,1: + One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: United Kingdom,Q: + Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: Growth rate,_T: Total + - all activities,Y: Calendar and seasonally adjusted,\"GY: Growth rate, over + 1 year\",2023-Q4,-0.462967667754943,A: Normal value,0: Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: + United Kingdom,Q: Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: + Growth rate,_T: Total - all activities,Y: Calendar and seasonally adjusted,\"G1: + Growth rate, period on period\",2023-Q4,-0.321441500118347,A: Normal value,0: + Units,1: One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: United + Kingdom,Q: Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: Growth + rate,_T: Total - all activities,Y: Calendar and seasonally adjusted,\"GY: + Growth rate, over 1 year\",2023-Q2,0.459833754652594,A: Normal value,0: Units,1: + One,\r\nDATAFLOW,OECD.SDD.STES:DF_KEI(4.0),I,GBR: United Kingdom,Q: + Quarterly,\"B1GQ_Q: Gross domestic product, volume\",GR: Growth rate,_T: Total + - all activities,Y: Calendar and seasonally adjusted,\"G1: Growth rate, period + on period\",2023-Q2,0.041351932513245,A: Normal value,0: Units,1: One,\r\n" + headers: + CF-RAY: + - 9dc5fe6bbbbd1c74-YVR + Cache-Control: + - public,max-age=7200 + Connection: + - keep-alive + Content-Language: + - en,en-US + Content-Type: + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 + Date: + - Sat, 14 Mar 2026 20:23:04 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=2592000 + Transfer-Encoding: + - chunked + Vary: + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding + X-Server-Node: + - Server 1 + alt-svc: + - h3=":443"; ma=86400 + api-supported-versions: + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:23:04 GMT + set-cookie: + - __cf_bm=aoyemoNsfR8Cbeb54Lu765Anpc.vgnfHodRaK46D0Mc-1773519781.7131035-1.0.1.1-DIIrKyLe9cwCUktB7LccQaIMwPF6jiSEvLsICNJnKwzrQqoWM6VlU3ggfJdHzKCe7NQXwm0DBZtmgEmphKnn5hE5.KRzlEv51rVOFo8SXcDGCz7QrkZ3b.58.ziGhskj; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:53:04 + GMT + - _cfuvid=_d7VoAVsBaBm.J361NTEAu2WUyz1MNol6YDDALKBpbc-1773519781.7131035-1.0.1.1-6Q7.rSVWRs57QnuwTFabiGN99GRPd_4InECX4UPVnI8; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org + status: + code: 200 + message: OK +version: 1 diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_gdp_forecast_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_gdp_forecast_fetcher_urllib3_v2.yaml index e093a14492c..582d44b5e9e 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_gdp_forecast_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_gdp_forecast_fetcher_urllib3_v2.yaml @@ -3,45 +3,56 @@ interactions: body: null headers: Accept: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.ECO.MAD,DSD_EO@DF_EO,1.1/USA.GDPV_USD.A?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2024-01-01&format=csvfile&startPeriod=2023-01-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.ECO.MAD/DSD_EO%40DF_EO/1.4/USA.GDPV_USD.A?c%5BTIME_PERIOD%5D=ge%3A2023-01%2Cle%3A2024-01&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: !!binary | - H4sIAAAAAAAEAO29B2AcSZYlJi9tynt/SvVK1+B0oQiAYBMk2JBAEOzBiM3mkuwdaUcjKasqgcpl - VmVdZhZAzO2dvPfee++999577733ujudTif33/8/XGZkAWz2zkrayZ4hgKrIHz9+fB8/Ip4evzl+ - 9vzL745enT77/Y9fnR6Pvjg9fv3Vq9PRs1enPzF6c/bF6e//8vTV2ZdPR18+ef37/+Tx869O+bfX - b47ffPV69NWLsze/v3lF/vjq+ZvRyVevXp2+OPl9Rk+OXzMAAvvm218+/fL5l5//PqOnpydnXxw/ - fz16+ers5PT3R5vR8dPvfPX6zRenL978xsmXpydPx6cnX46/OH766Onrp7//6Ze/59Nn9O/W7nj3 - zuir18ejz5++/Mnf/6vXT0fHo72dvXujvd0HD+4/vL9z79Pde7v3Rub5jZP/B2ir18HhAAAA + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,MEASURE: Measure,FREQ: + Frequency of observation,TIME_PERIOD: Time period,OBS_VALUE,OBS_STATUS: Observation + status,UNIT_MEASURE: Unit of measure,UNIT_MULT: Unit multiplier,CURRENCY: + Currency,BASE_PER: Base period,METHODOLOGY: Methodology,DECIMALS: Decimals,PRICE_BASE: + Price base,ADJUSTMENT: Adjustment\r\nDATAFLOW,OECD.ECO.MAD:DF_EO(1.4),I,USA: + United States,\"GDPV_USD: Gross domestic product, volume in USD, constant + exchange rates\",A: Annual,2023,25031611545935,,\"USD_EXC: US dollars, exchange + rate converted\",0: Units,,2021,,2: Two,L: Chain linked volume,\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fde08968ef12-YVR Cache-Control: - - no-store,no-cache - Content-Disposition: - - attachment; filename="OECD.ECO.MAD,DSD_EO@DF_EO,1.1+USA.GDPV_USD.A.csv" - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 Date: - - Sat, 13 Jul 2024 18:47:04 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:42 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 2 + - Server 3 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:42 GMT + set-cookie: + - __cf_bm=G75AQgFiVwLSvlKsvjZiKXtdc0Q58ycOaiQugSC4mEc-1773519759.4410257-1.0.1.1-OB1cC0N8sNRLFVfKvD8AEYP2GoLxuUKsuw9w50XW.BcUXGB_wLOXuDfJ9Lr0mr1vYvJuej4eSLUbKE8NJ3kAB.rn5OKDFQzD8dQmy0EoCUYQ1xEgH4Lz9OK6G6Zbs1S8; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:42 + GMT + - _cfuvid=yk2Tj_Lkd0RTJzFZI4iNJHZcpPhvi4k4Sn5zfmbVLwY-1773519759.4410257-1.0.1.1-KD8YGE2bBkrzsyJhG9Y6aYAV_aQnk4z9bxtXieRxo.U; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_house_price_index_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_house_price_index_fetcher_urllib3_v2.yaml index 9054a558f92..71ce155fc35 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_house_price_index_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_house_price_index_fetcher_urllib3_v2.yaml @@ -3,41 +3,105 @@ interactions: body: null headers: Accept: - - application/vnd.sdmx.data+csv; charset=utf-8 - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_RHPI_TARGET@DF_RHPI_TARGET,1.0/COU.GBR.Q.RHPI.IX....?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2024-04&startPeriod=2020-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.TPS/DSD_RHPI_TARGET%40DF_RHPI_TARGET/1.0/COU.GBR.Q.RHPI.IX.*.*.*.*?c%5BTIME_PERIOD%5D=ge%3A2020-01%2Cle%3A2024-04&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: "DATAFLOW,REF_AREA_TYPE,REF_AREA,FREQ,MEASURE,UNIT_MEASURE,ADJUSTMENT,TRANSFORMATION,VINTAGE,DWELLINGS,TIME_PERIOD,OBS_VALUE,OBS_STATUS,UNIT_MULT,DECIMALS,BASE_PER\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2020-Q3,120.812,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2022-Q1,136.43,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2020-Q4,124.248,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2021-Q1,126.185,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2021-Q2,128.433,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2021-Q3,130.745,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2021-Q4,133.186,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2022-Q2,139.995,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2022-Q3,145.294,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2022-Q4,145.455,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2023-Q1,141.858,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2023-Q2,141.215,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2023-Q3,143.72,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2023-Q4,142.468,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2024-Q1,142.179,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2020-Q1,117.108,,,,\r\nOECD.SDD.TPS:DSD_RHPI_TARGET@DF_RHPI_TARGET(1.0),COU,GBR,Q,RHPI,IX,N,_Z,_T,_T,2020-Q2,117.326,,,,\r\n" + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA_TYPE: Reference area type,REF_AREA: + Reference area,FREQ: Frequency of observation,MEASURE: Measure,UNIT_MEASURE: + Unit of measure,ADJUSTMENT: Adjustment,TRANSFORMATION: Transformation,VINTAGE: + Vintage,DWELLINGS: Dwellings,TIME_PERIOD: Time period,OBS_VALUE,OBS_STATUS: + Observation status,UNIT_MULT: Unit multiplier,DECIMALS: Decimals,BASE_PER: + Base period\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2022-Q4,145.264,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2020-Q1,117.108,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2023-Q1,141.875,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2023-Q2,141.183,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2023-Q3,143.546,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2023-Q4,141.685,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2024-Q1,140.062,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2020-Q2,117.347,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2020-Q3,120.878,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2020-Q4,124.266,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2021-Q1,126.175,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2021-Q2,128.418,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2021-Q3,130.804,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2021-Q4,133.238,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2022-Q1,136.387,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2022-Q2,139.919,A: Normal value,0: Units,1: One,2015\r\nDATAFLOW,OECD.SDD.TPS:DF_RHPI_TARGET(1.0),I,COU: + Country,GBR: United Kingdom,Q: Quarterly,RHPI: House price index,IX: Index,N: + Neither seasonally adjusted nor calendar adjusted,_Z: Not applicable,_T: Total,_T: + Total,2022-Q3,145.168,A: Normal value,0: Units,1: One,2015\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fe2aab896052-YVR Cache-Control: - - no-store,no-cache - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 Date: - - Thu, 27 Jun 2024 10:14:42 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:54 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 1 + - Server 3 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:54 GMT + set-cookie: + - __cf_bm=dUEoyCNAW1KHcAcY7EZD7HTTG4Vuhcja0KfB1xBQzJg-1773519771.301906-1.0.1.1-td7FBazwr5zrcYRm0bW2uKvVfcEHtsduFhJZFJSxBb1.gupE.ZjZYo2KxC8BNnaUTZ5omsNc2nRHGK3SgXarKZXl7s6AadP30kveJi34Tg3aVA5PVwkM4QTleBDC6fuz; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:54 + GMT + - _cfuvid=GQkZhvyWxf0zQQ3d3TpNsfE0o6huu1KN3cII0YKJ8Kc-1773519771.301906-1.0.1.1-jVvczkuK2gXcb.FjfMfiPqo8U_hybDDG.rd.bu8whNc; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_nominal_gdp_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_nominal_gdp_fetcher_urllib3_v2.yaml index 0e0f719d023..71b0a9a8a2c 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_nominal_gdp_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_nominal_gdp_fetcher_urllib3_v2.yaml @@ -3,55 +3,141 @@ interactions: body: null headers: Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_USD,1.1/Q..USA.S1..B1GQ.....V..?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2023-06-06&format=csvfile&startPeriod=2020-01-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.NAD/DSD_NAMAIN1%40DF_QNA_EXPENDITURE_USD/1.1/Q.*.USA.S1.*.B1GQ.*.*.*.*.V.*.*?c%5BTIME_PERIOD%5D=ge%3A2020-01%2Cle%3A2023-06&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: !!binary | - H4sIAAAAAAAEAO29B2AcSZYlJi9tynt/SvVK1+B0oQiAYBMk2JBAEOzBiM3mkuwdaUcjKasqgcpl - VmVdZhZAzO2dvPfee++999577733ujudTif33/8/XGZkAWz2zkrayZ4hgKrIHz9+fB8/Ip4evzl+ - 9vzL746evTr9idHx0+989frNF6cv3oxenT77/Y9fnR6PXp+evPny1ejky69evDl99fL41ZvfXz96 - 8+r4xevjkzdnX74Ynb14/ebV73/8+vXpmxE++smzN7/P6PT3fnn64unZm69enY6+enH25vf/4vT4 - Nf54+ers5PT3f3L8+lSAPPvy1RfHDOfN8ZPnp7//2VNC4ezZ2Sl1cvbF6e//8vTV2ZdPR18+ef37 - /+Tx869OGbvf5/T41e/PkEaAhEaE5Ytnv//rN8dvvno9enp6cvbF8fPX/Jp+Jlh89fzN6OSrV69O - X5z8Pr9x8uXpydPx66dPxy+Onz56+vrp7//i+Ivjsxe7v+fTZ7//T7w4/v29Ufz+X71+urU73r0z - +onR7zP66jVRZxf/e7L7+U+Mfv+f0v9Ro9//5cuXo58cPT8evdnZ3dkb7e3s7W3/xP5o79P9nYP9 - nfsjeX5one9s/8S90d7up/sPCAHqGM8PrfPd7Z/YHe3tfbqzs3vwQx85dU4/7+093Lv36R51jOeH - 2TmR/d7B3sHDB/eoYzw/zM6J4fY/vb//6c5g5z9bnRO305zf39l7uLv7KY0azw9t5NQ5/bx/f39/ - 74dP9ns88k8Pdu99urNLHeP5oXVOIyeGu//w4f6n9x5Sx3h+aJ2TkqGR7z7Y+fT+7g+d7NT53mj3 - 4cPde7v7Pxedk6jRL/vE8dQxnt84+X8AYyF/4lwHAAA= + string: "STRUCTURE,STRUCTURE_ID,ACTION,FREQ: Frequency of observation,ADJUSTMENT: + Adjustment,REF_AREA: Reference area,SECTOR: Institutional sector,COUNTERPART_SECTOR: + Counterpart institutional sector,TRANSACTION: Transaction,INSTR_ASSET: Financial + instruments and non-financial assets,ACTIVITY: Economic activity,EXPENDITURE: + Expenditure,UNIT_MEASURE: Unit of measure,PRICE_BASE: Price base,TRANSFORMATION: + Transformation,TABLE_IDENTIFIER: Table identifier,TIME_PERIOD: Time period,OBS_VALUE,REF_YEAR_PRICE: + Price reference year,BASE_PER: Base period,CONF_STATUS: Confidentiality status,DECIMALS: + Decimals,OBS_STATUS: Observation status,UNIT_MULT: Unit multiplier,CURRENCY: + Currency\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2020-Q2,19958291,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2021-Q2,23425910,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2021-Q1,22680693,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2023-Q2,27530055,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2020-Q1,21751238,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2023-Q1,27216445,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2022-Q4,26770514,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2022-Q3,26336304,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2022-Q2,25861292,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2022-Q1,25250347,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2021-Q4,24813600,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2021-Q3,23982379,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2020-Q4,22087160,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA_EXPENDITURE_USD(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",V: + Current prices,LA: Annual levels,T0102: Table 0102 - GDP identity from the + expenditure side,2020-Q3,21704437,,,F: Free (free for publication),1: One,A: + Normal value,6: Millions,USD: US dollar\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fdbe4fc53753-YVR Cache-Control: - - no-store,no-cache - Content-Disposition: - - attachment; filename="OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_USD,1.1+Q..USA.S1..B1GQ.....V...csv" - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 Date: - - Fri, 12 Jul 2024 19:23:22 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:36 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 3 + - Server 1 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:36 GMT + set-cookie: + - __cf_bm=57pZB9DDD.KbMQ1bjz.nrIONKUpXnDuQbhxsQ.LjSD4-1773519753.961757-1.0.1.1-J_DnbU5R5bKJFeMBmOVx8jvrHdR2bL8wE4kDviZJMl12mqc_p4SCXyo8_ColT.9aRAXMAL9MsmifgWNtPgLqSatDUJGE2W7_ZQlWEAUzvRxDP6e009kSxilrsJWIRAtJ; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:36 + GMT + - _cfuvid=YT_Z.CyEfd4MaOGCfM44D7CJC_pUo702WjNFoLbN0po-1773519753.961757-1.0.1.1-oITCoUxnKH99fVwXWUi0_fsGhYjRYcqTdBJkFWo6sis; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_real_gdp_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_real_gdp_fetcher_urllib3_v2.yaml index 3fd043cdae7..8cc646218d2 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_real_gdp_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_real_gdp_fetcher_urllib3_v2.yaml @@ -3,53 +3,81 @@ interactions: body: null headers: Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA,1.1/Q..USA.S1..B1GQ._Z...USD_PPP.LR.LA.T0102?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2024-01-01&format=csvfile&startPeriod=2023-01-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.NAD/DSD_NAMAIN1%40DF_QNA/1.1/Q.*.USA.S1.*.B1GQ._Z.*.*.USD_PPP.LR.LA.T0102?c%5BTIME_PERIOD%5D=ge%3A2023-01%2Cle%3A2024-01&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: !!binary | - H4sIAAAAAAAEAO29B2AcSZYlJi9tynt/SvVK1+B0oQiAYBMk2JBAEOzBiM3mkuwdaUcjKasqgcpl - VmVdZhZAzO2dvPfee++999577733ujudTif33/8/XGZkAWz2zkrayZ4hgKrIHz9+fB8/Ip4evzl+ - 9vzL746evTr9idHx0+989frNF6cv3oxenT77/Y9fnR6PXp+evPny1ejky69evDl99fL41ZvfXz96 - 8+r4xevjkzdnX74Ynb14/ebV73/8+vXpmxE++smzN7/P6PT3fnn64unZm69enY6+enH25vf/4vT4 - Nf54+ers5PT3f3L8+lSAPPvy1RfHDOfN8ZPnp7//2VNC4ezZ2Sl1cvbF6e//8vTV2ZdPR18+ef37 - /+Tx869OGbvf5/T41e/PkEaAhEaE5Ytnv//rN8dvvno9enp6cvbF8fPX/Jp+Jlh89fzN6OSrV69O - X5z8Pr9x8uXpydPx66dPxy+Onz56+vrp7//i+Ivjsxe7v+fTZ7//T7w43tod794Z/cTo9xl99Zqo - sYv/Pdn9/CdGv/9P6f++ondevnw5ev5q9Px49GZnd2dvtLezd2/7J+jn7qd7Bw8efjr+dCTPz2p/ - 96i/g4MHO7ufju9TX3h+VvvbH+3t7TzYub97ML5HfeH5We1vl8ZHnR3sPBzvUV94fuPk/wHpk0hF - wwIAAA== + string: "STRUCTURE,STRUCTURE_ID,ACTION,FREQ: Frequency of observation,ADJUSTMENT: + Adjustment,REF_AREA: Reference area,SECTOR: Institutional sector,COUNTERPART_SECTOR: + Counterpart institutional sector,TRANSACTION: Transaction,INSTR_ASSET: Financial + instruments and non-financial assets,ACTIVITY: Economic activity,EXPENDITURE: + Expenditure,UNIT_MEASURE: Unit of measure,PRICE_BASE: Price base,TRANSFORMATION: + Transformation,TABLE_IDENTIFIER: Table identifier,TIME_PERIOD: Time period,OBS_VALUE,REF_YEAR_PRICE: + Price reference year,BASE_PER: Base period,CONF_STATUS: Confidentiality status,DECIMALS: + Decimals,OBS_STATUS: Observation status,UNIT_MULT: Unit multiplier,CURRENCY: + Currency\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA(1.1),I,Q: Quarterly,Y: + Calendar and seasonally adjusted,USA: United States,S1: Total economy,S1: + Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: Not applicable,_Z: + Not applicable,\"USD_PPP: US dollars, PPP converted\",LR: Chain linked volume + (rebased),LA: Annual levels,T0102: Table 0102 - GDP identity from the expenditure + side,2023-Q1,23646277.5,2020,,F: Free (free for publication),1: One,A: Normal + value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",LR: + Chain linked volume (rebased),LA: Annual levels,T0102: Table 0102 - GDP identity + from the expenditure side,2023-Q4,24272401.7,2020,,F: Free (free for publication),1: + One,A: Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",LR: + Chain linked volume (rebased),LA: Annual levels,T0102: Table 0102 - GDP identity + from the expenditure side,2023-Q3,24069243.5,2020,,F: Free (free for publication),1: + One,A: Normal value,6: Millions,USD: US dollar\r\nDATAFLOW,OECD.SDD.NAD:DF_QNA(1.1),I,Q: + Quarterly,Y: Calendar and seasonally adjusted,USA: United States,S1: Total + economy,S1: Total economy,B1GQ: Gross domestic product,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,\"USD_PPP: US dollars, PPP converted\",LR: + Chain linked volume (rebased),LA: Annual levels,T0102: Table 0102 - GDP identity + from the expenditure side,2023-Q2,23794745.9,2020,,F: Free (free for publication),1: + One,A: Normal value,6: Millions,USD: US dollar\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fdd2484c30ff-YVR Cache-Control: - - no-store,no-cache - Content-Disposition: - - attachment; filename="OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA,1.1+Q..USA.S1..B1GQ._Z...USD_PPP.LR.LA.T0102.csv" - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both; charset=utf-8 Date: - - Tue, 16 Jul 2024 20:47:43 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:39 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 3 + - Server 2 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:39 GMT + set-cookie: + - __cf_bm=wQTR5d0ejYWYrFu29bWqLz1GnGMYXt90dvkWcPD20Aw-1773519757.1685205-1.0.1.1-n0mEkQsSm47wJZncrFxiqQNwRRZctGWffbxfLLZ1KkCPG_wR0Yc49DdS3GEj8Is3cBOkGo5DnO3V3X2hRbg9iwDxwCskh6vuP.D5miXP8C6olgAUO13IkbSMn1eL1LwJ; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:39 + GMT + - _cfuvid=pzuFDAs.EDt6Y69d_iRxmyEk2lA22Y51uFw3_DN7dPs-1773519757.1685205-1.0.1.1-tmi.AxKEj4zAheTfLNBoyKkQktkKYh9PRkKfBPhqVr0; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_share_price_index_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_share_price_index_fetcher_urllib3_v2.yaml index 2b59f82783b..58e99402503 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_share_price_index_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_share_price_index_fetcher_urllib3_v2.yaml @@ -3,41 +3,210 @@ interactions: body: null headers: Accept: - - application/vnd.sdmx.data+csv; charset=utf-8 - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_FINMARK,4.0/GBR.M.SHARE......?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2024-04&startPeriod=2020-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.STES/DSD_STES%40DF_FINMARK/4.0/GBR.M.SHARE.*.*.*.*.*.*?c%5BTIME_PERIOD%5D=ge%3A2020-01%2Cle%3A2024-04&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: "DATAFLOW,REF_AREA,FREQ,MEASURE,UNIT_MEASURE,ACTIVITY,ADJUSTMENT,TRANSFORMATION,TIME_HORIZ,METHODOLOGY,TIME_PERIOD,OBS_VALUE,OBS_STATUS,UNIT_MULT,DECIMALS,BASE_PER\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-08,108.2514,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-09,107.1312,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-03,87.06705,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-04,87.03309,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-05,90.33038,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-08,112.8674,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-09,114.8787,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-09,89.9651,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-10,88.72897,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-11,94.35523,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-12,98.82664,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-01,101.4009,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-02,99.82986,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-03,101.8014,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-04,104.7217,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-05,106.4692,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-01,114.5953,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-02,110.8974,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-10,108.6177,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-11,110.1591,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-12,110.5146,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-01,113.6568,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-02,114.2632,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-03,110.9183,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-04,114.6437,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-05,112.9969,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-06,110.677,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-07,109.5683,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-08,113.2166,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-09,109.0082,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-10,105.719,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-11,111.7236,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2022-12,113.3291,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-01,117.266,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-02,120.0188,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-03,115.8184,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-04,118.566,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-05,116.9485,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-06,114.5883,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-07,113.9178,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-06,94.69035,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-07,93.51866,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2020-08,92.02005,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-10,113.4461,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-11,112.8533,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2023-12,115.4689,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2024-01,115.3066,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2024-02,115.8311,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2024-03,117.9558,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2024-04,120.9609,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-06,107.6213,,,,\r\nOECD.SDD.STES:DSD_STES@DF_FINMARK(4.0),GBR,M,SHARE,IX,_Z,_Z,_Z,_Z,N,2021-07,106.8666,,,,\r\n" + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,FREQ: Frequency + of observation,MEASURE: Measure,UNIT_MEASURE: Unit of measure,ACTIVITY: Economic + activity,ADJUSTMENT: Adjustment,TRANSFORMATION: Transformation,TIME_HORIZ: + Time horizon,METHODOLOGY: Calculation methodology,TIME_PERIOD: Time period,OBS_VALUE,OBS_STATUS: + Observation status,UNIT_MULT: Unit multiplier,DECIMALS: Decimals,BASE_PER: + Base period\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: United + Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: Not + applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-10,105.719,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-12,113.3291,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-07,93.51866,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-08,92.02005,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-02,99.82986,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-03,101.8014,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-02,110.8974,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-09,107.1312,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-10,108.6177,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-11,110.1591,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-12,110.5146,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-01,113.6568,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-02,114.2632,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-03,110.9183,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-04,114.6437,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-07,106.8666,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-06,94.69035,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-11,111.7236,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-01,117.266,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-02,120.0188,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-03,115.8184,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-04,118.566,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-05,116.9485,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-06,114.5883,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-07,113.9178,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-08,112.8674,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-09,114.8787,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-10,113.4461,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-11,112.8533,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2023-12,115.4689,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2024-01,115.3066,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2024-02,115.8311,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2024-03,117.9558,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2024-04,120.9609,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-09,89.9651,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-10,88.72897,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-11,94.35523,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-12,98.82664,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-01,101.4009,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-06,110.677,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-01,114.5953,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-04,104.7217,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-05,106.4692,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-03,87.06705,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-04,87.03309,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2020-05,90.33038,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-08,108.2514,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2021-06,107.6213,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-05,112.9969,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-07,109.5683,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-08,113.2166,A: + Normal value,0: Units,2: Two,2015\r\nDATAFLOW,OECD.SDD.STES:DF_FINMARK(4.0),I,GBR: + United Kingdom,M: Monthly,SHARE: Share prices,IX: Index,_Z: Not applicable,_Z: + Not applicable,_Z: Not applicable,_Z: Not applicable,N: National,2022-09,109.0082,A: + Normal value,0: Units,2: Two,2015\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fe158ad9b3af-YVR Cache-Control: - - no-store,no-cache - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 Date: - - Thu, 27 Jun 2024 10:14:36 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:51 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 1 + - Server 3 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:50 GMT + set-cookie: + - __cf_bm=J.mEX9IoQXNDpm_yVHUn9VPTqoJRGun1Rya9Nf.WugQ-1773519767.924175-1.0.1.1-PL73IQinwVRjWKQwsbepx6.3tDLfJoowKN3NUqcWmpi7s7kpzaxZbh8z_yAHkr.kvJ49czjIi41gKSNxFhoHBvhCZpS0j9l8B2h4hB8iUx4IgE270m8N9QbLPsIY8_iq; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:51 + GMT + - _cfuvid=WigyecAjGGnGlppGlbHH_EQ6sa4xf4LMZ.1ImMpuU1k-1773519767.924175-1.0.1.1-b3yVA_oftF4SO87.jax4D9tRdEqncjjXtfmQ2UqOPpA; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_unemployment_fetcher_urllib3_v2.yaml b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_unemployment_fetcher_urllib3_v2.yaml index 28d4ac40222..c343149489c 100644 --- a/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_unemployment_fetcher_urllib3_v2.yaml +++ b/openbb_platform/providers/oecd/tests/record/http/test_oecd_fetchers/test_oecd_unemployment_fetcher_urllib3_v2.yaml @@ -3,41 +3,77 @@ interactions: body: null headers: Accept: - - application/vnd.sdmx.data+csv; charset=utf-8 - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive + - application/vnd.sdmx.data+csv; version=2.0.0; labels=both method: GET - uri: https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_LFS@DF_IALFS_UNE_M,1.0/USA..._Z.N._T.Y_GE15..M?detail=dataonly&dimensionAtObservation=TIME_PERIOD&endPeriod=2023-06&startPeriod=2023-01 + uri: https://sdmx.oecd.org/public/rest/v2/data/dataflow/OECD.SDD.TPS/DSD_LFS%40DF_IALFS_UNE_M/1.0/USA.*.*.*.N._T.Y_GE15.*.M?c%5BTIME_PERIOD%5D=ge%3A2023-01%2Cle%3A2023-06&detail=full&dimensionAtObservation=TIME_PERIOD response: body: - string: "DATAFLOW,REF_AREA,MEASURE,UNIT_MEASURE,TRANSFORMATION,ADJUSTMENT,SEX,AGE,ACTIVITY,FREQ,TIME_PERIOD,OBS_VALUE,BASE_PER,OBS_STATUS,UNIT_MULT,DECIMALS\r\nOECD.SDD.TPS:DSD_LFS@DF_IALFS_UNE_M(1.0),USA,UNE_LF_M,PT_LF_SUB,_Z,N,_T,Y_GE15,_Z,M,2023-02,3.9,,,,\r\nOECD.SDD.TPS:DSD_LFS@DF_IALFS_UNE_M(1.0),USA,UNE_LF_M,PT_LF_SUB,_Z,N,_T,Y_GE15,_Z,M,2023-01,3.9,,,,\r\nOECD.SDD.TPS:DSD_LFS@DF_IALFS_UNE_M(1.0),USA,UNE_LF_M,PT_LF_SUB,_Z,N,_T,Y_GE15,_Z,M,2023-06,3.8,,,,\r\nOECD.SDD.TPS:DSD_LFS@DF_IALFS_UNE_M(1.0),USA,UNE_LF_M,PT_LF_SUB,_Z,N,_T,Y_GE15,_Z,M,2023-05,3.4,,,,\r\nOECD.SDD.TPS:DSD_LFS@DF_IALFS_UNE_M(1.0),USA,UNE_LF_M,PT_LF_SUB,_Z,N,_T,Y_GE15,_Z,M,2023-04,3.1,,,,\r\nOECD.SDD.TPS:DSD_LFS@DF_IALFS_UNE_M(1.0),USA,UNE_LF_M,PT_LF_SUB,_Z,N,_T,Y_GE15,_Z,M,2023-03,3.6,,,,\r\n" + string: "STRUCTURE,STRUCTURE_ID,ACTION,REF_AREA: Reference area,MEASURE: Measure,UNIT_MEASURE: + Unit of measure,TRANSFORMATION: Transformation,ADJUSTMENT: Adjustment,SEX: + Sex,AGE: Age,ACTIVITY: Economic activity,FREQ: Frequency of observation,TIME_PERIOD: + Time period,OBS_VALUE,BASE_PER: Base period,OBS_STATUS: Observation status,UNIT_MULT: + Unit multiplier,DECIMALS: Decimals\r\nDATAFLOW,OECD.SDD.TPS:DF_IALFS_UNE_M(1.0),I,USA: + United States,UNE_LF_M: Monthly unemployment rate,PT_LF_SUB: Percentage of + labour force in the same subgroup,_Z: Not applicable,N: Neither seasonally + adjusted nor calendar adjusted,_T: Total,Y_GE15: 15 years or over,_Z: Not + applicable,M: Monthly,2023-03,3.6,,A: Normal value,0: Units,1: One\r\nDATAFLOW,OECD.SDD.TPS:DF_IALFS_UNE_M(1.0),I,USA: + United States,UNE_LF_M: Monthly unemployment rate,PT_LF_SUB: Percentage of + labour force in the same subgroup,_Z: Not applicable,N: Neither seasonally + adjusted nor calendar adjusted,_T: Total,Y_GE15: 15 years or over,_Z: Not + applicable,M: Monthly,2023-04,3.1,,A: Normal value,0: Units,1: One\r\nDATAFLOW,OECD.SDD.TPS:DF_IALFS_UNE_M(1.0),I,USA: + United States,UNE_LF_M: Monthly unemployment rate,PT_LF_SUB: Percentage of + labour force in the same subgroup,_Z: Not applicable,N: Neither seasonally + adjusted nor calendar adjusted,_T: Total,Y_GE15: 15 years or over,_Z: Not + applicable,M: Monthly,2023-05,3.4,,A: Normal value,0: Units,1: One\r\nDATAFLOW,OECD.SDD.TPS:DF_IALFS_UNE_M(1.0),I,USA: + United States,UNE_LF_M: Monthly unemployment rate,PT_LF_SUB: Percentage of + labour force in the same subgroup,_Z: Not applicable,N: Neither seasonally + adjusted nor calendar adjusted,_T: Total,Y_GE15: 15 years or over,_Z: Not + applicable,M: Monthly,2023-06,3.8,,A: Normal value,0: Units,1: One\r\nDATAFLOW,OECD.SDD.TPS:DF_IALFS_UNE_M(1.0),I,USA: + United States,UNE_LF_M: Monthly unemployment rate,PT_LF_SUB: Percentage of + labour force in the same subgroup,_Z: Not applicable,N: Neither seasonally + adjusted nor calendar adjusted,_T: Total,Y_GE15: 15 years or over,_Z: Not + applicable,M: Monthly,2023-01,3.9,,A: Normal value,0: Units,1: One\r\nDATAFLOW,OECD.SDD.TPS:DF_IALFS_UNE_M(1.0),I,USA: + United States,UNE_LF_M: Monthly unemployment rate,PT_LF_SUB: Percentage of + labour force in the same subgroup,_Z: Not applicable,N: Neither seasonally + adjusted nor calendar adjusted,_T: Total,Y_GE15: 15 years or over,_Z: Not + applicable,M: Monthly,2023-02,3.9,,A: Normal value,0: Units,1: One\r\n" headers: - Accept-Ranges: - - values + CF-RAY: + - 9dc5fdf34c1caf9b-YVR Cache-Control: - - no-store,no-cache - Content-Encoding: - - gzip + - public,max-age=7200 + Connection: + - keep-alive Content-Language: - en,en-US Content-Type: - - application/vnd.sdmx.data+csv; charset=utf-8 + - application/vnd.sdmx.data+csv; charset=utf-8; labels=both; version=2.0.0 Date: - - Thu, 27 Jun 2024 10:14:29 GMT - Pragma: - - no-cache + - Sat, 14 Mar 2026 20:22:45 GMT + Server: + - cloudflare Strict-Transport-Security: - max-age=2592000 Transfer-Encoding: - chunked Vary: - - Accept,Accept-Encoding,Accept-Encoding + - X-Range, Accept, Accept-Language, Accept-Encoding, Accept-Charset,Accept,Accept-Encoding,Accept-Encoding X-Server-Node: - - Server 1 + - Server 3 + alt-svc: + - h3=":443"; ma=86400 api-supported-versions: - - '1' + - '2' + cf-cache-status: + - MISS + last-modified: + - Sat, 14 Mar 2026 20:22:45 GMT + set-cookie: + - __cf_bm=yD8wRlodB.i5FUQLXwhKTa4Yb8xby6ZWGa0tluKw41I-1773519762.4496157-1.0.1.1-5SVXMFQ50hj9E.AJRqmqRUY_e4toW6RldXnM7OoEyvu_4TRc2Y8rQSxM6CtZY6fZUa7VulHCyIZxjjo8Pc6reBES5DEWkb2YfdK9nmN8OIQJurymo7bJiBxNe.cpeAO6; + HttpOnly; Secure; Path=/; Domain=oecd.org; Expires=Sat, 14 Mar 2026 20:52:45 + GMT + - _cfuvid=wZQcdom5Wgq_SNeKVTCPov9HbNpqSNHCkI0vL6jBNCQ-1773519762.4496157-1.0.1.1-ZyrShrehrhEpRIq0CP1mJ2ygGTrCw7FRZ.yfnqNi7g4; + HttpOnly; SameSite=None; Secure; Path=/; Domain=oecd.org status: code: 200 message: OK diff --git a/openbb_platform/providers/oecd/tests/test_metadata.py b/openbb_platform/providers/oecd/tests/test_metadata.py new file mode 100644 index 00000000000..de0b378fc47 --- /dev/null +++ b/openbb_platform/providers/oecd/tests/test_metadata.py @@ -0,0 +1,1261 @@ +"""Comprehensive unit tests for openbb_oecd.utils.metadata. + +All tests are entirely offline — no network calls are made. +HTTP is mocked via unittest.mock wherever _ensure_* methods are exercised. +The OecdMetadata singleton is reset between every test that uses it. +""" + +from __future__ import annotations + +import threading +from unittest.mock import MagicMock, patch + +import pytest +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_oecd.utils.metadata import ( + OecdMetadata, + _build_code_tree, + _extract_codelist_id_from_urn, + _extract_concept_id_from_urn, + _matches_query, + _normalize_label, + _parse_sdmx_json_codelists, + _parse_search_query, + _term_matches, +) + +# pylint: disable=C1803, C0302, W0212, W0621 +# flake8: noqa: D101, D102 + +_FULL_ID = "DSD_TEST@DF_TEST" +_SHORT_ID = "DF_TEST" + +_TEST_DATAFLOW = { + "id": _FULL_ID, + "short_id": _SHORT_ID, + "agency_id": "OECD", + "version": "1.0", + "name": "Test Dataflow", + "description": "", + "structure_ref": "", +} + +_TEST_DSD = { + "dsd_id": "DSD_TEST", + "agency_id": "OECD", + "version": "1.0", + "dimensions": [ + { + "id": "REF_AREA", + "position": 1, + "codelist_id": "OECD:CL_AREA(1.0)", + "concept_id": "REF_AREA", + "name": "Reference Area", + }, + { + "id": "MEASURE", + "position": 2, + "codelist_id": "OECD:CL_MEASURE(1.0)", + "concept_id": "MEASURE", + "name": "Measure", + }, + { + "id": "FREQ", + "position": 3, + "codelist_id": "OECD:CL_FREQ(1.0)", + "concept_id": "FREQ", + "name": "Frequency", + }, + ], + "attributes": [], + "has_time_dimension": True, +} + +_TEST_CODELISTS = { + "OECD:CL_AREA(1.0)": { + "USA": "United States", + "GBR": "United Kingdom", + "DEU": "Germany", + }, + "OECD:CL_MEASURE(1.0)": { + "CPI": "Consumer Price Index", + "PPI": "Producer Price Index", + }, + "OECD:CL_FREQ(1.0)": {"A": "Annual", "Q": "Quarterly", "M": "Monthly"}, +} + +_TEST_CONSTRAINTS = { + _FULL_ID: { + "REF_AREA": ["USA", "GBR"], + "MEASURE": ["CPI"], + "FREQ": ["A", "Q"], + } +} + + +@pytest.fixture +def meta(monkeypatch): + """Yield a fresh, empty OecdMetadata instance with test data pre-loaded.""" + OecdMetadata._reset() + monkeypatch.setattr(OecdMetadata, "_load_from_cache", lambda self: True) + instance = OecdMetadata() + # Inject minimal test data so tests don't need the network. + instance.dataflows[_FULL_ID] = _TEST_DATAFLOW.copy() + instance._short_id_map[_SHORT_ID] = _FULL_ID + instance._full_catalogue_loaded = True + instance.datastructures[_FULL_ID] = { + k: list(v) if isinstance(v, list) else v for k, v in _TEST_DSD.items() + } + instance.datastructures[_FULL_ID]["dimensions"] = [ + dict(d) for d in _TEST_DSD["dimensions"] + ] + instance.codelists.update({k: dict(v) for k, v in _TEST_CODELISTS.items()}) + instance._dataflow_constraints.update( + { + k: {dk: list(dv) for dk, dv in v.items()} + for k, v in _TEST_CONSTRAINTS.items() + } + ) + yield instance + OecdMetadata._reset() + + +# =========================================================================== +# 1. Pure module-level functions +# =========================================================================== + + +class TestNormalizeLabel: + def test_basic_replacement(self): + assert _normalize_label("United States") == "united_states" + + def test_hyphen_to_underscore(self): + assert _normalize_label("Czech-Republic") == "czech_republic" + + def test_parenthetical_stripped(self): + assert _normalize_label("Korea (Republic of)") == "korea" + + def test_comma_suffix_stripped(self): + assert _normalize_label("China, People's Republic of") == "china" + + def test_leading_trailing_underscores(self): + assert not _normalize_label(" Germany ").startswith("_") + assert not _normalize_label(" Germany ").endswith("_") + + def test_multiple_spaces_collapsed(self): + assert _normalize_label("New Zealand") == "new_zealand" + + def test_already_normalized(self): + assert _normalize_label("france") == "france" + + def test_all_caps(self): + result = _normalize_label("USA") + assert result == "usa" + + +class TestBuildCodeTree: + def test_flat_no_parents(self): + codes = {"A": "Alpha", "B": "Beta", "C": "Gamma"} + tree = _build_code_tree(codes, {}, {}) + assert len(tree) == 3 + assert all(n["children"] == [] for n in tree) + labels = [n["label"] for n in tree] + assert labels == sorted(labels) + + def test_child_attached_to_parent(self): + codes = {"P": "Parent", "C": "Child"} + parents = {"C": "P"} + tree = _build_code_tree(codes, parents, {}) + root_codes = [n["code"] for n in tree] + assert "P" in root_codes + assert "C" not in root_codes + parent_node = next(n for n in tree if n["code"] == "P") + assert len(parent_node["children"]) == 1 + assert parent_node["children"][0]["code"] == "C" + + def test_description_fallback_to_label(self): + codes = {"X": "Label X"} + tree = _build_code_tree(codes, {}, {}) + assert tree[0]["description"] == "Label X" + + def test_description_override(self): + codes = {"X": "Label X"} + descs = {"X": "Custom description"} + tree = _build_code_tree(codes, {}, descs) + assert tree[0]["description"] == "Custom description" + + def test_orphaned_parent_reference(self): + """Child referencing a non-existent parent becomes a root.""" + codes = {"C": "Child Only"} + parents = {"C": "MISSING_PARENT"} + tree = _build_code_tree(codes, parents, {}) + assert len(tree) == 1 + assert tree[0]["code"] == "C" + + def test_sorting(self): + codes = {"Z": "Zebra", "A": "Apple", "M": "Mango"} + tree = _build_code_tree(codes, {}, {}) + labels = [n["label"] for n in tree] + assert labels == sorted(labels) + + def test_empty(self): + assert _build_code_tree({}, {}, {}) == [] + + def test_multi_level_hierarchy(self): + codes = {"G": "Grand", "P": "Parent", "C": "Child"} + parents = {"P": "G", "C": "P"} + tree = _build_code_tree(codes, parents, {}) + assert len(tree) == 1 + grand = tree[0] + assert grand["code"] == "G" + assert len(grand["children"]) == 1 + parent_node = grand["children"][0] + assert parent_node["code"] == "P" + assert len(parent_node["children"]) == 1 + assert parent_node["children"][0]["code"] == "C" + + +class TestParseSdmxJsonCodelists: + def _make_raw(self, codelists_data): + return {"data": {"codelists": codelists_data}} + + def test_basic_codelist(self): + raw = self._make_raw( + [ + { + "id": "CL_TEST", + "agencyID": "OECD", + "version": "1.0", + "codes": [ + {"id": "A", "names": {"en": "Apple"}}, + {"id": "B", "names": {"en": "Banana"}}, + ], + } + ] + ) + cls, parents = _parse_sdmx_json_codelists(raw) + assert "OECD:CL_TEST(1.0)" in cls + assert cls["OECD:CL_TEST(1.0)"]["A"] == "Apple" + assert cls["OECD:CL_TEST(1.0)"]["B"] == "Banana" + assert parents == {} + + def test_parent_hierarchy(self): + raw = self._make_raw( + [ + { + "id": "CL_H", + "agencyID": "OECD", + "version": "2.0", + "codes": [ + {"id": "P", "names": {"en": "Parent"}}, + {"id": "C", "names": {"en": "Child"}, "parent": "P"}, + ], + } + ] + ) + _, parents = _parse_sdmx_json_codelists(raw) + key = "OECD:CL_H(2.0)" + assert key in parents + assert parents[key]["C"] == "P" + assert "P" not in parents[key] # parent itself has no parent + + def test_name_fallback(self): + """Falls back to 'name' string when 'names' dict is absent.""" + raw = self._make_raw( + [ + { + "id": "CL_FALLBACK", + "agencyID": "OECD", + "version": "1.0", + "codes": [{"id": "X", "name": "Fallback Label"}], + } + ] + ) + cls, _ = _parse_sdmx_json_codelists(raw) + assert cls["OECD:CL_FALLBACK(1.0)"]["X"] == "Fallback Label" + + def test_empty_codelists(self): + raw = self._make_raw([]) + cls, parents = _parse_sdmx_json_codelists(raw) + assert cls == {} + assert parents == {} + + def test_missing_data_key(self): + raw = {"codelists": []} + cls, _ = _parse_sdmx_json_codelists(raw) + assert cls == {} + + def test_multiple_codelists(self): + raw = self._make_raw( + [ + { + "id": "CL_A", + "agencyID": "OECD", + "version": "1.0", + "codes": [{"id": "X", "names": {"en": "Ex"}}], + }, + { + "id": "CL_B", + "agencyID": "OECD", + "version": "1.0", + "codes": [{"id": "Y", "names": {"en": "Why"}}], + }, + ] + ) + cls, _ = _parse_sdmx_json_codelists(raw) + assert "OECD:CL_A(1.0)" in cls + assert "OECD:CL_B(1.0)" in cls + + def test_id_fallback_code_label(self): + """When names dict lang key absent, falls back to code id.""" + raw = self._make_raw( + [ + { + "id": "CL_X", + "agencyID": "OECD", + "version": "1.0", + "codes": [{"id": "FOO", "names": {}}], + } + ] + ) + cls, _ = _parse_sdmx_json_codelists(raw) + assert cls["OECD:CL_X(1.0)"]["FOO"] == "FOO" + + +class TestExtractCodelistIdFromUrn: + def test_full_qualified_urn(self): + urn = "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=OECD.SDD.TPS:CL_REF_AREA(3.0)" + result = _extract_codelist_id_from_urn(urn) + assert result == "OECD.SDD.TPS:CL_REF_AREA(3.0)" + + def test_simple_urn(self): + urn = "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=OECD:CL_FREQ(2.1)" + result = _extract_codelist_id_from_urn(urn) + assert result == "OECD:CL_FREQ(2.1)" + + def test_no_version_fallback(self): + urn = "some:CL_TEST" + result = _extract_codelist_id_from_urn(urn) + assert "CL_TEST" in result + + def test_plain_string_passthrough(self): + result = _extract_codelist_id_from_urn("PLAIN") + assert result == "PLAIN" + + +class TestExtractConceptIdFromUrn: + def test_dotted_urn(self): + urn = ( + "urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=OECD:CS_COMMON(2.0).FREQ" + ) + assert _extract_concept_id_from_urn(urn) == "FREQ" + + def test_no_dot(self): + assert _extract_concept_id_from_urn("MEASURE") == "MEASURE" + + def test_nested_dot(self): + urn = "urn:sdmx:...=OECD:CS(1.0).SECTOR.SUB" + assert _extract_concept_id_from_urn(urn) == "SUB" + + +class TestParseSearchQuery: + def test_single_term(self): + result = _parse_search_query("GDP") + assert result == [["gdp"]] + + def test_multiple_terms_implicit_and(self): + result = _parse_search_query("consumer price index") + assert result == [["consumer", "price", "index"]] + + def test_semicolon_or(self): + result = _parse_search_query("GDP; CPI") + assert result == [["gdp"], ["cpi"]] + + def test_empty_string(self): + assert _parse_search_query("") == [] + + def test_whitespace_only(self): + assert _parse_search_query(" ") == [] + + def test_pipe_preserved_in_term(self): + result = _parse_search_query("gdp|gross") + assert result == [["gdp|gross"]] + + def test_mixed_case_lowercased(self): + result = _parse_search_query("GDP Consumer") + assert result == [["gdp", "consumer"]] + + +class TestMatchesQuery: + def test_empty_phrases_always_true(self): + assert _matches_query("anything", []) is True + + def test_single_phrase_match(self): + assert _matches_query("consumer price index", [["consumer"]]) is True + + def test_single_phrase_no_match(self): + assert _matches_query("unemployment rate", [["cpi"]]) is False + + def test_and_logic(self): + assert _matches_query("consumer price index", [["consumer", "price"]]) is True + assert _matches_query("consumer goods index", [["consumer", "price"]]) is False + + def test_or_phrases(self): + # "GDP" OR "CPI" + phrases = [["gdp"], ["cpi"]] + assert _matches_query("consumer price index cpi", phrases) is True + assert _matches_query("gdp growth", phrases) is True + assert _matches_query("unemployment", phrases) is False + + def test_pipe_or_within_term(self): + phrases = [["gdp|gross"]] + assert _matches_query("gross domestic product", phrases) is True + assert _matches_query("gdp per capita", phrases) is True + assert _matches_query("inflation rate", phrases) is False + + +class TestTermMatches: + def test_simple_match(self): + assert _term_matches("hello world", "world") is True + + def test_simple_no_match(self): + assert _term_matches("hello world", "foo") is False + + def test_pipe_or(self): + assert _term_matches("hello world", "hello|goodbye") is True + assert _term_matches("hello world", "foo|bar") is False + + def test_empty_alternatives_stripped(self): + # pipe with empty side + assert _term_matches("hello", "hello|") is True + + +class TestSingleton: + def test_singleton_returns_same_instance(self, meta): + """Two OecdMetadata() calls return the exact same object.""" + instance2 = OecdMetadata() + assert meta is instance2 + + def test_reset_creates_new_instance(self, monkeypatch): + """After _reset, a new instance is created.""" + OecdMetadata._reset() + monkeypatch.setattr(OecdMetadata, "_load_from_cache", lambda self: True) + a = OecdMetadata() + OecdMetadata._reset() + monkeypatch.setattr(OecdMetadata, "_load_from_cache", lambda self: True) + b = OecdMetadata() + assert a is not b + OecdMetadata._reset() + + def test_thread_safety(self, monkeypatch): + """Concurrent instantiation always yields the same object.""" + OecdMetadata._reset() + monkeypatch.setattr(OecdMetadata, "_load_from_cache", lambda self: True) + instances = [] + + def get_instance(): + instances.append(OecdMetadata()) + + threads = [threading.Thread(target=get_instance) for _ in range(10)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert len(set(id(i) for i in instances)) == 1 + OecdMetadata._reset() + + +class TestApplyBlob: + def test_basic_blob_population(self, meta): + blob = { + "dataflows": { + "DSD_X@DF_Y": {"id": "DSD_X@DF_Y", "short_id": "DF_Y", "name": "Y"} + }, + "codelists": {"OECD:CL_NEW(1.0)": {"A": "Alpha"}}, + "short_id_map": {"DF_Y": "DSD_X@DF_Y"}, + "taxonomy_tree": [], + } + meta._apply_blob(blob) + assert "DSD_X@DF_Y" in meta.dataflows + assert "OECD:CL_NEW(1.0)" in meta.codelists + + def test_blob_compact_indicators(self, meta): + blob = { + "dataflow_indicators": { + _FULL_ID: { + "dim_id": "MEASURE", + "codes": [ + {"indicator": "CPI", "label": "Consumer Price Index"}, + { + "indicator": "PPI", + "label": "Producer Price Index", + "parent": "CPI", + }, + ], + } + } + } + meta._apply_blob(blob) + expanded = meta._dataflow_indicators_cache.get(_FULL_ID, []) + assert len(expanded) == 2 + assert expanded[0]["indicator"] == "CPI" + assert expanded[1]["parent"] == "CPI" + assert "symbol" in expanded[0] + + def test_blob_taxonomy_loaded_flag(self, meta): + meta._taxonomy_loaded = False + blob = { + "taxonomy_tree": [ + {"id": "ECO", "name": "Economy", "path": "ECO", "children": []} + ], + "df_to_categories": {}, + "category_to_dfs": {}, + "category_names": {"ECO": "Economy"}, + } + meta._apply_blob(blob) + assert meta._taxonomy_loaded is True + assert meta._taxonomy_tree[0]["id"] == "ECO" + + +class TestResolveDataflowId: + def test_full_id_passthrough(self, meta): + assert meta._resolve_dataflow_id(_FULL_ID) == _FULL_ID + + def test_short_id_resolution(self, meta): + assert meta._resolve_dataflow_id(_SHORT_ID) == _FULL_ID + + def test_unknown_id_raises(self, meta): + meta._full_catalogue_loaded = True # prevent network call + with pytest.raises(OpenBBError, match="Unknown OECD dataflow"): + meta._resolve_dataflow_id("DF_NONEXISTENT_XYZ") + + +class TestListDataflows: + def test_returns_all_when_no_filter(self, meta): + meta._taxonomy_loaded = True + result = meta.list_dataflows() + assert len(result) == 1 + assert result[0]["value"] == _FULL_ID + + def test_topic_filter_match(self, meta): + meta._taxonomy_loaded = True + meta._df_to_categories[_FULL_ID] = ["ECO.PRICES"] + result = meta.list_dataflows(topic="ECO") + assert len(result) == 1 + + def test_topic_filter_no_match(self, meta): + meta._taxonomy_loaded = True + meta._df_to_categories[_FULL_ID] = ["ECO.PRICES"] + result = meta.list_dataflows(topic="HEA") + assert result == [] + + def test_result_sorted_by_value(self, meta): + meta._taxonomy_loaded = True + meta.dataflows["DSD_Z@DF_ZZZ"] = { + "id": "DSD_Z@DF_ZZZ", + "short_id": "DF_ZZZ", + "name": "ZZZ", + "description": "", + } + meta._short_id_map["DF_ZZZ"] = "DSD_Z@DF_ZZZ" + result = meta.list_dataflows() + values = [r["value"] for r in result] + assert values == sorted(values) + + def test_result_includes_topic_names(self, meta): + meta._taxonomy_loaded = True + meta._df_to_categories[_FULL_ID] = ["ECO.PRICES"] + meta._category_names["ECO"] = "Economy" + meta._category_names["ECO.PRICES"] = "Prices" + result = meta.list_dataflows() + row = result[0] + assert row["topic"] == "ECO" + assert row["topic_name"] == "Economy" + assert row["subtopic"] == "PRICES" + assert row["subtopic_name"] == "Prices" + + +class TestListTopics: + def _seed_taxonomy(self, meta): + meta._taxonomy_tree = [ + { + "id": "ECO", + "name": "Economy", + "path": "ECO", + "children": [ + { + "id": "PRICES", + "name": "Prices", + "path": "ECO.PRICES", + "children": [], + }, + ], + } + ] + meta._category_to_dfs = { + "ECO": [], + "ECO.PRICES": [_FULL_ID], + } + meta._taxonomy_loaded = True + + def test_basic_tree_structure(self, meta): + self._seed_taxonomy(meta) + result = meta.list_topics() + assert len(result) == 1 + top = result[0] + assert top["id"] == "ECO" + assert top["dataflow_count"] == 1 + assert len(top["subtopics"]) == 1 + assert top["subtopics"][0]["id"] == "PRICES" + + def test_empty_subtopics_excluded(self, meta): + """Subtopics with zero dataflows are dropped.""" + meta._taxonomy_tree = [ + { + "id": "ECO", + "name": "Economy", + "path": "ECO", + "children": [ + { + "id": "EMPTY", + "name": "Empty", + "path": "ECO.EMPTY", + "children": [], + }, + { + "id": "PRICES", + "name": "Prices", + "path": "ECO.PRICES", + "children": [], + }, + ], + } + ] + meta._category_to_dfs = {"ECO.PRICES": [_FULL_ID]} + meta._taxonomy_loaded = True + topics = meta.list_topics() + assert len(topics[0]["subtopics"]) == 1 + assert topics[0]["subtopics"][0]["id"] == "PRICES" + + +class TestParseCategoryTree: + def test_flat_categories(self): + cats = [ + {"id": "A", "names": {"en": "Alpha"}, "categories": []}, + {"id": "B", "names": {"en": "Beta"}, "categories": []}, + ] + tree, names = OecdMetadata._parse_category_tree(cats) + assert len(tree) == 2 + assert names["A"] == "Alpha" + assert names["B"] == "Beta" + + def test_nested_categories(self): + cats = [ + { + "id": "ECO", + "names": {"en": "Economy"}, + "categories": [ + {"id": "CPI", "names": {"en": "Prices"}, "categories": []}, + ], + } + ] + tree, names = OecdMetadata._parse_category_tree(cats) + assert tree[0]["children"][0]["id"] == "CPI" + assert "ECO.CPI" in names + assert names["ECO.CPI"] == "Prices" + + def test_name_fallback(self): + cats = [{"id": "X", "name": "Fallback Name", "categories": []}] + _, names = OecdMetadata._parse_category_tree(cats) + assert names["X"] == "Fallback Name" + + def test_path_prefixed_for_child(self): + cats = [ + { + "id": "TOP", + "names": {"en": "Top"}, + "categories": [{"id": "SUB", "names": {"en": "Sub"}, "categories": []}], + } + ] + _, names = OecdMetadata._parse_category_tree(cats, prefix="PARENT") + assert "PARENT.TOP" in names + assert "PARENT.TOP.SUB" in names + + +class TestParseCategorisations: + def test_basic_mapping(self, meta): + meta._df_to_categories = {} + meta._category_to_dfs = {} + # Test the regex patterns directly + df_re = OecdMetadata._CATEGORISATION_DF_RE + cat_re = OecdMetadata._CATEGORISATION_CAT_RE + m_df = df_re.search("Dataflow=OECD:DSD_TEST@DF_TEST(1.0)") + m_cat = cat_re.search("OECDCS1(v1).ECO.PRICES") + assert m_df is not None + assert m_cat is not None + assert m_df.group(2) == "DSD_TEST@DF_TEST" + assert m_cat.group(1) == "ECO.PRICES" + + +class TestGetDimensionOrder: + def test_returns_dims_excluding_time_period(self, meta): + order = meta.get_dimension_order(_SHORT_ID) + assert "TIME_PERIOD" not in order + assert order == ["REF_AREA", "MEASURE", "FREQ"] + + def test_full_id_works(self, meta): + order = meta.get_dimension_order(_FULL_ID) + assert order == ["REF_AREA", "MEASURE", "FREQ"] + + +class TestGetDataflowParameters: + def test_basic_parameters(self, meta): + params = meta.get_dataflow_parameters(_SHORT_ID) + assert "REF_AREA" in params + assert "MEASURE" in params + assert "FREQ" in params + assert "TIME_PERIOD" not in params + + def test_values_are_label_value_dicts(self, meta): + params = meta.get_dataflow_parameters(_SHORT_ID) + for entry in params["REF_AREA"]: + assert "label" in entry + assert "value" in entry + + def test_usa_in_ref_area(self, meta): + params = meta.get_dataflow_parameters(_SHORT_ID) + codes = {e["value"] for e in params["REF_AREA"]} + assert "USA" in codes + + def test_cache_hit_returns_same_object(self, meta): + p1 = meta.get_dataflow_parameters(_SHORT_ID) + p2 = meta.get_dataflow_parameters(_SHORT_ID) + assert p1 is p2 + + def test_empty_codelist_dimension(self, meta): + """Dimension with no codelist_id gets an empty list.""" + meta.datastructures[_FULL_ID]["dimensions"].append( + { + "id": "NAKED", + "position": 99, + "codelist_id": "", + "concept_id": "NAKED", + "name": "Naked", + } + ) + params = meta.get_dataflow_parameters(_FULL_ID) + assert params.get("NAKED") == [] + + +class TestGetCodelistForDimension: + def test_known_dimension(self, meta): + result = meta.get_codelist_for_dimension(_SHORT_ID, "REF_AREA") + assert "USA" in result + assert result["USA"] == "United States" + + def test_unknown_dimension_returns_empty(self, meta): + result = meta.get_codelist_for_dimension(_SHORT_ID, "NONEXISTENT") + assert result == {} + + def test_time_period_returns_empty(self, meta): + """TIME_PERIOD has no codelist_id → empty.""" + meta.datastructures[_FULL_ID]["dimensions"].append( + { + "id": "TIME_PERIOD", + "position": 99, + "codelist_id": "", + "concept_id": "TIME_PERIOD", + "name": "Time", + } + ) + result = meta.get_codelist_for_dimension(_SHORT_ID, "TIME_PERIOD") + assert result == {} + + +class TestResolveCountryCodes: + def test_empty_input_returns_empty(self, meta): + # Seed codelist so the country dim is resolvable. + meta._dataflow_constraints.clear() + assert meta.resolve_country_codes(_SHORT_ID, "") == [] + + def test_all_returns_empty(self, meta): + assert meta.resolve_country_codes(_SHORT_ID, "all") == [] + + def test_uppercase_code_matched(self, meta): + result = meta.resolve_country_codes(_SHORT_ID, "USA") + assert result == ["USA"] + + def test_lowercase_code_matched(self, meta): + result = meta.resolve_country_codes(_SHORT_ID, "usa") + assert result == ["USA"] + + def test_label_matched(self, meta): + result = meta.resolve_country_codes(_SHORT_ID, "United States") + assert result == ["USA"] + + def test_normalized_label_matched(self, meta): + result = meta.resolve_country_codes(_SHORT_ID, "united_states") + assert result == ["USA"] + + def test_multiple_countries(self, meta): + result = meta.resolve_country_codes(_SHORT_ID, "USA,GBR") + assert "USA" in result + assert "GBR" in result + + def test_invalid_country_raises(self, meta): + with pytest.raises(OpenBBError, match="Invalid country"): + meta.resolve_country_codes(_SHORT_ID, "ZZZNOTACOUNTRY") + + +class TestGetDimensionInfo: + def test_returns_list_of_dims(self, meta): + result = meta.get_dimension_info(_SHORT_ID) + assert isinstance(result, list) + assert len(result) == 3 # REF_AREA, MEASURE, FREQ (TIME_PERIOD excluded) + + def test_each_entry_has_required_keys(self, meta): + result = meta.get_dimension_info(_SHORT_ID) + required_keys = { + "id", + "position", + "name", + "codelist_id", + "total_codes", + "constrained_codes", + "has_hierarchy", + "values", + } + for entry in result: + assert required_keys.issubset(entry.keys()), f"Missing keys in {entry}" + + def test_constraints_applied(self, meta): + result = meta.get_dimension_info(_SHORT_ID) + ref_area = next(d for d in result if d["id"] == "REF_AREA") + # Constraint limits to USA+GBR + values = {v["value"] for v in ref_area["values"]} + assert values == {"USA", "GBR"} + assert "DEU" not in values + + def test_total_codes_vs_constrained(self, meta): + result = meta.get_dimension_info(_SHORT_ID) + ref_area = next(d for d in result if d["id"] == "REF_AREA") + assert ref_area["total_codes"] == 3 # full codelist size + assert ref_area["constrained_codes"] == 2 # after constraint + + def test_measure_constrained_to_one(self, meta): + result = meta.get_dimension_info(_SHORT_ID) + measure = next(d for d in result if d["id"] == "MEASURE") + assert measure["constrained_codes"] == 1 + assert measure["values"][0]["value"] == "CPI" + + +class TestGetTableGroups: + def test_no_table_identifier_dimension(self, meta): + """Returns empty list when TABLE_IDENTIFIER is not a dimension.""" + result = meta.get_table_groups(_SHORT_ID) + assert result == [] + + def test_with_table_identifier(self, meta): + meta.datastructures[_FULL_ID]["dimensions"].append( + { + "id": "TABLE_IDENTIFIER", + "position": 99, + "codelist_id": "OECD:CL_TBL(1.0)", + "concept_id": "TABLE_IDENTIFIER", + "name": "Table", + } + ) + meta.codelists["OECD:CL_TBL(1.0)"] = {"T01": "Table One", "T02": "Table Two"} + meta._dataflow_constraints[_FULL_ID]["TABLE_IDENTIFIER"] = ["T01"] + params_cache = meta._dataflow_parameters_cache + params_cache.pop(_FULL_ID, None) + params_cache.pop(_SHORT_ID, None) + + result = meta.get_table_groups(_SHORT_ID) + assert len(result) == 1 + assert result[0]["value"] == "T01" + assert result[0]["label"] == "Table One" + + +class TestGetConstrainedValues: + def test_returns_per_dimension(self, meta): + result = meta.get_constrained_values(_SHORT_ID) + assert "REF_AREA" in result + assert "MEASURE" in result + assert "FREQ" in result + + def test_constrained_values_filtered(self, meta): + result = meta.get_constrained_values(_SHORT_ID) + freq_values = {v["value"] for v in result["FREQ"]} + assert freq_values == {"A", "Q"} + assert "M" not in freq_values + + def test_each_value_has_label_and_description(self, meta): + result = meta.get_constrained_values(_SHORT_ID) + for dim_values in result.values(): + for v in dim_values: + assert "value" in v + assert "label" in v + assert "description" in v + + +class TestFindIndicatorDimension: + def test_measure_identified(self, meta): + dim = meta._find_indicator_dimension(_SHORT_ID) + assert dim == "MEASURE" + + def test_with_indicator_code(self, meta): + dim = meta._find_indicator_dimension(_SHORT_ID, "CPI") + assert dim == "MEASURE" + + def test_invalid_code_returns_none(self, meta): + dim = meta._find_indicator_dimension(_SHORT_ID, "NONEXISTENT_CODE_XYZ") + assert dim is None + + +class TestGetIndicatorsIn: + def test_from_cache(self, meta): + meta._dataflow_indicators_cache[_FULL_ID] = [ + { + "indicator": "CPI", + "label": "Consumer Price Index", + "dimension_id": "MEASURE", + "dataflow_id": _SHORT_ID, + "dataflow_name": "Test", + "description": "CPI", + "symbol": f"{_SHORT_ID}::CPI", + }, + ] + result = meta.get_indicators_in(_SHORT_ID) + assert len(result) == 1 + assert result[0]["indicator"] == "CPI" + + def test_constraint_filtering(self, meta): + meta._dataflow_indicators_cache[_FULL_ID] = [ + { + "indicator": "CPI", + "label": "CPI", + "dimension_id": "MEASURE", + "dataflow_id": _SHORT_ID, + "dataflow_name": "Test", + "description": "CPI", + "symbol": f"{_SHORT_ID}::CPI", + }, + { + "indicator": "PPI", + "label": "PPI", + "dimension_id": "MEASURE", + "dataflow_id": _SHORT_ID, + "dataflow_name": "Test", + "description": "PPI", + "symbol": f"{_SHORT_ID}::PPI", + }, + ] + # Constraint only allows CPI + result = meta.get_indicators_in(_SHORT_ID) + codes = [r["indicator"] for r in result] + assert "CPI" in codes + assert "PPI" not in codes + + +class TestGetIndicatorDataflows: + def test_finds_correct_dataflow(self, meta): + meta._dataflow_indicators_cache[_FULL_ID] = [ + { + "indicator": "CPI", + "dataflow_id": _SHORT_ID, + "label": "CPI", + "dimension_id": "MEASURE", + "dataflow_name": "Test", + "description": "", + "symbol": "", + }, + ] + result = meta.get_indicator_dataflows("CPI") + assert _SHORT_ID in result + + def test_missing_indicator_returns_empty(self, meta): + meta._dataflow_indicators_cache[_FULL_ID] = [] + result = meta.get_indicator_dataflows("NONEXISTENT") + assert result == [] + + +class TestDescribeDataflow: + def test_basic_structure(self, meta): + meta._dataflow_indicators_cache[_FULL_ID] = [] + result = meta.describe_dataflow(_SHORT_ID) + assert result["dataflow_id"] == _FULL_ID + assert result["short_id"] == _SHORT_ID + assert result["name"] == "Test Dataflow" + assert "dimensions" in result + assert "table_groups" in result + assert "indicator_tree" in result + + def test_ensure_description_called(self, meta): + """describe_dataflow should attempt to fetch description.""" + meta._dataflow_indicators_cache[_FULL_ID] = [] + # Pre-set description so _ensure_description skips network + meta.dataflows[_FULL_ID]["description"] = "Pre-set description" + result = meta.describe_dataflow(_SHORT_ID) + assert result["description"] == "Pre-set description" + + +class TestEnsureDescription: + def test_already_has_description(self, meta): + """Skips fetch when description already present.""" + meta.dataflows[_FULL_ID]["description"] = "Existing" + with patch( + "openbb_oecd.utils.metadata._loader_mixin._make_request" + ) as mock_req: + meta._ensure_description(_FULL_ID) + mock_req.assert_not_called() + assert meta.dataflows[_FULL_ID]["description"] == "Existing" + + def test_fetches_and_strips_html(self, meta): + """Strips HTML tags from raw description.""" + meta.dataflows[_FULL_ID]["description"] = "" + mock_resp = MagicMock() + mock_resp.json.return_value = { + "data": { + "dataflows": [{"descriptions": {"en": "

Hello World

"}}] + } + } + with patch( + "openbb_oecd.utils.metadata._loader_mixin._make_request", + return_value=mock_resp, + ): + meta._ensure_description(_FULL_ID) + assert meta.dataflows[_FULL_ID]["description"] == "Hello World" + + def test_deduplicated_whitespace(self, meta): + meta.dataflows[_FULL_ID]["description"] = "" + mock_resp = MagicMock() + mock_resp.json.return_value = { + "data": { + "dataflows": [{"descriptions": {"en": "Word1\t\t Word2 Word3"}}] + } + } + with patch( + "openbb_oecd.utils.metadata._loader_mixin._make_request", + return_value=mock_resp, + ): + meta._ensure_description(_FULL_ID) + assert meta.dataflows[_FULL_ID]["description"] == "Word1 Word2 Word3" + + def test_network_error_silenced(self, meta): + """Exception during fetch is silently ignored.""" + meta.dataflows[_FULL_ID]["description"] = "" + with patch( + "openbb_oecd.utils.metadata._loader_mixin._make_request", + side_effect=Exception("network error"), + ): + meta._ensure_description(_FULL_ID) # must not raise + assert meta.dataflows[_FULL_ID]["description"] == "" + + def test_cached_after_fetch(self, meta): + """Second call does not go to network again.""" + meta.dataflows[_FULL_ID]["description"] = "" + mock_resp = MagicMock() + mock_resp.json.return_value = { + "data": {"dataflows": [{"descriptions": {"en": "Desc"}}]} + } + with patch( + "openbb_oecd.utils.metadata._loader_mixin._make_request", + return_value=mock_resp, + ) as mock_req: + meta._ensure_description(_FULL_ID) + meta._ensure_description(_FULL_ID) + assert mock_req.call_count == 1 + + def test_html_entities_decoded(self, meta): + meta.dataflows[_FULL_ID]["description"] = "" + mock_resp = MagicMock() + mock_resp.json.return_value = { + "data": { + "dataflows": [{"descriptions": {"en": "A & B <C> D E"}}] + } + } + with patch( + "openbb_oecd.utils.metadata._loader_mixin._make_request", + return_value=mock_resp, + ): + meta._ensure_description(_FULL_ID) + desc = meta.dataflows[_FULL_ID]["description"] + assert "&" not in desc + assert "A & B" in desc + + +class TestFindTables: + def _seed_table_map(self, meta): + meta._taxonomy_loaded = True + meta._taxonomy_tree = [ + { + "id": "ECO", + "name": "Economy", + "path": "ECO", + "children": [ + { + "id": "PRICES", + "name": "Prices", + "path": "ECO.PRICES", + "children": [], + } + ], + } + ] + meta._category_to_dfs = {"ECO.PRICES": [_FULL_ID]} + meta._df_to_categories = {_FULL_ID: ["ECO.PRICES"]} + + def test_finds_by_name(self, meta): + self._seed_table_map(meta) + results = meta.find_tables("Test") + assert any(r["dataflow_id"] == _FULL_ID for r in results) + + def test_empty_query_returns_all(self, meta): + self._seed_table_map(meta) + results = meta.find_tables("") + assert len(results) >= 1 + + def test_no_match_returns_empty(self, meta): + self._seed_table_map(meta) + results = meta.find_tables("ZZZNONEXISTENTXYZ") + assert results == [] + + def test_multi_word_and_logic(self, meta): + self._seed_table_map(meta) + # Both "Test" and "Dataflow" should appear in the row text + results = meta.find_tables("Test Dataflow") + assert any(r["dataflow_id"] == _FULL_ID for r in results) + results2 = meta.find_tables("Test NOMATCH") + assert not any(r["dataflow_id"] == _FULL_ID for r in results2) + + +class TestDetectCountryFamilies: + def test_no_family_for_small_group(self, meta): + # Only one dataflow → too small to be a family + result = meta._detect_country_families() + assert result == {} + + def test_family_detected(self, meta): + """Add enough per-country dataflows to trigger family detection.""" + dsd = "DSD_REV" + country_suffixes = [ + "_AUT", + "_BEL", + "_CAN", + "_DEU", + "_ESP", + "_FIN", + "_FRA", + "_GBR", + "_GRC", + "_IRL", + ] + for sfx in country_suffixes: + fid = f"{dsd}@DF_REV{sfx}" + meta.dataflows[fid] = { + "id": fid, + "short_id": f"DF_REV{sfx}", + "agency_id": "OECD", + "version": "1.0", + "name": f"Tax revenues ({sfx[1:]})", + "description": "", + } + # Add an ALL variant + rep_fid = f"{dsd}@DF_REV_ALL" + meta.dataflows[rep_fid] = { + "id": rep_fid, + "short_id": "DF_REV_ALL", + "agency_id": "OECD", + "version": "1.0", + "name": "Tax revenues (OECD)", + "description": "", + } + families = meta._detect_country_families() + # Some members should be in the family map + in_family = [fid for fid in families if "DF_REV" in fid] + assert len(in_family) > 0 + # Representative should be the _ALL variant + first = families[in_family[0]] + assert first["representative"] == rep_fid + + +class TestSearchIndicators: + def test_raises_without_params(self, meta): + with pytest.raises(OpenBBError, match="At least one"): + meta.search_indicators() + + def test_finds_by_dataflow(self, meta): + meta._dataflow_indicators_cache[_FULL_ID] = [ + { + "indicator": "CPI", + "label": "Consumer Price Index", + "dimension_id": "MEASURE", + "dataflow_id": _SHORT_ID, + "dataflow_name": "Test", + "description": "The CPI", + "symbol": "x::CPI", + }, + ] + results = meta.search_indicators(dataflows=_SHORT_ID) + assert len(results) == 1 + assert results[0]["indicator"] == "CPI" + + def test_finds_by_query_label(self, meta): + meta._dataflow_indicators_cache[_FULL_ID] = [ + { + "indicator": "CPI", + "label": "Consumer Price Index", + "dimension_id": "MEASURE", + "dataflow_id": _SHORT_ID, + "dataflow_name": "Test", + "description": "Consumer prices", + "symbol": "x::CPI", + }, + { + "indicator": "UNE", + "label": "Unemployment Rate", + "dimension_id": "MEASURE", + "dataflow_id": _SHORT_ID, + "dataflow_name": "Test", + "description": "Jobs", + "symbol": "x::UNE", + }, + ] + results = meta.search_indicators(query="consumer", dataflows=_SHORT_ID) + codes = [r["indicator"] for r in results] + assert "CPI" in codes + assert "UNE" not in codes + + +class TestBuildDataUrl: + def test_basic_url_format(self, meta): + url = meta.build_data_url(_SHORT_ID) + assert "sdmx.oecd.org" in url + assert "DF_TEST" in url + + def test_last_n_appended(self, meta): + url = meta.build_data_url(_SHORT_ID, last_n=5) + assert "lastNObservations=5" in url + + def test_dimension_filter_in_url(self, meta): + url = meta.build_data_url(_SHORT_ID, dimension_filter="USA.*.*") + assert "USA" in url + + +class TestBuildDimensionFilter: + def test_wildcard_for_all(self, meta): + result = meta.build_dimension_filter(_SHORT_ID) + # Default: wildcard for each dimension + time + assert result.count("*") >= 3 + + def test_specific_value_inserted(self, meta): + result = meta.build_dimension_filter(_SHORT_ID, REF_AREA="USA") + assert result.startswith("USA.") + + def test_freq_value_at_correct_position(self, meta): + result = meta.build_dimension_filter(_SHORT_ID, FREQ="A") + parts = result.split(".") + # Dimension order: REF_AREA(0), MEASURE(1), FREQ(2) + assert parts[2] == "A" diff --git a/openbb_platform/providers/oecd/tests/test_oecd_fetchers.py b/openbb_platform/providers/oecd/tests/test_oecd_fetchers.py index fd9a12be7a5..1456c41f08d 100644 --- a/openbb_platform/providers/oecd/tests/test_oecd_fetchers.py +++ b/openbb_platform/providers/oecd/tests/test_oecd_fetchers.py @@ -4,11 +4,14 @@ import pytest from openbb_core.app.service.user_service import UserService +from openbb_oecd.models.available_indicators import OecdAvailableIndicatorsFetcher +from openbb_oecd.models.balance_of_payments import OECDBalanceOfPaymentsFetcher from openbb_oecd.models.composite_leading_indicator import ( OECDCompositeLeadingIndicatorFetcher, ) from openbb_oecd.models.consumer_price_index import OECDCPIFetcher from openbb_oecd.models.country_interest_rates import OecdCountryInterestRatesFetcher +from openbb_oecd.models.economic_indicators import OecdEconomicIndicatorsFetcher from openbb_oecd.models.gdp_forecast import OECDGdpForecastFetcher from openbb_oecd.models.gdp_nominal import OECDGdpNominalFetcher from openbb_oecd.models.gdp_real import OECDGdpRealFetcher @@ -38,6 +41,8 @@ def test_oecd_cpi_fetcher(credentials=test_credentials): params = { "country": "united_kingdom", "frequency": "annual", + "start_date": datetime.date(2020, 1, 1), + "end_date": datetime.date(2022, 1, 1), } fetcher = OECDCPIFetcher() @@ -155,3 +160,44 @@ def test_oecd_country_interest_rates_fetcher(credentials=test_credentials): fetcher = OecdCountryInterestRatesFetcher() result = fetcher.test(params, credentials) assert result is None + + +@pytest.mark.record_http +def test_oecd_economic_indicators_fetcher(credentials=test_credentials): + """Test the OECD Economic Indicators fetcher.""" + params = { + "start_date": datetime.date(2023, 1, 1), + "end_date": datetime.date(2024, 1, 1), + "country": "united_kingdom", + "symbol": "DF_KEI::B1GQ_Q", + "frequency": "quarterly", + } + + fetcher = OecdEconomicIndicatorsFetcher() + result = fetcher.test(params, credentials) + assert result is None + + +# This fetcher will not make a network call. +def test_oecd_available_indicators_fetcher(credentials=test_credentials): + """Test the OECD Available Indicators fetcher.""" + params = {"dataflow": "DF_KEI", "query": "GDP"} + + fetcher = OecdAvailableIndicatorsFetcher() + result = fetcher.test(params, credentials) + assert result is None + + +@pytest.mark.record_http +def test_oecd_balance_of_payments_fetcher(credentials=test_credentials): + """Test the OECD Balance of Payments fetcher.""" + params = { + "country": "united_kingdom", + "start_date": datetime.date(2020, 1, 1), + "end_date": datetime.date(2021, 1, 1), + "frequency": "annual", + } + + fetcher = OECDBalanceOfPaymentsFetcher() + result = fetcher.test(params, credentials) + assert result is None diff --git a/openbb_platform/providers/oecd/tests/test_query_builder.py b/openbb_platform/providers/oecd/tests/test_query_builder.py new file mode 100644 index 00000000000..d376a052263 --- /dev/null +++ b/openbb_platform/providers/oecd/tests/test_query_builder.py @@ -0,0 +1,651 @@ +"""Unit tests for openbb_oecd.utils.query_builder.""" + +from __future__ import annotations + +import warnings +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from openbb_core.app.model.abstract.error import OpenBBError +from openbb_oecd.utils.metadata import OecdMetadata +from openbb_oecd.utils.query_builder import ( + OecdQueryBuilder, + _format_period, + parse_time_period, +) +from requests.exceptions import HTTPError + +# pylint: disable=C1803, C0302, W0212, W0613, W0621 +# flake8: noqa: D101, D102, SIM117 + +_FULL_ID = "DSD_TEST@DF_TEST" +_SHORT_ID = "DF_TEST" + +_TEST_DATAFLOW = { + "id": _FULL_ID, + "short_id": _SHORT_ID, + "agency_id": "OECD", + "version": "1.0", + "name": "Test Dataflow", + "description": "", + "structure_ref": "", +} + +_TEST_DSD = { + "dsd_id": "DSD_TEST", + "agency_id": "OECD", + "version": "1.0", + "dimensions": [ + { + "id": "REF_AREA", + "position": 1, + "codelist_id": "OECD:CL_AREA(1.0)", + "concept_id": "REF_AREA", + "name": "Reference Area", + }, + { + "id": "MEASURE", + "position": 2, + "codelist_id": "OECD:CL_MEASURE(1.0)", + "concept_id": "MEASURE", + "name": "Measure", + }, + { + "id": "FREQ", + "position": 3, + "codelist_id": "OECD:CL_FREQ(1.0)", + "concept_id": "FREQ", + "name": "Frequency", + }, + ], + "attributes": [], + "has_time_dimension": True, +} + +_TEST_CODELISTS = { + "OECD:CL_AREA(1.0)": { + "USA": "United States", + "GBR": "United Kingdom", + "DEU": "Germany", + }, + "OECD:CL_MEASURE(1.0)": { + "CPI": "Consumer Price Index", + "PPI": "Producer Price Index", + }, + "OECD:CL_FREQ(1.0)": {"A": "Annual", "Q": "Quarterly", "M": "Monthly"}, +} + +_TEST_CONSTRAINTS = { + _FULL_ID: { + "REF_AREA": ["USA", "GBR"], + "MEASURE": ["CPI", "PPI"], + "FREQ": ["A", "Q"], + } +} + + +@pytest.fixture +def seeded_meta(monkeypatch): + """Fresh OecdMetadata singleton with test data injected, no I/O.""" + OecdMetadata._reset() + monkeypatch.setattr(OecdMetadata, "_load_from_cache", lambda self: True) + m = OecdMetadata() + m.dataflows[_FULL_ID] = _TEST_DATAFLOW.copy() + m._short_id_map[_SHORT_ID] = _FULL_ID + m._full_catalogue_loaded = True + m.datastructures[_FULL_ID] = { + "dsd_id": "DSD_TEST", + "agency_id": "OECD", + "version": "1.0", + "dimensions": [dict(d) for d in _TEST_DSD["dimensions"]], + "attributes": [], + "has_time_dimension": True, + } + m.codelists.update({k: dict(v) for k, v in _TEST_CODELISTS.items()}) + m._dataflow_constraints.update( + { + k: {dk: list(dv) for dk, dv in v.items()} + for k, v in _TEST_CONSTRAINTS.items() + } + ) + yield m + OecdMetadata._reset() + + +@pytest.fixture +def qb(seeded_meta): + """OecdQueryBuilder with seeded_meta already in place.""" + builder = OecdQueryBuilder() + assert builder.metadata is seeded_meta + return builder + + +class TestFormatPeriod: + def test_full_date_truncated(self): + assert _format_period("2024-03-15") == "2024-03" + + def test_year_month_passthrough(self): + assert _format_period("2024-03") == "2024-03" + + def test_year_only_passthrough(self): + assert _format_period("2024") == "2024" + + def test_empty_string(self): + assert _format_period("") == "" + + def test_none_like_empty(self): + assert _format_period("") == "" + + def test_quarter_string_passthrough(self): + result = _format_period("2024-Q3") + # "2024-Q3" splits into ["2024", "Q3"] — not 3 parts → passthrough + assert result == "2024-Q3" + + +class TestParseTimePeriod: + def test_daily_passthrough(self): + assert parse_time_period("2024-03-15") == "2024-03-15" + + def test_quarterly_q1(self): + result = parse_time_period("2024-Q1") + assert result == "2024-01-01" + + def test_quarterly_q2(self): + assert parse_time_period("2023-Q2") == "2023-04-01" + + def test_quarterly_q3(self): + assert parse_time_period("2022-Q3") == "2022-07-01" + + def test_quarterly_q4(self): + assert parse_time_period("2021-Q4") == "2021-10-01" + + def test_monthly(self): + assert parse_time_period("2024-06") == "2024-06-01" + + def test_annual(self): + assert parse_time_period("2024") == "2024-01-01" + + def test_empty_string_passthrough(self): + assert parse_time_period("") == "" + + def test_unknown_format_passthrough(self): + assert parse_time_period("S1/2024") == "S1/2024" + + def test_already_iso(self): + assert parse_time_period("2024-01-01") == "2024-01-01" + + +class TestBuildUrl: + def test_basic_url_contains_dataflow(self, qb): + url = qb.build_url(_SHORT_ID) + assert "DF_TEST" in url + assert "sdmx.oecd.org" in url + + def test_start_date_appended(self, qb): + url = qb.build_url(_SHORT_ID, start_date="2020-01") + assert "TIME_PERIOD" in url + assert "ge:2020-01" in url + + def test_end_date_appended(self, qb): + url = qb.build_url(_SHORT_ID, end_date="2023-12") + assert "le:2023-12" in url + + def test_both_dates(self, qb): + url = qb.build_url(_SHORT_ID, start_date="2020", end_date="2023") + assert "ge:2020" in url + assert "le:2023" in url + + def test_limit_appended(self, qb): + url = qb.build_url(_SHORT_ID, limit=10) + assert "lastNObservations=10" in url + + def test_dimension_kwarg_included(self, qb): + url = qb.build_url(_SHORT_ID, REF_AREA="USA") + assert "USA" in url + + def test_date_full_truncated_to_year_month(self, qb): + url = qb.build_url(_SHORT_ID, start_date="2020-06-15") + assert "ge:2020-06" in url + + def test_no_start_date_no_time_period_param(self, qb): + url = qb.build_url(_SHORT_ID) + assert "ge:" not in url + assert "le:" not in url + + +class TestBuildDimensionFilter: + def test_all_wildcards_by_default(self, qb): + result = qb._build_dimension_filter(_SHORT_ID) + parts = result.split(".") + assert all(p == "*" for p in parts) + + def test_case_insensitive_key(self, qb): + result_upper = qb._build_dimension_filter(_SHORT_ID, REF_AREA="USA") + result_lower = qb._build_dimension_filter(_SHORT_ID, ref_area="USA") + assert result_upper == result_lower + + def test_first_position_dimension(self, qb): + result = qb._build_dimension_filter(_SHORT_ID, REF_AREA="GBR") + assert result.startswith("GBR.") + + def test_unknown_kwarg_ignored(self, qb): + result = qb._build_dimension_filter(_SHORT_ID, UNKNOWN_DIM="XYZ") + # Unknown dims are ignored; the filter is still a valid structure + parts = result.split(".") + assert "XYZ" not in parts + + def test_multi_value_encoded(self, qb): + result = qb._build_dimension_filter(_SHORT_ID, REF_AREA="USA+GBR") + parts = result.split(".") + assert parts[0] == "USA+GBR" + + def test_specific_freq(self, qb): + result = qb._build_dimension_filter(_SHORT_ID, FREQ="Q") + parts = result.split(".") + # FREQ is position 2 (0-indexed) + assert parts[2] == "Q" + + +class TestSplitLabelColumns: + def _make_df(self, data: dict) -> pd.DataFrame: + return pd.DataFrame(data) + + def test_renames_dim_label_columns(self, qb): + df = self._make_df( + { + "REF_AREA: Reference Area": [ + "USA: United States", + "GBR: United Kingdom", + ], + "OBS_VALUE": [1.0, 2.0], + "TIME_PERIOD": ["2024", "2024"], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + assert "REF_AREA" in result.columns + assert "REF_AREA: Reference Area" not in result.columns + + def test_splits_code_label_values(self, qb): + df = self._make_df( + { + "REF_AREA: Reference Area": [ + "USA: United States", + "GBR: United Kingdom", + ], + "OBS_VALUE": [1.0, 2.0], + "TIME_PERIOD": ["2024", "2024"], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + assert list(result["REF_AREA"]) == ["USA", "GBR"] + assert "REF_AREA_label" in result.columns + assert list(result["REF_AREA_label"]) == ["United States", "United Kingdom"] + + def test_obs_value_not_split(self, qb): + df = self._make_df( + { + "REF_AREA: Reference Area": ["USA: United States"], + "OBS_VALUE": [3.14], + "TIME_PERIOD": ["2024"], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + assert "OBS_VALUE" in result.columns + assert list(result["OBS_VALUE"]) == [3.14] + + def test_time_period_not_split(self, qb): + df = self._make_df( + { + "TIME_PERIOD": ["2024-Q3"], + "REF_AREA: Reference Area": ["USA: United States"], + "OBS_VALUE": [5.0], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + assert list(result["TIME_PERIOD"]) == ["2024-Q3"] + + def test_no_label_column_codelist_fallback(self, qb): + """When cell values are plain codes (no ': '), labels come from codelist.""" + df = self._make_df( + { + "REF_AREA": ["USA", "GBR"], + "OBS_VALUE": [1.0, 2.0], + "TIME_PERIOD": ["2024", "2024"], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + if "REF_AREA_label" in result.columns: + assert list(result["REF_AREA_label"]) == ["United States", "United Kingdom"] + + def test_empty_column_not_split(self, qb): + df = self._make_df( + { + "REF_AREA: Reference Area": [None, None], + "OBS_VALUE": [1.0, 2.0], + "TIME_PERIOD": ["2024", "2024"], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + assert "REF_AREA" in result.columns + + def test_multiple_dimensions_all_renamed(self, qb): + df = self._make_df( + { + "REF_AREA: Reference Area": ["USA: United States"], + "FREQ: Frequency": ["A: Annual"], + "OBS_VALUE": [1.0], + "TIME_PERIOD": ["2024"], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + assert "REF_AREA" in result.columns + assert "FREQ" in result.columns + assert result["FREQ"].iloc[0] == "A" + + +class TestGetCountryDimension: + def test_gets_ref_area(self, qb): + with patch.object(qb.metadata, "classify_dimensions") as mock_cls: + mock_cls.return_value = {"country": [{"id": "REF_AREA"}], "freq": []} + result = qb.get_country_dimension(_SHORT_ID) + assert result == "REF_AREA" + + def test_returns_none_when_no_country_dim(self, qb): + with patch.object(qb.metadata, "classify_dimensions") as mock_cls: + mock_cls.return_value = {"country": [], "freq": []} + result = qb.get_country_dimension(_SHORT_ID) + assert result is None + + +class TestGetFrequencyDimension: + def test_gets_freq(self, qb): + with patch.object(qb.metadata, "classify_dimensions") as mock_cls: + mock_cls.return_value = {"country": [], "freq": [{"id": "FREQ"}]} + result = qb.get_frequency_dimension(_SHORT_ID) + assert result == "FREQ" + + def test_returns_none_when_no_freq_dim(self, qb): + with patch.object(qb.metadata, "classify_dimensions") as mock_cls: + mock_cls.return_value = {"country": [], "freq": []} + result = qb.get_frequency_dimension(_SHORT_ID) + assert result is None + + +class TestGetTranslationMaps: + def test_returns_code_to_label(self, qb): + result = qb.get_translation_maps(_SHORT_ID) + assert "REF_AREA" in result + assert result["REF_AREA"]["USA"] == "United States" + assert result["FREQ"]["Q"] == "Quarterly" + + def test_all_dimensions_included(self, qb): + result = qb.get_translation_maps(_SHORT_ID) + assert "MEASURE" in result + assert "FREQ" in result + + +class TestListTables: + def test_delegates_to_metadata(self, qb): + with patch.object( + qb.metadata, "list_tables", return_value=[{"table_id": "X"}] + ) as mock: + result = qb.list_tables(query="test") + mock.assert_called_once_with(query="test", topic=None, subtopic=None) + assert result == [{"table_id": "X"}] + + def test_with_topic(self, qb): + with patch.object(qb.metadata, "list_tables", return_value=[]) as mock: + qb.list_tables(topic="ECO") + mock.assert_called_once_with(query=None, topic="ECO", subtopic=None) + + +class TestGetTable: + def test_delegates_to_metadata(self, qb): + mock_result = {"dataflow_id": _FULL_ID, "short_id": _SHORT_ID} + with patch.object(qb.metadata, "get_table", return_value=mock_result) as mock: + result = qb.get_table(_SHORT_ID) + mock.assert_called_once_with(_SHORT_ID) + assert result == mock_result + + +class TestValidateDimensionConstraints: + def test_no_dims_skips_validation(self, qb): + """When no dimension kwargs, returns immediately without network call.""" + # Should not raise + qb.validate_dimension_constraints(_SHORT_ID) + + def test_non_dimension_keys_skipped(self, qb): + """start_date, end_date, limit are not validated as dimensions.""" + qb.validate_dimension_constraints( + _SHORT_ID, start_date="2020", end_date="2023", limit=10 + ) + + def test_valid_dimension_passes(self, qb): + """A valid dimension value should pass without raising.""" + qb.metadata.get_constrained_values = MagicMock( + return_value={ + "REF_AREA": [{"value": "USA", "label": "United States"}], + } + ) + qb.validate_dimension_constraints(_SHORT_ID, REF_AREA="USA") + + def test_invalid_dimension_raises(self, qb): + """An invalid dimension value raises ValueError with helpful message.""" + qb.metadata.get_constrained_values = MagicMock( + return_value={ + "REF_AREA": [{"value": "USA", "label": "United States"}], + } + ) + with pytest.raises(ValueError, match="Invalid value"): + qb.validate_dimension_constraints(_SHORT_ID, REF_AREA="ZZZNOTVALID") + + def test_constraint_load_failure_warns(self, qb): + """If get_constrained_values fails, only a warning is issued.""" + qb.metadata.get_constrained_values = MagicMock(side_effect=Exception("fail")) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + qb.validate_dimension_constraints(_SHORT_ID, REF_AREA="USA") + assert len(w) == 1 + assert "Could not load constraints" in str(w[0].message) + + def test_multi_value_validated(self, qb): + """Multi-value dimension values are validated.""" + qb.metadata.get_constrained_values = MagicMock( + return_value={ + "REF_AREA": [ + {"value": "USA", "label": "United States"}, + {"value": "GBR", "label": "United Kingdom"}, + ], + } + ) + qb.validate_dimension_constraints(_SHORT_ID, REF_AREA="USA+GBR") + + +class TestFetchData: + def _make_csv_text(self, with_headers=True) -> str: + lines = [ + "REF_AREA: Reference Area,MEASURE: Measure,FREQ: Frequency,TIME_PERIOD,OBS_VALUE", + "USA: United States,CPI: Consumer Price Index,A: Annual,2024,105.2", + "GBR: United Kingdom,CPI: Consumer Price Index,A: Annual,2024,103.7", + ] + if with_headers: + return "\n".join(lines) + return "\n".join(lines[1:]) + + def test_basic_fetch_returns_data_and_metadata(self, qb): + mock_resp = MagicMock() + mock_resp.text = self._make_csv_text() + with patch( + "openbb_oecd.utils.query_builder._make_request", return_value=mock_resp + ): + result = qb.fetch_data(_SHORT_ID, _skip_validation=True) + assert "data" in result + assert "metadata" in result + assert len(result["data"]) == 2 + + def test_data_row_has_expected_keys(self, qb): + mock_resp = MagicMock() + mock_resp.text = self._make_csv_text() + with patch( + "openbb_oecd.utils.query_builder._make_request", return_value=mock_resp + ): + result = qb.fetch_data(_SHORT_ID, _skip_validation=True) + row = result["data"][0] + assert "TIME_PERIOD" in row + assert "OBS_VALUE" in row + + def test_obs_value_numeric(self, qb): + mock_resp = MagicMock() + mock_resp.text = self._make_csv_text() + with patch( + "openbb_oecd.utils.query_builder._make_request", return_value=mock_resp + ): + result = qb.fetch_data(_SHORT_ID, _skip_validation=True) + for row in result["data"]: + assert isinstance(row["OBS_VALUE"], float) + + def test_metadata_fields(self, qb): + mock_resp = MagicMock() + mock_resp.text = self._make_csv_text() + with patch( + "openbb_oecd.utils.query_builder._make_request", return_value=mock_resp + ): + result = qb.fetch_data(_SHORT_ID, _skip_validation=True) + meta = result["metadata"] + assert meta["dataflow_id"] == _SHORT_ID + assert meta["row_count"] == 2 + + def test_empty_response_raises(self, qb): + mock_resp = MagicMock() + mock_resp.text = "" + with patch( + "openbb_oecd.utils.query_builder._make_request", return_value=mock_resp + ): + with pytest.raises(Exception): + qb.fetch_data(_SHORT_ID, _skip_validation=True) + + def test_start_end_date_url_params(self, qb): + """start/end date are appended to the URL as query params.""" + captured_urls = [] + mock_resp = MagicMock() + mock_resp.text = self._make_csv_text() + + def capture_req(url, **kwargs): + captured_urls.append(url) + return mock_resp + + with patch( + "openbb_oecd.utils.query_builder._make_request", side_effect=capture_req + ): + qb.fetch_data( + _SHORT_ID, start_date="2020", end_date="2023", _skip_validation=True + ) + + assert captured_urls, "No request was made" + assert any("ge:2020" in u for u in captured_urls) + assert any("le:2023" in u for u in captured_urls) + + +class TestFetchWithMultiValueFallback: + def _make_csv(self, rows=1) -> str: + lines = ["REF_AREA,TIME_PERIOD,OBS_VALUE"] + for i in range(rows): + lines.append(f"USA,202{i},10{i}.0") + return "\n".join(lines) + + def test_success_on_first_try(self, qb): + mock_resp = MagicMock() + mock_resp.text = self._make_csv(2) + with patch( + "openbb_oecd.utils.query_builder._make_request", return_value=mock_resp + ): + text = qb._fetch_with_multi_value_fallback( + "http://test", + {}, + _SHORT_ID, + None, + None, + None, + {}, + ) + assert "OBS_VALUE" in text + + def test_fallback_on_404_with_multi_value(self, qb): + """When 404 and a dimension contains '+', falls back to per-value requests.""" + call_count = [0] + csv_parts = [self._make_csv(1), "USA,2024,105.0"] + + def fake_req(url, **kwargs): + resp = MagicMock() + if call_count[0] == 0: + call_count[0] += 1 + http_err = HTTPError("404") + http_err.response = MagicMock(status_code=404) + raise http_err + resp.text = csv_parts[0] if call_count[0] == 1 else "USA,2024,105.0\n" + call_count[0] += 1 + return resp + + with patch( + "openbb_oecd.utils.query_builder._make_request", side_effect=fake_req + ): + with patch.object(qb, "build_url", return_value="http://single"): + text = qb._fetch_with_multi_value_fallback( + "http://test", + {}, + _SHORT_ID, + None, + None, + None, + {"REF_AREA": "USA+GBR"}, + ) + assert text is not None + + def test_raises_when_no_multi_value_on_404(self, qb): + """Without '+' in any dimension, raises immediately on 404.""" + + def fake_req(url, **kwargs): + http_err = HTTPError("404") + http_err.response = MagicMock(status_code=404) + raise http_err + + with patch( + "openbb_oecd.utils.query_builder._make_request", side_effect=fake_req + ): + with pytest.raises(OpenBBError, match="OECD data request failed"): + qb._fetch_with_multi_value_fallback( + "http://test", + {}, + _SHORT_ID, + None, + None, + None, + {"REF_AREA": "USA"}, # No '+' here + ) + + +class TestEdgeCases: + def test_format_period_already_year_month(self): + assert _format_period("2024-01") == "2024-01" + + def test_parse_time_period_invalid_quarter(self): + result = parse_time_period("2024-Q5") + assert result is not None + + def test_build_url_no_kwargs(self, qb): + url = qb.build_url(_SHORT_ID) + assert "DF_TEST" in url + assert "http" in url + + def test_split_label_columns_no_label_format(self, qb): + """Columns without ': ' in header are passed through unchanged.""" + df = pd.DataFrame( + { + "REF_AREA": ["USA"], + "OBS_VALUE": [1.0], + "TIME_PERIOD": ["2024"], + } + ) + result = qb._split_label_columns(df, _SHORT_ID) + assert "REF_AREA" in result.columns