Skip to content

Commit 8b5647f

Browse files
wilsonfreitasclaude
andcommitted
Phase 3: Cache Refactor
Implement thread-safe cache with structured keys and caching for OData metadata. bcb/currency.py: - Create _CacheKey NamedTuple for structured cache keys - Create _ThreadSafeCache class with RLock for thread safety - Support cache injection as optional parameter (defaults to module-level _DEFAULT_CACHE) - Replace string-based cache keys ("TEMP_*") with _CacheKey(type="...") - Update _currency_id_list() to use thread-safe cache - Update get_currency_list() to use thread-safe cache - Add cache parameter to functions for dependency injection - Update clear_cache() to accept optional cache parameter - Import threading and NamedTuple bcb/odata/framework.py: - Add module-level _METADATA_CACHE for OData metadata caching - Add _METADATA_CACHE_LOCK for thread-safe access - Update ODataService.__init__ to cache metadata by URL - Cache is in-memory, lives for process lifetime (no TTL) - Reuse cached metadata across instances with same URL - Import threading Benefits: - Thread-safe cache prevents race conditions in multi-threaded applications - Structured cache keys are more maintainable than magic strings - Dependency injection allows testing with custom cache instances - OData metadata caching prevents re-fetching on each instance creation - Cache still uses simple dict (no LRU, no TTL) as specified in requirements All checks pass: ✓ mypy: 0 errors ✓ ruff check: all checks passed ✓ ruff format: code properly formatted ✓ tests: 24 passed, 0 failed ✓ cache thread-safety verified with concurrent access Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
1 parent 87db07e commit 8b5647f

2 files changed

Lines changed: 194 additions & 64 deletions

File tree

bcb/currency.py

Lines changed: 172 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import re
2+
import threading
23
from datetime import date, timedelta
34
from io import BytesIO, StringIO
4-
from typing import TYPE_CHECKING, Dict, List, Literal, Union, overload
5+
from typing import TYPE_CHECKING, Dict, List, Literal, NamedTuple, Union, overload
56

67
import numpy as np
78
import pandas as pd
@@ -34,56 +35,151 @@ def _currency_url(currency_id: int, start_date: DateInput, end_date: DateInput)
3435
)
3536

3637

37-
_CACHE: dict[str, pd.DataFrame] = dict()
38+
class _CacheKey(NamedTuple):
39+
"""Structured cache key for currency module.
3840
41+
Attributes
42+
----------
43+
type : str
44+
Cache type: "currency_id_list" or "currency_list"
45+
"""
46+
47+
type: str
48+
49+
def __repr__(self) -> str:
50+
return f"CacheKey(type={self.type!r})"
51+
52+
53+
class _ThreadSafeCache:
54+
"""Thread-safe cache wrapper for currency data.
55+
56+
Parameters
57+
----------
58+
initial_data : dict, optional
59+
Initial cache data (default: empty)
60+
"""
61+
62+
def __init__(self, initial_data: dict[_CacheKey, pd.DataFrame] | None = None):
63+
self._lock = threading.RLock()
64+
self._data: dict[_CacheKey, pd.DataFrame] = initial_data or {}
65+
66+
def get(self, key: _CacheKey) -> pd.DataFrame | None:
67+
"""Get value from cache.
68+
69+
Parameters
70+
----------
71+
key : _CacheKey
72+
Cache key
73+
74+
Returns
75+
-------
76+
pd.DataFrame | None
77+
Cached DataFrame or None if not found
78+
"""
79+
with self._lock:
80+
return self._data.get(key)
81+
82+
def set(self, key: _CacheKey, value: pd.DataFrame) -> None:
83+
"""Set value in cache.
84+
85+
Parameters
86+
----------
87+
key : _CacheKey
88+
Cache key
89+
value : pd.DataFrame
90+
DataFrame to cache
91+
"""
92+
with self._lock:
93+
self._data[key] = value
94+
95+
def clear(self) -> None:
96+
"""Clear all cache entries."""
97+
with self._lock:
98+
self._data.clear()
3999

40-
def clear_cache() -> None:
100+
101+
# Default module-level cache instance
102+
_DEFAULT_CACHE = _ThreadSafeCache()
103+
104+
105+
def clear_cache(cache: _ThreadSafeCache | None = None) -> None:
41106
"""Clear the module-level session cache.
42107
43108
:func:`get` and :func:`get_currency_list` cache the currency ID list and
44109
the full currency master table for the duration of the Python session so
45110
that repeated calls do not make redundant HTTP requests. Call this
46111
function to force a fresh fetch on the next request (useful in tests or
47112
long-running scripts where the master data may have changed).
113+
114+
Parameters
115+
----------
116+
cache : _ThreadSafeCache, optional
117+
Cache instance to clear. If None, uses module-level default.
48118
"""
49-
_CACHE.clear()
119+
(cache or _DEFAULT_CACHE).clear()
50120

51121

52-
def _currency_id_list() -> pd.DataFrame:
53-
if _CACHE.get("TEMP_CURRENCY_ID_LIST") is not None:
54-
return _CACHE.get("TEMP_CURRENCY_ID_LIST")
55-
else:
56-
url1 = (
57-
"https://ptax.bcb.gov.br/ptax_internet/consultaBoletim.do?"
58-
"method=exibeFormularioConsultaBoletim"
122+
def _currency_id_list(
123+
cache: _ThreadSafeCache | None = None,
124+
) -> pd.DataFrame:
125+
"""Fetch list of available currency IDs and names.
126+
127+
Parameters
128+
----------
129+
cache : _ThreadSafeCache, optional
130+
Cache instance to use. If None, uses module-level default.
131+
132+
Returns
133+
-------
134+
pd.DataFrame
135+
DataFrame with columns: name, id
136+
137+
Raises
138+
------
139+
BCBRateLimitError
140+
If API rate limit is exceeded (429)
141+
BCBAPINotFoundError
142+
If API endpoint not found (404)
143+
BCBAPIError
144+
If API returns error response
145+
"""
146+
cache = cache or _DEFAULT_CACHE
147+
cache_key = _CacheKey(type="currency_id_list")
148+
cached = cache.get(cache_key)
149+
if cached is not None:
150+
return cached
151+
152+
url1 = (
153+
"https://ptax.bcb.gov.br/ptax_internet/consultaBoletim.do?"
154+
"method=exibeFormularioConsultaBoletim"
155+
)
156+
res = _CLIENT.get(url1)
157+
if res.status_code == 429:
158+
raise BCBRateLimitError(
159+
"BCB API rate limit exceeded. Please try again later.",
160+
status_code=429,
161+
)
162+
if res.status_code == 404:
163+
raise BCBAPINotFoundError(
164+
"BCB API endpoint not found (404)",
165+
status_code=404,
166+
)
167+
if res.status_code >= 500:
168+
raise BCBAPIError(
169+
f"BCB API server error (status {res.status_code})",
170+
status_code=res.status_code,
59171
)
60-
res = _CLIENT.get(url1)
61-
if res.status_code == 429:
62-
raise BCBRateLimitError(
63-
"BCB API rate limit exceeded. Please try again later.",
64-
status_code=429,
65-
)
66-
if res.status_code == 404:
67-
raise BCBAPINotFoundError(
68-
"BCB API endpoint not found (404)",
69-
status_code=404,
70-
)
71-
if res.status_code >= 500:
72-
raise BCBAPIError(
73-
f"BCB API server error (status {res.status_code})",
74-
status_code=res.status_code,
75-
)
76-
if res.status_code != 200:
77-
msg = f"BCB API Request error, status code = {res.status_code}"
78-
raise BCBAPIError(msg, res.status_code)
172+
if res.status_code != 200:
173+
msg = f"BCB API Request error, status code = {res.status_code}"
174+
raise BCBAPIError(msg, res.status_code)
79175

80-
doc = html.parse(BytesIO(res.content)).getroot()
81-
xpath = "//select[@name='ChkMoeda']/option"
82-
x = [(elm.text, elm.get("value")) for elm in doc.xpath(xpath)]
83-
df = pd.DataFrame(x, columns=["name", "id"])
84-
df["id"] = df["id"].astype("int32")
85-
_CACHE["TEMP_CURRENCY_ID_LIST"] = df
86-
return df
176+
doc = html.parse(BytesIO(res.content)).getroot()
177+
xpath = "//select[@name='ChkMoeda']/option"
178+
x = [(elm.text, elm.get("value")) for elm in doc.xpath(xpath)]
179+
df = pd.DataFrame(x, columns=["name", "id"])
180+
df["id"] = df["id"].astype("int32")
181+
cache.set(cache_key, df)
182+
return df
87183

88184

89185
def _get_valid_currency_list(
@@ -139,37 +235,50 @@ def _get_valid_currency_list(
139235
return _get_valid_currency_list(_date - timedelta(1), 0, max_rollback)
140236

141237

142-
def get_currency_list() -> pd.DataFrame:
143-
"""
144-
Listagem com todas as moedas disponíveis na API e suas configurações de paridade.
238+
def get_currency_list(
239+
cache: _ThreadSafeCache | None = None,
240+
) -> pd.DataFrame:
241+
"""Listagem com todas as moedas disponíveis na API e suas configurações de paridade.
242+
243+
Parameters
244+
----------
245+
cache : _ThreadSafeCache, optional
246+
Cache instance to use. If None, uses module-level default.
145247
146248
Returns
147249
-------
148-
149-
DataFrame :
250+
pd.DataFrame
150251
Tabela com a listagem de moedas disponíveis.
252+
253+
Raises
254+
------
255+
BCBAPIError
256+
If API returns error response
151257
"""
152-
if _CACHE.get("TEMP_FILE_CURRENCY_LIST") is not None:
153-
return _CACHE.get("TEMP_FILE_CURRENCY_LIST")
154-
else:
155-
res = _get_valid_currency_list(date.today())
156-
df = pd.read_csv(StringIO(res.text), delimiter=";")
157-
df.columns = [
158-
"code",
159-
"name",
160-
"symbol",
161-
"country_code",
162-
"country_name",
163-
"type",
164-
"exclusion_date",
165-
]
166-
df = df.loc[~df["country_code"].isna()]
167-
df["exclusion_date"] = pd.to_datetime(df["exclusion_date"], dayfirst=True)
168-
df["country_code"] = df["country_code"].astype("int32")
169-
df["code"] = df["code"].astype("int32")
170-
df["symbol"] = df["symbol"].str.strip()
171-
_CACHE["TEMP_FILE_CURRENCY_LIST"] = df
172-
return df
258+
cache = cache or _DEFAULT_CACHE
259+
cache_key = _CacheKey(type="currency_list")
260+
cached = cache.get(cache_key)
261+
if cached is not None:
262+
return cached
263+
264+
res = _get_valid_currency_list(date.today())
265+
df = pd.read_csv(StringIO(res.text), delimiter=";")
266+
df.columns = [
267+
"code",
268+
"name",
269+
"symbol",
270+
"country_code",
271+
"country_name",
272+
"type",
273+
"exclusion_date",
274+
]
275+
df = df.loc[~df["country_code"].isna()]
276+
df["exclusion_date"] = pd.to_datetime(df["exclusion_date"], dayfirst=True)
277+
df["country_code"] = df["country_code"].astype("int32")
278+
df["code"] = df["code"].astype("int32")
279+
df["symbol"] = df["symbol"].str.strip()
280+
cache.set(cache_key, df)
281+
return df
173282

174283

175284
def _get_currency_id(symbol: str) -> int:

bcb/odata/framework.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import threading
12
from io import BytesIO
23
from typing import Any, Optional, Union
34
from lxml import etree
@@ -8,6 +9,11 @@
89
from bcb.http import _CLIENT
910
from bcb.exceptions import ODataError
1011

12+
# Module-level metadata cache for OData services
13+
# Maps service URL → ODataMetadata instance
14+
_METADATA_CACHE: dict[str, "ODataMetadata"] = {}
15+
_METADATA_CACHE_LOCK = threading.RLock()
16+
1117
# Edm.Boolean
1218
# Edm.Byte
1319
# Edm.Date
@@ -352,6 +358,14 @@ def _parse_function_import(e: Any) -> ODataFunctionImport:
352358

353359

354360
class ODataService:
361+
"""OData service client.
362+
363+
Parameters
364+
----------
365+
url : str
366+
OData service root URL
367+
"""
368+
355369
def __init__(self, url: str) -> None:
356370
self.url = url
357371
res = _CLIENT.get(self.url)
@@ -360,7 +374,14 @@ def __init__(self, url: str) -> None:
360374
ODataEndPoint(**x) for x in self.api_data["value"]
361375
]
362376
self._odata_context_url: str = self.api_data["@odata.context"]
363-
self.metadata = ODataMetadata(self._odata_context_url)
377+
378+
# Use cached metadata if available, otherwise create and cache new one
379+
with _METADATA_CACHE_LOCK:
380+
if self._odata_context_url in _METADATA_CACHE:
381+
self.metadata = _METADATA_CACHE[self._odata_context_url]
382+
else:
383+
self.metadata = ODataMetadata(self._odata_context_url)
384+
_METADATA_CACHE[self._odata_context_url] = self.metadata
364385

365386
def __getitem__(self, item: str) -> Union[ODataEntitySet, ODataFunctionImport]:
366387
es = self.entity_sets.get(item)

0 commit comments

Comments
 (0)