Skip to content

Commit edc68ca

Browse files
wilsonfreitasclaude
andcommitted
Phase 4: Data Validation & Type Safety
- CSV validation in currency.py: _validate_currency_csv() (8-column check), _parse_currency_dates() (format validation), _parse_currency_types() (type conversion with error handling); _get_symbol() now uses all three validators - SGSCode refactored to @DataClass(frozen=True) with from_code() and from_named() classmethods; added _validate_sgs_code() to reject non-positive codes; updated _codes() generator to use classmethods + validation - Added CurrencyTextResult type alias and improved get() overloads to distinguish single-symbol (str) vs multi-symbol (Dict[str, str]) text returns - Added from __future__ import annotations to all 10 bcb/ modules - Converted all relative imports to absolute imports across the package - Updated tests to use new SGSCode classmethod constructors Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
1 parent 8b5647f commit edc68ca

11 files changed

Lines changed: 249 additions & 36 deletions

File tree

bcb/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
from .exceptions import (
1+
from __future__ import annotations
2+
3+
from bcb.exceptions import (
24
BCBError,
35
BCBAPIError,
46
CurrencyNotFoundError,
57
SGSError,
68
ODataError,
79
)
8-
from .odata.api import (
10+
from bcb.odata.api import (
911
ODataAPI,
1012
Expectativas,
1113
PTAX,

bcb/currency.py

Lines changed: 122 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
import re
24
import threading
35
from datetime import date, timedelta
@@ -359,6 +361,93 @@ def _fetch_symbol_response(
359361
return res
360362

361363

364+
def _validate_currency_csv(csv_text: str) -> pd.DataFrame:
365+
"""Parse and validate currency CSV format.
366+
367+
Parameters
368+
----------
369+
csv_text : str
370+
CSV content from BCB API
371+
372+
Returns
373+
-------
374+
pd.DataFrame
375+
Parsed DataFrame with all columns
376+
377+
Raises
378+
------
379+
BCBAPIError
380+
If CSV format is invalid (wrong column count)
381+
"""
382+
df = pd.read_csv(StringIO(csv_text), delimiter=";", header=None, dtype=str)
383+
384+
# Validate column count
385+
if len(df.columns) != 8:
386+
raise BCBAPIError(
387+
f"Invalid CSV format: expected 8 columns, got {len(df.columns)}",
388+
status_code=400,
389+
)
390+
391+
# Assign meaningful names
392+
df.columns = ["Date", "_col1", "_col2", "_col3", "bid", "ask", "_col6", "_col7"]
393+
return df
394+
395+
396+
def _parse_currency_dates(df: pd.DataFrame) -> pd.DataFrame:
397+
"""Parse and validate date column in currency CSV.
398+
399+
Parameters
400+
----------
401+
df : pd.DataFrame
402+
DataFrame with Date column as strings
403+
404+
Returns
405+
-------
406+
pd.DataFrame
407+
DataFrame with parsed DatetimeIndex
408+
409+
Raises
410+
------
411+
BCBAPIError
412+
If date parsing fails
413+
"""
414+
try:
415+
df["Date"] = pd.to_datetime(df["Date"], format="%d%m%Y")
416+
except ValueError as e:
417+
raise BCBAPIError(
418+
f"Failed to parse currency date column: {str(e)}", status_code=400
419+
)
420+
return df
421+
422+
423+
def _parse_currency_types(df: pd.DataFrame) -> pd.DataFrame:
424+
"""Parse and validate data types in currency DataFrame.
425+
426+
Parameters
427+
----------
428+
df : pd.DataFrame
429+
DataFrame with mixed types
430+
431+
Returns
432+
-------
433+
pd.DataFrame
434+
DataFrame with validated types
435+
436+
Raises
437+
------
438+
BCBAPIError
439+
If type conversion fails
440+
"""
441+
try:
442+
df["bid"] = df["bid"].str.replace(",", ".").astype(np.float64)
443+
df["ask"] = df["ask"].str.replace(",", ".").astype(np.float64)
444+
except (ValueError, TypeError) as e:
445+
raise BCBAPIError(
446+
f"Failed to parse currency numeric columns: {str(e)}", status_code=400
447+
)
448+
return df
449+
450+
362451
def _get_symbol(
363452
symbol: str, start_date: DateInput, end_date: DateInput
364453
) -> pd.DataFrame:
@@ -383,18 +472,12 @@ def _get_symbol(
383472
CurrencyNotFoundError
384473
If currency not found
385474
BCBAPIError
386-
If API returns error
475+
If API returns error or data format is invalid
387476
"""
388477
res = _fetch_symbol_response(symbol, start_date, end_date)
389-
columns = ["Date", "aa", "bb", "cc", "bid", "ask", "dd", "ee"]
390-
df = pd.read_csv(
391-
StringIO(res.text), delimiter=";", header=None, names=columns, dtype=str
392-
)
393-
df = df.assign(
394-
Date=lambda x: pd.to_datetime(x["Date"], format="%d%m%Y"),
395-
bid=lambda x: x["bid"].str.replace(",", ".").astype(np.float64),
396-
ask=lambda x: x["ask"].str.replace(",", ".").astype(np.float64),
397-
)
478+
df = _validate_currency_csv(res.text)
479+
df = _parse_currency_dates(df)
480+
df = _parse_currency_types(df)
398481
df1 = df.set_index("Date")
399482
n = ["bid", "ask"]
400483
df1 = df1[n]
@@ -431,9 +514,13 @@ def _get_symbol_text(symbol: str, start_date: DateInput, end_date: DateInput) ->
431514
return res.text
432515

433516

517+
# Type alias for text output with multiple symbols
518+
CurrencyTextResult = Dict[str, str] # Maps symbol → CSV text
519+
520+
434521
@overload
435522
def get(
436-
symbols: Union[str, List[str]],
523+
symbols: str,
437524
start: DateInput,
438525
end: DateInput,
439526
side: str = ...,
@@ -444,13 +531,35 @@ def get(
444531

445532
@overload
446533
def get(
447-
symbols: Union[str, List[str]],
534+
symbols: List[str],
535+
start: DateInput,
536+
end: DateInput,
537+
side: str = ...,
538+
groupby: str = ...,
539+
output: Literal["dataframe"] = ...,
540+
) -> pd.DataFrame: ...
541+
542+
543+
@overload
544+
def get(
545+
symbols: str,
546+
start: DateInput,
547+
end: DateInput,
548+
side: str = ...,
549+
groupby: str = ...,
550+
output: Literal["text"] = ...,
551+
) -> str: ...
552+
553+
554+
@overload
555+
def get(
556+
symbols: List[str],
448557
start: DateInput,
449558
end: DateInput,
450559
side: str = ...,
451560
groupby: str = ...,
452561
output: Literal["text"] = ...,
453-
) -> Union[str, Dict[str, str]]: ...
562+
) -> CurrencyTextResult: ...
454563

455564

456565
def get(

bcb/exceptions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from __future__ import annotations
2+
3+
14
class BCBError(Exception):
25
"""Base exception for all python-bcb errors."""
36

bcb/http.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Shared HTTP client and utilities for python-bcb."""
22

3+
from __future__ import annotations
4+
35
from typing import Callable, TypeVar
46

57
import httpx

bcb/odata/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from .api import (
1+
from __future__ import annotations
2+
3+
from bcb.odata.api import (
24
ODataAPI,
35
Expectativas,
46
PTAX,

bcb/odata/api.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
from __future__ import annotations
2+
13
from typing import Any, Literal, Optional, Union, overload
2-
from .framework import (
4+
5+
from bcb.odata.framework import (
36
ODataEntitySet,
47
ODataFunctionImport,
58
ODataQuery,

bcb/odata/framework.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
from __future__ import annotations
2+
13
import threading
24
from io import BytesIO
35
from typing import Any, Optional, Union
6+
47
from lxml import etree
58
import json
69
from urllib.parse import quote

bcb/sgs/__init__.py

Lines changed: 99 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
from __future__ import annotations
2+
13
import json
4+
from dataclasses import dataclass
25
from io import StringIO
36
from typing import (
47
Dict,
@@ -29,18 +32,58 @@
2932
"""
3033

3134

35+
@dataclass(frozen=True)
3236
class SGSCode:
33-
def __init__(self, code: Union[str, int], name: Optional[str] = None) -> None:
34-
if name is None:
35-
if isinstance(code, int) or isinstance(code, str):
36-
self.name = str(code)
37-
self.value = int(code)
38-
else:
39-
self.name = str(name)
40-
self.value = int(code)
37+
"""SGS time series code with optional human-readable name.
38+
39+
Attributes
40+
----------
41+
value : int
42+
Numeric SGS code
43+
name : str
44+
Human-readable name or string representation of code
45+
"""
46+
47+
value: int
48+
name: str
49+
50+
@classmethod
51+
def from_code(cls, code: int | str) -> "SGSCode":
52+
"""Create SGSCode from numeric or string code.
53+
54+
Parameters
55+
----------
56+
code : int | str
57+
SGS code
58+
59+
Returns
60+
-------
61+
SGSCode
62+
New instance with name = str(code)
63+
"""
64+
code_int = int(code)
65+
return cls(value=code_int, name=str(code_int))
66+
67+
@classmethod
68+
def from_named(cls, code: int | str, name: str) -> "SGSCode":
69+
"""Create SGSCode with explicit name.
70+
71+
Parameters
72+
----------
73+
code : int | str
74+
SGS code
75+
name : str
76+
Human-readable name
77+
78+
Returns
79+
-------
80+
SGSCode
81+
New instance with value and name
82+
"""
83+
return cls(value=int(code), name=name)
4184

4285
def __repr__(self) -> str:
43-
return f"{self.value} - {self.name}" if self.name else f"{self.value}"
86+
return f"{self.value} - {self.name}"
4487

4588

4689
SGSCodeInput: TypeAlias = Union[
@@ -52,20 +95,62 @@ def __repr__(self) -> str:
5295
]
5396

5497

98+
def _validate_sgs_code(code: SGSCode) -> None:
99+
"""Validate SGSCode value.
100+
101+
Parameters
102+
----------
103+
code : SGSCode
104+
Code to validate
105+
106+
Raises
107+
------
108+
ValueError
109+
If code value is not positive integer
110+
"""
111+
if code.value <= 0:
112+
raise ValueError(f"SGS code must be positive integer, got {code.value}")
113+
114+
55115
def _codes(codes: SGSCodeInput) -> Generator[SGSCode, None, None]:
116+
"""Normalize various SGSCodeInput formats to SGSCode instances.
117+
118+
Parameters
119+
----------
120+
codes : SGSCodeInput
121+
Input in various formats: int, str, tuple, list, or mapping
122+
123+
Yields
124+
------
125+
SGSCode
126+
Validated SGSCode instances
127+
128+
Raises
129+
------
130+
ValueError
131+
If any code is a non-positive integer
132+
"""
56133
if isinstance(codes, int) or isinstance(codes, str):
57-
yield SGSCode(codes)
134+
code_obj = SGSCode.from_code(codes)
135+
_validate_sgs_code(code_obj)
136+
yield code_obj
58137
elif isinstance(codes, tuple):
59-
yield SGSCode(codes[1], codes[0])
138+
code_obj = SGSCode.from_named(codes[1], codes[0])
139+
_validate_sgs_code(code_obj)
140+
yield code_obj
60141
elif isinstance(codes, list):
61142
for cd in codes:
62143
if isinstance(cd, tuple):
63-
yield SGSCode(cd[1], cd[0])
144+
code_obj = SGSCode.from_named(cd[1], cd[0])
64145
else:
65-
yield SGSCode(cd)
146+
code_obj = SGSCode.from_code(cd)
147+
_validate_sgs_code(code_obj)
148+
yield code_obj
66149
elif isinstance(codes, Mapping):
67150
for name, code in codes.items():
68-
yield SGSCode(code, name)
151+
code_obj = SGSCode.from_named(code, name)
152+
_validate_sgs_code(code_obj)
153+
yield code_obj
69154

70155

71156
def _get_url_and_payload(

bcb/sgs/regional_economy.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from typing import Dict, List, Optional, Union
24

35
import pandas as pd

bcb/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from datetime import date, datetime
24
from typing import TypeAlias, Union
35

0 commit comments

Comments
 (0)