Skip to content

Commit a66efc0

Browse files
Fix currency.get() by migrating from broken BCB CSV to PTAX OData API
Co-authored-by: wilsonfreitas <1583922+wilsonfreitas@users.noreply.github.com> Agent-Logs-Url: https://github.com/wilsonfreitas/python-bcb/sessions/a113f59b-1247-4385-80e1-8e951d69a4d7
1 parent 9e9b331 commit a66efc0

3 files changed

Lines changed: 140 additions & 205 deletions

File tree

bcb/currency.py

Lines changed: 53 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
1-
import re
2-
import warnings
3-
from datetime import date, timedelta
4-
from io import BytesIO, StringIO
51
from typing import Dict, List, Literal, Optional, Union, overload
62

73
import httpx
8-
import numpy as np
94
import pandas as pd
10-
from lxml import html
115

126
from .exceptions import BCBAPIError, CurrencyNotFoundError
137
from .utils import Date, DateInput
@@ -16,68 +10,23 @@
1610
O módulo :py:mod:`bcb.currency` tem como objetivo fazer consultas no site do conversor de moedas do BCB.
1711
"""
1812

19-
20-
def _currency_url(currency_id: int, start_date: DateInput, end_date: DateInput) -> str:
21-
start_date = Date(start_date)
22-
end_date = Date(end_date)
23-
return (
24-
f"https://ptax.bcb.gov.br/ptax_internet/consultaBoletim.do?"
25-
f"method=gerarCSVFechamentoMoedaNoPeriodo&"
26-
f"ChkMoeda={currency_id}&DATAINI={start_date.date:%d/%m/%Y}&DATAFIM={end_date.date:%d/%m/%Y}"
27-
)
28-
13+
_PTAX_BASE_URL = "https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata"
2914

3015
_CACHE: dict[str, pd.DataFrame] = dict()
3116

3217

3318
def clear_cache() -> None:
3419
"""Clear the module-level session cache.
3520
36-
:func:`get` and :func:`get_currency_list` cache the currency ID list and
37-
the full currency master table for the duration of the Python session so
38-
that repeated calls do not make redundant HTTP requests. Call this
39-
function to force a fresh fetch on the next request (useful in tests or
40-
long-running scripts where the master data may have changed).
21+
:func:`get` and :func:`get_currency_list` cache the currency list for the
22+
duration of the Python session so that repeated calls do not make redundant
23+
HTTP requests. Call this function to force a fresh fetch on the next
24+
request (useful in tests or long-running scripts where the data may have
25+
changed).
4126
"""
4227
_CACHE.clear()
4328

4429

45-
def _currency_id_list() -> pd.DataFrame:
46-
if _CACHE.get("TEMP_CURRENCY_ID_LIST") is not None:
47-
return _CACHE.get("TEMP_CURRENCY_ID_LIST")
48-
else:
49-
url1 = (
50-
"https://ptax.bcb.gov.br/ptax_internet/consultaBoletim.do?"
51-
"method=exibeFormularioConsultaBoletim"
52-
)
53-
res = httpx.get(url1, follow_redirects=True)
54-
if res.status_code != 200:
55-
msg = f"BCB API Request error, status code = {res.status_code}"
56-
raise BCBAPIError(msg, res.status_code)
57-
58-
doc = html.parse(BytesIO(res.content)).getroot()
59-
xpath = "//select[@name='ChkMoeda']/option"
60-
x = [(elm.text, elm.get("value")) for elm in doc.xpath(xpath)]
61-
df = pd.DataFrame(x, columns=["name", "id"])
62-
df["id"] = df["id"].astype("int32")
63-
_CACHE["TEMP_CURRENCY_ID_LIST"] = df
64-
return df
65-
66-
67-
def _get_valid_currency_list(_date: date, n: int = 0) -> httpx.Response:
68-
url2 = f"http://www4.bcb.gov.br/Download/fechamento/M{_date:%Y%m%d}.csv"
69-
try:
70-
res = httpx.get(url2, follow_redirects=True)
71-
except httpx.ConnectError as ex:
72-
if n >= 3:
73-
raise ex
74-
return _get_valid_currency_list(_date, n + 1)
75-
if res.status_code == 200:
76-
return res
77-
else:
78-
return _get_valid_currency_list(_date - timedelta(1), 0)
79-
80-
8130
def get_currency_list() -> pd.DataFrame:
8231
"""
8332
Listagem com todas as moedas disponíveis na API e suas configurações de paridade.
@@ -86,59 +35,55 @@ def get_currency_list() -> pd.DataFrame:
8635
-------
8736
8837
DataFrame :
89-
Tabela com a listagem de moedas disponíveis.
38+
Tabela com a listagem de moedas disponíveis (colunas: ``symbol``,
39+
``name``, ``type``).
9040
"""
91-
if _CACHE.get("TEMP_FILE_CURRENCY_LIST") is not None:
92-
return _CACHE.get("TEMP_FILE_CURRENCY_LIST")
93-
else:
94-
res = _get_valid_currency_list(date.today())
95-
df = pd.read_csv(StringIO(res.text), delimiter=";")
96-
df.columns = [
97-
"code",
98-
"name",
99-
"symbol",
100-
"country_code",
101-
"country_name",
102-
"type",
103-
"exclusion_date",
104-
]
105-
df = df.loc[~df["country_code"].isna()]
106-
df["exclusion_date"] = pd.to_datetime(df["exclusion_date"], dayfirst=True)
107-
df["country_code"] = df["country_code"].astype("int32")
108-
df["code"] = df["code"].astype("int32")
109-
df["symbol"] = df["symbol"].str.strip()
110-
_CACHE["TEMP_FILE_CURRENCY_LIST"] = df
111-
return df
112-
113-
114-
def _get_currency_id(symbol: str) -> int:
115-
id_list = _currency_id_list()
41+
cached = _CACHE.get("TEMP_FILE_CURRENCY_LIST")
42+
if cached is not None:
43+
return cached
44+
url = f"{_PTAX_BASE_URL}/Moedas?$format=json"
45+
res = httpx.get(url, follow_redirects=True)
46+
if res.status_code != 200:
47+
msg = f"BCB API Request error, status code = {res.status_code}"
48+
raise BCBAPIError(msg, res.status_code)
49+
data = res.json()
50+
df = pd.DataFrame(data["value"])
51+
df = df.rename(
52+
columns={"simbolo": "symbol", "nomeFormatado": "name", "tipoMoeda": "type"}
53+
)
54+
_CACHE["TEMP_FILE_CURRENCY_LIST"] = df
55+
return df
56+
57+
58+
def _validate_currency_symbol(symbol: str) -> None:
11659
all_currencies = get_currency_list()
117-
x = pd.merge(id_list, all_currencies, on=["name"])
118-
matches = x.loc[x["symbol"] == symbol, "id"]
119-
if matches.empty:
60+
if symbol not in all_currencies["symbol"].values:
12061
raise CurrencyNotFoundError(f"Unknown currency symbol: {symbol}")
121-
return int(matches.max())
62+
63+
64+
def _currency_url(symbol: str, start_date: DateInput, end_date: DateInput) -> str:
65+
start_date = Date(start_date)
66+
end_date = Date(end_date)
67+
return (
68+
f"{_PTAX_BASE_URL}/CotacaoMoedaPeriodo("
69+
f"moeda=@moeda,dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)?"
70+
f"@moeda='{symbol}'&"
71+
f"@dataInicial='{start_date.date:%m-%d-%Y}'&"
72+
f"@dataFinalCotacao='{end_date.date:%m-%d-%Y}'&"
73+
f"$format=json"
74+
)
12275

12376

12477
def _fetch_symbol_response(
12578
symbol: str, start_date: DateInput, end_date: DateInput
12679
) -> Optional[httpx.Response]:
12780
try:
128-
cid = _get_currency_id(symbol)
81+
_validate_currency_symbol(symbol)
12982
except CurrencyNotFoundError:
13083
return None
131-
url = _currency_url(cid, start_date, end_date)
84+
url = _currency_url(symbol, start_date, end_date)
13285
res = httpx.get(url, follow_redirects=True)
133-
if res.headers["Content-Type"].startswith("text/html"):
134-
doc = html.parse(BytesIO(res.content)).getroot()
135-
xpath = "//div[@class='msgErro']"
136-
elm = doc.xpath(xpath)[0]
137-
x = elm.text
138-
x = re.sub(r"^\W+", "", x)
139-
x = re.sub(r"\W+$", "", x)
140-
msg = f"BCB API returned error: {x} - {symbol}"
141-
warnings.warn(msg)
86+
if res.status_code != 200:
14287
return None
14388
return res
14489

@@ -149,18 +94,17 @@ def _get_symbol(
14994
res = _fetch_symbol_response(symbol, start_date, end_date)
15095
if res is None:
15196
return None
152-
columns = ["Date", "aa", "bb", "cc", "bid", "ask", "dd", "ee"]
153-
df = pd.read_csv(
154-
StringIO(res.text), delimiter=";", header=None, names=columns, dtype=str
155-
)
156-
df = df.assign(
157-
Date=lambda x: pd.to_datetime(x["Date"], format="%d%m%Y"),
158-
bid=lambda x: x["bid"].str.replace(",", ".").astype(np.float64),
159-
ask=lambda x: x["ask"].str.replace(",", ".").astype(np.float64),
160-
)
161-
df1 = df.set_index("Date")
97+
data = res.json()
98+
if not data.get("value"):
99+
return None
100+
df = pd.DataFrame(data["value"])
101+
df = df[df["tipoBoletim"] == "Fechamento"].copy()
102+
if df.empty:
103+
return None
104+
df["Date"] = pd.to_datetime(df["dataHoraCotacao"]).dt.normalize()
105+
df = df.rename(columns={"cotacaoCompra": "bid", "cotacaoVenda": "ask"})
162106
n = ["bid", "ask"]
163-
df1 = df1[n]
107+
df1 = df.set_index("Date")[n]
164108
tuples = list(zip([symbol] * len(n), n))
165109
df1.columns = pd.MultiIndex.from_tuples(tuples)
166110
return df1

tests/conftest.py

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,57 @@
1+
import json
2+
13
import pytest
24
from bcb import currency
35

46
# ---------------------------------------------------------------------------
57
# Mock data constants
68
# ---------------------------------------------------------------------------
79

8-
CURRENCY_ID_LIST_HTML = b"""
9-
<html><body><form>
10-
<select name="ChkMoeda">
11-
<option value="61">DOLLAR DOS EUA</option>
12-
</select>
13-
</form></body></html>
14-
"""
15-
16-
# First row is treated as header by pd.read_csv, then overwritten by df.columns = [...]
17-
CURRENCY_LIST_CSV = (
18-
"Codigo;Nome;Simbolo;CodPais;NomePais;Tipo;DataExclusao\n"
19-
"61;DOLLAR DOS EUA;USD;249;EUA;A;\n"
10+
# OData /Moedas response
11+
CURRENCY_LIST_JSON = json.dumps(
12+
{
13+
"value": [
14+
{"simbolo": "USD", "nomeFormatado": "DOLLAR DOS EUA", "tipoMoeda": "A"},
15+
]
16+
}
2017
)
2118

22-
# 8 columns, no header, date format DDMMYYYY, comma as decimal separator
23-
CURRENCY_RATE_CSV = (
24-
"01122020;0;0;0;5,0000;5,1000;0;0\n"
25-
"02122020;0;0;0;5,0100;5,1100;0;0\n"
26-
"03122020;0;0;0;5,0200;5,1200;0;0\n"
27-
"04122020;0;0;0;5,0300;5,1300;0;0\n"
28-
"07122020;0;0;0;5,0400;5,1400;0;0\n"
19+
# OData /CotacaoMoedaPeriodo response — one "Fechamento" bulletin per trading day
20+
CURRENCY_RATE_ODATA_JSON = json.dumps(
21+
{
22+
"value": [
23+
{
24+
"cotacaoCompra": 5.0000,
25+
"cotacaoVenda": 5.1000,
26+
"dataHoraCotacao": "2020-12-01 13:03:38.273",
27+
"tipoBoletim": "Fechamento",
28+
},
29+
{
30+
"cotacaoCompra": 5.0100,
31+
"cotacaoVenda": 5.1100,
32+
"dataHoraCotacao": "2020-12-02 13:03:38.273",
33+
"tipoBoletim": "Fechamento",
34+
},
35+
{
36+
"cotacaoCompra": 5.0200,
37+
"cotacaoVenda": 5.1200,
38+
"dataHoraCotacao": "2020-12-03 13:03:38.273",
39+
"tipoBoletim": "Fechamento",
40+
},
41+
{
42+
"cotacaoCompra": 5.0300,
43+
"cotacaoVenda": 5.1300,
44+
"dataHoraCotacao": "2020-12-04 13:03:38.273",
45+
"tipoBoletim": "Fechamento",
46+
},
47+
{
48+
"cotacaoCompra": 5.0400,
49+
"cotacaoVenda": 5.1400,
50+
"dataHoraCotacao": "2020-12-07 13:03:38.273",
51+
"tipoBoletim": "Fechamento",
52+
},
53+
]
54+
}
2955
)
3056

3157
SGS_JSON_5 = (

0 commit comments

Comments
 (0)