Skip to content

Commit 8ddc382

Browse files
Merge pull request #60 from datagouv/feat/specific-user-agent-for-mcp
feat: mcp sends a specific user agent to datagouv services
2 parents 9a2ee3b + 42291f3 commit 8ddc382

6 files changed

Lines changed: 46 additions & 13 deletions

File tree

helpers/crawler_api_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import httpx
66

77
from helpers import env_config
8+
from helpers.user_agent import USER_AGENT
89

910
logger = logging.getLogger("datagouv_mcp")
1011

@@ -19,7 +20,7 @@ async def _get_session(
1920
) -> tuple[httpx.AsyncClient, bool]:
2021
if session is not None:
2122
return session, False
22-
new_session = httpx.AsyncClient()
23+
new_session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
2324
return new_session, True
2425

2526

helpers/datagouv_api_client.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import yaml
77

88
from helpers import env_config
9+
from helpers.user_agent import USER_AGENT
910

1011
logger = logging.getLogger("datagouv_mcp")
1112

@@ -29,7 +30,7 @@ async def get_resource_details(
2930
"""
3031
own = session is None
3132
if own:
32-
session = httpx.AsyncClient()
33+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
3334
assert session is not None
3435
try:
3536
base_url: str = env_config.get_base_url("datagouv_api")
@@ -45,7 +46,7 @@ async def get_resource_metadata(
4546
) -> dict[str, Any]:
4647
own = session is None
4748
if own:
48-
session = httpx.AsyncClient()
49+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
4950
assert session is not None
5051
try:
5152
data = await get_resource_details(resource_id, session=session)
@@ -69,7 +70,7 @@ async def get_dataset_details(
6970
"""
7071
own = session is None
7172
if own:
72-
session = httpx.AsyncClient()
73+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
7374
assert session is not None
7475
try:
7576
base_url: str = env_config.get_base_url("datagouv_api")
@@ -85,7 +86,7 @@ async def get_dataset_metadata(
8586
) -> dict[str, Any]:
8687
own = session is None
8788
if own:
88-
session = httpx.AsyncClient()
89+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
8990
assert session is not None
9091
try:
9192
data = await get_dataset_details(dataset_id, session=session)
@@ -105,7 +106,7 @@ async def get_resource_and_dataset_metadata(
105106
) -> dict[str, Any]:
106107
own = session is None
107108
if own:
108-
session = httpx.AsyncClient()
109+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
109110
try:
110111
res: dict[str, Any] = await get_resource_metadata(resource_id, session=session)
111112
ds: dict[str, Any] = {}
@@ -129,7 +130,7 @@ async def get_resources_for_dataset(
129130
"""
130131
own = session is None
131132
if own:
132-
session = httpx.AsyncClient()
133+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
133134
try:
134135
ds = await get_dataset_metadata(dataset_id, session=session)
135136
base_url: str = env_config.get_base_url("datagouv_api")
@@ -164,7 +165,7 @@ async def fetch_openapi_spec(
164165
"""
165166
own = session is None
166167
if own:
167-
session = httpx.AsyncClient()
168+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
168169
assert session is not None
169170
try:
170171
logger.debug("Fetching OpenAPI spec from %s", url)
@@ -196,7 +197,7 @@ async def get_dataservice_details(
196197
"""
197198
own = session is None
198199
if own:
199-
session = httpx.AsyncClient()
200+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
200201
assert session is not None
201202
try:
202203
base_url: str = env_config.get_base_url("datagouv_api")
@@ -226,7 +227,7 @@ async def search_dataservices(
226227
"""
227228
own = session is None
228229
if own:
229-
session = httpx.AsyncClient()
230+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
230231
assert session is not None
231232
try:
232233
base_url: str = env_config.get_base_url("datagouv_api")
@@ -295,7 +296,7 @@ async def search_datasets(
295296
"""
296297
own = session is None
297298
if own:
298-
session = httpx.AsyncClient()
299+
session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
299300
assert session is not None
300301
try:
301302
base_url: str = env_config.get_base_url("datagouv_api")

helpers/metrics_api_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import httpx
55

66
from helpers import env_config
7+
from helpers.user_agent import USER_AGENT
78

89
logger = logging.getLogger("datagouv_mcp")
910

@@ -13,7 +14,7 @@ async def _get_session(
1314
) -> tuple[httpx.AsyncClient, bool]:
1415
if session is not None:
1516
return session, False
16-
new_session = httpx.AsyncClient()
17+
new_session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
1718
return new_session, True
1819

1920

helpers/tabular_api_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import httpx
55

66
from helpers import env_config
7+
from helpers.user_agent import USER_AGENT
78

89
logger = logging.getLogger("datagouv_mcp")
910

@@ -17,7 +18,7 @@ async def _get_session(
1718
) -> tuple[httpx.AsyncClient, bool]:
1819
if session is not None:
1920
return session, False
20-
new_session = httpx.AsyncClient()
21+
new_session = httpx.AsyncClient(headers={"User-Agent": USER_AGENT})
2122
return new_session, True
2223

2324

helpers/user_agent.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""User-Agent sent to data.gouv.fr services for identification and support."""
2+
3+
from importlib.metadata import version
4+
5+
USER_AGENT = f"datagouv-mcp/{version('datagouv-mcp')}"

tests/test_datagouv_api.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Tests for the datagouv_api_client helper."""
22

33
import os
4+
from unittest.mock import AsyncMock, MagicMock, patch
45

56
import pytest
67

78
from helpers import datagouv_api_client
9+
from helpers.user_agent import USER_AGENT
810

911

1012
@pytest.fixture
@@ -34,6 +36,28 @@ async def test_get_dataset_metadata(self, known_dataset_id):
3436
assert "title" in metadata
3537
assert metadata["title"] is not None
3638

39+
async def test_get_dataset_metadata_sends_user_agent(self, known_dataset_id):
40+
"""Test that get_dataset_metadata creates a client with User-Agent header."""
41+
mock_client = MagicMock()
42+
mock_response = MagicMock()
43+
mock_response.json.return_value = {
44+
"id": known_dataset_id,
45+
"title": "Test Dataset",
46+
}
47+
mock_response.raise_for_status = MagicMock()
48+
mock_client.get = AsyncMock(return_value=mock_response)
49+
mock_client.aclose = AsyncMock(return_value=None)
50+
51+
with patch(
52+
"helpers.datagouv_api_client.httpx.AsyncClient",
53+
return_value=mock_client,
54+
) as mock_async_client:
55+
await datagouv_api_client.get_dataset_metadata(
56+
known_dataset_id, session=None
57+
)
58+
59+
mock_async_client.assert_called_once_with(headers={"User-Agent": USER_AGENT})
60+
3761
async def test_get_resource_metadata(self, known_resource_id):
3862
"""Test fetching resource metadata."""
3963
metadata = await datagouv_api_client.get_resource_metadata(known_resource_id)

0 commit comments

Comments
 (0)