diff --git a/backend/core/commons_imagery.py b/backend/core/commons_imagery.py index e7b19e1d..eb16d32f 100644 --- a/backend/core/commons_imagery.py +++ b/backend/core/commons_imagery.py @@ -45,10 +45,10 @@ from db_utils import get_db_connection from core import image_quality as iq +from core.http_client import make_session logger = logging.getLogger("commons_imagery") -USER_AGENT = "ApproachNote/1.0 (+support@approachnote.com)" COMMONS_API = "https://commons.wikimedia.org/w/api.php" WIKIDATA_API = "https://www.wikidata.org/w/api.php" WIKIPEDIA_API = "https://en.wikipedia.org/w/api.php" @@ -157,12 +157,7 @@ def db_fields(self) -> Dict[str, Any]: # --------------------------------------------------------------------------- # HTTP helpers # --------------------------------------------------------------------------- - -def make_session() -> requests.Session: - s = requests.Session() - s.headers.update({"User-Agent": USER_AGENT, "Accept": "application/json"}) - return s - +# Session construction lives in core.http_client.make_session (imported above). def download(session: requests.Session, url: Optional[str], max_bytes: int = 15_000_000, timeout: int = 30) -> Optional[bytes]: diff --git a/backend/integrations/apple_music/client.py b/backend/integrations/apple_music/client.py index 86ed270e..ba884895 100644 --- a/backend/integrations/apple_music/client.py +++ b/backend/integrations/apple_music/client.py @@ -25,6 +25,7 @@ import requests from core.cache_utils import get_cache_dir +from core.http_client import make_session logger = logging.getLogger(__name__) @@ -97,10 +98,7 @@ def __init__(self, cache_days: int = 30, force_refresh: bool = False, self.track_cache_dir.mkdir(parents=True, exist_ok=True) # HTTP session for connection reuse - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)' - }) + self.session = make_session(accept_json=False) # Stats tracking self.stats = { diff --git a/backend/integrations/coverart/utils.py b/backend/integrations/coverart/utils.py index bf1c6f3b..4e35129a 100755 --- a/backend/integrations/coverart/utils.py +++ b/backend/integrations/coverart/utils.py @@ -22,6 +22,7 @@ import requests from core.cache_utils import get_cache_dir +from core.http_client import make_session logger = logging.getLogger(__name__) @@ -66,11 +67,7 @@ def __init__(self, cache_days: int = 30, force_refresh: bool = False): cache_days: Number of days before cache is considered stale force_refresh: If True, bypass cache and fetch fresh data """ - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)', - 'Accept': 'application/json' - }) + self.session = make_session() # Rate limiting (CAA has no limit, but be courteous) self.last_request_time = 0 diff --git a/backend/integrations/musicbrainz/client.py b/backend/integrations/musicbrainz/client.py index 8f2dfa39..5698e45d 100644 --- a/backend/integrations/musicbrainz/client.py +++ b/backend/integrations/musicbrainz/client.py @@ -33,6 +33,7 @@ from pathlib import Path from core.cache_utils import get_cache_dir +from core.http_client import make_session logger = logging.getLogger(__name__) @@ -48,11 +49,7 @@ def __init__(self, cache_days=30, force_refresh=False): cache_days: Number of days before cache is considered stale force_refresh: If True, always fetch fresh data ignoring cache """ - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)', - 'Accept': 'application/json' - }) + self.session = make_session() # Rate limiting self.last_request_time = 0 diff --git a/backend/integrations/wikipedia/utils.py b/backend/integrations/wikipedia/utils.py index f29dbe7a..4778f24e 100644 --- a/backend/integrations/wikipedia/utils.py +++ b/backend/integrations/wikipedia/utils.py @@ -16,6 +16,7 @@ from pathlib import Path from core.cache_utils import get_cache_dir +from core.http_client import make_session logger = logging.getLogger(__name__) @@ -51,11 +52,7 @@ def __init__(self, cache_days=7, force_refresh=False): # Remove cache_dir param cache_days: Number of days before cache is considered stale force_refresh: If True, always fetch fresh data ignoring cache """ - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)', - 'Accept': 'application/json' - }) + self.session = make_session() # Rate limiting self.last_request_time = 0 diff --git a/backend/integrations/youtube/client.py b/backend/integrations/youtube/client.py index 2a70ca89..507dea57 100644 --- a/backend/integrations/youtube/client.py +++ b/backend/integrations/youtube/client.py @@ -27,6 +27,7 @@ import requests from core.cache_utils import get_cache_dir +from core.http_client import make_session logger = logging.getLogger(__name__) @@ -99,10 +100,7 @@ def __init__( self.search_cache_dir.mkdir(parents=True, exist_ok=True) self.videos_cache_dir.mkdir(parents=True, exist_ok=True) - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)', - }) + self.session = make_session(accept_json=False) # Stats — mirrors what the other matchers expose so a CLI can print # a consistent summary. `quota_units` is the YouTube-specific one. diff --git a/backend/routes/admin.py b/backend/routes/admin.py index e8fd96ec..6187a9ca 100644 --- a/backend/routes/admin.py +++ b/backend/routes/admin.py @@ -22,6 +22,7 @@ from pathlib import Path from core.auth_utils import hash_password +from core.http_client import make_session from db_utils import get_db_connection from integrations.musicbrainz.release_importer import MBReleaseImporter from integrations.musicbrainz.parsing import parse_release_data @@ -1307,10 +1308,7 @@ def diagnose_mb_recording(song_id): diagnosis['recommendation']['id'] = str(rec['id']) # ===== CHECK 1: Fetch from MusicBrainz ===== - mb_session = requests.Session() - mb_session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)' - }) + mb_session = make_session(accept_json=False) # Fetch recording with work-rels and releases mb_response = mb_session.get( diff --git a/backend/scripts/add_wikipedia_image.py b/backend/scripts/add_wikipedia_image.py index 64721e9a..7216a286 100755 --- a/backend/scripts/add_wikipedia_image.py +++ b/backend/scripts/add_wikipedia_image.py @@ -24,6 +24,7 @@ find_performer_by_id, get_performer_images ) +from core.http_client import make_session # Configure logging logging.basicConfig( @@ -39,10 +40,7 @@ class WikipediaImageProcessor: def __init__(self, dry_run: bool = False, debug: bool = False): self.dry_run = dry_run self.debug = debug - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)' - }) + self.session = make_session(accept_json=False) if debug: logger.setLevel(logging.DEBUG) diff --git a/backend/scripts/diagnostics/fix_unknown_leaders.py b/backend/scripts/diagnostics/fix_unknown_leaders.py index 039951d5..c329376e 100755 --- a/backend/scripts/diagnostics/fix_unknown_leaders.py +++ b/backend/scripts/diagnostics/fix_unknown_leaders.py @@ -15,6 +15,7 @@ import requests from db_utils import get_db_connection +from core.http_client import make_session # Configure logging logging.basicConfig( @@ -41,11 +42,7 @@ def __init__(self, dry_run=False, request_delay=1.5): """ self.dry_run = dry_run self.request_delay = request_delay - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)', - 'Accept': 'application/json' - }) + self.session = make_session() self.stats = { 'recordings_checked': 0, 'recordings_no_leader': 0, diff --git a/backend/scripts/fetch_artist_images.py b/backend/scripts/fetch_artist_images.py index 1b2efe37..0d25f61a 100755 --- a/backend/scripts/fetch_artist_images.py +++ b/backend/scripts/fetch_artist_images.py @@ -48,6 +48,7 @@ # Import Wikipedia utilities with caching from integrations.wikipedia.utils import WikipediaSearcher +from core.http_client import make_session # Configure logging logging.basicConfig( @@ -83,11 +84,8 @@ def __init__(self, dry_run: bool = False, debug: bool = False, force_refresh: bo self.dry_run = dry_run self.debug = debug self.force_refresh = force_refresh - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)' - }) - + self.session = make_session(accept_json=False) + # Initialize Wikipedia searcher with caching self.wiki_searcher = WikipediaSearcher( cache_days=7, diff --git a/backend/scripts/fetch_loc_images.py b/backend/scripts/fetch_loc_images.py index 4d67b5e1..ad745f90 100755 --- a/backend/scripts/fetch_loc_images.py +++ b/backend/scripts/fetch_loc_images.py @@ -32,6 +32,7 @@ normalize_apostrophes, get_performer_images ) +from core.http_client import make_session # Configure logging logging.basicConfig( @@ -68,11 +69,7 @@ def __init__(self, dry_run: bool = False, debug: bool = False): """ self.dry_run = dry_run self.debug = debug - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)', - 'Accept': 'application/json' - }) + self.session = make_session() if debug: logger.setLevel(logging.DEBUG) diff --git a/backend/scripts/gather_performers_from_mb_for_release.py b/backend/scripts/gather_performers_from_mb_for_release.py index d1269760..d5544812 100755 --- a/backend/scripts/gather_performers_from_mb_for_release.py +++ b/backend/scripts/gather_performers_from_mb_for_release.py @@ -17,6 +17,7 @@ sys.path.insert(0, '/mnt/project/scripts') from db_utils import get_db_connection from integrations.musicbrainz.performer_importer import PerformerImporter +from core.http_client import make_session # Configure logging logging.basicConfig( @@ -32,11 +33,7 @@ class SingleRecordingImporter: def __init__(self, dry_run=False): self.dry_run = dry_run - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)', - 'Accept': 'application/json' - }) + self.session = make_session() self.performer_importer = PerformerImporter(dry_run=dry_run) self.stats = { 'recordings_created': 0, diff --git a/backend/scripts/jazzs_extract.py b/backend/scripts/jazzs_extract.py index 713c57c8..624d5231 100755 --- a/backend/scripts/jazzs_extract.py +++ b/backend/scripts/jazzs_extract.py @@ -32,6 +32,7 @@ pass # python-dotenv not installed, skip from db_utils import get_db_connection +from core.http_client import make_session # Configure logging logging.basicConfig( @@ -76,10 +77,7 @@ def __init__(self, dry_run: bool = False, force_refresh: bool = False, cache_day ITUNES_CACHE_DIR.mkdir(parents=True, exist_ok=True) # Setup session for HTTP requests - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)' - }) + self.session = make_session(accept_json=False) def get_cache_path(self, url: str) -> Path: """Generate cache file path for a URL""" diff --git a/backend/scripts/jazzs_match.py b/backend/scripts/jazzs_match.py index a60f3e8f..d3a072d8 100755 --- a/backend/scripts/jazzs_match.py +++ b/backend/scripts/jazzs_match.py @@ -31,6 +31,7 @@ pass # python-dotenv not installed, skip from db_utils import get_db_connection +from core.http_client import make_session # Configure logging logging.basicConfig( @@ -67,10 +68,7 @@ def __init__(self, dry_run: bool = False, force_refresh: bool = False, show_unma self.unmatched_songs = [] # Setup session for HTTP requests - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)' - }) + self.session = make_session(accept_json=False) def normalize_title(self, title: str, strip_parentheses: bool = False) -> str: """ diff --git a/backend/scripts/jazzs_match_authorityrecs.py b/backend/scripts/jazzs_match_authorityrecs.py index 0678500f..d51e8e0f 100755 --- a/backend/scripts/jazzs_match_authorityrecs.py +++ b/backend/scripts/jazzs_match_authorityrecs.py @@ -42,6 +42,7 @@ pass # python-dotenv not installed, skip from db_utils import get_db_connection +from core.http_client import make_session from rapidfuzz import fuzz # Ensure log directory exists BEFORE logging configuration @@ -91,10 +92,7 @@ def __init__(self, dry_run: bool = False, min_confidence: str = 'medium', } # Setup HTTP session for iTunes API calls - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)' - }) + self.session = make_session(accept_json=False) # Matching thresholds self.thresholds = { diff --git a/backend/scripts/onetime_scripts/one_time_song_wiki_intro.py b/backend/scripts/onetime_scripts/one_time_song_wiki_intro.py index c3aebac1..fde7c15c 100644 --- a/backend/scripts/onetime_scripts/one_time_song_wiki_intro.py +++ b/backend/scripts/onetime_scripts/one_time_song_wiki_intro.py @@ -32,6 +32,7 @@ pass from db_utils import get_db_connection # noqa: E402 +from core.http_client import make_session # noqa: E402 LOG_DIR = Path(__file__).resolve().parent.parent / 'log' LOG_DIR.mkdir(exist_ok=True) @@ -46,7 +47,6 @@ ) logger = logging.getLogger(__name__) -USER_AGENT = "ApproachNote/1.0 (+support@approachnote.com)" RATE_LIMIT_SECONDS = 1.0 @@ -165,8 +165,7 @@ def main(): ) logger.info("") - session = requests.Session() - session.headers.update({'User-Agent': USER_AGENT, 'Accept': 'application/json'}) + session = make_session() stats = {'processed': 0, 'updated': 0, 'no_intro': 0, 'bad_url': 0, 'errors': 0} last_request = 0.0 diff --git a/backend/scripts/save_review_thumbs.py b/backend/scripts/save_review_thumbs.py index 803cd4d0..bee5fcbe 100644 --- a/backend/scripts/save_review_thumbs.py +++ b/backend/scripts/save_review_thumbs.py @@ -13,11 +13,17 @@ import argparse import json import re +import sys from pathlib import Path import requests -UA = {"User-Agent": "ApproachNote/1.0 (+support@approachnote.com)"} +# This is an otherwise standalone utility; put backend/ on the path so the +# shared outbound User-Agent can be imported rather than re-hardcoded. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from core.http_client import HTTP_USER_AGENT + +UA = {"User-Agent": HTTP_USER_AGENT} def slug(s: str, n: int = 40) -> str: