Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions backend/core/commons_imagery.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@

from db_utils import get_db_connection
from core import image_quality as iq
from core.http_client import make_session

logger = logging.getLogger("commons_imagery")

USER_AGENT = "ApproachNote/1.0 (+support@approachnote.com)"
COMMONS_API = "https://commons.wikimedia.org/w/api.php"
WIKIDATA_API = "https://www.wikidata.org/w/api.php"
WIKIPEDIA_API = "https://en.wikipedia.org/w/api.php"
Expand Down Expand Up @@ -157,12 +157,7 @@ def db_fields(self) -> Dict[str, Any]:
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------

def make_session() -> requests.Session:
s = requests.Session()
s.headers.update({"User-Agent": USER_AGENT, "Accept": "application/json"})
return s

# Session construction lives in core.http_client.make_session (imported above).

def download(session: requests.Session, url: Optional[str],
max_bytes: int = 15_000_000, timeout: int = 30) -> Optional[bytes]:
Expand Down
6 changes: 2 additions & 4 deletions backend/integrations/apple_music/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import requests

from core.cache_utils import get_cache_dir
from core.http_client import make_session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -97,10 +98,7 @@ def __init__(self, cache_days: int = 30, force_refresh: bool = False,
self.track_cache_dir.mkdir(parents=True, exist_ok=True)

# HTTP session for connection reuse
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)'
})
self.session = make_session(accept_json=False)

# Stats tracking
self.stats = {
Expand Down
7 changes: 2 additions & 5 deletions backend/integrations/coverart/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import requests

from core.cache_utils import get_cache_dir
from core.http_client import make_session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -66,11 +67,7 @@ def __init__(self, cache_days: int = 30, force_refresh: bool = False):
cache_days: Number of days before cache is considered stale
force_refresh: If True, bypass cache and fetch fresh data
"""
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)',
'Accept': 'application/json'
})
self.session = make_session()

# Rate limiting (CAA has no limit, but be courteous)
self.last_request_time = 0
Expand Down
7 changes: 2 additions & 5 deletions backend/integrations/musicbrainz/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from pathlib import Path

from core.cache_utils import get_cache_dir
from core.http_client import make_session

logger = logging.getLogger(__name__)

Expand All @@ -48,11 +49,7 @@ def __init__(self, cache_days=30, force_refresh=False):
cache_days: Number of days before cache is considered stale
force_refresh: If True, always fetch fresh data ignoring cache
"""
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)',
'Accept': 'application/json'
})
self.session = make_session()

# Rate limiting
self.last_request_time = 0
Expand Down
7 changes: 2 additions & 5 deletions backend/integrations/wikipedia/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pathlib import Path

from core.cache_utils import get_cache_dir
from core.http_client import make_session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -51,11 +52,7 @@ def __init__(self, cache_days=7, force_refresh=False): # Remove cache_dir param
cache_days: Number of days before cache is considered stale
force_refresh: If True, always fetch fresh data ignoring cache
"""
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)',
'Accept': 'application/json'
})
self.session = make_session()

# Rate limiting
self.last_request_time = 0
Expand Down
6 changes: 2 additions & 4 deletions backend/integrations/youtube/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import requests

from core.cache_utils import get_cache_dir
from core.http_client import make_session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -99,10 +100,7 @@ def __init__(
self.search_cache_dir.mkdir(parents=True, exist_ok=True)
self.videos_cache_dir.mkdir(parents=True, exist_ok=True)

self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)',
})
self.session = make_session(accept_json=False)

# Stats — mirrors what the other matchers expose so a CLI can print
# a consistent summary. `quota_units` is the YouTube-specific one.
Expand Down
6 changes: 2 additions & 4 deletions backend/routes/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from pathlib import Path

from core.auth_utils import hash_password
from core.http_client import make_session
from db_utils import get_db_connection
from integrations.musicbrainz.release_importer import MBReleaseImporter
from integrations.musicbrainz.parsing import parse_release_data
Expand Down Expand Up @@ -1307,10 +1308,7 @@ def diagnose_mb_recording(song_id):
diagnosis['recommendation']['id'] = str(rec['id'])

# ===== CHECK 1: Fetch from MusicBrainz =====
mb_session = requests.Session()
mb_session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)'
})
mb_session = make_session(accept_json=False)

# Fetch recording with work-rels and releases
mb_response = mb_session.get(
Expand Down
6 changes: 2 additions & 4 deletions backend/scripts/add_wikipedia_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
find_performer_by_id,
get_performer_images
)
from core.http_client import make_session

# Configure logging
logging.basicConfig(
Expand All @@ -39,10 +40,7 @@ class WikipediaImageProcessor:
def __init__(self, dry_run: bool = False, debug: bool = False):
self.dry_run = dry_run
self.debug = debug
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)'
})
self.session = make_session(accept_json=False)

if debug:
logger.setLevel(logging.DEBUG)
Expand Down
7 changes: 2 additions & 5 deletions backend/scripts/diagnostics/fix_unknown_leaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import requests

from db_utils import get_db_connection
from core.http_client import make_session

# Configure logging
logging.basicConfig(
Expand All @@ -41,11 +42,7 @@ def __init__(self, dry_run=False, request_delay=1.5):
"""
self.dry_run = dry_run
self.request_delay = request_delay
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)',
'Accept': 'application/json'
})
self.session = make_session()
self.stats = {
'recordings_checked': 0,
'recordings_no_leader': 0,
Expand Down
8 changes: 3 additions & 5 deletions backend/scripts/fetch_artist_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@

# Import Wikipedia utilities with caching
from integrations.wikipedia.utils import WikipediaSearcher
from core.http_client import make_session

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -83,11 +84,8 @@ def __init__(self, dry_run: bool = False, debug: bool = False, force_refresh: bo
self.dry_run = dry_run
self.debug = debug
self.force_refresh = force_refresh
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)'
})

self.session = make_session(accept_json=False)

# Initialize Wikipedia searcher with caching
self.wiki_searcher = WikipediaSearcher(
cache_days=7,
Expand Down
7 changes: 2 additions & 5 deletions backend/scripts/fetch_loc_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
normalize_apostrophes,
get_performer_images
)
from core.http_client import make_session

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -68,11 +69,7 @@ def __init__(self, dry_run: bool = False, debug: bool = False):
"""
self.dry_run = dry_run
self.debug = debug
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)',
'Accept': 'application/json'
})
self.session = make_session()

if debug:
logger.setLevel(logging.DEBUG)
Expand Down
7 changes: 2 additions & 5 deletions backend/scripts/gather_performers_from_mb_for_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
sys.path.insert(0, '/mnt/project/scripts')
from db_utils import get_db_connection
from integrations.musicbrainz.performer_importer import PerformerImporter
from core.http_client import make_session

# Configure logging
logging.basicConfig(
Expand All @@ -32,11 +33,7 @@
class SingleRecordingImporter:
def __init__(self, dry_run=False):
self.dry_run = dry_run
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)',
'Accept': 'application/json'
})
self.session = make_session()
self.performer_importer = PerformerImporter(dry_run=dry_run)
self.stats = {
'recordings_created': 0,
Expand Down
6 changes: 2 additions & 4 deletions backend/scripts/jazzs_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
pass # python-dotenv not installed, skip

from db_utils import get_db_connection
from core.http_client import make_session

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -76,10 +77,7 @@ def __init__(self, dry_run: bool = False, force_refresh: bool = False, cache_day
ITUNES_CACHE_DIR.mkdir(parents=True, exist_ok=True)

# Setup session for HTTP requests
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)'
})
self.session = make_session(accept_json=False)

def get_cache_path(self, url: str) -> Path:
"""Generate cache file path for a URL"""
Expand Down
6 changes: 2 additions & 4 deletions backend/scripts/jazzs_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
pass # python-dotenv not installed, skip

from db_utils import get_db_connection
from core.http_client import make_session

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -67,10 +68,7 @@ def __init__(self, dry_run: bool = False, force_refresh: bool = False, show_unma
self.unmatched_songs = []

# Setup session for HTTP requests
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)'
})
self.session = make_session(accept_json=False)

def normalize_title(self, title: str, strip_parentheses: bool = False) -> str:
"""
Expand Down
6 changes: 2 additions & 4 deletions backend/scripts/jazzs_match_authorityrecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
pass # python-dotenv not installed, skip

from db_utils import get_db_connection
from core.http_client import make_session
from rapidfuzz import fuzz

# Ensure log directory exists BEFORE logging configuration
Expand Down Expand Up @@ -91,10 +92,7 @@ def __init__(self, dry_run: bool = False, min_confidence: str = 'medium',
}

# Setup HTTP session for iTunes API calls
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'ApproachNote/1.0 (+support@approachnote.com)'
})
self.session = make_session(accept_json=False)

# Matching thresholds
self.thresholds = {
Expand Down
5 changes: 2 additions & 3 deletions backend/scripts/onetime_scripts/one_time_song_wiki_intro.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
pass

from db_utils import get_db_connection # noqa: E402
from core.http_client import make_session # noqa: E402

LOG_DIR = Path(__file__).resolve().parent.parent / 'log'
LOG_DIR.mkdir(exist_ok=True)
Expand All @@ -46,7 +47,6 @@
)
logger = logging.getLogger(__name__)

USER_AGENT = "ApproachNote/1.0 (+support@approachnote.com)"
RATE_LIMIT_SECONDS = 1.0


Expand Down Expand Up @@ -165,8 +165,7 @@ def main():
)
logger.info("")

session = requests.Session()
session.headers.update({'User-Agent': USER_AGENT, 'Accept': 'application/json'})
session = make_session()

stats = {'processed': 0, 'updated': 0, 'no_intro': 0, 'bad_url': 0, 'errors': 0}
last_request = 0.0
Expand Down
8 changes: 7 additions & 1 deletion backend/scripts/save_review_thumbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@
import argparse
import json
import re
import sys
from pathlib import Path

import requests

UA = {"User-Agent": "ApproachNote/1.0 (+support@approachnote.com)"}
# This is an otherwise standalone utility; put backend/ on the path so the
# shared outbound User-Agent can be imported rather than re-hardcoded.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from core.http_client import HTTP_USER_AGENT

UA = {"User-Agent": HTTP_USER_AGENT}


def slug(s: str, n: int = 40) -> str:
Expand Down
Loading