Skip to content

Commit 586235d

Browse files
committed
fix: correct module path in odds-kenpom workflow
Updated the command in the odds-kenpom workflow to use the correct module path for the odds_pipeline schema execution.
1 parent 4816c74 commit 586235d

3 files changed

Lines changed: 186 additions & 1 deletion

File tree

.github/workflows/odds-kenpom.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
env:
2222
DATABASE_URL: ${{ secrets.DATABASE_URL }}
2323
working-directory: odds
24-
run: uv run python -m odds_pipeline schema
24+
run: uv run python -m odds_pipeline.schema
2525

2626
- name: Collect KenPom ratings
2727
env:
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from dataclasses import dataclass
5+
from datetime import date
6+
7+
from odds_pipeline.config import load_settings
8+
from odds_pipeline.db import connect
9+
from odds_pipeline.util import now_utc
10+
11+
12+
@dataclass(frozen=True)
13+
class KenPomAuth:
14+
email: str
15+
password: str
16+
17+
18+
def _require_auth() -> KenPomAuth:
19+
import os
20+
21+
email = os.getenv("KENPOM_EMAIL")
22+
password = os.getenv("KENPOM_PASSWORD")
23+
if not email or not password:
24+
raise RuntimeError("KENPOM_EMAIL and KENPOM_PASSWORD are required for kenpompy scraping")
25+
return KenPomAuth(email=email, password=password)
26+
27+
28+
def collect_kenpom_team_metrics(*, season: int, metric_type: str) -> int:
29+
"""
30+
Collect KenPom metrics via kenpompy scraping.
31+
32+
metric_type supported:
33+
- pomeroy_ratings
34+
- efficiency
35+
- four_factors
36+
"""
37+
settings = load_settings()
38+
auth = _require_auth()
39+
40+
# Import locally so dependency is optional outside this pipeline.
41+
from kenpompy.utils import login
42+
from kenpompy import misc, summary
43+
44+
browser = login(auth.email, auth.password)
45+
46+
if metric_type == "pomeroy_ratings":
47+
df = misc.get_pomeroy_ratings(browser, season=str(season))
48+
elif metric_type == "efficiency":
49+
df = summary.get_efficiency(browser, season=str(season))
50+
elif metric_type == "four_factors":
51+
df = summary.get_fourfactors(browser, season=str(season))
52+
else:
53+
raise ValueError(f"Unsupported metric_type: {metric_type}")
54+
55+
collected_at = now_utc()
56+
inserted = 0
57+
58+
# Store one JSON row per team (best-effort: locate a likely team column).
59+
records = df.to_dict(orient="records")
60+
team_key = None
61+
for candidate in ("Team", "team", "TEAM"):
62+
if records and candidate in records[0]:
63+
team_key = candidate
64+
break
65+
if team_key is None:
66+
# Fall back: store as a single blob under team='__all__'
67+
records = [{"__all__": True, "rows": records}]
68+
team_key = "__all__"
69+
70+
with connect(settings.database_url) as conn:
71+
with conn.cursor() as cur:
72+
for r in records:
73+
team = str(r.get(team_key) or "__unknown__")
74+
cur.execute(
75+
"""
76+
INSERT INTO raw_kenpom_team_metrics (
77+
season, team, metric_type, collected_at, raw
78+
) VALUES (
79+
%(season)s, %(team)s, %(metric_type)s, %(collected_at)s, %(raw)s
80+
)
81+
ON CONFLICT DO NOTHING
82+
""",
83+
{
84+
"season": int(season),
85+
"team": team,
86+
"metric_type": metric_type,
87+
"collected_at": collected_at,
88+
"raw": json.dumps(r),
89+
},
90+
)
91+
inserted += cur.rowcount
92+
conn.commit()
93+
94+
return inserted
95+
96+
97+
def collect_kenpom_fanmatch(*, game_date: date) -> int:
98+
"""
99+
Collect KenPom FanMatch predictions for a given date via kenpompy.
100+
"""
101+
settings = load_settings()
102+
auth = _require_auth()
103+
104+
from kenpompy.utils import login
105+
from kenpompy.FanMatch import FanMatch
106+
107+
browser = login(auth.email, auth.password)
108+
fm = FanMatch(browser, date=game_date.isoformat())
109+
df = fm.fm_df
110+
if df is None:
111+
return 0
112+
113+
collected_at = now_utc()
114+
inserted = 0
115+
records = df.to_dict(orient="records")
116+
117+
with connect(settings.database_url) as conn:
118+
with conn.cursor() as cur:
119+
for r in records:
120+
# FanMatch rows may have matchup/team columns; store under team='__fanmatch__'
121+
cur.execute(
122+
"""
123+
INSERT INTO raw_kenpom_team_metrics (
124+
season, team, metric_type, collected_at, raw
125+
) VALUES (
126+
%(season)s, %(team)s, %(metric_type)s, %(collected_at)s, %(raw)s
127+
)
128+
ON CONFLICT DO NOTHING
129+
""",
130+
{
131+
"season": int(game_date.year),
132+
"team": "__fanmatch__",
133+
"metric_type": "fanmatch",
134+
"collected_at": collected_at,
135+
"raw": json.dumps(r),
136+
},
137+
)
138+
inserted += cur.rowcount
139+
conn.commit()
140+
141+
return inserted
142+

odds/src/odds_pipeline/config.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from __future__ import annotations
2+
3+
from datetime import timedelta
4+
from os import getenv
5+
6+
from pydantic import BaseModel, Field
7+
8+
9+
class Settings(BaseModel):
10+
database_url: str = Field(..., description="Postgres connection string", alias="DATABASE_URL")
11+
odds_api_key: str | None = Field(default=None, description="API key for collectors", alias="ODDS_API_KEY")
12+
13+
# Defaults for orchestration. Override via env vars if needed.
14+
window_days: int = Field(default=5, alias="WINDOW_DAYS")
15+
odds_stale_minutes: int = Field(default=180, alias="ODDS_STALE_MINUTES")
16+
scores_stale_hours: int = Field(default=24, alias="SCORES_STALE_HOURS")
17+
18+
@property
19+
def window(self) -> timedelta:
20+
return timedelta(days=int(self.window_days))
21+
22+
@property
23+
def odds_stale_for(self) -> timedelta:
24+
return timedelta(minutes=int(self.odds_stale_minutes))
25+
26+
@property
27+
def scores_stale_for(self) -> timedelta:
28+
return timedelta(hours=int(self.scores_stale_hours))
29+
30+
31+
def load_settings() -> Settings:
32+
# Pydantic v2 supports reading env via model_validate with dict.
33+
env = {
34+
"DATABASE_URL": getenv("DATABASE_URL"),
35+
"ODDS_API_KEY": getenv("ODDS_API_KEY"),
36+
"WINDOW_DAYS": getenv("WINDOW_DAYS"),
37+
"ODDS_STALE_MINUTES": getenv("ODDS_STALE_MINUTES"),
38+
"SCORES_STALE_HOURS": getenv("SCORES_STALE_HOURS"),
39+
}
40+
# Remove None keys so defaults apply.
41+
env = {k: v for k, v in env.items() if v is not None}
42+
return Settings.model_validate(env)
43+

0 commit comments

Comments
 (0)