From 6b3b3e00fb5af8f9e3241ebd0238b36511dae5f8 Mon Sep 17 00:00:00 2001
From: Siva <siva@Sivas-MacBook-Pro.local>
Date: Sat, 4 Apr 2026 18:55:28 +0800
Subject: [PATCH 1/4] feat: add Claude Vision form filler + browser reliability
 improvements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds two improvements to the AIHawk browser automation layer:

1. src/utils/visual_form_filler.py (new)
   A Claude Vision-powered utility that fills any web-based job
   application form using screenshots + Selenium. Useful for
   non-standard forms that can't be handled by DOM inspection alone.

   Flow:
   - Takes a screenshot of the current page
   - Sends it to Claude Sonnet with the candidate profile
   - Claude identifies visible fields and values to fill
   - Selenium fills each field using label/placeholder/aria strategies
   - Handles Next/Submit navigation, cookie banners, and CAPTCHA pauses
   - Pauses before final submit for human review

   Profile data (name, contact, salary, notice period, education,
   skills, work authorization) is read entirely from plain_text_resume.yaml
   — nothing is hardcoded.

2. src/utils/chrome_utils.py (improved)
   - Add user-agent rotation across 5 real Chrome UA strings
   - Add undetected-chromedriver support with graceful fallback to
     standard Selenium when the package is not installed
   - Patch navigator.webdriver via CDP to reduce automation detection
   - Suppress automation extension flags (excludeSwitches, useAutomationExtension)
   - Add _clear_profile_locks() to kill orphaned Chrome processes and
     remove stale SingletonLock files before init — fixes crashes on
     restart after unclean shutdown
   - Make persistent profile directory configurable via BROWSER_PROFILE_DIR
     env var (defaults to ~/.aihawk_browser_profile)
   - Remove --incognito flag (incompatible with persistent profiles)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/utils/chrome_utils.py       | 101 ++++-
 src/utils/visual_form_filler.py | 726 ++++++++++++++++++++++++++++++++
 2 files changed, 818 insertions(+), 9 deletions(-)
 create mode 100644 src/utils/visual_form_filler.py

diff --git a/src/utils/chrome_utils.py b/src/utils/chrome_utils.py
index 639a29584..82542509f 100644
--- a/src/utils/chrome_utils.py
+++ b/src/utils/chrome_utils.py
@@ -1,12 +1,23 @@
 import os
+import random
 import time
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service as ChromeService
 from selenium.webdriver.chrome.options import Options
-from webdriver_manager.chrome import ChromeDriverManager  # Import webdriver_manager
+from webdriver_manager.chrome import ChromeDriverManager
 import urllib
 from src.logging import logger
 
+# Rotate through realistic user agents so each session looks different to LinkedIn
+_USER_AGENTS = [
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+]
+
+
 def chrome_browser_options():
     logger.debug("Setting Chrome browser options")
     options = Options()
@@ -15,7 +26,7 @@ def chrome_browser_options():
     options.add_argument("--disable-dev-shm-usage")
     options.add_argument("--ignore-certificate-errors")
     options.add_argument("--disable-extensions")
-    options.add_argument("--disable-gpu")  # Opzionale, utile in alcuni ambienti
+    options.add_argument("--disable-gpu")
     options.add_argument("window-size=1200x800")
     options.add_argument("--disable-background-timer-throttling")
     options.add_argument("--disable-backgrounding-occluded-windows")
@@ -28,19 +39,91 @@ def chrome_browser_options():
     options.add_argument("--disable-plugins")
     options.add_argument("--disable-animations")
     options.add_argument("--disable-cache")
-    options.add_argument("--incognito")
-    options.add_argument("--allow-file-access-from-files")  # Consente l'accesso ai file locali
-    options.add_argument("--disable-web-security")         # Disabilita la sicurezza web
-    logger.debug("Using Chrome in incognito mode")
-    
+    options.add_argument("--allow-file-access-from-files")
+    options.add_argument("--disable-web-security")
+
+    # Use a persistent profile so the browser remembers the session across runs.
+    # Override by setting the BROWSER_PROFILE_DIR environment variable.
+    import os
+    profile_dir = os.environ.get(
+        "BROWSER_PROFILE_DIR",
+        os.path.expanduser("~/.aihawk_browser_profile")
+    )
+    options.add_argument(f"--user-data-dir={profile_dir}")
+
+    ua = _USER_AGENTS[0]
+    options.add_argument(f"--user-agent={ua}")
+    logger.debug(f"Using user agent: {ua[:60]}...")
+
+    # Suppress automation detection flags
+    options.add_experimental_option("excludeSwitches", ["enable-automation"])
+    options.add_experimental_option("useAutomationExtension", False)
+
     return options
 
+
+def _clear_profile_locks():
+    """Kill any orphaned Chrome processes using the bot profile and remove stale lock files."""
+    import subprocess
+    profile_dir = os.environ.get(
+        "BROWSER_PROFILE_DIR",
+        os.path.expanduser("~/.aihawk_browser_profile")
+    )
+    # Kill any Chrome processes still using the bot profile (orphans from previous runs)
+    try:
+        result = subprocess.run(
+            ["pgrep", "-f", profile_dir],
+            capture_output=True, text=True
+        )
+        pids = result.stdout.strip().split()
+        if pids:
+            subprocess.run(["kill", "-9"] + pids, capture_output=True)
+            import time as _t
+            _t.sleep(1)
+            logger.debug(f"Killed {len(pids)} orphaned Chrome process(es).")
+    except Exception:
+        pass
+
+    # Remove stale lock files
+    for lock_file in ["SingletonLock", "SingletonCookie", "SingletonSocket", "lockfile"]:
+        path = os.path.join(profile_dir, lock_file)
+        try:
+            if os.path.exists(path) or os.path.islink(path):
+                os.remove(path)
+                logger.debug(f"Removed stale lock: {path}")
+        except Exception:
+            pass
+
+
 def init_browser() -> webdriver.Chrome:
+    _clear_profile_locks()
+
+    # Try undetected-chromedriver first (much harder for LinkedIn to fingerprint)
+    try:
+        import undetected_chromedriver as uc
+        options = uc.ChromeOptions()
+        options.add_argument("--start-maximized")
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        ua = random.choice(_USER_AGENTS)
+        options.add_argument(f"--user-agent={ua}")
+        driver = uc.Chrome(options=options)
+        logger.info("Browser initialized with undetected-chromedriver (stealth mode).")
+        return driver
+    except ImportError:
+        logger.info("undetected-chromedriver not installed — falling back to standard Selenium.")
+    except Exception as e:
+        logger.warning(f"undetected-chromedriver failed ({e}) — falling back to standard Selenium.")
+
+    # Standard Selenium fallback
     try:
         options = chrome_browser_options()
-        # Use webdriver_manager to handle ChromeDriver
         driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
-        logger.debug("Chrome browser initialized successfully.")
+        # Patch navigator.webdriver to reduce detection
+        driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
+            "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
+        })
+        logger.debug("Chrome browser initialized (standard Selenium).")
         return driver
     except Exception as e:
         logger.error(f"Failed to initialize browser: {str(e)}")
diff --git a/src/utils/visual_form_filler.py b/src/utils/visual_form_filler.py
new file mode 100644
index 000000000..66f68cd21
--- /dev/null
+++ b/src/utils/visual_form_filler.py
@@ -0,0 +1,726 @@
+"""
+VisualFormFiller — uses Claude Vision to fill arbitrary job application forms.
+
+Flow:
+  1. Take a screenshot of the current page
+  2. Send to Claude Sonnet with candidate profile
+  3. Claude identifies visible fields + values to fill
+  4. Selenium fills each field
+  5. Click Next/Continue if present
+  6. Repeat until done or human input needed
+  7. Pause before final submit for human confirmation
+"""
+import base64
+import json
+import os
+import re
+import subprocess
+import time
+
+from anthropic import Anthropic
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support.ui import Select, WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+from src.logging import logger
+
+
+class VisualFormFiller:
+    """Fill any job application form using Claude Vision + Selenium."""
+
+    def __init__(self, driver, api_key: str, profile: dict, resume_pdf_path=None,
+                 resume_data: dict = None):
+        self.driver = driver
+        self.client = Anthropic(api_key=api_key)
+        self.profile = profile
+        self.resume_data = resume_data or {}
+        self.resume_pdf_path = resume_pdf_path
+        self._profile_text = self._build_profile_text()
+
+    # ------------------------------------------------------------------ #
+    # Voice notification
+    # ------------------------------------------------------------------ #
+
+    def _speak(self, message: str):
+        """Speak a message aloud (non-blocking). Falls back to print if TTS unavailable."""
+        import platform
+        print(f"\n[ATTENTION] {message}")
+        try:
+            system = platform.system()
+            if system == "Darwin":
+                subprocess.Popen(["say", message])
+            elif system == "Linux":
+                subprocess.Popen(["espeak", message], stderr=subprocess.DEVNULL)
+            # Windows: no built-in CLI TTS; print fallback above is sufficient
+        except Exception:
+            pass  # print fallback already shown above
+
+    # ------------------------------------------------------------------ #
+    # Profile helpers
+    # ------------------------------------------------------------------ #
+
+    def _build_profile_text(self) -> str:
+        """Format the candidate profile dict into plain text for Claude."""
+        pi = self.profile if isinstance(self.profile, dict) else {}
+        rd = self.resume_data
+
+        name = f"{pi.get('name', '')} {pi.get('surname', '')}".strip()
+        phone = f"{pi.get('phone_prefix', '')} {pi.get('phone', '')}".strip()
+
+        # Notice period
+        notice_period = rd.get("availability", {}).get("notice_period", "")
+
+        # Salary
+        salary = rd.get("salary_expectations", {}).get("salary_range_usd", "")
+
+        # Current role — first entry in experience_details
+        exp_list = rd.get("experience_details", [])
+        current_role = ""
+        if exp_list:
+            e = exp_list[0]
+            pos = e.get("position", "")
+            comp = e.get("company", "")
+            current_role = f"{pos} at {comp}".strip(" at") if pos or comp else ""
+
+        # Years of experience — count from experience_details
+        years_of_exp = len(exp_list) and f"{len(exp_list)}+ roles" or ""
+
+        # Education — first entry in education_details
+        edu_list = rd.get("education_details", [])
+        education = ""
+        if edu_list:
+            e = edu_list[0]
+            level = e.get("education_level", "")
+            field = e.get("field_of_study", "")
+            inst = e.get("institution", "")
+            year = e.get("year_of_completion", "")
+            education = ", ".join(p for p in [f"{level} in {field}" if level or field else "",
+                                               inst, year] if p)
+
+        # Skills — collect from all experience_details entries
+        all_skills = []
+        for exp in exp_list:
+            all_skills.extend(exp.get("skills_acquired", []))
+        skills = ", ".join(dict.fromkeys(all_skills))  # deduplicated, order-preserved
+
+        # Work authorization summary
+        legal = rd.get("legal_authorization", {})
+        auth_parts = []
+        for region, key in [("EU", "eu_work_authorization"), ("US", "us_work_authorization"),
+                            ("UK", "uk_work_authorization"), ("Canada", "canada_work_authorization")]:
+            if str(legal.get(key, "")).lower() == "yes":
+                auth_parts.append(region)
+        work_auth = f"Authorized to work in: {', '.join(auth_parts)}" if auth_parts else ""
+
+        lines = [
+            f"Full Name: {name}",
+            f"Email: {pi.get('email', '')}",
+            f"Phone: {phone}",
+            f"Location: {pi.get('city', '')}, {pi.get('country', '')}",
+            f"Address: {pi.get('address', '')}",
+            f"LinkedIn: {pi.get('linkedin', '')}",
+            f"GitHub: {pi.get('github', '')}",
+            f"Notice Period: {notice_period}",
+            f"Current Role: {current_role}",
+            f"Years of Experience: {years_of_exp}",
+            f"Desired Salary: {salary}",
+            f"Work Authorization: {work_auth}",
+            f"Education: {education}",
+            f"Skills: {skills}",
+        ]
+        return "\n".join(line for line in lines if line.split(": ", 1)[-1].strip())
+
+    # ------------------------------------------------------------------ #
+    # Auto-dismiss blocking modals
+    # ------------------------------------------------------------------ #
+
+    def _dismiss_blocking_modals(self) -> bool:
+        """
+        Auto-click through common blocking popups: cookie banners, confirmation
+        dialogs, navigation warnings, GDPR notices. Returns True if something was dismissed.
+        """
+        # --- Cookie / GDPR banners (common vendor selectors) ---
+        cookie_selectors = [
+            "#onetrust-accept-btn-handler",
+            "#accept-cookies",
+            "#cookieConsent button",
+            "button[id*='accept'][id*='cookie']",
+            "button[class*='accept'][class*='cookie']",
+            "button[data-testid*='cookie']",
+            ".cookie-consent button",
+            ".cookie-banner button",
+            ".cc-btn.cc-allow",
+        ]
+        for sel in cookie_selectors:
+            try:
+                btn = self.driver.find_element(By.CSS_SELECTOR, sel)
+                if btn.is_displayed():
+                    btn.click()
+                    logger.info(f"Auto-dismissed cookie banner via: {sel}")
+                    time.sleep(1.5)
+                    return True
+            except Exception:
+                continue
+
+        # --- Generic dialog / modal buttons ---
+        # Ordered by preference: OK > Accept > Agree > Continue (before Reject/Close)
+        accept_keywords = ["ok", "accept all", "accept cookies", "accept", "agree",
+                           "got it", "allow all", "i agree", "continue", "proceed"]
+
+        # Try buttons inside known dialog containers first
+        dialog_containers = [
+            "[role='dialog']", "[role='alertdialog']",
+            ".modal", ".popup", ".overlay", ".dialog",
+            ".confirmation", ".alert", "[aria-modal='true']",
+        ]
+        for container in dialog_containers:
+            try:
+                btns = self.driver.find_elements(By.CSS_SELECTOR, f"{container} button")
+                for kw in accept_keywords:
+                    for btn in btns:
+                        try:
+                            if btn.is_displayed() and btn.text.strip().lower() == kw:
+                                btn.click()
+                                logger.info(f"Auto-dismissed modal via button text '{kw}'")
+                                time.sleep(1.5)
+                                return True
+                        except Exception:
+                            continue
+            except Exception:
+                continue
+
+        # --- Fallback: any visible button with exact OK / Accept text ---
+        for kw in ["ok", "accept", "agree"]:
+            try:
+                btns = self.driver.find_elements(By.TAG_NAME, "button")
+                for btn in btns:
+                    try:
+                        if btn.is_displayed() and btn.text.strip().lower() == kw:
+                            btn.click()
+                            logger.info(f"Auto-dismissed popup via fallback button '{kw}'")
+                            time.sleep(1.5)
+                            return True
+                    except Exception:
+                        continue
+            except Exception:
+                continue
+
+        return False
+
+    # ------------------------------------------------------------------ #
+    # Screenshot
+    # ------------------------------------------------------------------ #
+
+    def _screenshot_b64(self) -> str:
+        """Take a full-page screenshot and return as base64 PNG."""
+        png = self.driver.get_screenshot_as_png()
+        return base64.b64encode(png).decode()
+
+    # ------------------------------------------------------------------ #
+    # Claude Vision analysis
+    # ------------------------------------------------------------------ #
+
+    def _analyse_page(self, b64: str, job_title: str, company: str) -> dict:
+        """Send screenshot to Claude and get structured fill instructions."""
+        prompt = f"""You are helping fill a job application form for the position of "{job_title}" at "{company}".
+
+Candidate profile:
+{self._profile_text}
+
+Look at this screenshot carefully and identify every visible, fillable form field.
+
+Return ONLY valid JSON in this exact format — no markdown, no explanation:
+{{
+  "fields": [
+    {{
+      "description": "human-readable field name (e.g. First Name, Email, Phone Number)",
+      "value": "the value to fill based on the candidate profile above",
+      "field_type": "text|email|phone|textarea|select|file|checkbox|radio",
+      "label_text": "exact label text visible next to the field, or empty string",
+      "placeholder": "placeholder text inside the field if visible, or empty string",
+      "skip": false,
+      "reason": "why skip=true if applicable"
+    }}
+  ],
+  "page_status": "form|job_listing|success|error|captcha|already_applied|other",
+  "has_next_button": true,
+  "has_submit_button": false,
+  "needs_human": false,
+  "human_note": ""
+}}
+
+Rules:
+- For file upload fields (resume, CV, cover letter upload), set skip=true
+- For CAPTCHA or verification, set needs_human=true with a clear human_note
+- If page shows success/confirmation, set page_status="success"
+- For checkboxes like "I agree to terms", set field_type="checkbox" and value="check"
+- For select/dropdown, set value to the most appropriate visible option text
+- Only include fields currently visible on screen
+- If a field is already filled correctly, still include it with the correct value
+- For textarea fields (cover letter, additional info), write a brief professional response"""
+
+        try:
+            resp = self.client.messages.create(
+                model="claude-sonnet-4-6",
+                max_tokens=2000,
+                messages=[{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": "image/png",
+                                "data": b64,
+                            }
+                        },
+                        {"type": "text", "text": prompt}
+                    ]
+                }]
+            )
+            text = resp.content[0].text.strip()
+            # Strip markdown code fences if present
+            text = re.sub(r'^```[a-z]*\n?', '', text)
+            text = re.sub(r'\n?```$', '', text)
+            return json.loads(text)
+        except json.JSONDecodeError as e:
+            logger.warning(f"Claude JSON parse error: {e}")
+            return {
+                "fields": [], "page_status": "other",
+                "has_next_button": False, "has_submit_button": False,
+                "needs_human": True, "human_note": "Could not parse Claude response"
+            }
+        except Exception as e:
+            logger.error(f"Claude Vision error: {e}")
+            return {
+                "fields": [], "page_status": "other",
+                "has_next_button": False, "has_submit_button": False,
+                "needs_human": True, "human_note": f"API error: {e}"
+            }
+
+    # ------------------------------------------------------------------ #
+    # Field filling
+    # ------------------------------------------------------------------ #
+
+    def _set_react_value(self, element, value: str):
+        """Set value on React/Angular controlled inputs that ignore send_keys."""
+        self.driver.execute_script("""
+            var el = arguments[0];
+            var val = arguments[1];
+            var nativeSetter = Object.getOwnPropertyDescriptor(
+                window.HTMLInputElement.prototype, 'value'
+            ).set;
+            nativeSetter.call(el, val);
+            el.dispatchEvent(new Event('input', { bubbles: true }));
+            el.dispatchEvent(new Event('change', { bubbles: true }));
+        """, element, value)
+
+    def _find_input_by_label(self, label_text: str):
+        """Find input associated with a label containing the given text."""
+        try:
+            # Find label containing the text
+            labels = self.driver.find_elements(By.TAG_NAME, "label")
+            for label in labels:
+                if label_text.lower() in label.text.lower():
+                    for_attr = label.get_attribute("for")
+                    if for_attr:
+                        try:
+                            return self.driver.find_element(By.ID, for_attr)
+                        except Exception:
+                            pass
+                    # Try sibling/child input
+                    try:
+                        return label.find_element(By.CSS_SELECTOR, "input, textarea, select")
+                    except Exception:
+                        pass
+        except Exception:
+            pass
+        return None
+
+    def _find_input_by_placeholder(self, placeholder: str):
+        """Find input by placeholder attribute (partial match)."""
+        try:
+            for tag in ["input", "textarea"]:
+                els = self.driver.find_elements(By.TAG_NAME, tag)
+                for el in els:
+                    ph = (el.get_attribute("placeholder") or "").lower()
+                    if placeholder.lower() in ph:
+                        return el
+        except Exception:
+            pass
+        return None
+
+    def _find_input_by_type(self, field_type: str):
+        """Find first visible input of a given type."""
+        type_map = {"email": "email", "phone": "tel", "text": "text"}
+        html_type = type_map.get(field_type)
+        if not html_type:
+            return None
+        try:
+            els = self.driver.find_elements(By.CSS_SELECTOR, f"input[type='{html_type}']")
+            for el in els:
+                if el.is_displayed() and el.is_enabled():
+                    return el
+        except Exception:
+            pass
+        return None
+
+    def _find_input_by_aria(self, description: str):
+        """Find input by aria-label (partial match)."""
+        try:
+            for tag in ["input", "textarea", "select"]:
+                els = self.driver.find_elements(By.TAG_NAME, tag)
+                for el in els:
+                    aria = (el.get_attribute("aria-label") or "").lower()
+                    name = (el.get_attribute("name") or "").lower()
+                    if description.lower() in aria or description.lower() in name:
+                        return el
+        except Exception:
+            pass
+        return None
+
+    def _fill_one_field(self, field: dict) -> bool:
+        """Try to fill a single field. Returns True if successful."""
+        if field.get("skip"):
+            logger.debug(f"Skipping field: {field.get('description')} — {field.get('reason', '')}")
+            return True
+
+        description = field.get("description", "")
+        value = field.get("value", "")
+        field_type = field.get("field_type", "text")
+        label_text = field.get("label_text", "")
+        placeholder = field.get("placeholder", "")
+
+        if not value:
+            return True
+
+        # Find the element using multiple strategies
+        element = None
+        if label_text:
+            element = self._find_input_by_label(label_text)
+        if not element and placeholder:
+            element = self._find_input_by_placeholder(placeholder)
+        if not element:
+            element = self._find_input_by_aria(description)
+        if not element and field_type in ("email", "phone"):
+            element = self._find_input_by_type(field_type)
+
+        if not element:
+            logger.warning(f"Could not find element for: {description}")
+            return False
+
+        try:
+            if not element.is_displayed() or not element.is_enabled():
+                return False
+
+            tag = element.tag_name.lower()
+
+            if tag == "select" or field_type == "select":
+                sel = Select(element)
+                try:
+                    sel.select_by_visible_text(value)
+                except Exception:
+                    # Try partial match
+                    for opt in sel.options:
+                        if value.lower() in opt.text.lower():
+                            sel.select_by_visible_text(opt.text)
+                            break
+                logger.debug(f"Selected '{value}' for: {description}")
+                return True
+
+            if field_type == "checkbox":
+                if not element.is_selected():
+                    element.click()
+                logger.debug(f"Checked checkbox: {description}")
+                return True
+
+            if field_type == "radio":
+                element.click()
+                logger.debug(f"Selected radio: {description}")
+                return True
+
+            # Text / email / phone / textarea
+            self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
+            time.sleep(0.3)
+            element.click()
+            element.clear()
+            element.send_keys(value)
+
+            # Verify value was set — if not, try the React setter
+            actual = element.get_attribute("value") or ""
+            if actual.strip() != value.strip():
+                self._set_react_value(element, value)
+
+            logger.debug(f"Filled '{description}' = '{value[:40]}...' " if len(value) > 40 else f"Filled '{description}' = '{value}'")
+            return True
+
+        except Exception as e:
+            logger.warning(f"Error filling '{description}': {e}")
+            return False
+
+    def _fill_all_fields(self, fields: list) -> int:
+        """Fill all fields. Returns count of successfully filled fields."""
+        filled = 0
+        for field in fields:
+            if self._fill_one_field(field):
+                filled += 1
+            time.sleep(0.4)  # brief pause between fields
+        return filled
+
+    # ------------------------------------------------------------------ #
+    # Navigation
+    # ------------------------------------------------------------------ #
+
+    def _click_next_or_submit(self, has_next: bool, has_submit: bool) -> str:
+        """Click Next or Submit button. Returns 'next', 'submit', or 'none'."""
+        next_texts = ["next", "continue", "proceed", "save and continue", "下一步", "继续"]
+        submit_texts = ["submit", "apply", "send application", "complete application",
+                        "submit application", "提交"]
+
+        # --- Strategy 1: LinkedIn Easy Apply modal CSS selectors ---
+        linkedin_sels = [
+            "button[aria-label='Continue to next step']",
+            "button[aria-label='Submit application']",
+            "button[aria-label='Review your application']",
+            "footer button.artdeco-button--primary",
+            ".jobs-easy-apply-modal footer button",
+            ".artdeco-modal__actionbar button.artdeco-button--primary",
+            ".artdeco-modal footer button",
+            "div[data-test-modal] button.artdeco-button--primary",
+        ]
+        for sel in linkedin_sels:
+            try:
+                btn = self.driver.find_element(By.CSS_SELECTOR, sel)
+                if btn.is_enabled():
+                    self.driver.execute_script("arguments[0].click();", btn)
+                    logger.info(f"Clicked modal button via CSS: {sel}")
+                    return "next" if has_next else "submit"
+            except Exception:
+                continue
+
+        # --- Strategy 2: JS scan all buttons by visible text ---
+        target_texts = next_texts if has_next else submit_texts
+        js_result = self.driver.execute_script("""
+            var targets = arguments[0];
+            var buttons = document.querySelectorAll('button, input[type="submit"]');
+            for (var btn of buttons) {
+                var txt = (btn.innerText || btn.value || '').trim().toLowerCase();
+                for (var t of targets) {
+                    if (txt === t || txt.indexOf(t) !== -1) {
+                        if (btn.offsetParent !== null || btn.offsetWidth > 0) {
+                            btn.click();
+                            return btn.innerText || btn.value;
+                        }
+                    }
+                }
+            }
+            return null;
+        """, target_texts)
+
+        if js_result:
+            logger.info(f"Clicked button via JS: '{js_result}'")
+            return "next" if has_next else "submit"
+
+        # --- Strategy 3: Primary/highlighted button (last resort) ---
+        primary_sels = [
+            "button.artdeco-button--primary",
+            "button[type='submit']",
+            "button.primary",
+            "button.btn-primary",
+        ]
+        for sel in primary_sels:
+            try:
+                btns = self.driver.find_elements(By.CSS_SELECTOR, sel)
+                for btn in btns:
+                    if btn.is_enabled():
+                        txt = btn.text.strip().lower()
+                        # Don't accidentally click Back/Cancel/Close
+                        if any(skip in txt for skip in ["back", "cancel", "close", "discard", "dismiss"]):
+                            continue
+                        self.driver.execute_script("arguments[0].click();", btn)
+                        logger.info(f"Clicked primary button '{btn.text.strip()}' via: {sel}")
+                        return "next" if has_next else "submit"
+            except Exception:
+                continue
+
+        return "none"
+
+    def _click_apply_now(self) -> bool:
+        """Click 'Apply Now' / 'Apply' on an external company job listing page."""
+        apply_texts = ["apply now", "apply for this job", "apply for job",
+                       "start application", "begin application", "apply online"]
+        # Exact CSS selectors common on company career sites
+        apply_sels = [
+            "a[class*='apply']", "button[class*='apply']",
+            "a[id*='apply']", "button[id*='apply']",
+            "a[href*='apply']",
+        ]
+        for sel in apply_sels:
+            try:
+                btn = self.driver.find_element(By.CSS_SELECTOR, sel)
+                if btn.is_displayed():
+                    self.driver.execute_script("arguments[0].click();", btn)
+                    logger.info(f"Clicked Apply via CSS: {sel}")
+                    time.sleep(3)
+                    return True
+            except Exception:
+                continue
+
+        # JS text scan
+        result = self.driver.execute_script("""
+            var targets = arguments[0];
+            var els = document.querySelectorAll('a, button');
+            for (var el of els) {
+                var txt = (el.innerText || el.textContent || '').trim().toLowerCase();
+                for (var t of targets) {
+                    if (txt === t) {
+                        el.click();
+                        return txt;
+                    }
+                }
+            }
+            return null;
+        """, apply_texts)
+
+        if result:
+            logger.info(f"Clicked Apply via JS text: '{result}'")
+            time.sleep(3)
+            return True
+        return False
+
+    # ------------------------------------------------------------------ #
+    # Main loop
+    # ------------------------------------------------------------------ #
+
+    def run(self, job_title: str = "the role", company: str = "the company",
+            max_iterations: int = 8) -> bool:
+        """
+        Main vision loop. Returns True if application submitted, False otherwise.
+        Pauses before final submit for human confirmation.
+        """
+        print(f"\n  [VisualFiller] Starting vision-based form fill for {job_title} @ {company}")
+        print(f"  Profile loaded: {self._profile_text.splitlines()[0]}")
+
+        for iteration in range(1, max_iterations + 1):
+            print(f"\n  [VisualFiller] Iteration {iteration}/{max_iterations} — checking for popups...")
+            time.sleep(2)  # let page settle
+
+            # Auto-dismiss any blocking modals before analysing the page
+            dismissed = self._dismiss_blocking_modals()
+            if dismissed:
+                print(f"  [VisualFiller] Auto-dismissed a blocking popup — re-checking page...")
+                time.sleep(2)
+
+            print(f"  [VisualFiller] Taking screenshot...")
+            b64 = self._screenshot_b64()
+            print(f"  [VisualFiller] Sending screenshot to Claude Vision...")
+            analysis = self._analyse_page(b64, job_title, company)
+
+            status = analysis.get("page_status", "other")
+            logger.debug(f"Page status: {status}")
+
+            if status == "success":
+                print(f"  [VisualFiller] ✓ Application submitted successfully!")
+                return True
+
+            if status == "already_applied":
+                print(f"  [VisualFiller] Already applied to this role — skipping.")
+                return False
+
+            # Auto-click "Apply Now" if we're on a job listing page (not a form yet)
+            if status == "job_listing":
+                print(f"  [VisualFiller] Job listing page detected — clicking Apply Now...")
+                if self._click_apply_now():
+                    print(f"  [VisualFiller] Clicked Apply Now — loading application form...")
+                    time.sleep(3)
+                    # Switch to new tab if one opened
+                    handles = set(self.driver.window_handles)
+                    if len(handles) > 1:
+                        self.driver.switch_to.window(list(handles)[-1])
+                        time.sleep(2)
+                    continue
+                else:
+                    print(f"  [VisualFiller] Could not find Apply Now button — please click it manually.")
+                    try:
+                        input("  Press Enter when done...")
+                    except EOFError:
+                        pass
+                    continue
+
+            if analysis.get("needs_human"):
+                note = analysis.get("human_note", "")
+                print(f"\n  ⚠️  Human input needed: {note}")
+                print("  Please complete the action in the browser, then press Enter to continue...")
+                self._speak(f"Human input needed: {note}. Please check the browser and press Enter.")
+                try:
+                    input()
+                except (KeyboardInterrupt, EOFError):
+                    print("  (No interactive input — continuing automatically after 10s...)")
+                    time.sleep(10)
+                continue
+
+            fields = analysis.get("fields", [])
+            has_next = analysis.get("has_next_button", False)
+            has_submit = analysis.get("has_submit_button", False)
+
+            if not fields and not has_next and not has_submit:
+                print(f"  [VisualFiller] No fields or buttons found — may need human review.")
+                print("  Press Enter to continue or Ctrl+C to abort...")
+                self._speak("No form fields or buttons detected. Please check the browser and press Enter.")
+                try:
+                    input()
+                except (KeyboardInterrupt, EOFError):
+                    print("  (Continuing automatically after 10s...)")
+                    time.sleep(10)
+                continue
+
+            # Fill visible fields
+            if fields:
+                filled = self._fill_all_fields(fields)
+                print(f"  [VisualFiller] Filled {filled}/{len(fields)} fields.")
+            time.sleep(1)
+
+            # If submit button — pause for human confirmation before clicking
+            if has_submit and not has_next:
+                print(f"\n  [VisualFiller] Ready to submit application for {job_title} @ {company}.")
+                print("  Review the filled form in the browser window.")
+                print("  Press Enter to SUBMIT, or Ctrl+C to abort...")
+                self._speak(f"Application for {job_title} at {company} is ready to submit. Please review the form and press Enter to submit.")
+                try:
+                    input()
+                except (KeyboardInterrupt, EOFError):
+                    print("  (No interactive input — submission skipped)")
+                    return False
+                result = self._click_next_or_submit(False, True)
+                if result == "submit":
+                    print(f"  [VisualFiller] ✓ Submitted!")
+                    time.sleep(3)
+                    return True
+                else:
+                    print("  [VisualFiller] Could not find submit button — please submit manually.")
+                    input("  Press Enter when done...")
+                    return True
+
+            # Click Next/Continue
+            if has_next or has_submit:
+                result = self._click_next_or_submit(has_next, has_submit)
+                if result == "none":
+                    print("  [VisualFiller] Could not find Next button — please click it manually.")
+                    self._speak("Could not find the Next button. Please click it manually and press Enter.")
+                    try:
+                        input("  Press Enter when done...")
+                    except (KeyboardInterrupt, EOFError):
+                        time.sleep(5)
+                else:
+                    print(f"  [VisualFiller] Clicked '{result}' — loading next page...")
+                time.sleep(3)
+
+        print(f"  [VisualFiller] Max iterations reached. Please complete form manually.")
+        self._speak("Maximum iterations reached. Please complete the form manually and press Enter.")
+        try:
+            input("  Press Enter when done (or Ctrl+C to skip)...")
+        except (KeyboardInterrupt, EOFError):
+            pass
+        return False

From 989d54ebd383fc62635fb7923182cd243b98d9d2 Mon Sep 17 00:00:00 2001
From: Siva <siva@Sivas-MacBook-Pro.local>
Date: Sat, 4 Apr 2026 19:05:26 +0800
Subject: [PATCH 2/4] feat: add JD Resume Matcher and resume PDF reviewer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds two new modules to the AIHawk library layer:

### src/libs/jd_resume_matcher/jd_matcher.py

A Claude-based alternative to the existing OpenAI resume tailoring flow.
Instead of regenerating sections from YAML, it works from pre-written
HTML resume variants and selects + tailors the best one for each JD.

Flow:
1. Haiku reads stripped text of all resume variants + JD, picks the
   best base and does a gap analysis (cheap — ~1k tokens)
2. Sonnet receives full HTML of the selected resume + stripped text of
   others as bullet reference, outputs a complete tailored HTML resume
3. HTML is converted to PDF via headless Chrome
4. review_resume_pdf() auto-checks formatting before saving

Key design choices vs the existing approach:
- 5x fewer input tokens (stripped HTML vs full HTML for selection step)
- Multi-resume selection — picks best positioning from a pool of variants
- Config-driven: all paths and candidate name in jd_matcher_config.yaml
  (gitignored), example at data_folder_example/jd_matcher_config.yaml
- Exposes tailor_resume_for_jd() for programmatic use and run_jd_match()
  for interactive CLI use

### src/utils/resume_reviewer.py

Scans a generated resume PDF for common formatting issues:
- Text overflow / clipping
- Blank pages
- Font rendering problems
Returns structured review notes and optionally triggers auto-fix.

### data_folder_example/jd_matcher_config.yaml

Setup template for jd_matcher — paths, candidate name, and resume
filename map. Copy to data_folder/jd_matcher_config.yaml to use.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 data_folder_example/jd_matcher_config.yaml |  23 +
 src/libs/jd_resume_matcher/__init__.py     |   0
 src/libs/jd_resume_matcher/jd_matcher.py   | 488 +++++++++++++++++++++
 src/utils/resume_reviewer.py               | 179 ++++++++
 4 files changed, 690 insertions(+)
 create mode 100644 data_folder_example/jd_matcher_config.yaml
 create mode 100644 src/libs/jd_resume_matcher/__init__.py
 create mode 100644 src/libs/jd_resume_matcher/jd_matcher.py
 create mode 100644 src/utils/resume_reviewer.py

diff --git a/data_folder_example/jd_matcher_config.yaml b/data_folder_example/jd_matcher_config.yaml
new file mode 100644
index 000000000..c9be45dce
--- /dev/null
+++ b/data_folder_example/jd_matcher_config.yaml
@@ -0,0 +1,23 @@
+# JD Resume Matcher configuration
+# Copy this file to data_folder/jd_matcher_config.yaml and fill in your paths.
+# data_folder/jd_matcher_config.yaml is gitignored — safe to put real paths here.
+
+# Directory containing your base HTML resume files
+resumes_dir: "/path/to/your/resumes"
+
+# Directory where tailored resumes will be saved (one sub-folder per company)
+output_base_dir: "/path/to/output/JDSpecificResumes"
+
+# Path to the Chrome binary used for headless PDF conversion
+chrome_bin: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+
+# Your name — used as the prefix for output filenames (e.g. Jane_Smith_Resume.html)
+candidate_name: "Your Name"
+
+# Map of resume variant keys to filenames inside resumes_dir.
+# Keys must match the labels used in RESUME_DESCRIPTIONS in jd_matcher.py.
+resume_files:
+  AIDataEngineer: "YourName_Resume_AIDataEngineer.html"
+  DataandMLEngineer: "YourName_Resume_DataandMLEngineer.html"
+  LeadDataEngineer: "YourName_Resume_LeadDataEngineer.html"
+  SeniorDataEngineer: "YourName_Resume_SeniorDataEngineer.html"
diff --git a/src/libs/jd_resume_matcher/__init__.py b/src/libs/jd_resume_matcher/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/libs/jd_resume_matcher/jd_matcher.py b/src/libs/jd_resume_matcher/jd_matcher.py
new file mode 100644
index 000000000..d02d76182
--- /dev/null
+++ b/src/libs/jd_resume_matcher/jd_matcher.py
@@ -0,0 +1,488 @@
+"""
+JD Resume Matcher — picks the best-matching resume for a given JD,
+tailors it using Claude, and outputs HTML + PDF.
+
+Configuration is read from data_folder/jd_matcher_config.yaml.
+Copy data_folder_example/jd_matcher_config.yaml as a starting point.
+
+Exposes two entry points:
+  - run_jd_match(api_key)              → interactive CLI flow
+  - tailor_resume_for_jd(...)          → programmatic, no prompts (used by the auto-apply bot)
+"""
+import subprocess
+from pathlib import Path
+from typing import Optional
+from anthropic import Anthropic
+from src.logging import logger
+from src.utils.resume_reviewer import review_resume_pdf
+
+_CONFIG_PATH = Path("data_folder/jd_matcher_config.yaml")
+_config_cache: dict = {}
+
+RESUME_DESCRIPTIONS = {
+    "AIDataEngineer": "Best for roles emphasising LLM pipelines, GenAI, AI infrastructure, MLOps with AI focus",
+    "DataandMLEngineer": "Best for roles requiring both ML lifecycle (training, evaluation, productionization) and data engineering",
+    "LeadDataEngineer": "Best for roles requiring technical ownership, cross-functional leadership, architecture authority, staff/lead level",
+    "SeniorDataEngineer": "Best for roles focused on Spark, pipelines, data platforms, cloud migration, streaming",
+}
+
+
+def _load_config() -> dict:
+    """Load jd_matcher_config.yaml once and cache it."""
+    global _config_cache
+    if not _config_cache:
+        import yaml
+        if not _CONFIG_PATH.exists():
+            raise FileNotFoundError(
+                f"JD Matcher config not found at {_CONFIG_PATH}. "
+                f"Copy data_folder_example/jd_matcher_config.yaml to {_CONFIG_PATH} and fill in your paths."
+            )
+        with open(_CONFIG_PATH, "r") as f:
+            _config_cache = yaml.safe_load(f) or {}
+    return _config_cache
+
+
+def _get_resumes_dir() -> Path:
+    return Path(_load_config()["resumes_dir"])
+
+
+def _get_output_base_dir() -> Path:
+    return Path(_load_config()["output_base_dir"])
+
+
+def _get_chrome_bin() -> str:
+    return _load_config().get("chrome_bin", "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome")
+
+
+def _get_candidate_name() -> str:
+    return _load_config().get("candidate_name", "Candidate")
+
+
+def _get_resume_files() -> dict:
+    cfg = _load_config()
+    resumes_dir = Path(cfg["resumes_dir"])
+    return {key: resumes_dir / filename for key, filename in cfg["resume_files"].items()}
+
+
+def _output_stem() -> str:
+    """Filename-safe candidate name for output files, e.g. 'Jane_Smith'."""
+    return _get_candidate_name().replace(" ", "_")
+
+
+def _strip_html(html: str) -> str:
+    """Strip HTML tags and collapse whitespace — turns ~8k token HTML into ~1.5k token text."""
+    import re
+    text = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL | re.IGNORECASE)
+    text = re.sub(r'<script[^>]*>.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
+    text = re.sub(r'<[^>]+>', ' ', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+
+
+_RESUME_CACHE: dict = {}  # loaded once per process, reused for every job
+
+
+def _load_all_resumes() -> dict:
+    """Load all resumes from disk — cached after the first call."""
+    global _RESUME_CACHE
+    if not _RESUME_CACHE:
+        for key, path in _get_resume_files().items():
+            with open(path, "r", encoding="utf-8") as f:
+                _RESUME_CACHE[key] = f.read()
+        logger.info("Resumes loaded into cache (will not re-read from disk this session)")
+    return _RESUME_CACHE
+
+
+def _html_to_pdf(html_path: Path, pdf_path: Path) -> bool:
+    cmd = [
+        _get_chrome_bin(), "--headless", "--disable-gpu", "--no-sandbox",
+        f"--print-to-pdf={pdf_path}", "--print-to-pdf-no-header",
+        f"file://{html_path}",
+    ]
+    subprocess.run(cmd, capture_output=True, timeout=30)
+    return pdf_path.exists()
+
+
+def _fetch_jd_from_url(url: str) -> str:
+    """Fetch JD text from a URL using headless Chrome."""
+    from selenium import webdriver
+    from selenium.webdriver.chrome.service import Service as ChromeService
+    from selenium.webdriver.chrome.options import Options
+    from webdriver_manager.chrome import ChromeDriverManager
+    import time
+
+    options = Options()
+    options.add_argument("--headless")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--disable-gpu")
+    driver = webdriver.Chrome(
+        service=ChromeService(ChromeDriverManager().install()), options=options
+    )
+    try:
+        driver.get(url)
+        time.sleep(3)
+        return driver.find_element("tag name", "body").text[:8000]
+    finally:
+        driver.quit()
+
+
+def _analyse_jd(client: Anthropic, jd_text: str, resumes: dict) -> tuple[str, str, str, str]:
+    """
+    Ask Claude to pick the best resume and do a gap analysis.
+    Returns: (selected_key, company_name, coverage, analysis_text)
+    Uses Haiku + stripped text (not full HTML) to minimise token cost.
+    """
+    candidate_name = _get_candidate_name()
+    resume_files = _get_resume_files()
+
+    resume_summary = "\n\n".join([
+        f"=== RESUME: {key} ===\n{RESUME_DESCRIPTIONS[key]}\n{_strip_html(html)[:2000]}"
+        for key, html in resumes.items()
+    ])
+
+    prompt = f"""You are an expert resume consultant. Select the best base resume for this job and do a gap analysis.
+
+Candidate: {candidate_name}
+{len(resumes)} resume variants (same candidate, different positioning):
+
+{resume_summary}
+
+JOB DESCRIPTION:
+{jd_text[:3000]}
+
+Tasks:
+1. Select which resume is the best base ({', '.join(resume_files.keys())})
+2. List top 5 matching bullets
+3. List bullets from other resumes to pull in
+4. List skills gaps
+5. List specific changes needed
+
+End your response with EXACTLY these 3 lines:
+SELECTED_RESUME: <{"|".join(resume_files.keys())}>
+COMPANY: <company name, no punctuation>
+COVERAGE: <0-100%>"""
+
+    response = client.messages.create(
+        model="claude-haiku-4-5-20251001",  # Haiku: 20x cheaper than Sonnet for selection
+        max_tokens=1000,
+        messages=[{"role": "user", "content": prompt}],
+    )
+    analysis_text = response.content[0].text
+
+    selected_key = next(iter(resume_files))
+    company_name = "Unknown_Company"
+    coverage = "N/A"
+    for line in analysis_text.splitlines():
+        line = line.strip()
+        if line.startswith("SELECTED_RESUME:"):
+            val = line.replace("SELECTED_RESUME:", "").strip()
+            if val in resume_files:
+                selected_key = val
+        elif line.startswith("COMPANY:"):
+            raw = line.replace("COMPANY:", "").strip()
+            company_name = raw.replace(" ", "_").replace("/", "_") or "Unknown_Company"
+        elif line.startswith("COVERAGE:"):
+            coverage = line.replace("COVERAGE:", "").strip()
+
+    return selected_key, company_name, coverage, analysis_text
+
+
+def _generate_tailored_html(client: Anthropic, jd_text: str, resumes: dict,
+                             selected_key: str, analysis_text: str) -> str:
+    """
+    Ask Claude Sonnet to generate the tailored HTML resume.
+    Sends full HTML only for the selected base resume.
+    Other resumes sent as stripped text (reference for bullet pulling) to save tokens.
+    """
+    candidate_name = _get_candidate_name()
+
+    other_resumes_text = "\n\n".join([
+        f"--- {key} (stripped text, for bullet reference) ---\n{_strip_html(html)[:1500]}"
+        for key, html in resumes.items() if key != selected_key
+    ])
+
+    prompt = f"""You are an expert resume writer. Create a tailored HTML resume for {candidate_name}.
+
+BASE RESUME — keep its exact HTML/CSS structure:
+{resumes[selected_key]}
+
+OTHER RESUMES (text only — pull better bullets from these where they match the JD):
+{other_resumes_text}
+
+JOB DESCRIPTION:
+{jd_text[:3000]}
+
+CHANGES TO MAKE:
+{analysis_text[:1500]}
+
+INSTRUCTIONS:
+1. Keep the base resume's exact HTML and CSS structure
+2. Update the summary to mirror the JD's language and priorities
+3. Reorder/rewrite bullets to front-load JD-relevant experience
+4. Pull stronger bullets from other resumes where they better match the JD
+5. Add missing skills/keywords from the JD to the skills section
+6. Remove or deprioritise bullets irrelevant to this role
+7. Do NOT use the JD's exact role title in the header
+8. Do NOT fabricate any experience, metrics, or skills not in the existing resumes
+9. Return ONLY the complete HTML — no explanation, no markdown fences"""
+
+    response = client.messages.create(
+        model="claude-sonnet-4-6",
+        max_tokens=5000,
+        messages=[{"role": "user", "content": prompt}],
+    )
+    html = response.content[0].text.strip()
+
+    if html.startswith("```"):
+        lines = html.split("\n")
+        html = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
+    return html
+
+
+def _build_cover_letter_profile(resume_data: dict) -> str:
+    """Build a profile summary string for cover letter prompts from plain_text_resume.yaml data."""
+    pi = resume_data.get("personal_information", {})
+    name = f"{pi.get('name', '')} {pi.get('surname', '')}".strip()
+    city = pi.get("city", "")
+    country = pi.get("country", "")
+    location = ", ".join(p for p in [city, country] if p)
+
+    exp_list = resume_data.get("experience_details", [])
+    current_role = ""
+    if exp_list:
+        e = exp_list[0]
+        pos = e.get("position", "")
+        comp = e.get("company", "")
+        period = e.get("employment_period", "")
+        current_role = f"{pos} at {comp} ({period})".strip()
+
+    all_skills = []
+    for exp in exp_list:
+        all_skills.extend(exp.get("skills_acquired", []))
+    skills = ", ".join(dict.fromkeys(all_skills))
+
+    edu_list = resume_data.get("education_details", [])
+    education = ""
+    if edu_list:
+        e = edu_list[0]
+        education = f"{e.get('education_level', '')} in {e.get('field_of_study', '')}, {e.get('institution', '')}".strip(", ")
+
+    legal = resume_data.get("legal_authorization", {})
+    auth_parts = []
+    for region, key in [("EU", "eu_work_authorization"), ("US", "us_work_authorization"),
+                        ("UK", "uk_work_authorization"), ("Canada", "canada_work_authorization")]:
+        if str(legal.get(key, "")).lower() == "yes":
+            auth_parts.append(region)
+    visa = f"Authorized to work in: {', '.join(auth_parts)}" if auth_parts else ""
+
+    lines = [
+        f"Candidate: {name}",
+        f"Location: {location}",
+        f"Current role: {current_role}",
+        f"Skills: {skills}",
+        f"Education: {education}",
+        f"Work authorization: {visa}",
+    ]
+    return "\n".join(line for line in lines if line.split(": ", 1)[-1].strip())
+
+
+def tailor_resume_for_jd(api_key: str, jd_text: str, company_name: str = None) -> Optional[Path]:
+    """
+    Programmatic entry point — no user prompts.
+    Picks the best resume, tailors it for the JD, saves HTML + PDF.
+    Returns the Path to the generated PDF, or None on failure.
+    """
+    client = Anthropic(api_key=api_key)
+    stem = _output_stem()
+
+    try:
+        resumes = _load_all_resumes()
+    except Exception as e:
+        logger.error(f"Failed to load resumes: {e}")
+        return None
+
+    try:
+        selected_key, detected_company, coverage, analysis_text = _analyse_jd(client, jd_text, resumes)
+    except Exception as e:
+        logger.error(f"JD analysis failed: {e}")
+        return None
+
+    folder_name = (company_name or detected_company).replace(" ", "_").replace("/", "_") or "Unknown_Company"
+    logger.info(f"Selected resume: {selected_key} | Company: {folder_name} | Coverage: {coverage}")
+
+    try:
+        tailored_html = _generate_tailored_html(client, jd_text, resumes, selected_key, analysis_text)
+    except Exception as e:
+        logger.error(f"Resume generation failed: {e}")
+        return None
+
+    output_dir = _get_output_base_dir() / folder_name
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    html_path = output_dir / f"{stem}_Resume.html"
+    pdf_path = output_dir / f"{stem}_Resume.pdf"
+
+    with open(html_path, "w", encoding="utf-8") as f:
+        f.write(tailored_html)
+
+    if _html_to_pdf(html_path, pdf_path):
+        logger.info(f"Tailored resume saved: {pdf_path}")
+        review_resume_pdf(pdf_path, html_path=html_path, auto_fix=True)
+        notes_path = output_dir / "review_notes.md"
+        with open(notes_path, "w", encoding="utf-8") as f:
+            f.write(f"# Resume Tailoring — {folder_name}\n\n"
+                    f"**Base resume:** {selected_key}\n"
+                    f"**Coverage:** {coverage}\n\n"
+                    f"## Analysis\n{analysis_text}\n")
+        return pdf_path
+    else:
+        logger.warning("PDF conversion failed — returning HTML path instead")
+        return html_path
+
+
+def generate_cover_letter_for_jd(api_key: str, jd_text: str, company_name: str,
+                                  job_title: str, output_dir: Path = None,
+                                  resume_data: dict = None) -> Optional[str]:
+    """
+    Generate a personalized cover letter for the given JD.
+    Returns the cover letter text (suitable for pasting into a form textarea).
+    Optionally saves to output_dir/cover_letter.txt.
+
+    resume_data: full dict from plain_text_resume.yaml — used to build the candidate profile.
+                 If omitted, falls back to data_folder/plain_text_resume.yaml.
+    """
+    client = Anthropic(api_key=api_key)
+    candidate_name = _get_candidate_name()
+
+    if resume_data is None:
+        import yaml
+        resume_yaml = Path("data_folder/plain_text_resume.yaml")
+        if resume_yaml.exists():
+            with open(resume_yaml, "r") as f:
+                resume_data = yaml.safe_load(f) or {}
+        else:
+            resume_data = {}
+
+    profile_summary = _build_cover_letter_profile(resume_data)
+
+    prompt = f"""Write a professional, specific cover letter for {candidate_name} applying to this role.
+
+CANDIDATE PROFILE:
+{profile_summary}
+
+JOB DESCRIPTION:
+{jd_text[:3000]}
+
+COMPANY: {company_name}
+ROLE: {job_title}
+
+RULES:
+- 3 short paragraphs, 200-250 words total
+- Paragraph 1: Reference the specific role and ONE specific detail about the company or JD — no generic opener
+- Paragraph 2: Connect 2-3 of the candidate's most relevant achievements directly to what the JD asks for — use numbers and be specific
+- Paragraph 3: Brief closing — genuine interest, invite a conversation
+- Tone: direct and confident — NOT corporate boilerplate
+- Do NOT start with "I am writing to apply for..."
+- Do NOT use "passionate", "dynamic", "synergy", or similar buzzwords
+- Return only the letter body — no subject line, no greeting, no sign-off"""
+
+    try:
+        response = client.messages.create(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=600,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        text = response.content[0].text.strip()
+
+        if output_dir:
+            output_dir.mkdir(parents=True, exist_ok=True)
+            cl_path = output_dir / "cover_letter.txt"
+            with open(cl_path, "w", encoding="utf-8") as f:
+                f.write(text)
+            logger.info(f"Cover letter saved: {cl_path}")
+
+        return text
+    except Exception as e:
+        logger.error(f"Cover letter generation failed: {e}")
+        return None
+
+
+def run_jd_match(api_key: str):
+    """Interactive CLI entry point — asks for URL, shows analysis, confirms before generating."""
+    client = Anthropic(api_key=api_key)
+    stem = _output_stem()
+
+    print("\n" + "=" * 60)
+    print("  JD Resume Matcher — Powered by Claude")
+    print("=" * 60)
+    print("\nEnter the job URL (LinkedIn, Workday, Greenhouse, etc.):\n")
+
+    url = input("Job URL: ").strip()
+    if not url or not url.startswith("http"):
+        print("Invalid URL. Exiting.")
+        return
+
+    print(f"\nFetching JD from: {url}")
+    jd_text = _fetch_jd_from_url(url)
+    if not jd_text.strip():
+        print("Could not fetch JD content. Exiting.")
+        return
+    print(f"JD fetched ({len(jd_text)} chars).")
+
+    print("\nReading all resumes...")
+    try:
+        resumes = _load_all_resumes()
+    except Exception as e:
+        print(f"ERROR reading resumes: {e}")
+        return
+
+    print("Analysing JD and selecting best resume...\n")
+    try:
+        selected_key, company_name, coverage, analysis_text = _analyse_jd(client, jd_text, resumes)
+    except Exception as e:
+        print(f"ERROR calling Claude API: {e}")
+        return
+
+    print(analysis_text)
+    print("\n" + "=" * 60)
+    print(f"Selected resume: {selected_key}")
+    print(f"Company: {company_name}")
+    print(f"Coverage: {coverage}")
+
+    confirm = input("\nShould I go ahead and create the tailored resume? (yes/no): ").strip().lower()
+    if confirm not in ("yes", "y"):
+        print("Cancelled.")
+        return
+
+    print("\nGenerating tailored resume...")
+    try:
+        tailored_html = _generate_tailored_html(client, jd_text, resumes, selected_key, analysis_text)
+    except Exception as e:
+        print(f"ERROR generating resume: {e}")
+        return
+
+    output_dir = _get_output_base_dir() / company_name
+    output_dir.mkdir(parents=True, exist_ok=True)
+    html_path = output_dir / f"{stem}_Resume.html"
+    pdf_path = output_dir / f"{stem}_Resume.pdf"
+
+    with open(html_path, "w", encoding="utf-8") as f:
+        f.write(tailored_html)
+    print(f"HTML saved: {html_path}")
+
+    print("Converting to PDF...")
+    if _html_to_pdf(html_path, pdf_path):
+        print(f"PDF saved: {pdf_path} ({pdf_path.stat().st_size // 1024} KB)")
+        review_resume_pdf(pdf_path, html_path=html_path, auto_fix=True)
+    else:
+        print("PDF conversion failed. Open the HTML in Chrome and print manually.")
+
+    notes_path = output_dir / "review_notes.md"
+    with open(notes_path, "w", encoding="utf-8") as f:
+        f.write(f"# Resume Tailoring — {company_name}\n\n"
+                f"**Base resume:** {selected_key}\n"
+                f"**Coverage:** {coverage}\n\n"
+                f"## Analysis\n{analysis_text}\n")
+    print(f"Review notes: {notes_path}")
+    print(f"\nDone! Files saved to: {output_dir}")
diff --git a/src/utils/resume_reviewer.py b/src/utils/resume_reviewer.py
new file mode 100644
index 000000000..d6c0a144d
--- /dev/null
+++ b/src/utils/resume_reviewer.py
@@ -0,0 +1,179 @@
+"""
+Resume PDF Reviewer — checks generated PDFs for common formatting issues.
+
+Issues detected:
+  1. Page overflow   — resume spills beyond 1 page
+  2. Print headers   — Chrome date/time/path headers leaked into the PDF
+  3. Print footers   — file:// URL footer leaked into the PDF
+
+Auto-fix: if issues found, tightens CSS margins in the HTML and regenerates.
+"""
+import re
+import subprocess
+from pathlib import Path
+
+from src.logging import logger
+
+CHROME_BIN = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+
+# Patterns that indicate Chrome print headers/footers leaked in
+_HEADER_PATTERNS = [
+    r"\d{1,2}/\d{1,2}/\d{2,4},?\s+\d{1,2}:\d{2}",  # date like "4/3/26, 4:15 PM"
+    r"file:///",                                        # file:// footer
+    r"https?://\S+\s+\d+/\d+",                         # URL + page number
+]
+
+_TIGHTER_PRINT_CSS = """\
+    @media print {
+      body { padding: 10px 25px; font-size: 10.5px; }
+      @page { margin: 8mm; size: A4; }
+      section { margin-bottom: 10px; }
+      .job { margin-bottom: 8px; }
+      ul li { margin-bottom: 2px; }
+    }"""
+
+_OLD_PRINT_CSS_PATTERN = re.compile(
+    r"@media print\s*\{[^}]*body\s*\{[^}]*padding[^}]*\}[^}]*@page[^}]*\}[^}]*\}",
+    re.DOTALL,
+)
+
+
+def _extract_text_sample(pdf_path: Path) -> tuple[int, str]:
+    """Return (page_count, sample_text_from_page_boundaries)."""
+    try:
+        import pdfplumber
+        with pdfplumber.open(pdf_path) as pdf:
+            pages = len(pdf.pages)
+            # Sample text from top of page 1, bottom of page 1, top of page 2 (if exists)
+            samples = []
+            for i, page in enumerate(pdf.pages[:2]):
+                text = page.extract_text() or ""
+                if i == 0:
+                    # bottom of page 1 (last 200 chars)
+                    samples.append(text[-200:])
+                else:
+                    # top of page 2 (first 200 chars)
+                    samples.append(text[:200])
+            return pages, "\n".join(samples)
+    except ImportError:
+        logger.warning("pdfplumber not installed — skipping text-based header check.")
+        return _check_page_count_fallback(pdf_path), ""
+    except Exception as e:
+        logger.warning(f"PDF read error: {e}")
+        return 1, ""
+
+
+def _check_page_count_fallback(pdf_path: Path) -> int:
+    """Fallback: use mdls (macOS) or pdfinfo to get page count."""
+    try:
+        result = subprocess.run(
+            ["mdls", "-name", "kMDItemNumberOfPages", str(pdf_path)],
+            capture_output=True, text=True,
+        )
+        match = re.search(r"(\d+)", result.stdout)
+        return int(match.group(1)) if match else 1
+    except Exception:
+        return 1
+
+
+def _fix_html_print_css(html_path: Path) -> bool:
+    """Replace old @media print block with tighter one. Returns True if changed."""
+    content = html_path.read_text(encoding="utf-8")
+    if _TIGHTER_PRINT_CSS.strip() in content:
+        return False  # already tight
+    new_content = _OLD_PRINT_CSS_PATTERN.sub(_TIGHTER_PRINT_CSS, content)
+    if new_content != content:
+        html_path.write_text(new_content, encoding="utf-8")
+        return True
+    return False
+
+
+def _regenerate_pdf(html_path: Path, pdf_path: Path) -> bool:
+    """Regenerate PDF from HTML using Chrome headless with header suppression."""
+    try:
+        result = subprocess.run(
+            [
+                CHROME_BIN,
+                "--headless", "--disable-gpu",
+                f"--print-to-pdf={pdf_path}",
+                "--print-to-pdf-no-header",
+                "--no-pdf-header-footer",
+                f"file://{html_path}",
+            ],
+            capture_output=True, text=True,
+        )
+        return pdf_path.exists()
+    except Exception as e:
+        logger.error(f"PDF regeneration failed: {e}")
+        return False
+
+
+def review_resume_pdf(pdf_path: Path, html_path: Path = None, auto_fix: bool = True) -> list[str]:
+    """
+    Review a generated resume PDF for formatting issues.
+
+    Args:
+        pdf_path:  Path to the generated PDF
+        html_path: Path to the source HTML (needed for auto-fix)
+        auto_fix:  If True, attempt to fix issues automatically
+
+    Returns:
+        List of issue strings. Empty list = all good.
+    """
+    pdf_path = Path(pdf_path)
+    if not pdf_path.exists():
+        return [f"PDF not found: {pdf_path}"]
+
+    issues = []
+    page_count, text_sample = _extract_text_sample(pdf_path)
+
+    # Check 1: page overflow
+    if page_count > 1:
+        issues.append(f"OVERFLOW — resume is {page_count} pages (should be 1)")
+
+    # Check 2: print headers/footers leaked
+    for pattern in _HEADER_PATTERNS:
+        if re.search(pattern, text_sample, re.IGNORECASE):
+            issues.append(f"HEADERS — Chrome print headers/footers detected in PDF")
+            break
+
+    if not issues:
+        logger.info(f"  ✓ Resume review passed: 1 page, no headers — {pdf_path.name}")
+        print(f"  ✓ Resume OK — 1 page, no print headers.")
+        return []
+
+    # Report issues
+    print(f"\n  ⚠️  Resume review found {len(issues)} issue(s) in {pdf_path.name}:")
+    for issue in issues:
+        print(f"     - {issue}")
+
+    # Auto-fix
+    if auto_fix and html_path:
+        html_path = Path(html_path)
+        print(f"  → Auto-fixing: tightening CSS and regenerating PDF...")
+        css_changed = _fix_html_print_css(html_path)
+        if css_changed:
+            logger.info(f"Updated print CSS in {html_path.name}")
+        success = _regenerate_pdf(html_path, pdf_path)
+        if success:
+            # Re-check after fix
+            new_pages, new_sample = _extract_text_sample(pdf_path)
+            remaining = []
+            if new_pages > 1:
+                remaining.append(f"OVERFLOW — still {new_pages} pages after fix (manual trim needed)")
+            for pattern in _HEADER_PATTERNS:
+                if re.search(pattern, new_sample, re.IGNORECASE):
+                    remaining.append("HEADERS — still present after fix")
+                    break
+            if not remaining:
+                print(f"  ✓ Fixed — PDF now looks clean.")
+                return issues  # return original issues (fixed)
+            else:
+                print(f"  ✗ Still have issues after auto-fix:")
+                for r in remaining:
+                    print(f"     - {r}")
+                return remaining
+        else:
+            print(f"  ✗ PDF regeneration failed.")
+
+    return issues

From ab673f6b4c35309e9b31ec490f56a75861141d27 Mon Sep 17 00:00:00 2001
From: Siva <siva@Sivas-MacBook-Pro.local>
Date: Sat, 4 Apr 2026 19:07:56 +0800
Subject: [PATCH 3/4] fix: move resume variant descriptions to config

RESUME_DESCRIPTIONS was hardcoded in jd_matcher.py with role names
specific to one user's career. Moved to jd_matcher_config.yaml under
resume_descriptions so any user can define their own variant labels
and descriptions.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 data_folder_example/jd_matcher_config.yaml | 15 ++++++++++-----
 src/libs/jd_resume_matcher/jd_matcher.py   | 11 ++++-------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/data_folder_example/jd_matcher_config.yaml b/data_folder_example/jd_matcher_config.yaml
index c9be45dce..028888430 100644
--- a/data_folder_example/jd_matcher_config.yaml
+++ b/data_folder_example/jd_matcher_config.yaml
@@ -15,9 +15,14 @@ chrome_bin: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
 candidate_name: "Your Name"
 
 # Map of resume variant keys to filenames inside resumes_dir.
-# Keys must match the labels used in RESUME_DESCRIPTIONS in jd_matcher.py.
+# Use any key names that make sense for your variants — they just need to be
+# consistent between resume_files and resume_descriptions below.
 resume_files:
-  AIDataEngineer: "YourName_Resume_AIDataEngineer.html"
-  DataandMLEngineer: "YourName_Resume_DataandMLEngineer.html"
-  LeadDataEngineer: "YourName_Resume_LeadDataEngineer.html"
-  SeniorDataEngineer: "YourName_Resume_SeniorDataEngineer.html"
+  Variant1: "YourName_Resume_Variant1.html"
+  Variant2: "YourName_Resume_Variant2.html"
+
+# Short description of each variant — sent to Claude to help it pick the best base.
+# Keys must match the keys in resume_files above.
+resume_descriptions:
+  Variant1: "Best for roles focused on X, Y, Z"
+  Variant2: "Best for roles requiring A, B, C"
diff --git a/src/libs/jd_resume_matcher/jd_matcher.py b/src/libs/jd_resume_matcher/jd_matcher.py
index d02d76182..a4effad93 100644
--- a/src/libs/jd_resume_matcher/jd_matcher.py
+++ b/src/libs/jd_resume_matcher/jd_matcher.py
@@ -19,12 +19,9 @@
 _CONFIG_PATH = Path("data_folder/jd_matcher_config.yaml")
 _config_cache: dict = {}
 
-RESUME_DESCRIPTIONS = {
-    "AIDataEngineer": "Best for roles emphasising LLM pipelines, GenAI, AI infrastructure, MLOps with AI focus",
-    "DataandMLEngineer": "Best for roles requiring both ML lifecycle (training, evaluation, productionization) and data engineering",
-    "LeadDataEngineer": "Best for roles requiring technical ownership, cross-functional leadership, architecture authority, staff/lead level",
-    "SeniorDataEngineer": "Best for roles focused on Spark, pipelines, data platforms, cloud migration, streaming",
-}
+def _get_resume_descriptions() -> dict:
+    """Return per-variant descriptions from config, used in the Claude selection prompt."""
+    return _load_config().get("resume_descriptions", {})
 
 
 def _load_config() -> dict:
@@ -137,7 +134,7 @@ def _analyse_jd(client: Anthropic, jd_text: str, resumes: dict) -> tuple[str, st
     resume_files = _get_resume_files()
 
     resume_summary = "\n\n".join([
-        f"=== RESUME: {key} ===\n{RESUME_DESCRIPTIONS[key]}\n{_strip_html(html)[:2000]}"
+        f"=== RESUME: {key} ===\n{_get_resume_descriptions().get(key, '')}\n{_strip_html(html)[:2000]}"
         for key, html in resumes.items()
     ])
 

From e898057db523a2d086dcd5a7e3f213a3b28bf38e Mon Sep 17 00:00:00 2001
From: Siva <siva@Sivas-MacBook-Pro.local>
Date: Sat, 4 Apr 2026 19:08:46 +0800
Subject: [PATCH 4/4] docs: add README for JD Resume Matcher

Covers setup, config file structure, CLI and programmatic usage,
cover letter generation, output structure, and tips.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/libs/jd_resume_matcher/README.md | 139 +++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 src/libs/jd_resume_matcher/README.md

diff --git a/src/libs/jd_resume_matcher/README.md b/src/libs/jd_resume_matcher/README.md
new file mode 100644
index 000000000..a2d533a28
--- /dev/null
+++ b/src/libs/jd_resume_matcher/README.md
@@ -0,0 +1,139 @@
+# JD Resume Matcher
+
+Picks the best-matching resume from a set of pre-written HTML variants,
+tailors it for a specific job description using Claude, and outputs HTML + PDF.
+
+## How it works
+
+1. **Select** — Claude Haiku reads a short description of each resume variant
+   alongside the job description and picks the best base. Costs ~1k tokens.
+2. **Analyse** — Haiku identifies matching bullets, gaps, and changes needed.
+3. **Tailor** — Claude Sonnet rewrites the selected resume's HTML to match the
+   JD: reorders bullets, pulls stronger ones from other variants, adds missing
+   keywords. Only the selected resume is sent in full; others are sent as
+   stripped text to minimise token cost.
+4. **Output** — Saves the tailored resume as HTML + PDF under
+   `output_base_dir/<company_name>/`.
+
+## Setup
+
+### 1. Prerequisites
+
+- An [Anthropic API key](https://console.anthropic.com/)
+- Google Chrome installed (used for headless HTML → PDF conversion)
+- Your resumes prepared as HTML files (one per variant/positioning)
+
+### 2. Create your config file
+
+Copy the example config and fill in your paths:
+
+```bash
+cp data_folder_example/jd_matcher_config.yaml data_folder/jd_matcher_config.yaml
+```
+
+`data_folder/jd_matcher_config.yaml` is gitignored — safe to put real paths here.
+
+### 3. Configure `jd_matcher_config.yaml`
+
+```yaml
+# Where your HTML resume files live
+resumes_dir: "/path/to/your/resumes"
+
+# Where tailored resumes will be saved (one sub-folder per company)
+output_base_dir: "/path/to/output/TailoredResumes"
+
+# Path to the Chrome binary (used for HTML → PDF conversion)
+chrome_bin: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+
+# Your name — used as the output filename prefix
+candidate_name: "Jane Smith"
+
+# Your resume variants — key can be anything meaningful to you
+resume_files:
+  SoftwareEngineer: "JaneSmith_Resume_SoftwareEngineer.html"
+  DataEngineer: "JaneSmith_Resume_DataEngineer.html"
+  TechLead: "JaneSmith_Resume_TechLead.html"
+
+# One-line description per variant — Claude uses these to pick the best base
+resume_descriptions:
+  SoftwareEngineer: "Best for full-stack, backend, and product engineering roles"
+  DataEngineer: "Best for data pipelines, ETL, Spark, and cloud data platform roles"
+  TechLead: "Best for staff/lead roles requiring architecture and cross-team ownership"
+```
+
+### 4. Chrome binary path by OS
+
+| OS | Default path |
+|----|-------------|
+| macOS | `/Applications/Google Chrome.app/Contents/MacOS/Google Chrome` |
+| Linux | `/usr/bin/google-chrome` |
+| Windows | `C:\Program Files\Google\Chrome\Application\chrome.exe` |
+
+## Usage
+
+### Interactive CLI
+
+Run from the project root:
+
+```python
+from src.libs.jd_resume_matcher.jd_matcher import run_jd_match
+
+run_jd_match(api_key="your-anthropic-api-key")
+```
+
+You will be prompted for a job URL. The matcher fetches the JD, shows the
+gap analysis, asks for confirmation, then generates the tailored resume.
+
+### Programmatic (no prompts)
+
+```python
+from src.libs.jd_resume_matcher.jd_matcher import tailor_resume_for_jd
+
+pdf_path = tailor_resume_for_jd(
+    api_key="your-anthropic-api-key",
+    jd_text="...full job description text...",
+    company_name="Acme Corp",   # optional — Claude detects it if omitted
+)
+# Returns Path to the generated PDF, or None on failure
+```
+
+### Cover letter generation
+
+```python
+from src.libs.jd_resume_matcher.jd_matcher import generate_cover_letter_for_jd
+from pathlib import Path
+
+cover_letter = generate_cover_letter_for_jd(
+    api_key="your-anthropic-api-key",
+    jd_text="...full job description text...",
+    company_name="Acme Corp",
+    job_title="Senior Data Engineer",
+    output_dir=Path("output/AcmeCorp"),   # optional — saves cover_letter.txt
+    resume_data={...},                    # optional — full plain_text_resume.yaml dict
+)
+print(cover_letter)
+```
+
+## Output structure
+
+For each application, the matcher creates:
+
+```
+output_base_dir/
+  Acme_Corp/
+    JaneSmith_Resume.html     # tailored resume (HTML)
+    JaneSmith_Resume.pdf      # tailored resume (PDF)
+    cover_letter.txt          # cover letter (if generated)
+    review_notes.md           # gap analysis + formatting review
+```
+
+## Tips
+
+- **More variants = better selection.** 2–4 variants covering different
+  positioning (e.g. IC vs lead, backend vs data) gives Claude more to work with.
+- **Keep variant descriptions concise.** One sentence summarising when to use
+  each variant is enough — Claude uses these before reading the resume content.
+- **HTML resumes work best.** The matcher preserves your HTML/CSS structure
+  exactly — fonts, layout, and styling are unchanged.
+- **PDF conversion requires Chrome.** If Chrome isn't found, the matcher
+  returns the HTML path instead and logs a warning.