|
| 1 | +import base64 |
| 2 | +import os |
| 3 | +import re |
| 4 | +import sys |
| 5 | +import time |
| 6 | + |
| 7 | +import requests |
| 8 | + |
| 9 | + |
| 10 | +def handle_rate_limit(response, wait_time=60): |
| 11 | + return False |
| 12 | + |
| 13 | + |
| 14 | +def search_branches(repo_nwo, file_path, regex_pattern): |
| 15 | + # GitHub API base URL |
| 16 | + base_url = "https://api.github.com" |
| 17 | + |
| 18 | + # Get GitHub token from environment variable |
| 19 | + github_token = os.environ.get("GITHUB_TOKEN") |
| 20 | + if not github_token: |
| 21 | + print("Error: GITHUB_TOKEN environment variable not set") |
| 22 | + sys.exit(1) |
| 23 | + |
| 24 | + # Set up headers for authenticated requests |
| 25 | + headers = { |
| 26 | + "Authorization": f"token {github_token}", |
| 27 | + "Accept": "application/vnd.github.v3+json", |
| 28 | + } |
| 29 | + |
| 30 | + # Get all branches (with pagination) |
| 31 | + branches_url = f"{base_url}/repos/{repo_nwo}/branches" |
| 32 | + branches = [] |
| 33 | + while branches_url: |
| 34 | + branches_response = requests.get(branches_url, headers=headers) |
| 35 | + if handle_rate_limit(branches_response): |
| 36 | + continue |
| 37 | + branches_response.raise_for_status() |
| 38 | + branches.extend(branches_response.json()) |
| 39 | + branches_url = branches_response.links.get("next", {}).get("url") |
| 40 | + |
| 41 | + # Compile the regex pattern |
| 42 | + pattern = re.compile(regex_pattern) |
| 43 | + |
| 44 | + # Search file contents in each branch |
| 45 | + for branch in branches: |
| 46 | + branch_name = branch["name"] |
| 47 | + file_url = f"{base_url}/repos/{repo_nwo}/contents/{file_path}?ref={branch_name}" |
| 48 | + |
| 49 | + while True: |
| 50 | + file_response = requests.get(file_url, headers=headers) |
| 51 | + |
| 52 | + if file_response.status_code == 200: |
| 53 | + file_content = file_response.json()["content"] |
| 54 | + |
| 55 | + decoded_content = base64.b64decode(file_content).decode("utf-8") |
| 56 | + |
| 57 | + if pattern.search(decoded_content): |
| 58 | + print(f"Match found in branch: {branch_name}!!!!!") |
| 59 | + else: |
| 60 | + print(f"No match found in branch: {branch_name}") |
| 61 | + break |
| 62 | + elif file_response.status_code == 404: |
| 63 | + print(f"File not found in branch: {branch_name}") |
| 64 | + break |
| 65 | + elif ( |
| 66 | + file_response.status_code == 403 |
| 67 | + and "X-RateLimit-Remaining" in file_response.headers |
| 68 | + ): |
| 69 | + if int(file_response.headers["X-RateLimit-Remaining"]) == 0: |
| 70 | + reset_time = int(file_response.headers["X-RateLimit-Reset"]) |
| 71 | + sleep_time = reset_time - int(time.time()) + 1 |
| 72 | + print(f"Rate limit exceeded. Waiting for {sleep_time} seconds.") |
| 73 | + time.sleep(sleep_time) |
| 74 | + |
| 75 | + |
| 76 | +if __name__ == "__main__": |
| 77 | + if len(sys.argv) != 4: |
| 78 | + print("Usage: python search_branches.py <repo_nwo> <file_path> <regex_pattern>") |
| 79 | + sys.exit(1) |
| 80 | + |
| 81 | + repo_nwo = sys.argv[1] |
| 82 | + file_path = sys.argv[2] |
| 83 | + regex_pattern = sys.argv[3] |
| 84 | + |
| 85 | + print( |
| 86 | + f"Searching branches in {repo_nwo} for {file_path} with pattern {regex_pattern}" |
| 87 | + ) |
| 88 | + search_branches(repo_nwo, file_path, regex_pattern) |
0 commit comments