diff --git a/collectoss/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py b/collectoss/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py index 4c8aec067..329166181 100644 --- a/collectoss/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py +++ b/collectoss/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py @@ -7,7 +7,6 @@ from collectoss.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException from collectoss.application.db.models import ContributorRepo from collectoss.application.db.lib import bulk_insert_dicts -from collectoss.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth ### This worker scans all the platform users in CollectOSS, and pulls their platform activity ### logs. Those are then used to analyze what repos each is working in (which will include repos not @@ -26,8 +25,6 @@ def contributor_breadth_model(self) -> None: tool_version = '0.0.1' data_source = 'GitHub API' - key_auth = GithubRandomKeyAuth(logger) - # This version of the query pulls contributors who have not had any data collected yet # To the top of the list cntrb_login_query = s.sql.text(""" @@ -83,7 +80,7 @@ def contributor_breadth_model(self) -> None: cntrb_newest_events_map[gh_login] = newest_event_date - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) index = 1 total = len(current_cntrb_logins) diff --git a/collectoss/tasks/frontend.py b/collectoss/tasks/frontend.py index d78fc1e1d..599198f89 100644 --- a/collectoss/tasks/frontend.py +++ b/collectoss/tasks/frontend.py @@ -152,7 +152,7 @@ def get_org_repo_data(orgs, session): def add_new_github_repos(repo_data, group_id, session, logger): # get data for repos to determine type, src id, and if they exist - data = get_github_repos_data(repo_data, session, logger) + data = get_github_repos_data(repo_data, None, logger) for url, repo_group_id in repo_data: @@ -200,7 +200,7 @@ def get_github_repos_data(repo_data, session, logger): repo_urls = [x[0] for x in repo_data] - github_graphql_data_access = GithubGraphQlDataAccess(session.oauths, logger, ingore_not_found_error=True) + github_graphql_data_access = GithubGraphQlDataAccess(None, logger, ingore_not_found_error=True) query_parts = [] repo_map = {} diff --git a/collectoss/tasks/github/contributors.py b/collectoss/tasks/github/contributors.py index f3eaaa802..0ae554d68 100644 --- a/collectoss/tasks/github/contributors.py +++ b/collectoss/tasks/github/contributors.py @@ -82,6 +82,7 @@ def process_contributors(): +@deprecated("This function is deprecated. Use the GithubDataAccess class instead") def retrieve_dict_data(url: str, key_auth, logger): num_attempts = 0 @@ -133,8 +134,7 @@ def grab_comitters(self, repo_git,platform="github"): logger = logging.getLogger(grab_comitters.__name__) try: - key_auth = GithubRandomKeyAuth(logger) - grab_committer_list(logger, key_auth, repo_git, tool_source, tool_version, data_source, platform) + grab_committer_list(logger, None, repo_git, tool_source, tool_version, data_source, platform) except Exception as e: logger.error(f"Could not grab committers from github endpoint!\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}") diff --git a/collectoss/tasks/github/events.py b/collectoss/tasks/github/events.py index 24b1e42ff..1882f92ac 100644 --- a/collectoss/tasks/github/events.py +++ b/collectoss/tasks/github/events.py @@ -39,7 +39,7 @@ def collect_events(repo_git: str, full_collection: bool): key_auth = GithubRandomKeyAuth(logger) - if bulk_events_collection_endpoint_contains_all_data(key_auth, logger, owner, repo): + if bulk_events_collection_endpoint_contains_all_data(None, logger, owner, repo): collection_strategy = BulkGithubEventCollection(logger) else: collection_strategy = ThoroughGithubEventCollection(logger) @@ -50,7 +50,7 @@ def bulk_events_collection_endpoint_contains_all_data(key_auth, logger, owner, r url = f"https://api.github.com/repos/{owner}/{repo}/issues/events?per_page=100" - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) page_count = github_data_access.get_resource_page_count(url) @@ -136,7 +136,7 @@ def _collect_events(self, repo_git: str, key_auth, since): url = f"https://api.github.com/repos/{owner}/{repo}/issues/events" - github_data_access = GithubDataAccess(key_auth, self._logger) + github_data_access = GithubDataAccess(None, self._logger) for event in github_data_access.paginate_resource(url): @@ -308,7 +308,7 @@ def _collect_and_process_issue_events(self, owner, repo, repo_id, key_auth, sinc events = [] contributors = [] - github_data_access = GithubDataAccess(key_auth, self._logger) + github_data_access = GithubDataAccess(None, self._logger) for db_issue in issue_result: issue = db_issue._asdict() @@ -371,7 +371,7 @@ def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth, since): events = [] contributors = [] - github_data_access = GithubDataAccess(key_auth, self._logger) + github_data_access = GithubDataAccess(None, self._logger) for db_pr in pr_result: pr = db_pr._asdict() diff --git a/collectoss/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py b/collectoss/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py index b1b163a2b..6b3d6341d 100644 --- a/collectoss/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py +++ b/collectoss/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py @@ -296,7 +296,7 @@ def fetch_username_from_email(logger, auth, commit) -> dict | None: return login_json try: - github_data_access = GithubDataAccess(auth, logger, feature="search") + github_data_access = GithubDataAccess(None, logger, feature="search") login_json = github_data_access.get_resource(url) except Exception as e: logger.error(f"Couldn't resolve email URL with given data. Reason: {e}") @@ -328,7 +328,7 @@ def get_login_with_supplemental_data(logger, auth, commit_data): # Try to get login from all possible emails # Is None upon failure. - login_json = fetch_username_from_email(logger,auth,commit_data) + login_json = fetch_username_from_email(logger,None,commit_data) # total_count is the count of username's found by the endpoint. # This Checks if the email result got anything. @@ -367,7 +367,7 @@ def get_login_with_commit_hash(logger, auth, commit_data, repo_id): #TODO: here. # Send api request - github_data_access = GithubDataAccess(auth, logger) + github_data_access = GithubDataAccess(None, logger) login_json = github_data_access.get_resource(url) # TODO: Why are we returning None if 'sha' is not in response if we aren't even using it? diff --git a/collectoss/tasks/github/facade_github/core.py b/collectoss/tasks/github/facade_github/core.py index 64b42e0d2..44d311a15 100644 --- a/collectoss/tasks/github/facade_github/core.py +++ b/collectoss/tasks/github/facade_github/core.py @@ -40,7 +40,7 @@ def query_github_contributors(logger, key_auth, github_url, tool_source:str, too update_col_map = {'cntrb_email': 'email'} duplicate_col_map = {'cntrb_login': 'login'} - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) contributor_count = github_data_access.get_resource_count(contributors_url) @@ -79,5 +79,5 @@ def query_github_contributors(logger, key_auth, github_url, tool_source:str, too def grab_committer_list(logger, key_auth, repo_git, tool_source: str, tool_version: str, data_source: str, platform="github" ): # Create API endpoint from repo_id - query_github_contributors(logger, key_auth, repo_git, tool_source, tool_version, data_source) + query_github_contributors(logger, None, repo_git, tool_source, tool_version, data_source) \ No newline at end of file diff --git a/collectoss/tasks/github/facade_github/tasks.py b/collectoss/tasks/github/facade_github/tasks.py index cc380d497..b3bd40a1d 100644 --- a/collectoss/tasks/github/facade_github/tasks.py +++ b/collectoss/tasks/github/facade_github/tasks.py @@ -17,7 +17,7 @@ def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id, tool_source:str, tool_version:str, data_source:str): - github_data_access = GithubDataAccess(auth, logger) + github_data_access = GithubDataAccess(None, logger) for contributor in contributorQueue: # Get the email from the commit data @@ -63,12 +63,12 @@ def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id # Try to get the login from the commit sha if login == None or login == "": - login = get_login_with_commit_hash(logger, auth, contributor, repo_id) + login = get_login_with_commit_hash(logger, None, contributor, repo_id) if login == None or login == "": logger.warning("Failed to get login from commit hash") # Try to get the login from supplemental data if not found with the commit hash - login = get_login_with_supplemental_data(logger, auth,contributor) + login = get_login_with_supplemental_data(logger, None,contributor) if login == None or login == "": logger.error("Failed to get login from supplemental data!") @@ -229,8 +229,6 @@ def insert_facade_contributors(self, repo_git): # 'repo_id': repo_id}).to_json(orient="records")) - key_auth = GithubRandomKeyAuth(logger) - facade_batch_size = get_batch_size() # Process results in batches to reduce memory usage @@ -240,12 +238,12 @@ def insert_facade_contributors(self, repo_git): batch.append(dict(row)) if len(batch) >= facade_batch_size: - process_commit_metadata(logger, key_auth, batch, repo_id, platform_id, tool_source, tool_version, data_source) + process_commit_metadata(logger, None, batch, repo_id, platform_id, tool_source, tool_version, data_source) batch.clear() # Process remaining items in batch if batch: - process_commit_metadata(logger, key_auth, batch, repo_id, platform_id, tool_source, tool_version, data_source) + process_commit_metadata(logger, None, batch, repo_id, platform_id, tool_source, tool_version, data_source) logger.debug("DEBUG: Got through the new_contribs") diff --git a/collectoss/tasks/github/issues.py b/collectoss/tasks/github/issues.py index 406718759..9d9846729 100644 --- a/collectoss/tasks/github/issues.py +++ b/collectoss/tasks/github/issues.py @@ -9,7 +9,6 @@ from collectoss.tasks.init.celery_app import CoreRepoCollectionTask from collectoss.application.db.data_parse import * from collectoss.tasks.github.util.github_data_access import GithubDataAccess -from collectoss.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from collectoss.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo from collectoss.tasks.util.worker_util import remove_duplicate_dicts from collectoss.application.db.models import Issue, IssueLabel, IssueAssignee @@ -47,12 +46,8 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: # Subtract 2 days to ensure all data is collected core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc) - key_auth = GithubRandomKeyAuth(logger) - - logger.info(f'this is the manifest.key_auth value: {str(key_auth)}') - try: - issue_data_generator = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) + issue_data_generator = retrieve_all_issue_data(repo_git, logger, None, core_data_last_collected) issue_batch_size = get_batch_size() @@ -86,7 +81,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: -def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None): +def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: None, since: datetime | None = None): """ Retrieve all issue data for a repository as a generator. @@ -96,7 +91,7 @@ def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: Git Args: repo_git (str): The GitHub repository in "owner/repo" format. logger (logging.Logger): Logger for logging messages. - key_auth (GithubRandomKeyAuth): Auth handler for GitHub API. + key_auth (GithubRandomKeyAuth): Auth handler for GitHub API. unused and deprecated, use KeyClient instead. since (datetime, optional): Only issues updated since this datetime will be retrieved. """ owner, repo = get_owner_repo(repo_git) @@ -108,7 +103,7 @@ def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: Git if since: url += f"&since={since.isoformat()}" - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) num_pages = github_data_access.get_resource_page_count(url) logger.info(f"{owner}/{repo}: Retrieving {num_pages} pages of issues") diff --git a/collectoss/tasks/github/messages.py b/collectoss/tasks/github/messages.py index 342eeb2ca..6d3c5af8d 100644 --- a/collectoss/tasks/github/messages.py +++ b/collectoss/tasks/github/messages.py @@ -40,7 +40,7 @@ def collect_github_messages(repo_git: str, full_collection: bool) -> None: if is_repo_small(repo_id): - message_data = fast_retrieve_all_pr_and_issue_messages(repo_git, logger, manifest.key_auth, task_name, core_data_last_collected) + message_data = fast_retrieve_all_pr_and_issue_messages(repo_git, logger, None, task_name, core_data_last_collected) if message_data: process_messages(message_data, task_name, repo_id, logger, db_session) @@ -49,7 +49,7 @@ def collect_github_messages(repo_git: str, full_collection: bool) -> None: logger.info(f"{owner}/{repo} has no messages") else: - process_large_issue_and_pr_message_collection(repo_id, repo_git, logger, manifest.key_auth, task_name, db_session, core_data_last_collected) + process_large_issue_and_pr_message_collection(repo_id, repo_git, logger, None, task_name, db_session, core_data_last_collected) def is_repo_small(repo_id): @@ -73,7 +73,7 @@ def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, tas # define logger for task logger.info(f"Collecting github comments for {owner}/{repo}") - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) message_count = github_data_access.get_resource_count(url) @@ -113,7 +113,7 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger result = connection.execute(query).fetchall() comment_urls = [x[0] for x in result if x[0] is not None] - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) logger.info(f"{task_name}: Collecting github messages for {len(comment_urls)} prs/issues") diff --git a/collectoss/tasks/github/pull_requests/commits_model/core.py b/collectoss/tasks/github/pull_requests/commits_model/core.py index 9abadc2dd..ff0b2951d 100644 --- a/collectoss/tasks/github/pull_requests/commits_model/core.py +++ b/collectoss/tasks/github/pull_requests/commits_model/core.py @@ -46,7 +46,7 @@ def pull_request_commits_model(repo_id,logger, db_session, key_auth, full_collec logger.info(f"Getting pull request commits for repo: {repo.repo_git}") - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] all_data = [] diff --git a/collectoss/tasks/github/pull_requests/commits_model/tasks.py b/collectoss/tasks/github/pull_requests/commits_model/tasks.py index ab96d3eb5..f9382ed71 100644 --- a/collectoss/tasks/github/pull_requests/commits_model/tasks.py +++ b/collectoss/tasks/github/pull_requests/commits_model/tasks.py @@ -16,4 +16,4 @@ def process_pull_request_commits(repo_git: str, full_collection: bool) -> None: with GithubTaskManifest(logger) as manifest: - pull_request_commits_model(repo.repo_id, logger, manifest.db_session, manifest.key_auth, full_collection) + pull_request_commits_model(repo.repo_id, logger, manifest.db_session, None, full_collection) diff --git a/collectoss/tasks/github/pull_requests/files_model/core.py b/collectoss/tasks/github/pull_requests/files_model/core.py index d07d43246..ab2309e15 100644 --- a/collectoss/tasks/github/pull_requests/files_model/core.py +++ b/collectoss/tasks/github/pull_requests/files_model/core.py @@ -42,7 +42,7 @@ def pull_request_files_model(repo_id,logger, db_session, key_auth, full_collecti task_name = f"{owner}/{name} Pr files" - github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) + github_graphql_data_access = GithubGraphQlDataAccess(None, logger) pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] pr_file_rows = [] diff --git a/collectoss/tasks/github/pull_requests/files_model/tasks.py b/collectoss/tasks/github/pull_requests/files_model/tasks.py index 7caf27d1d..bf2fd0452 100644 --- a/collectoss/tasks/github/pull_requests/files_model/tasks.py +++ b/collectoss/tasks/github/pull_requests/files_model/tasks.py @@ -15,4 +15,4 @@ def process_pull_request_files(repo_git: str, full_collection: bool) -> None: query = db_session.session.query(Repo).filter(Repo.repo_git == repo_git) repo = execute_session_query(query, 'one') - pull_request_files_model(repo.repo_id, logger, db_session, manifest.key_auth, full_collection) \ No newline at end of file + pull_request_files_model(repo.repo_id, logger, db_session, None, full_collection) \ No newline at end of file diff --git a/collectoss/tasks/github/pull_requests/tasks.py b/collectoss/tasks/github/pull_requests/tasks.py index 3efaddf3b..13401f8bf 100644 --- a/collectoss/tasks/github/pull_requests/tasks.py +++ b/collectoss/tasks/github/pull_requests/tasks.py @@ -10,7 +10,6 @@ from collectoss.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo from collectoss.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo from collectoss.tasks.github.util.github_task_session import GithubTaskManifest -from collectoss.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from collectoss.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors, get_batch_size from collectoss.application.db.util import execute_session_query from ..messages import process_github_comment_contributors @@ -46,7 +45,7 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: total_count = 0 all_data = [] - for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected): + for pr in retrieve_all_pr_data(repo_git, logger, None, core_data_last_collected): all_data.append(pr) @@ -75,7 +74,7 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[ logger.debug(f"Collecting pull requests for {owner}/{repo}") - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) search_args = {"state": "all", "direction": "desc", "sort": "updated"} url = github_data_access.endpoint_url(f"repos/{owner}/{repo}/pulls", search_args) @@ -257,8 +256,7 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - tool_version = "2.0" data_source = "Github API" - key_auth = GithubRandomKeyAuth(logger) - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) pr_review_comment_batch_size = get_batch_size() @@ -495,7 +493,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: logger.info(f"{owner}/{repo}: Collecting reviews for {pr_count} PRs") - github_data_access = GithubDataAccess(manifest.key_auth, logger) + github_data_access = GithubDataAccess(None, logger) pr_review_batch_size = get_batch_size() diff --git a/collectoss/tasks/github/releases/core.py b/collectoss/tasks/github/releases/core.py index 643e1eb63..f1f94931b 100644 --- a/collectoss/tasks/github/releases/core.py +++ b/collectoss/tasks/github/releases/core.py @@ -6,7 +6,7 @@ from collectoss.tasks.github.util.gh_graphql_entities import request_graphql_dict from collectoss.application.db.util import execute_session_query from collectoss.application.db.lib import bulk_insert_dicts - +from typing_extensions import deprecated def get_release_inf(repo_id, release, tag_only): if not tag_only: @@ -153,7 +153,7 @@ def get_query(logger, owner, repo, tag_only): return query - +@deprecated("This function is deprecated. Use the GithubGraphQlDataAccess class instead") def fetch_data(key_auth, logger, github_url, repo_id, tag_only = False): logger.info("Beginning filling the releases model for repo: " + github_url + "\n") diff --git a/collectoss/tasks/github/repo_info/core.py b/collectoss/tasks/github/repo_info/core.py index 582a5ed45..beb85cd4e 100644 --- a/collectoss/tasks/github/repo_info/core.py +++ b/collectoss/tasks/github/repo_info/core.py @@ -20,7 +20,7 @@ def query_committers_count(key_auth, logger, owner, repo): ## If the repository is empty there are zero committers, and the API returns nothing at all. Response ## header of 200 along with an empty JSON. try: - github_data_access = GithubDataAccess(key_auth, logger) + github_data_access = GithubDataAccess(None, logger) try: data = github_data_access.get_resource_count(url) except Exception as e: @@ -176,7 +176,7 @@ def repo_info_model(key_auth, repo_orm_obj, logger): } """ - github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) + github_graphql_data_access = GithubGraphQlDataAccess(None, logger) variables = { "owner": owner, @@ -188,7 +188,7 @@ def repo_info_model(key_auth, repo_orm_obj, logger): data = github_graphql_data_access.get_resource(query, variables, result_keys) # Get committers count info that requires seperate endpoint - committers_count = query_committers_count(key_auth, logger, owner, repo) + committers_count = query_committers_count(None, logger, owner, repo) # Put all data together in format of the table logger.info(f'Inserting repo info for repo with id:{repo_orm_obj.repo_id}, owner:{owner}, name:{repo}\n') diff --git a/collectoss/tasks/github/repo_info/tasks.py b/collectoss/tasks/github/repo_info/tasks.py index 66144dfc5..7542bb22d 100644 --- a/collectoss/tasks/github/repo_info/tasks.py +++ b/collectoss/tasks/github/repo_info/tasks.py @@ -5,7 +5,6 @@ from collectoss.tasks.init.celery_app import celery_app as celery from collectoss.tasks.init.celery_app import CoreRepoCollectionTask from collectoss.application.db.lib import get_repo_by_repo_git -from collectoss.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from collectoss.application.db import get_engine @@ -17,9 +16,7 @@ def collect_repo_info(repo_git: str): repo = get_repo_by_repo_git(repo_git) - key_auth = GithubRandomKeyAuth(logger) - - repo_info_model(key_auth, repo, logger) + repo_info_model(None, repo, logger) #Task to get CII api data for linux badge info using github data. diff --git a/collectoss/tasks/github/traffic.py b/collectoss/tasks/github/traffic.py index 163b97ad5..ef736d3ef 100644 --- a/collectoss/tasks/github/traffic.py +++ b/collectoss/tasks/github/traffic.py @@ -6,8 +6,6 @@ from collectoss.tasks.github.util.util import get_owner_repo from collectoss.application.db.models import RepoClone from collectoss.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts -from collectoss.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth - @celery.task def collect_github_repo_clones_data(repo_git: str) -> None: @@ -21,40 +19,13 @@ def collect_github_repo_clones_data(repo_git: str) -> None: logger.info(f"Collecting Github repository clone data for {owner}/{repo}") - key_auth = GithubRandomKeyAuth(logger) - - clones_data = retrieve_all_clones_data(repo_git, logger, key_auth) + clones_data = [] if clones_data: process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id) else: logger.info(f"{owner}/{repo} has no clones") -def retrieve_all_clones_data(repo_git: str, logger, key_auth): - # owner, repo = get_owner_repo(repo_git) - - # url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones" - - # clones = GithubPaginator(url, key_auth, logger) - - # num_pages = clones.get_num_pages() - all_data = [] - # for page_data, page in clones.iter_pages(): - - # if page_data is None: - # return all_data - - # elif len(page_data) == 0: - # logger.debug(f"{repo.capitalize()} Traffic Page {page} contains no data...returning") - # logger.info(f"Traffic Page {page} of {num_pages}") - # return all_data - - # logger.info(f"{repo} Traffic Page {page} of {num_pages}") - - # all_data += page_data - - return all_data - def process_clones_data(clones_data, task_name, repo_id, logger) -> None: clone_history_data = clones_data[0]['clones'] diff --git a/collectoss/tasks/github/util/gh_graphql_entities.py b/collectoss/tasks/github/util/gh_graphql_entities.py index bb5f95e98..4f0ba2c6f 100644 --- a/collectoss/tasks/github/util/gh_graphql_entities.py +++ b/collectoss/tasks/github/util/gh_graphql_entities.py @@ -7,6 +7,7 @@ import time import traceback from collectoss.tasks.github.util.github_paginator import GithubApiResult, process_dict_response +from typing_extensions import deprecated """ Should be designed on a per entity basis that has attributes that call @@ -21,6 +22,7 @@ PR_reviews, events, messages, pr_commits, pr_files(already done convert it) """ +@deprecated("This function is deprecated. Use the GithubGraphQlDataAccess class instead") def hit_api_graphql(keyAuth,url,logger,query,variables={},timeout=40): logger.debug(f"Sending query {query} to github graphql") @@ -65,6 +67,7 @@ def hit_api_graphql(keyAuth,url,logger,query,variables={},timeout=40): return response +@deprecated("This function is deprecated. Use the GithubGraphQlDataAccess class instead") def request_graphql_dict(key_auth, logger, url,query,variables={},timeout_wait=10): attempts = 0 response_data = None @@ -138,6 +141,7 @@ def request_graphql_dict(key_auth, logger, url,query,variables={},timeout_wait=1 #Get data extraction logic for nested nodes in return data. #Should keep track of embedded data that is incomplete. +@deprecated("This class is deprecated. Use the GithubGraphQlDataAccess class instead") class GraphQlPageCollection(collections.abc.Sequence): #Bind is needed for things like query by repo. Contains bind variables for the graphql query def __init__(self,query,keyAuth,logger,bind={},numPerPage=100,url="https://api.github.com/graphql",repaginateIfIncomplete=[]): @@ -401,6 +405,7 @@ def __iter__(self): #use httpx and pass random_key_auth +@deprecated("This class is deprecated. Use the GithubGraphQlDataAccess class instead") class GitHubRepo(): def __init__(self, logger, key_auth, owner, repo): @@ -534,6 +539,7 @@ def get_pull_requests_collection(self): +@deprecated("This class is deprecated. Use the GithubGraphQlDataAccess class instead") class PullRequest(): def __init__(self, logger, key_auth, owner, repo, number): diff --git a/collectoss/tasks/github/util/util.py b/collectoss/tasks/github/util/util.py index c25c738d9..58f5eaec3 100644 --- a/collectoss/tasks/github/util/util.py +++ b/collectoss/tasks/github/util/util.py @@ -20,9 +20,7 @@ def get_repo_src_id(owner, repo, logger): } """ - key_auth = GithubRandomKeyAuth(logger) - - github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) + github_graphql_data_access = GithubGraphQlDataAccess(None, logger) variables = { "owner": owner, @@ -101,13 +99,13 @@ def get_repo_weight_by_issue(logger,repo_git): Sum of issues and prs for that repo """ - from collectoss.tasks.github.util.gh_graphql_entities import GitHubRepo as GitHubRepoGraphql + from collectoss.tasks.github.util.gh_graphql_entities import GitHubRepo owner,name = get_owner_repo(repo_git) key_auth = GithubRandomKeyAuth(logger) - repo_graphql = GitHubRepoGraphql(logger, key_auth, owner, name) + repo_graphql = GitHubRepo(logger, key_auth, owner, name) number_of_issues_and_prs = len(repo_graphql.get_issues_collection()) + len(repo_graphql.get_pull_requests_collection()) return number_of_issues_and_prs diff --git a/tests/test_tasks/test_task_utilities/test_key_auth/test_github_random_key_auth.py b/tests/test_tasks/test_task_utilities/test_key_auth/test_github_random_key_auth.py deleted file mode 100644 index 251545415..000000000 --- a/tests/test_tasks/test_task_utilities/test_key_auth/test_github_random_key_auth.py +++ /dev/null @@ -1,34 +0,0 @@ -import pytest -import httpx -import random -import time -import logging - -from collectoss.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth -from collectoss.application.db.session import DatabaseSession - -logger = logging.getLogger(__name__) - -@pytest.fixture -def github_random_key_auth(): - - session = DatabaseSession(logger) - - key_auth = GithubRandomKeyAuth(session) - - yield key_auth - - session.close() - - -def test_github_api_rate_limit_after_setting_key(github_random_key_auth): - - url = "https://api.github.com/rate_limit" - - with httpx.Client() as client: - - response = client.request(method="GET", url=url, auth=github_random_key_auth) - - rate_limit_data = response.json() - - assert rate_limit_data["resources"]["core"]["limit"] > 60 \ No newline at end of file