Skip to content

Commit fadc5be

Browse files
committed
Add caching for dependency info data collection
(may resolve timeouts)
1 parent cdbd4d3 commit fadc5be

3 files changed

Lines changed: 48 additions & 4 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
# --- CHACHEABLE QUERIES ---
22
cache_repos_activitycommits
3+
cache_repos_dependencies

_visualize/scripts/cache_repos_activitycommits.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@
1313
repolist = sorted(inputLists.data["data"].keys())
1414
print("Repo list complete. Found %d repos." % (len(repolist)))
1515

16-
# Initialize data collector
17-
dataCollector = qm.DataManager()
18-
dataCollector.data = {"data": {}}
19-
2016
# Initialize query manager
2117
queryMan = qm.GitHubQueryManager(maxRetry=1, retryDelay=1)
2218

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from scraper.github import queryManager as qm
2+
from os import environ as env
3+
4+
ghDataDir = env.get("GITHUB_DATA", "../github-data")
5+
queryPath = "../queries/repo-Dependencies.gql"
6+
7+
# Read repo info data file (to use as repo list)
8+
inputLists = qm.DataManager("%s/intReposInfo.json" % ghDataDir, True)
9+
# Populate repo list
10+
repolist = []
11+
print("Getting internal repos ...")
12+
repolist = sorted(inputLists.data["data"].keys())
13+
print("Repo list complete. Found %d repos." % (len(repolist)))
14+
15+
# Initialize query manager
16+
queryMan = qm.GitHubQueryManager(maxRetry=1, retryDelay=1)
17+
18+
# Iterate through internal repos
19+
print("Gathering data across multiple paginated queries...")
20+
for repo in repolist:
21+
print("\n'%s'" % (repo))
22+
23+
r = repo.split("/")
24+
try:
25+
outObj = queryMan.queryGitHubFromFile(
26+
queryPath,
27+
{
28+
"ownName": r[0],
29+
"repoName": r[1],
30+
"numManifests": 25,
31+
"numDependents": 100,
32+
"pgCursor": None,
33+
},
34+
paginate=True,
35+
cursorVar="pgCursor",
36+
keysToList=["data", "repository", "dependencyGraphManifests", "nodes"],
37+
)
38+
except Exception as error:
39+
print("Warning: Could not complete '%s'" % (repo))
40+
print(error)
41+
continue
42+
43+
print("'%s' Done!" % (repo))
44+
45+
print("\nCollective data gathering complete!")
46+
47+
print("\nDone!\n")

0 commit comments

Comments
 (0)