Skip to content

Commit 3353c3e

Browse files
committed
Add workflow to generate timeseries CSV coverage report
1 parent 4de4277 commit 3353c3e

4 files changed

Lines changed: 127 additions & 78 deletions

File tree

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Build CSV timeseries flow coverage report
2+
3+
on:
4+
workflow_dispatch:
5+
6+
jobs:
7+
build:
8+
9+
runs-on: ubuntu-latest
10+
11+
steps:
12+
- name: Clone self (github/codeql)
13+
uses: actions/checkout@v2
14+
with:
15+
path: script
16+
- name: Clone self (github/codeql) for analysis
17+
uses: actions/checkout@v2
18+
with:
19+
path: codeqlModels
20+
- name: Set up Python 3.8
21+
uses: actions/setup-python@v2
22+
with:
23+
python-version: 3.8
24+
- name: Download CodeQL CLI
25+
uses: dsaltares/fetch-gh-release-asset@aa37ae5c44d3c9820bc12fe675e8670ecd93bd1c
26+
with:
27+
repo: "github/codeql-cli-binaries"
28+
version: "latest"
29+
file: "codeql-linux64.zip"
30+
token: ${{ secrets.GITHUB_TOKEN }}
31+
- name: Unzip CodeQL CLI
32+
run: unzip -d codeql-cli codeql-linux64.zip
33+
- name: Build modeled package list
34+
run: |
35+
PATH="$PATH:codeql-cli/codeql" python script/misc/scripts/library-coverage/generate-time-series.py codeqlModels
36+
- name: Upload timeseries CSV
37+
uses: actions/upload-artifact@v2
38+
with:
39+
name: csv-flow-model-coverage-timeseries
40+
path: timeseries-*.csv
41+

misc/scripts/library-coverage/generate-report.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,6 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
8484
return row, processed_packages
8585

8686

87-
class LanguageConfig:
88-
def __init__(self, lang, capitalized_lang, ext, ql_path):
89-
self.lang = lang
90-
self.capitalized_lang = capitalized_lang
91-
self.ext = ext
92-
self.ql_path = ql_path
93-
94-
9587
try: # Check for `codeql` on path
9688
utils.subprocess_run(["codeql", "--version"])
9789
except Exception as e:
@@ -121,7 +113,7 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
121113

122114
# Languages for which we want to generate coverage reports.
123115
configs = [
124-
LanguageConfig(
116+
utils.LanguageConfig(
125117
"java", "Java", ".java", query_prefix + "java/ql/src/meta/frameworks/Coverage.ql")
126118
]
127119

misc/scripts/library-coverage/generate-time-series.py

Lines changed: 77 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,6 @@
1414
# the distance between commits to include in the output
1515
day_distance = 1
1616

17-
# the directory where codeql is. This is the directory where we change the SHAs
18-
working_dir = sys.argv[1]
19-
20-
lang = "java"
21-
db = "empty-java"
22-
ql_output = "output-java.csv"
23-
csv_output = "timeseries-java.csv"
24-
2517

2618
def get_str_output(arr):
2719
r = subprocess.check_output(arr)
@@ -49,66 +41,82 @@ def get_previous_sha(sha, date):
4941
return (parent_sha, parent_date)
5042

5143

52-
def get_stats():
53-
if os.path.isdir(db):
54-
shutil.rmtree(db)
55-
utils.create_empty_database(lang, ".java", db)
56-
utils.run_codeql_query(
57-
"java/ql/src/meta/frameworks/Coverage.ql", db, ql_output)
58-
shutil.rmtree(db)
59-
60-
sources = 0
61-
sinks = 0
62-
summaries = 0
63-
64-
with open(ql_output) as csvfile:
65-
reader = csv.reader(csvfile)
66-
for row in reader:
67-
# row: "android.util",1,"remote","source",16
68-
if row[3] == "source":
69-
sources += int(row[4])
70-
if row[3] == "sink":
71-
sinks += int(row[4])
72-
if row[3] == "summary":
73-
summaries += int(row[4])
74-
75-
os.remove(ql_output)
76-
77-
return (sources, sinks, summaries)
78-
79-
80-
with open(csv_output, 'w', newline='') as csvfile:
81-
csvwriter = csv.writer(csvfile)
82-
csvwriter.writerow(["SHA", "Date", "Sources", "Sinks", "Summaries"])
83-
84-
os.chdir(working_dir)
44+
def get_stats(lang, query):
45+
try:
46+
db = "empty_" + lang
47+
ql_output = "output-" + lang + ".csv"
48+
if os.path.isdir(db):
49+
shutil.rmtree(db)
50+
utils.create_empty_database(lang, ".java", db)
51+
utils.run_codeql_query(query, db, ql_output)
52+
53+
sources = 0
54+
sinks = 0
55+
summaries = 0
56+
57+
with open(ql_output) as csvfile:
58+
reader = csv.reader(csvfile)
59+
for row in reader:
60+
# row: "android.util",1,"remote","source",16
61+
if row[3] == "source":
62+
sources += int(row[4])
63+
if row[3] == "sink":
64+
sinks += int(row[4])
65+
if row[3] == "summary":
66+
summaries += int(row[4])
67+
68+
os.remove(ql_output)
69+
70+
return (sources, sinks, summaries)
71+
except:
72+
print("Unexpected error:", sys.exc_info()[0])
73+
raise Exception()
74+
finally:
75+
if os.path.isdir(db):
76+
shutil.rmtree(db)
77+
78+
79+
working_dir = ""
80+
if len(sys.argv) > 1:
81+
working_dir = sys.argv[1]
82+
83+
configs = [
84+
utils.LanguageConfig(
85+
"java", "Java", ".java", "java/ql/src/meta/frameworks/Coverage.ql")
86+
]
87+
88+
# todo: change this when we cover multiple languages. We should compute the SHAs
89+
# only once and not per language
90+
for config in configs:
91+
with open("timeseries-" + config.lang + ".csv", 'w', newline='') as csvfile:
92+
csvwriter = csv.writer(csvfile)
93+
csvwriter.writerow(["SHA", "Date", "Sources", "Sinks", "Summaries"])
94+
95+
os.chdir(working_dir)
96+
97+
utils.subprocess_run(["git", "checkout", "main"])
98+
99+
current_sha = get_str_output(["git", "rev-parse", "HEAD"])
100+
current_date = get_date(current_sha)
101+
102+
while True:
103+
print("Getting stats for " + current_sha)
104+
utils.subprocess_run(["git", "checkout", current_sha])
105+
106+
try:
107+
stats = get_stats(config.lang, config.ql_path)
108+
109+
csvwriter.writerow(
110+
[current_sha, current_date, stats[0], stats[1], stats[2]])
111+
112+
print("Collected stats for " + current_sha +
113+
" at " + current_date.isoformat())
114+
except:
115+
print("Error getting stats for " +
116+
current_sha + ". Stopping iteration.")
117+
break
118+
119+
current_sha, current_date = get_previous_sha(
120+
current_sha, current_date)
85121

86122
utils.subprocess_run(["git", "checkout", "main"])
87-
88-
current_sha = get_str_output(["git", "rev-parse", "HEAD"])
89-
current_date = get_date(current_sha)
90-
91-
while True:
92-
print("Getting stats for " + current_sha)
93-
utils.subprocess_run(["git", "checkout", current_sha])
94-
95-
try:
96-
stats = get_stats()
97-
98-
csvwriter.writerow(
99-
[current_sha, current_date, stats[0], stats[1], stats[2]])
100-
101-
print("Collected stats for " + current_sha +
102-
" at " + current_date.isoformat())
103-
except:
104-
print("Unexpected error:", sys.exc_info()[0])
105-
106-
if os.path.isdir(db):
107-
shutil.rmtree(db)
108-
print("Error getting stats for " +
109-
current_sha + ". Stopping iteration.")
110-
break
111-
112-
current_sha, current_date = get_previous_sha(current_sha, current_date)
113-
114-
utils.subprocess_run(["git", "checkout", "main"])

misc/scripts/library-coverage/utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,11 @@ def run_codeql_query(query, database, output):
2424
subprocess_run(["codeql", "bqrs", "decode", output + ".bqrs",
2525
"--format=csv", "--no-titles", "--output", output])
2626
os.remove(output + ".bqrs")
27+
28+
29+
class LanguageConfig:
30+
def __init__(self, lang, capitalized_lang, ext, ql_path):
31+
self.lang = lang
32+
self.capitalized_lang = capitalized_lang
33+
self.ext = ext
34+
self.ql_path = ql_path

0 commit comments

Comments
 (0)