Skip to content

Commit eab552c

Browse files
refactor: update metrics email (#1791)
Changes build query to return n_issues, n_new_issues and total_incidents; Added subquery to return top 3 issues for each origin; Changed lab query to return builds related to the tests of each lab (instead of builds directly related to the lab). Closes #1780
1 parent 36ddf41 commit eab552c

6 files changed

Lines changed: 253 additions & 136 deletions

File tree

backend/kernelCI_app/management/commands/notifications.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,13 +824,18 @@ def generate_metrics_report(
824824

825825
deltas = compute_metrics_deltas(data)
826826

827+
# Compute the text spacing for the labs so it isn't too big or too small
828+
lab_spacing = max((len(lab_key) for lab_key in data.lab_maps.keys()), default=0)
829+
lab_spacing += 7 # add spacing for leading tab and possible * mark for new labs
830+
827831
report = {}
828832
template = setup_jinja_template("metrics_report.txt.j2")
829833
report["content"] = template.render(
830834
**data.model_dump(),
831835
start_datetime=start_datetime.strftime("%Y-%m-%d %H:%M %Z"),
832836
end_datetime=end_datetime.strftime("%Y-%m-%d %H:%M %Z"),
833837
deltas=deltas,
838+
lab_spacing=lab_spacing,
834839
)
835840

836841
report["title"] = "KernelCI Metrics Report - %s" % now.strftime("%Y-%m-%d %H:%M %Z")

backend/kernelCI_app/management/commands/templates/metrics_report.txt.j2

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,53 @@ Period: {{ start_datetime }} to {{ end_datetime }}
2020

2121
BUILD REGRESSIONS
2222
-----------------
23-
A "regression" is a newly detected failure (first occurrence).
24-
An "occurrence" is each subsequent hit of that same failure.
23+
A "regression" is defined as a reported problem affecting 0 or multiple builds.
2524

2625
{% if build_incidents_by_origin -%}
27-
{{ "{:<14}".format(" Origin") -}}
28-
{{ "{:<15}".format("Occurrences") -}}
29-
New regressions
30-
────────────────────────────────────────────
26+
27+
{{ "{:<12}".format(" Origin") -}}
28+
{{ "{:<18}".format("Regressions") -}}
29+
{{ "{:<19}".format("Affected") -}}
30+
Affected builds by top issues
31+
{{ "{:<12}".format("") -}}
32+
{{ "{:<18}".format("(known + new)") -}}
33+
{{ "{:<19}".format("Builds (total)") -}}
34+
{{ "{:<8}".format("#1") -}}{{ "{:<8}".format("#2") -}}#3
35+
─────────────────────────────────────────────────────────────────────────
3136
{% for origin, data in build_incidents_by_origin.items() | sort -%}
32-
{{ "{:<14}".format(" " + origin) -}}
33-
{{ "{:<15}".format(fmt(data.total)) -}}
34-
{{ fmt(data.new_regressions) }}
35-
{% endfor %} ────────────────────────────────────────────
36-
{{ "{:<14}".format(" Total") -}}
37-
{{ "{:<15}".format(fmt(build_incidents_by_origin.values() | map(attribute='total') | sum)) -}}
38-
{{ fmt(build_incidents_by_origin.values() | map(attribute='new_regressions') | sum) }}
37+
{{ "{:<12}".format(" " + origin) -}}
38+
{{ "{:<4}".format(fmt(data['n_existing_issues'])) -}} +{{" "}}
39+
{{- "{:<4}".format(fmt(data['n_new_issues'])) -}} ={{" "}}
40+
{{- "{:<6}".format(fmt(data['n_total_issues'])) -}}
41+
{{ "{:<19}".format(fmt(data['total_incidents'])) -}}
42+
{% for issue_key, data in top_issues_by_origin.get(origin, {}).items() -%}
43+
{{ "{:<8}".format(fmt(data['total_incidents'])) -}}
44+
{% endfor %}
45+
{% endfor %} ─────────────────────────────────────────────────────────────────────────
46+
{{ "{:<12}".format(" Total") -}}
47+
{{ "{:<4}".format(fmt(build_incidents_by_origin.values() | map(attribute='n_existing_issues') | sum)) -}} +{{" "}}
48+
{{- "{:<4}".format(fmt(build_incidents_by_origin.values() | map(attribute='n_new_issues') | sum)) -}} ={{" "}}
49+
{{- "{:<6}".format(fmt(build_incidents_by_origin.values() | map(attribute='n_total_issues') | sum)) -}}
50+
{{ fmt(build_incidents_by_origin.values() | map(attribute='total_incidents') | sum) }}
3951
{%- else %} No build regressions to show in this period. {%- endif %}
4052

4153

42-
LAB ACTIVITY
43-
------------
54+
TOP REGRESSIONS PER ORIGIN
55+
--------------------------
56+
{%- if top_issues_by_origin %}
57+
{%- for origin, data in top_issues_by_origin.items() | sort %}
58+
{{ origin }}:
59+
{%- for issue_key, issue_data in data.items() %}
60+
#{{ loop.index }} ({{ fmt(issue_data['total_incidents']) }} occurrences) - {{ issue_data['comment'] | truncate(70, True, '...', 0)}}
61+
Dashboard: https://d.kernelci.org/issue/{{issue_data['id']}}?iv={{issue_data['version']}}
62+
{%- endfor %}
63+
{% endfor -%}
64+
{%- else %}
65+
No regression details to show in this period.
66+
{%- endif %}
67+
68+
TEST LABS ACTIVITY
69+
------------------
4470
{%- set n_labs = lab_maps | length %}
4571
{%- set prev_n_labs = prev_lab_maps | length %}
4672
{%- set lab_diff = n_labs - prev_n_labs %}
@@ -53,36 +79,32 @@ New regressions
5379
{%- endif %}
5480

5581
Labs marked with an asterisk (*) are new.
56-
Labs that stopped reporting are shown with a -100% change.
5782

5883
{% if n_labs -%}
59-
{{ "{:<20}".format(" Origin") -}}
60-
{{ "{:<25}".format("Lab") -}}
61-
{{ "{:<9}".format("Builds") -}}
84+
{{ "{:<{width}}".format(" Lab", width=lab_spacing) -}}
85+
{{ "{:<16}".format("Covered builds") -}}
6286
{{ "{:<9}".format("Boots") -}}
6387
{{ "{:<15}".format("Tests") -}}
6488
Change (tests)
65-
──────────────────────────────────────────────────────────────────────────────────────────
89+
────────────────────────────────────────────────────────────────────────────────
6690
{% for lab_key, lab_values in lab_maps.items() | sort -%}
6791
{%- set display_name = lab_key + " *" if lab_key in deltas.new_lab_keys else lab_key -%}
68-
{{ "{:<20}".format(" " + lab_values["origin"]) -}}
69-
{{ "{:<25}".format(display_name) -}}
70-
{{ "{:<9}".format(fmt(lab_values["builds"])) -}}
92+
{{ "{:<{width}}".format(" " + display_name, width=lab_spacing) -}}
93+
{{ "{:<16}".format(fmt(lab_values["builds"])) -}}
7194
{{ "{:<9}".format(fmt(lab_values["boots"])) -}}
7295
{{ "{:<15}".format(fmt(lab_values["tests"])) -}}
7396
{{ deltas.labs.get(lab_key, "") }}
7497
{% endfor %}
7598
{%- for lab_key in deltas.extinct_lab_keys | sort -%}
7699
{%- set lab_values = prev_lab_maps[lab_key] -%}
77-
{{ "{:<20}".format(" " + lab_values["origin"]) -}}
78-
{{ "{:<25}".format(lab_key) -}}
79-
{{ "{:<9}".format(fmt(0)) -}}
100+
{{ "{:<{width}}".format(" " + lab_key, width=lab_spacing) -}}
101+
{{ "{:<16}".format(fmt(0)) -}}
80102
{{ "{:<9}".format(fmt(0)) -}}
81103
{{ "{:<15}".format(fmt(0)) -}}
82104
{{ deltas.labs.get(lab_key, "") }}
83-
{% endfor %} ──────────────────────────────────────────────────────────────────────────────────────────
84-
{{ "{:<45}".format(" Total") -}}
85-
{{ "{:<9}".format(fmt(lab_maps.values() | map(attribute='builds') | sum)) -}}
105+
{% endfor %} ────────────────────────────────────────────────────────────────────────────────
106+
{{ "{:<{width}}".format(" Total", width=lab_spacing) -}}
107+
{{ "{:<16}".format(fmt(lab_maps.values() | map(attribute='builds') | sum)) -}}
86108
{{ "{:<9}".format(fmt(lab_maps.values() | map(attribute='boots') | sum)) -}}
87109
{{ "{:<15}".format(fmt(lab_maps.values() | map(attribute='tests') | sum)) -}}
88110
{{ deltas.n_total_lab_activity }}

backend/kernelCI_app/queries/notifications.py

Lines changed: 108 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88
from kernelCI_app.helpers.logger import out
99
from kernelCI_app.queries.tree import get_tree_listing_query
1010
from kernelCI_app.typeModels.metrics_notifications import (
11-
BuildIncidentsByOrigin,
11+
BuildIncidentsCount,
1212
LabMetricsData,
1313
MetricsReportData,
14+
TopIssue,
1415
)
1516
from pydantic import ValidationError
1617

@@ -696,78 +697,95 @@ def get_metrics_data(
696697
"""
697698

698699
build_incidents_query = """
699-
WITH ranked AS (
700+
-- Ranks incidents of each issue by time to check which incident was the first incident of an issue
701+
WITH time_rank AS (
700702
SELECT
701703
_timestamp,
702704
origin,
703-
id,
705+
issue_id,
704706
ROW_NUMBER() OVER (PARTITION BY issue_id ORDER BY _timestamp) AS rn
705707
FROM incidents
706708
where build_id is not null
707-
)
708-
SELECT
709-
origin,
710-
COUNT(*) FILTER (
711-
WHERE _timestamp BETWEEN
712-
NOW() - INTERVAL %(start_days_ago)s
713-
AND NOW() - INTERVAL %(end_days_ago)s
714-
) AS total_incidents,
715-
COUNT(*) FILTER (
716-
WHERE rn = 1
717-
AND _timestamp BETWEEN
718-
NOW() - INTERVAL %(start_days_ago)s
719-
AND NOW() - INTERVAL %(end_days_ago)s
720-
) AS first_incidents_in_interval
721-
FROM ranked
722-
GROUP BY origin;
723-
"""
724-
725-
# For the lab query, we can't simply join the builds of the tests by lab,
726-
# because we want the builds made by a lab and tests made by a lab,
727-
# not the builds related to tests made by a lab, neither the tests related to builds made by a lab.
728-
lab_summary_query = """
729-
WITH unioned_results AS (
730-
(SELECT
731-
b.misc->>'lab' AS lab,
732-
b.origin,
733-
COUNT(DISTINCT b.id) AS n_builds,
734-
0 AS n_boots,
735-
0 AS n_tests
736-
FROM builds b
737-
WHERE
738-
b.misc->>'lab' IS NOT NULL
739-
AND b._timestamp BETWEEN
740-
NOW() - INTERVAL %(start_days_ago)s
741-
AND NOW() - INTERVAL %(end_days_ago)s
742-
GROUP BY lab, origin
743-
)
744-
UNION ALL
745-
(
709+
),
710+
-- counts total incidents in interval and how many were the first incident of an issue
711+
numbers AS (
746712
SELECT
747-
t.misc->>'runtime' AS lab,
748-
t.origin,
749-
0 AS n_builds,
750-
COUNT(CASE WHEN (t.path LIKE 'boot.%%' OR t.path = 'boot') THEN 1 END) AS n_boots,
751-
COUNT(CASE WHEN (t.path NOT LIKE 'boot.%%' AND t.path != 'boot') THEN 1 END) AS n_tests
752-
FROM tests t
713+
origin,
714+
COUNT(*) FILTER (
715+
WHERE _timestamp BETWEEN
716+
NOW() - INTERVAL %(start_days_ago)s
717+
AND NOW() - INTERVAL %(end_days_ago)s
718+
) AS total_incidents,
719+
COUNT(*) FILTER (
720+
WHERE _timestamp BETWEEN
721+
NOW() - INTERVAL %(start_days_ago)s
722+
AND NOW() - INTERVAL %(end_days_ago)s
723+
AND rn = 1
724+
) AS n_new_issues,
725+
COUNT(DISTINCT issue_id) FILTER (
726+
WHERE _timestamp BETWEEN
727+
NOW() - INTERVAL %(start_days_ago)s
728+
AND NOW() - INTERVAL %(end_days_ago)s
729+
) AS n_issues
730+
FROM time_rank
731+
GROUP BY origin
732+
),
733+
-- counts incidents by issue
734+
grouped_counted AS (
735+
SELECT
736+
inc.origin,
737+
inc.issue_id,
738+
inc.issue_version,
739+
i.comment,
740+
COUNT(inc.*) AS total
741+
FROM incidents inc
742+
JOIN issues i ON inc.issue_id = i.id AND inc.issue_version = i.version
753743
WHERE
754-
t.misc->>'runtime' IS NOT NULL
755-
AND t._timestamp BETWEEN
744+
inc.build_id is not null
745+
AND inc._timestamp BETWEEN
756746
NOW() - INTERVAL %(start_days_ago)s
757747
AND NOW() - INTERVAL %(end_days_ago)s
758-
GROUP BY lab, origin
759-
)
748+
GROUP BY inc.origin, inc.issue_id, inc.issue_version, i.comment
749+
ORDER BY inc.origin, total DESC
750+
),
751+
-- ranks issues by number of incidents
752+
ranked_counted AS (
753+
SELECT
754+
*,
755+
ROW_NUMBER() OVER (PARTITION BY origin ORDER BY total DESC) as ranked
756+
FROM grouped_counted
760757
)
758+
-- combines data into single output,
759+
-- repeating total incidents by origin and adding the top 3 issues per origin
760+
SELECT
761+
n.origin,
762+
n.total_incidents,
763+
n.n_new_issues,
764+
n.n_issues,
765+
r.issue_id,
766+
r.issue_version,
767+
r.comment,
768+
r.total
769+
FROM numbers n
770+
JOIN ranked_counted r
771+
ON n.origin = r.origin
772+
WHERE r.ranked <= 3 AND n.total_incidents > 0
773+
"""
774+
775+
lab_summary_query = """
776+
-- get count of tests of each lab and how many builds are related to those tests
761777
SELECT
762-
lab,
763-
origin,
764-
SUM(n_builds) AS n_builds,
765-
SUM(n_boots) AS n_boots,
766-
SUM(n_tests) AS n_tests
767-
FROM
768-
unioned_results u
769-
GROUP BY u.lab, u.origin
770-
ORDER BY u.lab, u.origin
778+
t.misc->>'runtime' AS lab,
779+
COUNT(DISTINCT t.build_id) AS n_builds,
780+
COUNT(*) FILTER (WHERE t.path LIKE 'boot.%%' OR t.path = 'boot') AS n_boots,
781+
COUNT(*) FILTER (WHERE t.path NOT LIKE 'boot.%%' AND t.path != 'boot') AS n_tests
782+
FROM tests t
783+
WHERE
784+
t.misc->>'runtime' IS NOT NULL
785+
AND t._timestamp BETWEEN
786+
NOW() - INTERVAL %(start_days_ago)s
787+
AND NOW() - INTERVAL %(end_days_ago)s
788+
GROUP BY lab
771789
"""
772790

773791
with connections["default"].cursor() as cursor:
@@ -787,27 +805,41 @@ def get_metrics_data(
787805
prev_lab_summary_results = cursor.fetchall()
788806

789807
try:
808+
build_incidents_by_origin: dict[str, BuildIncidentsCount] = {}
809+
top_issues_by_origin: dict[str, dict[tuple[str, int], TopIssue]] = {}
810+
for row in build_incidents_result:
811+
origin = row[0]
812+
issue_id = row[4]
813+
issue_version = row[5]
814+
build_incidents_by_origin[origin] = BuildIncidentsCount(
815+
total_incidents=row[1],
816+
n_new_issues=row[2],
817+
n_total_issues=row[3],
818+
n_existing_issues=row[3] - row[2],
819+
)
820+
if top_issues_by_origin.get(origin) is None:
821+
top_issues_by_origin[origin] = {}
822+
top_issues_by_origin[origin][(issue_id, issue_version)] = TopIssue(
823+
id=issue_id,
824+
version=issue_version,
825+
comment=row[6],
826+
total_incidents=row[7],
827+
)
828+
790829
data = MetricsReportData(
791830
n_trees=total_objects_result[0],
792831
n_checkouts=total_objects_result[1],
793832
n_builds=total_objects_result[2],
794833
n_tests=total_objects_result[3],
795834
n_issues=total_objects_result[4],
796835
n_incidents=total_objects_result[5],
797-
build_incidents_by_origin={
798-
row[0]: BuildIncidentsByOrigin(
799-
total=row[1],
800-
new_regressions=row[2],
801-
)
802-
for row in build_incidents_result
803-
if row[1] != 0 or row[2] != 0
804-
},
836+
build_incidents_by_origin=build_incidents_by_origin,
837+
top_issues_by_origin=top_issues_by_origin,
805838
lab_maps={
806839
row[0]: LabMetricsData(
807-
origin=row[1],
808-
builds=row[2],
809-
boots=row[3],
810-
tests=row[4],
840+
builds=row[1],
841+
boots=row[2],
842+
tests=row[3],
811843
)
812844
for row in lab_summary_results
813845
},
@@ -817,10 +849,9 @@ def get_metrics_data(
817849
prev_n_tests=prev_total_objects_result[3],
818850
prev_lab_maps={
819851
row[0]: LabMetricsData(
820-
origin=row[1],
821-
builds=row[2],
822-
boots=row[3],
823-
tests=row[4],
852+
builds=row[1],
853+
boots=row[2],
854+
tests=row[3],
824855
)
825856
for row in prev_lab_summary_results
826857
},

0 commit comments

Comments
 (0)