Skip to content

Commit 2cc7aa9

Browse files
captain5050acmel
authored andcommitted
perf stat: Refactor retry/skip/fatal error handling
For the sake of Intel topdown events commit 9eac561 ("perf stat: Don't skip failing group events") changed 'perf stat' error handling making it so that more errors were fatal and didn't report "<not supported>" events. The change outside of topdown events was unintentional. The notion of "fatal" error handling was introduced in commit e0e6a6c ("perf stat: Factor out open error handling") and refined in commits like commit cb5ef60 ("perf stat: Error out unsupported group leader immediately") to be an approach for avoiding later assertion failures in the code base. This change fixes those issues and removes the notion of a fatal error on an event. If all events fail to open then a fatal error occurs with the previous fatal error message. This seems to best match the notion of supported events and allowing some errors not to stop 'perf stat', while allowing the truly fatal no event case to terminate the tool early. The evsel->errored flag is only used in the stat code but always just meaning !evsel->supported although there is a comment about it being sticky. Force all evsels to be supported in evsel__init and then clear this when evsel__open fails. When an event is tried the supported is set to true again. This simplifies the notion of whether an evsel is broken. In the get_group_fd code, fail to get a group fd when the evsel isn't supported. If the leader isn't supported then it is also expected that there is no group_fd as the leader will have been skipped. Therefore change the BUG_ON test to be on supported rather than skippable. This corrects the assertion errors that were the reason for the previous fatal error handling. Fixes: 9eac561 ("perf stat: Don't skip failing group events") Reviewed-by: James Clark <james.clark@linaro.org> Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Dapeng Mi <dapeng1.mi@linux.intel.com> Cc: Howard Chu <howardchu95@gmail.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Falcon <thomas.falcon@intel.com> Link: https://lore.kernel.org/r/20251002220727.1889799-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent 6026ab6 commit 2cc7aa9

4 files changed

Lines changed: 76 additions & 90 deletions

File tree

tools/perf/builtin-record.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,8 +1408,6 @@ static int record__open(struct record *rec)
14081408
ui__error("%s\n", msg);
14091409
goto out;
14101410
}
1411-
1412-
pos->supported = true;
14131411
}
14141412

14151413
if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {

tools/perf/builtin-stat.c

Lines changed: 51 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -610,22 +610,13 @@ static int dispatch_events(bool forks, int timeout, int interval, int *times)
610610
enum counter_recovery {
611611
COUNTER_SKIP,
612612
COUNTER_RETRY,
613-
COUNTER_FATAL,
614613
};
615614

616615
static enum counter_recovery stat_handle_error(struct evsel *counter, int err)
617616
{
618617
char msg[BUFSIZ];
619618

620-
if (counter->skippable) {
621-
if (verbose > 0) {
622-
ui__warning("skipping event %s that kernel failed to open .\n",
623-
evsel__name(counter));
624-
}
625-
counter->supported = false;
626-
counter->errored = true;
627-
return COUNTER_SKIP;
628-
}
619+
assert(!counter->supported);
629620

630621
/*
631622
* PPC returns ENXIO for HW counters until 2.6.37
@@ -636,44 +627,34 @@ static enum counter_recovery stat_handle_error(struct evsel *counter, int err)
636627
ui__warning("%s event is not supported by the kernel.\n",
637628
evsel__name(counter));
638629
}
639-
counter->supported = false;
640-
/*
641-
* errored is a sticky flag that means one of the counter's
642-
* cpu event had a problem and needs to be reexamined.
643-
*/
644-
counter->errored = true;
645-
} else if (evsel__fallback(counter, &target, err, msg, sizeof(msg))) {
630+
return COUNTER_SKIP;
631+
}
632+
if (evsel__fallback(counter, &target, err, msg, sizeof(msg))) {
646633
if (verbose > 0)
647634
ui__warning("%s\n", msg);
635+
counter->supported = true;
648636
return COUNTER_RETRY;
649-
} else if (target__has_per_thread(&target) && err != EOPNOTSUPP &&
650-
evsel_list->core.threads &&
651-
evsel_list->core.threads->err_thread != -1) {
637+
}
638+
if (target__has_per_thread(&target) && err != EOPNOTSUPP &&
639+
evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) {
652640
/*
653641
* For global --per-thread case, skip current
654642
* error thread.
655643
*/
656644
if (!thread_map__remove(evsel_list->core.threads,
657645
evsel_list->core.threads->err_thread)) {
658646
evsel_list->core.threads->err_thread = -1;
647+
counter->supported = true;
659648
return COUNTER_RETRY;
660649
}
661-
} else if (err == EOPNOTSUPP) {
662-
if (verbose > 0) {
663-
ui__warning("%s event is not supported by the kernel.\n",
664-
evsel__name(counter));
665-
}
666-
counter->supported = false;
667-
counter->errored = true;
668650
}
669-
670-
evsel__open_strerror(counter, &target, err, msg, sizeof(msg));
671-
ui__error("%s\n", msg);
672-
673-
if (child_pid != -1)
674-
kill(child_pid, SIGTERM);
675-
676-
return COUNTER_FATAL;
651+
if (verbose > 0) {
652+
ui__warning(err == EOPNOTSUPP
653+
? "%s event is not supported by the kernel.\n"
654+
: "skipping event %s that kernel failed to open.\n",
655+
evsel__name(counter));
656+
}
657+
return COUNTER_SKIP;
677658
}
678659

679660
static int create_perf_stat_counter(struct evsel *evsel,
@@ -746,8 +727,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
746727
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
747728
struct evlist_cpu_iterator evlist_cpu_itr;
748729
struct affinity saved_affinity, *affinity = NULL;
749-
int err;
750-
bool second_pass = false;
730+
int err, open_err = 0;
731+
bool second_pass = false, has_supported_counters;
751732

752733
if (forks) {
753734
if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
@@ -787,44 +768,37 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
787768
if (target.use_bpf)
788769
break;
789770

790-
if (counter->reset_group || counter->errored)
771+
if (counter->reset_group || !counter->supported)
791772
continue;
792773
if (evsel__is_bperf(counter))
793774
continue;
794-
try_again:
795-
if (create_perf_stat_counter(counter, &stat_config,
796-
evlist_cpu_itr.cpu_map_idx) < 0) {
797775

776+
while (true) {
777+
if (create_perf_stat_counter(counter, &stat_config,
778+
evlist_cpu_itr.cpu_map_idx) == 0)
779+
break;
780+
781+
open_err = errno;
798782
/*
799783
* Weak group failed. We cannot just undo this here
800784
* because earlier CPUs might be in group mode, and the kernel
801785
* doesn't support mixing group and non group reads. Defer
802786
* it to later.
803787
* Don't close here because we're in the wrong affinity.
804788
*/
805-
if ((errno == EINVAL || errno == EBADF) &&
789+
if ((open_err == EINVAL || open_err == EBADF) &&
806790
evsel__leader(counter) != counter &&
807791
counter->weak_group) {
808792
evlist__reset_weak_group(evsel_list, counter, false);
809793
assert(counter->reset_group);
794+
counter->supported = true;
810795
second_pass = true;
811-
continue;
812-
}
813-
814-
switch (stat_handle_error(counter, errno)) {
815-
case COUNTER_FATAL:
816-
err = -1;
817-
goto err_out;
818-
case COUNTER_RETRY:
819-
goto try_again;
820-
case COUNTER_SKIP:
821-
continue;
822-
default:
823796
break;
824797
}
825798

799+
if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
800+
break;
826801
}
827-
counter->supported = true;
828802
}
829803

830804
if (second_pass) {
@@ -837,7 +811,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
837811
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
838812
counter = evlist_cpu_itr.evsel;
839813

840-
if (!counter->reset_group && !counter->errored)
814+
if (!counter->reset_group && counter->supported)
841815
continue;
842816

843817
perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
@@ -848,34 +822,29 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
848822

849823
if (!counter->reset_group)
850824
continue;
851-
try_again_reset:
852-
pr_debug2("reopening weak %s\n", evsel__name(counter));
853-
if (create_perf_stat_counter(counter, &stat_config,
854-
evlist_cpu_itr.cpu_map_idx) < 0) {
855-
856-
switch (stat_handle_error(counter, errno)) {
857-
case COUNTER_FATAL:
858-
err = -1;
859-
goto err_out;
860-
case COUNTER_RETRY:
861-
goto try_again_reset;
862-
case COUNTER_SKIP:
863-
continue;
864-
default:
825+
826+
while (true) {
827+
pr_debug2("reopening weak %s\n", evsel__name(counter));
828+
if (create_perf_stat_counter(counter, &stat_config,
829+
evlist_cpu_itr.cpu_map_idx) == 0)
830+
break;
831+
832+
open_err = errno;
833+
if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
865834
break;
866-
}
867835
}
868-
counter->supported = true;
869836
}
870837
}
871838
affinity__cleanup(affinity);
872839
affinity = NULL;
873840

841+
has_supported_counters = false;
874842
evlist__for_each_entry(evsel_list, counter) {
875843
if (!counter->supported) {
876844
perf_evsel__free_fd(&counter->core);
877845
continue;
878846
}
847+
has_supported_counters = true;
879848

880849
l = strlen(counter->unit);
881850
if (l > stat_config.unit_width)
@@ -887,6 +856,16 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
887856
goto err_out;
888857
}
889858
}
859+
if (!has_supported_counters) {
860+
evsel__open_strerror(evlist__first(evsel_list), &target, open_err,
861+
msg, sizeof(msg));
862+
ui__error("No supported events found.\n%s\n", msg);
863+
864+
if (child_pid != -1)
865+
kill(child_pid, SIGTERM);
866+
err = -1;
867+
goto err_out;
868+
}
890869

891870
if (evlist__apply_filters(evsel_list, &counter, &target)) {
892871
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",

tools/perf/util/evsel.c

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ void evsel__init(struct evsel *evsel,
407407
evsel->collect_stat = false;
408408
evsel->group_pmu_name = NULL;
409409
evsel->skippable = false;
410+
evsel->supported = true;
410411
evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
411412
evsel->script_output_type = -1; // FIXME: OUTPUT_TYPE_UNSET, see builtin-script.c
412413
}
@@ -1941,7 +1942,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
19411942
struct evsel *leader = evsel__leader(evsel);
19421943
int fd;
19431944

1944-
if (evsel__is_group_leader(evsel))
1945+
if (!evsel->supported || evsel__is_group_leader(evsel))
19451946
return -1;
19461947

19471948
/*
@@ -1955,7 +1956,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
19551956
return -1;
19561957

19571958
fd = FD(leader, cpu_map_idx, thread);
1958-
BUG_ON(fd == -1 && !leader->skippable);
1959+
BUG_ON(fd == -1 && leader->supported);
19591960

19601961
/*
19611962
* When the leader has been skipped, return -2 to distinguish from no
@@ -2573,12 +2574,14 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
25732574
enum rlimit_action set_rlimit = NO_CHANGE;
25742575
struct perf_cpu cpu;
25752576

2576-
if (evsel__is_retire_lat(evsel))
2577-
return evsel__tpebs_open(evsel);
2577+
if (evsel__is_retire_lat(evsel)) {
2578+
err = evsel__tpebs_open(evsel);
2579+
goto out;
2580+
}
25782581

25792582
err = __evsel__prepare_open(evsel, cpus, threads);
25802583
if (err)
2581-
return err;
2584+
goto out;
25822585

25832586
if (cpus == NULL)
25842587
cpus = empty_cpu_map;
@@ -2598,19 +2601,22 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
25982601
display_attr(&evsel->core.attr);
25992602

26002603
if (evsel__is_tool(evsel)) {
2601-
return evsel__tool_pmu_open(evsel, threads,
2602-
start_cpu_map_idx,
2603-
end_cpu_map_idx);
2604+
err = evsel__tool_pmu_open(evsel, threads,
2605+
start_cpu_map_idx,
2606+
end_cpu_map_idx);
2607+
goto out;
26042608
}
26052609
if (evsel__is_hwmon(evsel)) {
2606-
return evsel__hwmon_pmu_open(evsel, threads,
2607-
start_cpu_map_idx,
2608-
end_cpu_map_idx);
2610+
err = evsel__hwmon_pmu_open(evsel, threads,
2611+
start_cpu_map_idx,
2612+
end_cpu_map_idx);
2613+
goto out;
26092614
}
26102615
if (evsel__is_drm(evsel)) {
2611-
return evsel__drm_pmu_open(evsel, threads,
2612-
start_cpu_map_idx,
2613-
end_cpu_map_idx);
2616+
err = evsel__drm_pmu_open(evsel, threads,
2617+
start_cpu_map_idx,
2618+
end_cpu_map_idx);
2619+
goto out;
26142620
}
26152621

26162622
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
@@ -2689,7 +2695,8 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
26892695
}
26902696
}
26912697

2692-
return 0;
2698+
err = 0;
2699+
goto out;
26932700

26942701
try_fallback:
26952702
if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
@@ -2728,6 +2735,9 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
27282735
thread = nthreads;
27292736
} while (--idx >= 0);
27302737
errno = old_errno;
2738+
out:
2739+
if (err)
2740+
evsel->supported = false;
27312741
return err;
27322742
}
27332743

tools/perf/util/evsel.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ struct evsel {
121121
bool forced_leader;
122122
bool cmdline_group_boundary;
123123
bool reset_group;
124-
bool errored;
125124
bool needs_auxtrace_mmap;
126125
bool default_metricgroup; /* A member of the Default metricgroup */
127126
bool needs_uniquify;

0 commit comments

Comments
 (0)