From: Lionel Landwerlin Date: Tue, 2 Oct 2018 16:48:24 +0000 (+0100) Subject: intel/perf: compute number of passes for a set of counters X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d15369332ff400aabb008f9990f7b990b3c8643e;p=mesa.git intel/perf: compute number of passes for a set of counters We want to compute the number of passes required to gather performance data about a set of counters. Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand Part-of: --- diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 36a9e83857f..30b87a02647 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -47,6 +47,7 @@ #include "perf/gen_perf_private.h" #include "util/bitscan.h" +#include "util/macros.h" #include "util/mesa-sha1.h" #include "util/u_math.h" @@ -768,6 +769,58 @@ gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd, return i915_add_config(perf_cfg, fd, config, generated_guid); } +static uint64_t +get_passes_mask(struct gen_perf_config *perf, + const uint32_t *counter_indices, + uint32_t counter_indices_count) +{ + uint64_t queries_mask = 0; + + assert(perf->n_queries < 64); + + /* Compute the number of passes by going through all counters N times (with + * N the number of queries) to make sure we select the most constraining + * counters first and look at the more flexible ones (that could be + * obtained from multiple queries) later. That way we minimize the number + * of passes required. + */ + for (uint32_t q = 0; q < perf->n_queries; q++) { + for (uint32_t i = 0; i < counter_indices_count; i++) { + assert(counter_indices[i] < perf->n_counters); + + uint32_t idx = counter_indices[i]; + if (__builtin_popcount(perf->counters[idx]->query_mask) != (q + 1)) + continue; + + if (queries_mask & perf->counters[idx]->query_mask) + continue; + + queries_mask |= BITFIELD64_BIT(ffsll(perf->counters[idx]->query_mask) - 1); + } + } + + return queries_mask; +} + +uint32_t +gen_perf_get_n_passes(struct gen_perf_config *perf, + const uint32_t *counter_indices, + uint32_t counter_indices_count, + struct gen_perf_query_info **pass_queries) +{ + uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count); + + if (pass_queries) { + uint32_t pass = 0; + for (uint32_t q = 0; q < perf->n_queries; q++) { + if ((1ULL << q) & queries_mask) + pass_queries[pass++] = &perf->queries[q]; + } + } + + return __builtin_popcount(queries_mask); +} + /* Accumulate 32bits OA counters */ static inline void accumulate_uint32(const uint32_t *report0, diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index 83c6c3b9e3a..d35246d2f10 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -321,4 +321,9 @@ gen_perf_new(void *ctx) return perf; } +uint32_t gen_perf_get_n_passes(struct gen_perf_config *perf, + const uint32_t *counter_indices, + uint32_t counter_indices_count, + struct gen_perf_query_info **pass_queries); + #endif /* GEN_PERF_H */