From 3144bc1d33692ed35c6431bca57d3b0d46330cde Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marcin=20=C5=9Alusarz?= Date: Tue, 16 Jun 2020 14:40:21 +0200 Subject: [PATCH] intel/perf: move query_mask and location out of gen_perf_query_counter MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcin Ślusarz Reviewed-by: Lionel Landwerlin Reviewed-by: Mark Janes Part-of: --- src/gallium/drivers/iris/iris_monitor.c | 11 +++-- src/intel/perf/gen_perf.c | 65 +++++++++++++++---------- src/intel/perf/gen_perf.h | 27 +++++----- src/intel/vulkan/anv_perf.c | 2 +- 4 files changed, 62 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/iris/iris_monitor.c b/src/gallium/drivers/iris/iris_monitor.c index 80bddcc3b31..b615476b2cf 100644 --- a/src/gallium/drivers/iris/iris_monitor.c +++ b/src/gallium/drivers/iris/iris_monitor.c @@ -53,9 +53,10 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index, return perf_cfg->n_counters; } - struct gen_perf_query_counter *counter = perf_cfg->counters[index]; + struct gen_perf_query_counter_info *counter_info = &perf_cfg->counter_infos[index]; + struct gen_perf_query_counter *counter = counter_info->counter; - info->group_id = counter->location.group_idx; + info->group_id = counter_info->location.group_idx; info->name = counter->name; info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; @@ -179,7 +180,7 @@ iris_create_monitor_object(struct iris_context *ice, assert(num_queries > 0); int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC; assert(query_index <= perf_cfg->n_counters); - const int group = perf_cfg->counters[query_index]->location.group_idx; + const int group = perf_cfg->counter_infos[query_index].location.group_idx; struct iris_monitor_object *monitor = calloc(1, sizeof(struct iris_monitor_object)); @@ -197,9 +198,9 @@ iris_create_monitor_object(struct iris_context *ice, /* all queries must be in the same group */ assert(current_query_index <= perf_cfg->n_counters); - assert(perf_cfg->counters[current_query_index]->location.group_idx == group); + assert(perf_cfg->counter_infos[current_query_index].location.group_idx == group); monitor->active_counters[i] = - perf_cfg->counters[current_query_index]->location.counter_idx; + perf_cfg->counter_infos[current_query_index].location.counter_idx; } /* create the gen_perf_query */ diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 4de459dfb93..26dc497e486 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -594,8 +594,8 @@ i915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu) static int compare_counters(const void *_c1, const void *_c2) { - const struct gen_perf_query_counter * const *c1 = _c1, * const *c2 = _c2; - return strcmp((*c1)->symbol_name, (*c2)->symbol_name); + const struct gen_perf_query_counter_info *c1 = _c1, *c2 = _c2; + return strcmp(c1->counter->symbol_name, c2->counter->symbol_name); } static void @@ -603,6 +603,21 @@ build_unique_counter_list(struct gen_perf_config *perf) { assert(perf->n_queries < 64); + size_t max_counters = 0; + + for (int q = 0; q < perf->n_queries; q++) + max_counters += perf->queries[q].n_counters; + + /* + * Allocate big enough array to hold maximum possible number of counters. + * We can't alloc it small and realloc when needed because the hash table + * below contains pointers to this array. + */ + struct gen_perf_query_counter_info *counter_infos = + ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters); + + perf->n_counters = 0; + struct hash_table *counters_table = _mesa_hash_table_create(perf, _mesa_hash_string, @@ -612,39 +627,37 @@ build_unique_counter_list(struct gen_perf_config *perf) struct gen_perf_query_info *query = &perf->queries[q]; for (int c = 0; c < query->n_counters; c++) { - struct gen_perf_query_counter *counter, *unique_counter; + struct gen_perf_query_counter *counter; + struct gen_perf_query_counter_info *counter_info; counter = &query->counters[c]; entry = _mesa_hash_table_search(counters_table, counter->symbol_name); if (entry) { - unique_counter = entry->data; - unique_counter->query_mask |= BITFIELD64_BIT(q); + counter_info = entry->data; + counter_info->query_mask |= BITFIELD64_BIT(q); continue; } + assert(perf->n_counters < max_counters); - unique_counter = counter; - unique_counter->query_mask = BITFIELD64_BIT(q); + counter_info = &counter_infos[perf->n_counters++]; + counter_info->counter = counter; + counter_info->query_mask = BITFIELD64_BIT(q); - unique_counter->location.group_idx = q; - unique_counter->location.counter_idx = c; - _mesa_hash_table_insert(counters_table, unique_counter->symbol_name, unique_counter); - } - } - - perf->n_counters = _mesa_hash_table_num_entries(counters_table); - perf->counters = ralloc_array(perf, struct gen_perf_query_counter *, - perf->n_counters); + counter_info->location.group_idx = q; + counter_info->location.counter_idx = c; - int c = 0; - hash_table_foreach(counters_table, entry) { - struct gen_perf_query_counter *counter = entry->data; - perf->counters[c++] = counter; + _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info); + } } _mesa_hash_table_destroy(counters_table, NULL); - qsort(perf->counters, perf->n_counters, sizeof(perf->counters[0]), + /* Now we can realloc counter_infos array because hash table doesn't exist. */ + perf->counter_infos = reralloc_array_size(perf, counter_infos, + sizeof(counter_infos[0]), perf->n_counters); + + qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]), compare_counters); } @@ -814,13 +827,13 @@ get_passes_mask(struct gen_perf_config *perf, assert(counter_indices[i] < perf->n_counters); uint32_t idx = counter_indices[i]; - if (__builtin_popcount(perf->counters[idx]->query_mask) != (q + 1)) + if (__builtin_popcount(perf->counter_infos[idx].query_mask) != (q + 1)) continue; - if (queries_mask & perf->counters[idx]->query_mask) + if (queries_mask & perf->counter_infos[idx].query_mask) continue; - queries_mask |= BITFIELD64_BIT(ffsll(perf->counters[idx]->query_mask) - 1); + queries_mask |= BITFIELD64_BIT(ffsll(perf->counter_infos[idx].query_mask) - 1); } } @@ -859,9 +872,9 @@ gen_perf_get_counters_passes(struct gen_perf_config *perf, assert(counter_indices[i] < perf->n_counters); uint32_t idx = counter_indices[i]; - counter_pass[i].counter = perf->counters[idx]; + counter_pass[i].counter = perf->counter_infos[idx].counter; - uint32_t query_idx = ffsll(perf->counters[idx]->query_mask & queries_mask) - 1; + uint32_t query_idx = ffsll(perf->counter_infos[idx].query_mask & queries_mask) - 1; counter_pass[i].query = &perf->queries[query_idx]; uint32_t clear_bits = 63 - query_idx; diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index ca4a66a2e9d..95aeab9182f 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -171,16 +171,6 @@ struct gen_perf_query_counter { enum gen_perf_counter_units units; uint64_t raw_max; size_t offset; - uint64_t query_mask; - - /** - * Each counter can be a part of many groups, each time at different index. - * This struct stores one of those locations. - */ - struct { - int group_idx; /* query/group number */ - int counter_idx; /* index inside of query/group */ - } location; union { uint64_t (*oa_counter_read_uint64)(struct gen_perf_config *perf, @@ -237,6 +227,21 @@ struct gen_perf_query_info { struct gen_perf_registers config; }; +struct gen_perf_query_counter_info { + struct gen_perf_query_counter *counter; + + uint64_t query_mask; + + /** + * Each counter can be a part of many groups, each time at different index. + * This struct stores one of those locations. + */ + struct { + int group_idx; /* query/group number */ + int counter_idx; /* index inside of query/group */ + } location; +}; + struct gen_perf_config { /* Whether i915 has DRM_I915_QUERY_PERF_CONFIG support. */ bool i915_query_supported; @@ -250,7 +255,7 @@ struct gen_perf_config { struct gen_perf_query_info *queries; int n_queries; - struct gen_perf_query_counter **counters; + struct gen_perf_query_counter_info *counter_infos; int n_counters; /* Variables referenced in the XML meta data for OA performance diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c index ff47317fa68..0781ee775c3 100644 --- a/src/intel/vulkan/anv_perf.c +++ b/src/intel/vulkan/anv_perf.c @@ -297,7 +297,7 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count); for (int c = 0; c < (perf ? perf->n_counters : 0); c++) { - const struct gen_perf_query_counter *gen_counter = perf->counters[c]; + const struct gen_perf_query_counter *gen_counter = perf->counter_infos[c].counter; vk_outarray_append(&out, counter) { counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units]; -- 2.30.2