#include "perf/gen_perf_private.h"
#include "util/bitscan.h"
+#include "util/macros.h"
#include "util/mesa-sha1.h"
#include "util/u_math.h"
perf->sysfs_dev_dir[0] = '\0';
+ if (unlikely(INTEL_DEBUG & DEBUG_NO_OACONFIG))
+ return true;
+
if (fstat(fd, &sb)) {
DBG("Failed to stat DRM fd\n");
return false;
closedir(metricsdir);
}
+static void
+add_all_metrics(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo)
+{
+ hash_table_foreach(perf->oa_metrics_table, entry) {
+ const struct gen_perf_query_info *query = entry->data;
+ register_oa_config(perf, devinfo, query, 0);
+ }
+}
+
static bool
kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
{
memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid));
i915_config.n_mux_regs = config->n_mux_regs;
- i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
+ i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
i915_config.n_boolean_regs = config->n_b_counter_regs;
- i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
+ i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
i915_config.n_flex_regs = config->n_flex_regs;
- i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
+ i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
int ret = gen_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config);
return ret > 0 ? ret : 0;
{
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
- if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
- return false;
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
+ return false;
- if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
- return false;
+ if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
+ return false;
+ } else {
+ min_freq_mhz = 300;
+ max_freq_mhz = 1000;
+ }
memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
return NULL;
}
+static int
+gen_perf_compare_counter_names(const void *v1, const void *v2)
+{
+ const struct gen_perf_query_counter *c1 = v1;
+ const struct gen_perf_query_counter *c2 = v2;
+
+ return strcmp(c1->name, c2->name);
+}
+
+static void
+sort_query(struct gen_perf_query_info *q)
+{
+ qsort(q->counters, q->n_counters, sizeof(q->counters[0]),
+ gen_perf_compare_counter_names);
+}
+
static void
load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
const struct gen_device_info *devinfo)
}
query->data_size = sizeof(uint64_t) * query->n_counters;
+
+ sort_query(query);
}
static int
gen_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
}
+static inline int
+compare_str_or_null(const char *s1, const char *s2)
+{
+ if (s1 == NULL && s2 == NULL)
+ return 0;
+ if (s1 == NULL)
+ return -1;
+ if (s2 == NULL)
+ return 1;
+
+ return strcmp(s1, s2);
+}
+
static int
-compare_counters(const void *_c1, const void *_c2)
+compare_counter_categories_and_names(const void *_c1, const void *_c2)
{
- const struct gen_perf_query_counter * const *c1 = _c1, * const *c2 = _c2;
- return strcmp((*c1)->symbol_name, (*c2)->symbol_name);
+ const struct gen_perf_query_counter_info *c1 = (const struct gen_perf_query_counter_info *)_c1;
+ const struct gen_perf_query_counter_info *c2 = (const struct gen_perf_query_counter_info *)_c2;
+
+ /* pipeline counters don't have an assigned category */
+ int r = compare_str_or_null(c1->counter->category, c2->counter->category);
+ if (r)
+ return r;
+
+ return strcmp(c1->counter->name, c2->counter->name);
}
static void
{
assert(perf->n_queries < 64);
+ size_t max_counters = 0;
+
+ for (int q = 0; q < perf->n_queries; q++)
+ max_counters += perf->queries[q].n_counters;
+
+ /*
+ * Allocate big enough array to hold maximum possible number of counters.
+ * We can't alloc it small and realloc when needed because the hash table
+ * below contains pointers to this array.
+ */
+ struct gen_perf_query_counter_info *counter_infos =
+ ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters);
+
+ perf->n_counters = 0;
+
struct hash_table *counters_table =
_mesa_hash_table_create(perf,
_mesa_hash_string,
struct gen_perf_query_info *query = &perf->queries[q];
for (int c = 0; c < query->n_counters; c++) {
- struct gen_perf_query_counter *counter, *unique_counter;
+ struct gen_perf_query_counter *counter;
+ struct gen_perf_query_counter_info *counter_info;
counter = &query->counters[c];
entry = _mesa_hash_table_search(counters_table, counter->symbol_name);
if (entry) {
- unique_counter = entry->data;
- unique_counter->query_mask |= BITFIELD64_BIT(q);
+ counter_info = entry->data;
+ counter_info->query_mask |= BITFIELD64_BIT(q);
continue;
}
+ assert(perf->n_counters < max_counters);
- unique_counter = counter;
- unique_counter->query_mask = BITFIELD64_BIT(q);
+ counter_info = &counter_infos[perf->n_counters++];
+ counter_info->counter = counter;
+ counter_info->query_mask = BITFIELD64_BIT(q);
- _mesa_hash_table_insert(counters_table, unique_counter->symbol_name, unique_counter);
- }
- }
-
- perf->n_counters = _mesa_hash_table_num_entries(counters_table);
- perf->counters = ralloc_array(perf, struct gen_perf_query_counter *,
- perf->n_counters);
+ counter_info->location.group_idx = q;
+ counter_info->location.counter_idx = c;
- int c = 0;
- hash_table_foreach(counters_table, entry) {
- struct gen_perf_query_counter *counter = entry->data;
- perf->counters[c++] = counter;
+ _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info);
+ }
}
_mesa_hash_table_destroy(counters_table, NULL);
- qsort(perf->counters, perf->n_counters, sizeof(perf->counters[0]),
- compare_counters);
+ /* Now we can realloc counter_infos array because hash table doesn't exist. */
+ perf->counter_infos = reralloc_array_size(perf, counter_infos,
+ sizeof(counter_infos[0]), perf->n_counters);
+
+ qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]),
+ compare_counter_categories_and_names);
}
static bool
-load_oa_metrics(struct gen_perf_config *perf, int fd,
- const struct gen_device_info *devinfo)
+oa_metrics_available(struct gen_perf_config *perf, int fd,
+ const struct gen_device_info *devinfo)
{
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
bool i915_perf_oa_available = false;
if (paranoid == 0 || geteuid() == 0)
i915_perf_oa_available = true;
}
+
+ perf->platform_supported = oa_register != NULL;
}
- if (!i915_perf_oa_available ||
- !oa_register ||
- !get_sysfs_dev_dir(perf, fd) ||
- !init_oa_sys_vars(perf, devinfo))
- return false;
+ return i915_perf_oa_available &&
+ oa_register &&
+ get_sysfs_dev_dir(perf, fd) &&
+ init_oa_sys_vars(perf, devinfo);
+}
+
+static void
+load_oa_metrics(struct gen_perf_config *perf, int fd,
+ const struct gen_device_info *devinfo)
+{
+ int existing_queries = perf->n_queries;
+
+ perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
perf->oa_metrics_table =
_mesa_hash_table_create(perf, _mesa_hash_string,
*/
oa_register(perf);
- if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
- kernel_has_dynamic_config_support(perf, fd))
- init_oa_configs(perf, fd, devinfo);
- else
- enumerate_sysfs_metrics(perf, devinfo);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ if (kernel_has_dynamic_config_support(perf, fd))
+ init_oa_configs(perf, fd, devinfo);
+ else
+ enumerate_sysfs_metrics(perf, devinfo);
+ } else {
+ add_all_metrics(perf, devinfo);
+ }
- build_unique_counter_list(perf);
+ /* sort counters in each individual group created by this function by name */
+ for (int i = existing_queries; i < perf->n_queries; ++i)
+ sort_query(&perf->queries[i]);
- return true;
+ /* Select a fallback OA metric. Look for the TestOa metric or use the last
+ * one if no present (on HSW).
+ */
+ for (int i = existing_queries; i < perf->n_queries; i++) {
+ if (perf->queries[i].symbol_name &&
+ strcmp(perf->queries[i].symbol_name, "TestOa") == 0) {
+ perf->fallback_raw_oa_metric = perf->queries[i].oa_metrics_set_id;
+ break;
+ }
+ }
+ if (perf->fallback_raw_oa_metric == 0)
+ perf->fallback_raw_oa_metric = perf->queries[perf->n_queries - 1].oa_metrics_set_id;
}
struct gen_perf_registers *
* struct gen_perf_query_register_prog maps exactly to the tuple of
* (register offset, register value) returned by the i915.
*/
- i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
- i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
- i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
+ i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
+ i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
+ i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) {
ralloc_free(config);
return NULL;
return i915_add_config(perf_cfg, fd, config, generated_guid);
}
+static uint64_t
+get_passes_mask(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count)
+{
+ uint64_t queries_mask = 0;
+
+ assert(perf->n_queries < 64);
+
+ /* Compute the number of passes by going through all counters N times (with
+ * N the number of queries) to make sure we select the most constraining
+ * counters first and look at the more flexible ones (that could be
+ * obtained from multiple queries) later. That way we minimize the number
+ * of passes required.
+ */
+ for (uint32_t q = 0; q < perf->n_queries; q++) {
+ for (uint32_t i = 0; i < counter_indices_count; i++) {
+ assert(counter_indices[i] < perf->n_counters);
+
+ uint32_t idx = counter_indices[i];
+ if (__builtin_popcount(perf->counter_infos[idx].query_mask) != (q + 1))
+ continue;
+
+ if (queries_mask & perf->counter_infos[idx].query_mask)
+ continue;
+
+ queries_mask |= BITFIELD64_BIT(ffsll(perf->counter_infos[idx].query_mask) - 1);
+ }
+ }
+
+ return queries_mask;
+}
+
+uint32_t
+gen_perf_get_n_passes(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count,
+ struct gen_perf_query_info **pass_queries)
+{
+ uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
+
+ if (pass_queries) {
+ uint32_t pass = 0;
+ for (uint32_t q = 0; q < perf->n_queries; q++) {
+ if ((1ULL << q) & queries_mask)
+ pass_queries[pass++] = &perf->queries[q];
+ }
+ }
+
+ return __builtin_popcount(queries_mask);
+}
+
+void
+gen_perf_get_counters_passes(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count,
+ struct gen_perf_counter_pass *counter_pass)
+{
+ uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
+ ASSERTED uint32_t n_passes = __builtin_popcount(queries_mask);
+
+ for (uint32_t i = 0; i < counter_indices_count; i++) {
+ assert(counter_indices[i] < perf->n_counters);
+
+ uint32_t idx = counter_indices[i];
+ counter_pass[i].counter = perf->counter_infos[idx].counter;
+
+ uint32_t query_idx = ffsll(perf->counter_infos[idx].query_mask & queries_mask) - 1;
+ counter_pass[i].query = &perf->queries[query_idx];
+
+ uint32_t clear_bits = 63 - query_idx;
+ counter_pass[i].pass = __builtin_popcount((queries_mask << clear_bits) >> clear_bits) - 1;
+ assert(counter_pass[i].pass < n_passes);
+ }
+}
+
/* Accumulate 32bits OA counters */
static inline void
accumulate_uint32(const uint32_t *report0,
const uint32_t *start,
const uint32_t *end)
{
- int i, idx = 0;
+ int i;
if (result->hw_id == OA_REPORT_INVALID_CTX_ID &&
start[2] != OA_REPORT_INVALID_CTX_ID)
switch (query->oa_format) {
case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
- accumulate_uint32(start + 1, end + 1, result->accumulator + idx++); /* timestamp */
- accumulate_uint32(start + 3, end + 3, result->accumulator + idx++); /* clock */
+ accumulate_uint32(start + 1, end + 1,
+ result->accumulator + query->gpu_time_offset); /* timestamp */
+ accumulate_uint32(start + 3, end + 3,
+ result->accumulator + query->gpu_clock_offset); /* clock */
/* 32x 40bit A counters... */
- for (i = 0; i < 32; i++)
- accumulate_uint40(i, start, end, result->accumulator + idx++);
+ for (i = 0; i < 32; i++) {
+ accumulate_uint40(i, start, end,
+ result->accumulator + query->a_offset + i);
+ }
/* 4x 32bit A counters... */
- for (i = 0; i < 4; i++)
- accumulate_uint32(start + 36 + i, end + 36 + i, result->accumulator + idx++);
+ for (i = 0; i < 4; i++) {
+ accumulate_uint32(start + 36 + i, end + 36 + i,
+ result->accumulator + query->a_offset + 32 + i);
+ }
- /* 8x 32bit B counters + 8x 32bit C counters... */
- for (i = 0; i < 16; i++)
- accumulate_uint32(start + 48 + i, end + 48 + i, result->accumulator + idx++);
+ /* 8x 32bit B counters */
+ for (i = 0; i < 8; i++) {
+ accumulate_uint32(start + 48 + i, end + 48 + i,
+ result->accumulator + query->b_offset + i);
+ }
+
+ /* 8x 32bit C counters... */
+ for (i = 0; i < 8; i++) {
+ accumulate_uint32(start + 56 + i, end + 56 + i,
+ result->accumulator + query->c_offset + i);
+ }
break;
case I915_OA_FORMAT_A45_B8_C8:
accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */
- for (i = 0; i < 61; i++)
- accumulate_uint32(start + 3 + i, end + 3 + i, result->accumulator + 1 + i);
+ for (i = 0; i < 61; i++) {
+ accumulate_uint32(start + 3 + i, end + 3 + i,
+ result->accumulator + query->a_offset + i);
+ }
break;
default:
result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */
}
+static int
+gen_perf_compare_query_names(const void *v1, const void *v2)
+{
+ const struct gen_perf_query_info *q1 = v1;
+ const struct gen_perf_query_info *q2 = v2;
+
+ return strcmp(q1->name, q2->name);
+}
+
void
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
const struct gen_device_info *devinfo,
load_pipeline_statistic_metrics(perf_cfg, devinfo);
gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
}
- if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
+
+ bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo);
+ if (oa_metrics)
+ load_oa_metrics(perf_cfg, drm_fd, devinfo);
+
+ /* sort query groups by name */
+ qsort(perf_cfg->queries, perf_cfg->n_queries,
+ sizeof(perf_cfg->queries[0]), gen_perf_compare_query_names);
+
+ build_unique_counter_list(perf_cfg);
+
+ if (oa_metrics)
gen_perf_register_mdapi_oa_query(perf_cfg, devinfo);
}