#include "perf/gen_perf_private.h"
#include "util/bitscan.h"
+#include "util/macros.h"
#include "util/mesa-sha1.h"
#include "util/u_math.h"
perf->sysfs_dev_dir[0] = '\0';
+ if (unlikely(INTEL_DEBUG & DEBUG_NO_OACONFIG))
+ return true;
+
if (fstat(fd, &sb)) {
DBG("Failed to stat DRM fd\n");
return false;
static void
register_oa_config(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo,
const struct gen_perf_query_info *query,
uint64_t config_id)
{
gen_perf_append_query_info(perf, 0);
*registered_query = *query;
+ registered_query->oa_format = devinfo->gen >= 8 ?
+ I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_A45_B8_C8;
registered_query->oa_metrics_set_id = config_id;
DBG("metric set registered: id = %" PRIu64", guid = %s\n",
registered_query->oa_metrics_set_id, query->guid);
}
static void
-enumerate_sysfs_metrics(struct gen_perf_config *perf)
+enumerate_sysfs_metrics(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo)
{
DIR *metricsdir = NULL;
struct dirent *metric_entry;
continue;
}
- register_oa_config(perf, (const struct gen_perf_query_info *)entry->data, id);
+ register_oa_config(perf, devinfo,
+ (const struct gen_perf_query_info *)entry->data, id);
} else
DBG("metric set not known by mesa (skipping)\n");
}
closedir(metricsdir);
}
+static void
+add_all_metrics(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo)
+{
+ hash_table_foreach(perf->oa_metrics_table, entry) {
+ const struct gen_perf_query_info *query = entry->data;
+ register_oa_config(perf, devinfo, query, 0);
+ }
+}
+
static bool
kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
{
}
static void
-init_oa_configs(struct gen_perf_config *perf, int fd)
+init_oa_configs(struct gen_perf_config *perf, int fd,
+ const struct gen_device_info *devinfo)
{
hash_table_foreach(perf->oa_metrics_table, entry) {
const struct gen_perf_query_info *query = entry->data;
if (gen_perf_load_metric_id(perf, query->guid, &config_id)) {
DBG("metric set: %s (already loaded)\n", query->guid);
- register_oa_config(perf, query, config_id);
+ register_oa_config(perf, devinfo, query, config_id);
continue;
}
continue;
}
- register_oa_config(perf, query, ret);
+ register_oa_config(perf, devinfo, query, ret);
DBG("metric set: %s (added)\n", query->guid);
}
}
{
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
- if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
- return false;
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
+ return false;
- if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
- return false;
+ if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
+ return false;
+ } else {
+ min_freq_mhz = 300;
+ max_freq_mhz = 1000;
+ }
memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
gen_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
}
+static int
+compare_counters(const void *_c1, const void *_c2)
+{
+ const struct gen_perf_query_counter * const *c1 = _c1, * const *c2 = _c2;
+ return strcmp((*c1)->symbol_name, (*c2)->symbol_name);
+}
+
+static void
+build_unique_counter_list(struct gen_perf_config *perf)
+{
+ assert(perf->n_queries < 64);
+
+ struct hash_table *counters_table =
+ _mesa_hash_table_create(perf,
+ _mesa_hash_string,
+ _mesa_key_string_equal);
+ struct hash_entry *entry;
+ for (int q = 0; q < perf->n_queries ; q++) {
+ struct gen_perf_query_info *query = &perf->queries[q];
+
+ for (int c = 0; c < query->n_counters; c++) {
+ struct gen_perf_query_counter *counter, *unique_counter;
+
+ counter = &query->counters[c];
+ entry = _mesa_hash_table_search(counters_table, counter->symbol_name);
+
+ if (entry) {
+ unique_counter = entry->data;
+ unique_counter->query_mask |= BITFIELD64_BIT(q);
+ continue;
+ }
+
+ unique_counter = counter;
+ unique_counter->query_mask = BITFIELD64_BIT(q);
+
+ _mesa_hash_table_insert(counters_table, unique_counter->symbol_name, unique_counter);
+ }
+ }
+
+ perf->n_counters = _mesa_hash_table_num_entries(counters_table);
+ perf->counters = ralloc_array(perf, struct gen_perf_query_counter *,
+ perf->n_counters);
+
+ int c = 0;
+ hash_table_foreach(counters_table, entry) {
+ struct gen_perf_query_counter *counter = entry->data;
+ perf->counters[c++] = counter;
+ }
+
+ _mesa_hash_table_destroy(counters_table, NULL);
+
+ qsort(perf->counters, perf->n_counters, sizeof(perf->counters[0]),
+ compare_counters);
+}
+
static bool
load_oa_metrics(struct gen_perf_config *perf, int fd,
- const struct gen_device_info *devinfo)
+ const struct gen_device_info *devinfo)
{
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
bool i915_perf_oa_available = false;
}
}
+ perf->platform_supported = oa_register != NULL;
+
if (!i915_perf_oa_available ||
!oa_register ||
!get_sysfs_dev_dir(perf, fd) ||
*/
oa_register(perf);
- if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
- kernel_has_dynamic_config_support(perf, fd))
- init_oa_configs(perf, fd);
- else
- enumerate_sysfs_metrics(perf);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ if (kernel_has_dynamic_config_support(perf, fd))
+ init_oa_configs(perf, fd, devinfo);
+ else
+ enumerate_sysfs_metrics(perf, devinfo);
+ } else {
+ add_all_metrics(perf, devinfo);
+ }
+
+ build_unique_counter_list(perf);
return true;
}
return i915_add_config(perf_cfg, fd, config, generated_guid);
}
+static uint64_t
+get_passes_mask(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count)
+{
+ uint64_t queries_mask = 0;
+
+ assert(perf->n_queries < 64);
+
+ /* Compute the number of passes by going through all counters N times (with
+ * N the number of queries) to make sure we select the most constraining
+ * counters first and look at the more flexible ones (that could be
+ * obtained from multiple queries) later. That way we minimize the number
+ * of passes required.
+ */
+ for (uint32_t q = 0; q < perf->n_queries; q++) {
+ for (uint32_t i = 0; i < counter_indices_count; i++) {
+ assert(counter_indices[i] < perf->n_counters);
+
+ uint32_t idx = counter_indices[i];
+ if (__builtin_popcount(perf->counters[idx]->query_mask) != (q + 1))
+ continue;
+
+ if (queries_mask & perf->counters[idx]->query_mask)
+ continue;
+
+ queries_mask |= BITFIELD64_BIT(ffsll(perf->counters[idx]->query_mask) - 1);
+ }
+ }
+
+ return queries_mask;
+}
+
+uint32_t
+gen_perf_get_n_passes(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count,
+ struct gen_perf_query_info **pass_queries)
+{
+ uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
+
+ if (pass_queries) {
+ uint32_t pass = 0;
+ for (uint32_t q = 0; q < perf->n_queries; q++) {
+ if ((1ULL << q) & queries_mask)
+ pass_queries[pass++] = &perf->queries[q];
+ }
+ }
+
+ return __builtin_popcount(queries_mask);
+}
+
+void
+gen_perf_get_counters_passes(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count,
+ struct gen_perf_counter_pass *counter_pass)
+{
+ uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
+ uint32_t n_passes = __builtin_popcount(queries_mask);
+
+ for (uint32_t i = 0; i < counter_indices_count; i++) {
+ assert(counter_indices[i] < perf->n_counters);
+
+ uint32_t idx = counter_indices[i];
+ counter_pass[i].counter = perf->counters[idx];
+
+ uint32_t query_idx = ffsll(perf->counters[idx]->query_mask & queries_mask) - 1;
+ counter_pass[i].query = &perf->queries[query_idx];
+
+ uint32_t clear_bits = 63 - query_idx;
+ counter_pass[i].pass = __builtin_popcount((queries_mask << clear_bits) >> clear_bits) - 1;
+ assert(counter_pass[i].pass < n_passes);
+ }
+}
+
/* Accumulate 32bits OA counters */
static inline void
accumulate_uint32(const uint32_t *report0,
const uint32_t *start,
const uint32_t *end)
{
- int i, idx = 0;
+ int i;
if (result->hw_id == OA_REPORT_INVALID_CTX_ID &&
start[2] != OA_REPORT_INVALID_CTX_ID)
switch (query->oa_format) {
case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
- accumulate_uint32(start + 1, end + 1, result->accumulator + idx++); /* timestamp */
- accumulate_uint32(start + 3, end + 3, result->accumulator + idx++); /* clock */
+ accumulate_uint32(start + 1, end + 1,
+ result->accumulator + query->gpu_time_offset); /* timestamp */
+ accumulate_uint32(start + 3, end + 3,
+ result->accumulator + query->gpu_clock_offset); /* clock */
/* 32x 40bit A counters... */
- for (i = 0; i < 32; i++)
- accumulate_uint40(i, start, end, result->accumulator + idx++);
+ for (i = 0; i < 32; i++) {
+ accumulate_uint40(i, start, end,
+ result->accumulator + query->a_offset + i);
+ }
/* 4x 32bit A counters... */
- for (i = 0; i < 4; i++)
- accumulate_uint32(start + 36 + i, end + 36 + i, result->accumulator + idx++);
+ for (i = 0; i < 4; i++) {
+ accumulate_uint32(start + 36 + i, end + 36 + i,
+ result->accumulator + query->a_offset + 32 + i);
+ }
- /* 8x 32bit B counters + 8x 32bit C counters... */
- for (i = 0; i < 16; i++)
- accumulate_uint32(start + 48 + i, end + 48 + i, result->accumulator + idx++);
+ /* 8x 32bit B counters */
+ for (i = 0; i < 8; i++) {
+ accumulate_uint32(start + 48 + i, end + 48 + i,
+ result->accumulator + query->b_offset + i);
+ }
+
+ /* 8x 32bit C counters... */
+ for (i = 0; i < 8; i++) {
+ accumulate_uint32(start + 56 + i, end + 56 + i,
+ result->accumulator + query->c_offset + i);
+ }
break;
case I915_OA_FORMAT_A45_B8_C8:
accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */
- for (i = 0; i < 61; i++)
- accumulate_uint32(start + 3 + i, end + 3 + i, result->accumulator + 1 + i);
+ for (i = 0; i < 61; i++) {
+ accumulate_uint32(start + 3 + i, end + 3 + i,
+ result->accumulator + query->a_offset + i);
+ }
break;
default: