#include "perf/gen_perf_regs.h"
#include "perf/gen_perf_mdapi.h"
#include "perf/gen_perf_metrics.h"
+#include "perf/gen_perf_private.h"
#include "util/bitscan.h"
+#include "util/macros.h"
#include "util/mesa-sha1.h"
#include "util/u_math.h"
#define OA_REPORT_INVALID_CTX_ID (0xffffffff)
-static inline uint64_t to_user_pointer(void *ptr)
-{
- return (uintptr_t) ptr;
-}
-
static bool
is_dir_or_link(const struct dirent *entry, const char *parent_dir)
{
perf->sysfs_dev_dir[0] = '\0';
+ if (unlikely(INTEL_DEBUG & DEBUG_NO_OACONFIG))
+ return true;
+
if (fstat(fd, &sb)) {
DBG("Failed to stat DRM fd\n");
return false;
return read_file_uint64(buf, value);
}
-static inline struct gen_perf_query_info *
-append_query_info(struct gen_perf_config *perf, int max_counters)
-{
- struct gen_perf_query_info *query;
-
- perf->queries = reralloc(perf, perf->queries,
- struct gen_perf_query_info,
- ++perf->n_queries);
- query = &perf->queries[perf->n_queries - 1];
- memset(query, 0, sizeof(*query));
-
- if (max_counters > 0) {
- query->max_counters = max_counters;
- query->counters =
- rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
- }
-
- return query;
-}
-
static void
register_oa_config(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo,
const struct gen_perf_query_info *query,
uint64_t config_id)
{
- struct gen_perf_query_info *registered_query = append_query_info(perf, 0);
+ struct gen_perf_query_info *registered_query =
+ gen_perf_append_query_info(perf, 0);
*registered_query = *query;
+ registered_query->oa_format = devinfo->gen >= 8 ?
+ I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_A45_B8_C8;
registered_query->oa_metrics_set_id = config_id;
DBG("metric set registered: id = %" PRIu64", guid = %s\n",
registered_query->oa_metrics_set_id, query->guid);
}
static void
-enumerate_sysfs_metrics(struct gen_perf_config *perf)
+enumerate_sysfs_metrics(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo)
{
DIR *metricsdir = NULL;
struct dirent *metric_entry;
continue;
}
- register_oa_config(perf, (const struct gen_perf_query_info *)entry->data, id);
+ register_oa_config(perf, devinfo,
+ (const struct gen_perf_query_info *)entry->data, id);
} else
DBG("metric set not known by mesa (skipping)\n");
}
closedir(metricsdir);
}
+static void
+add_all_metrics(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo)
+{
+ hash_table_foreach(perf->oa_metrics_table, entry) {
+ const struct gen_perf_query_info *query = entry->data;
+ register_oa_config(perf, devinfo, query, 0);
+ }
+}
+
static bool
kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
{
memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid));
i915_config.n_mux_regs = config->n_mux_regs;
- i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
+ i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
i915_config.n_boolean_regs = config->n_b_counter_regs;
- i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
+ i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
i915_config.n_flex_regs = config->n_flex_regs;
- i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
+ i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
int ret = gen_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config);
return ret > 0 ? ret : 0;
}
static void
-init_oa_configs(struct gen_perf_config *perf, int fd)
+init_oa_configs(struct gen_perf_config *perf, int fd,
+ const struct gen_device_info *devinfo)
{
hash_table_foreach(perf->oa_metrics_table, entry) {
const struct gen_perf_query_info *query = entry->data;
if (gen_perf_load_metric_id(perf, query->guid, &config_id)) {
DBG("metric set: %s (already loaded)\n", query->guid);
- register_oa_config(perf, query, config_id);
+ register_oa_config(perf, devinfo, query, config_id);
continue;
}
continue;
}
- register_oa_config(perf, query, ret);
+ register_oa_config(perf, devinfo, query, ret);
DBG("metric set: %s (added)\n", query->guid);
}
}
{
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
- if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
- return false;
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
+ return false;
- if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
- return false;
+ if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
+ return false;
+ } else {
+ min_freq_mhz = 300;
+ max_freq_mhz = 1000;
+ }
memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
return NULL;
}
-static inline void
-add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
- uint32_t numerator, uint32_t denominator,
- const char *name, const char *description)
-{
- struct gen_perf_query_counter *counter;
-
- assert(query->n_counters < query->max_counters);
-
- counter = &query->counters[query->n_counters];
- counter->name = name;
- counter->desc = description;
- counter->type = GEN_PERF_COUNTER_TYPE_RAW;
- counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
- counter->offset = sizeof(uint64_t) * query->n_counters;
- counter->pipeline_stat.reg = reg;
- counter->pipeline_stat.numerator = numerator;
- counter->pipeline_stat.denominator = denominator;
-
- query->n_counters++;
-}
-
-static inline void
-add_basic_stat_reg(struct gen_perf_query_info *query,
- uint32_t reg, const char *name)
-{
- add_stat_reg(query, reg, 1, 1, name, name);
-}
-
static void
load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
- const struct gen_device_info *devinfo)
+ const struct gen_device_info *devinfo)
{
struct gen_perf_query_info *query =
- append_query_info(perf_cfg, MAX_STAT_COUNTERS);
+ gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
query->name = "Pipeline Statistics Registers";
- add_basic_stat_reg(query, IA_VERTICES_COUNT,
- "N vertices submitted");
- add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
- "N primitives submitted");
- add_basic_stat_reg(query, VS_INVOCATION_COUNT,
- "N vertex shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
if (devinfo->gen == 6) {
- add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
- "SO_PRIM_STORAGE_NEEDED",
- "N geometry shader stream-out primitives (total)");
- add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
- "SO_NUM_PRIMS_WRITTEN",
- "N geometry shader stream-out primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
+ "SO_PRIM_STORAGE_NEEDED",
+ "N geometry shader stream-out primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
+ "SO_NUM_PRIMS_WRITTEN",
+ "N geometry shader stream-out primitives (written)");
} else {
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 0)",
- "N stream-out (stream 0) primitives (total)");
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 1)",
- "N stream-out (stream 1) primitives (total)");
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 2)",
- "N stream-out (stream 2) primitives (total)");
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 3)",
- "N stream-out (stream 3) primitives (total)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 0)",
- "N stream-out (stream 0) primitives (written)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 1)",
- "N stream-out (stream 1) primitives (written)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 2)",
- "N stream-out (stream 2) primitives (written)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 3)",
- "N stream-out (stream 3) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 0)",
+ "N stream-out (stream 0) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 1)",
+ "N stream-out (stream 1) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 2)",
+ "N stream-out (stream 2) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 3)",
+ "N stream-out (stream 3) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 0)",
+ "N stream-out (stream 0) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 1)",
+ "N stream-out (stream 1) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 2)",
+ "N stream-out (stream 2) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 3)",
+ "N stream-out (stream 3) primitives (written)");
}
- add_basic_stat_reg(query, HS_INVOCATION_COUNT,
- "N TCS shader invocations");
- add_basic_stat_reg(query, DS_INVOCATION_COUNT,
- "N TES shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
- add_basic_stat_reg(query, GS_INVOCATION_COUNT,
- "N geometry shader invocations");
- add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
- "N geometry shader primitives emitted");
+ gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
- add_basic_stat_reg(query, CL_INVOCATION_COUNT,
- "N primitives entering clipping");
- add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
- "N primitives leaving clipping");
+ gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
if (devinfo->is_haswell || devinfo->gen == 8) {
- add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
- "N fragment shader invocations",
- "N fragment shader invocations");
+ gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
} else {
- add_basic_stat_reg(query, PS_INVOCATION_COUNT,
- "N fragment shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
}
- add_basic_stat_reg(query, PS_DEPTH_COUNT,
- "N z-pass fragments");
+ gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
+ "N z-pass fragments");
if (devinfo->gen >= 7) {
- add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "N compute shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
}
query->data_size = sizeof(uint64_t) * query->n_counters;
}
+static int
+i915_perf_version(int drm_fd)
+{
+ int tmp;
+ drm_i915_getparam_t gp = {
+ .param = I915_PARAM_PERF_REVISION,
+ .value = &tmp,
+ };
+
+ int ret = gen_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+
+ /* Return 0 if this getparam is not supported, the first version supported
+ * is 1.
+ */
+ return ret < 0 ? 0 : tmp;
+}
+
+static void
+i915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu)
+{
+ struct drm_i915_gem_context_param arg = {
+ .param = I915_CONTEXT_PARAM_SSEU,
+ .size = sizeof(*sseu),
+ .value = to_user_pointer(sseu)
+ };
+
+ gen_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
+}
+
+static int
+compare_counters(const void *_c1, const void *_c2)
+{
+ const struct gen_perf_query_counter_info *c1 = _c1, *c2 = _c2;
+ return strcmp(c1->counter->symbol_name, c2->counter->symbol_name);
+}
+
+static void
+build_unique_counter_list(struct gen_perf_config *perf)
+{
+ assert(perf->n_queries < 64);
+
+ size_t max_counters = 0;
+
+ for (int q = 0; q < perf->n_queries; q++)
+ max_counters += perf->queries[q].n_counters;
+
+ /*
+ * Allocate big enough array to hold maximum possible number of counters.
+ * We can't alloc it small and realloc when needed because the hash table
+ * below contains pointers to this array.
+ */
+ struct gen_perf_query_counter_info *counter_infos =
+ ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters);
+
+ perf->n_counters = 0;
+
+ struct hash_table *counters_table =
+ _mesa_hash_table_create(perf,
+ _mesa_hash_string,
+ _mesa_key_string_equal);
+ struct hash_entry *entry;
+ for (int q = 0; q < perf->n_queries ; q++) {
+ struct gen_perf_query_info *query = &perf->queries[q];
+
+ for (int c = 0; c < query->n_counters; c++) {
+ struct gen_perf_query_counter *counter;
+ struct gen_perf_query_counter_info *counter_info;
+
+ counter = &query->counters[c];
+ entry = _mesa_hash_table_search(counters_table, counter->symbol_name);
+
+ if (entry) {
+ counter_info = entry->data;
+ counter_info->query_mask |= BITFIELD64_BIT(q);
+ continue;
+ }
+ assert(perf->n_counters < max_counters);
+
+ counter_info = &counter_infos[perf->n_counters++];
+ counter_info->counter = counter;
+ counter_info->query_mask = BITFIELD64_BIT(q);
+
+ counter_info->location.group_idx = q;
+ counter_info->location.counter_idx = c;
+
+ _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info);
+ }
+ }
+
+ _mesa_hash_table_destroy(counters_table, NULL);
+
+ /* Now we can realloc counter_infos array because hash table doesn't exist. */
+ perf->counter_infos = reralloc_array_size(perf, counter_infos,
+ sizeof(counter_infos[0]), perf->n_counters);
+
+ qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]),
+ compare_counters);
+}
+
static bool
-load_oa_metrics(struct gen_perf_config *perf, int fd,
- const struct gen_device_info *devinfo)
+oa_metrics_available(struct gen_perf_config *perf, int fd,
+ const struct gen_device_info *devinfo)
{
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
bool i915_perf_oa_available = false;
struct stat sb;
perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
+ perf->i915_perf_version = i915_perf_version(fd);
+
+ /* Record the default SSEU configuration. */
+ i915_get_sseu(fd, &perf->sseu);
/* The existence of this sysctl parameter implies the kernel supports
* the i915 perf interface.
if (paranoid == 0 || geteuid() == 0)
i915_perf_oa_available = true;
}
+
+ perf->platform_supported = oa_register != NULL;
}
- if (!i915_perf_oa_available ||
- !oa_register ||
- !get_sysfs_dev_dir(perf, fd) ||
- !init_oa_sys_vars(perf, devinfo))
- return false;
+ return i915_perf_oa_available &&
+ oa_register &&
+ get_sysfs_dev_dir(perf, fd) &&
+ init_oa_sys_vars(perf, devinfo);
+}
+
+static void
+load_oa_metrics(struct gen_perf_config *perf, int fd,
+ const struct gen_device_info *devinfo)
+{
+ perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
perf->oa_metrics_table =
_mesa_hash_table_create(perf, _mesa_hash_string,
*/
oa_register(perf);
- if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
- kernel_has_dynamic_config_support(perf, fd))
- init_oa_configs(perf, fd);
- else
- enumerate_sysfs_metrics(perf);
-
- return true;
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ if (kernel_has_dynamic_config_support(perf, fd))
+ init_oa_configs(perf, fd, devinfo);
+ else
+ enumerate_sysfs_metrics(perf, devinfo);
+ } else {
+ add_all_metrics(perf, devinfo);
+ }
}
struct gen_perf_registers *
* struct gen_perf_query_register_prog maps exactly to the tuple of
* (register offset, register value) returned by the i915.
*/
- i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
- i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
- i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
+ i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
+ i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
+ i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) {
ralloc_free(config);
return NULL;
return i915_add_config(perf_cfg, fd, config, generated_guid);
}
+static uint64_t
+get_passes_mask(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count)
+{
+ uint64_t queries_mask = 0;
+
+ assert(perf->n_queries < 64);
+
+ /* Compute the number of passes by going through all counters N times (with
+ * N the number of queries) to make sure we select the most constraining
+ * counters first and look at the more flexible ones (that could be
+ * obtained from multiple queries) later. That way we minimize the number
+ * of passes required.
+ */
+ for (uint32_t q = 0; q < perf->n_queries; q++) {
+ for (uint32_t i = 0; i < counter_indices_count; i++) {
+ assert(counter_indices[i] < perf->n_counters);
+
+ uint32_t idx = counter_indices[i];
+ if (__builtin_popcount(perf->counter_infos[idx].query_mask) != (q + 1))
+ continue;
+
+ if (queries_mask & perf->counter_infos[idx].query_mask)
+ continue;
+
+ queries_mask |= BITFIELD64_BIT(ffsll(perf->counter_infos[idx].query_mask) - 1);
+ }
+ }
+
+ return queries_mask;
+}
+
+uint32_t
+gen_perf_get_n_passes(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count,
+ struct gen_perf_query_info **pass_queries)
+{
+ uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
+
+ if (pass_queries) {
+ uint32_t pass = 0;
+ for (uint32_t q = 0; q < perf->n_queries; q++) {
+ if ((1ULL << q) & queries_mask)
+ pass_queries[pass++] = &perf->queries[q];
+ }
+ }
+
+ return __builtin_popcount(queries_mask);
+}
+
+void
+gen_perf_get_counters_passes(struct gen_perf_config *perf,
+ const uint32_t *counter_indices,
+ uint32_t counter_indices_count,
+ struct gen_perf_counter_pass *counter_pass)
+{
+ uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
+ ASSERTED uint32_t n_passes = __builtin_popcount(queries_mask);
+
+ for (uint32_t i = 0; i < counter_indices_count; i++) {
+ assert(counter_indices[i] < perf->n_counters);
+
+ uint32_t idx = counter_indices[i];
+ counter_pass[i].counter = perf->counter_infos[idx].counter;
+
+ uint32_t query_idx = ffsll(perf->counter_infos[idx].query_mask & queries_mask) - 1;
+ counter_pass[i].query = &perf->queries[query_idx];
+
+ uint32_t clear_bits = 63 - query_idx;
+ counter_pass[i].pass = __builtin_popcount((queries_mask << clear_bits) >> clear_bits) - 1;
+ assert(counter_pass[i].pass < n_passes);
+ }
+}
+
/* Accumulate 32bits OA counters */
static inline void
accumulate_uint32(const uint32_t *report0,
const uint32_t *start,
const uint32_t *end)
{
- int i, idx = 0;
+ int i;
if (result->hw_id == OA_REPORT_INVALID_CTX_ID &&
start[2] != OA_REPORT_INVALID_CTX_ID)
switch (query->oa_format) {
case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
- accumulate_uint32(start + 1, end + 1, result->accumulator + idx++); /* timestamp */
- accumulate_uint32(start + 3, end + 3, result->accumulator + idx++); /* clock */
+ accumulate_uint32(start + 1, end + 1,
+ result->accumulator + query->gpu_time_offset); /* timestamp */
+ accumulate_uint32(start + 3, end + 3,
+ result->accumulator + query->gpu_clock_offset); /* clock */
/* 32x 40bit A counters... */
- for (i = 0; i < 32; i++)
- accumulate_uint40(i, start, end, result->accumulator + idx++);
+ for (i = 0; i < 32; i++) {
+ accumulate_uint40(i, start, end,
+ result->accumulator + query->a_offset + i);
+ }
/* 4x 32bit A counters... */
- for (i = 0; i < 4; i++)
- accumulate_uint32(start + 36 + i, end + 36 + i, result->accumulator + idx++);
+ for (i = 0; i < 4; i++) {
+ accumulate_uint32(start + 36 + i, end + 36 + i,
+ result->accumulator + query->a_offset + 32 + i);
+ }
+
+ /* 8x 32bit B counters */
+ for (i = 0; i < 8; i++) {
+ accumulate_uint32(start + 48 + i, end + 48 + i,
+ result->accumulator + query->b_offset + i);
+ }
- /* 8x 32bit B counters + 8x 32bit C counters... */
- for (i = 0; i < 16; i++)
- accumulate_uint32(start + 48 + i, end + 48 + i, result->accumulator + idx++);
+ /* 8x 32bit C counters... */
+ for (i = 0; i < 8; i++) {
+ accumulate_uint32(start + 56 + i, end + 56 + i,
+ result->accumulator + query->c_offset + i);
+ }
break;
case I915_OA_FORMAT_A45_B8_C8:
accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */
- for (i = 0; i < 61; i++)
- accumulate_uint32(start + 3 + i, end + 3 + i, result->accumulator + 1 + i);
+ for (i = 0; i < 61; i++) {
+ accumulate_uint32(start + 3 + i, end + 3 + i,
+ result->accumulator + query->a_offset + i);
+ }
break;
default:
result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */
}
-static void
-register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
- const struct gen_device_info *devinfo)
-{
- if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
- return;
-
- struct gen_perf_query_info *query =
- append_query_info(perf_cfg, MAX_STAT_COUNTERS);
-
- query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
- query->name = "Intel_Raw_Pipeline_Statistics_Query";
-
- /* The order has to match mdapi_pipeline_metrics. */
- add_basic_stat_reg(query, IA_VERTICES_COUNT,
- "N vertices submitted");
- add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
- "N primitives submitted");
- add_basic_stat_reg(query, VS_INVOCATION_COUNT,
- "N vertex shader invocations");
- add_basic_stat_reg(query, GS_INVOCATION_COUNT,
- "N geometry shader invocations");
- add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
- "N geometry shader primitives emitted");
- add_basic_stat_reg(query, CL_INVOCATION_COUNT,
- "N primitives entering clipping");
- add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
- "N primitives leaving clipping");
- if (devinfo->is_haswell || devinfo->gen == 8) {
- add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
- "N fragment shader invocations",
- "N fragment shader invocations");
- } else {
- add_basic_stat_reg(query, PS_INVOCATION_COUNT,
- "N fragment shader invocations");
- }
- add_basic_stat_reg(query, HS_INVOCATION_COUNT,
- "N TCS shader invocations");
- add_basic_stat_reg(query, DS_INVOCATION_COUNT,
- "N TES shader invocations");
- if (devinfo->gen >= 7) {
- add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "N compute shader invocations");
- }
-
- if (devinfo->gen >= 10) {
- /* Reuse existing CS invocation register until we can expose this new
- * one.
- */
- add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "Reserved1");
- }
-
- query->data_size = sizeof(uint64_t) * query->n_counters;
-}
-
-static void
-fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
- const char *name,
- uint32_t data_offset,
- uint32_t data_size,
- enum gen_perf_counter_data_type data_type)
-{
- struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
-
- assert(query->n_counters <= query->max_counters);
-
- counter->name = name;
- counter->desc = "Raw counter value";
- counter->type = GEN_PERF_COUNTER_TYPE_RAW;
- counter->data_type = data_type;
- counter->offset = data_offset;
-
- query->n_counters++;
-
- assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
-}
-
-#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
- fill_mdapi_perf_query_counter(query, #field_name, \
- (uint8_t *) &struct_name.field_name - \
- (uint8_t *) &struct_name, \
- sizeof(struct_name.field_name), \
- GEN_PERF_COUNTER_DATA_TYPE_##type_name)
-#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
- fill_mdapi_perf_query_counter(query, \
- ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
- (uint8_t *) &struct_name.field_name[idx] - \
- (uint8_t *) &struct_name, \
- sizeof(struct_name.field_name[0]), \
- GEN_PERF_COUNTER_DATA_TYPE_##type_name)
-
-static void
-register_mdapi_oa_query(const struct gen_device_info *devinfo,
- struct gen_perf_config *perf)
-{
- struct gen_perf_query_info *query = NULL;
-
- /* MDAPI requires different structures for pretty much every generation
- * (right now we have definitions for gen 7 to 11).
- */
- if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
- return;
-
- switch (devinfo->gen) {
- case 7: {
- query = append_query_info(perf, 1 + 45 + 16 + 7);
- query->oa_format = I915_OA_FORMAT_A45_B8_C8;
-
- struct gen7_mdapi_metrics metric_data;
- query->data_size = sizeof(metric_data);
-
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
- for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, ACounters, i, UINT64);
- }
- for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, NOACounters, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
- break;
- }
- case 8: {
- query = append_query_info(perf, 2 + 36 + 16 + 16);
- query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
-
- struct gen8_mdapi_metrics metric_data;
- query->data_size = sizeof(metric_data);
-
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
- for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, OaCntr, i, UINT64);
- }
- for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, NoaCntr, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
- break;
- }
- case 9:
- case 10:
- case 11: {
- query = append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
- query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
-
- struct gen9_mdapi_metrics metric_data;
- query->data_size = sizeof(metric_data);
-
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
- for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, OaCntr, i, UINT64);
- }
- for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, NoaCntr, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
- for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, UserCntr, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
- break;
- }
- default:
- unreachable("Unsupported gen");
- break;
- }
-
- query->kind = GEN_PERF_QUERY_TYPE_RAW;
- query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
- query->guid = GEN_PERF_QUERY_GUID_MDAPI;
-
- {
- /* Accumulation buffer offsets copied from an actual query... */
- const struct gen_perf_query_info *copy_query =
- &perf->queries[0];
-
- query->gpu_time_offset = copy_query->gpu_time_offset;
- query->gpu_clock_offset = copy_query->gpu_clock_offset;
- query->a_offset = copy_query->a_offset;
- query->b_offset = copy_query->b_offset;
- query->c_offset = copy_query->c_offset;
- }
-}
-
void
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
const struct gen_device_info *devinfo,
- int drm_fd)
+ int drm_fd,
+ bool include_pipeline_statistics)
{
- load_pipeline_statistic_metrics(perf_cfg, devinfo);
- register_mdapi_statistic_query(perf_cfg, devinfo);
- if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
- register_mdapi_oa_query(devinfo, perf_cfg);
+ if (include_pipeline_statistics) {
+ load_pipeline_statistic_metrics(perf_cfg, devinfo);
+ gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
+ }
+
+ if (oa_metrics_available(perf_cfg, drm_fd, devinfo)) {
+ load_oa_metrics(perf_cfg, drm_fd, devinfo);
+ build_unique_counter_list(perf_cfg);
+ gen_perf_register_mdapi_oa_query(perf_cfg, devinfo);
+ }
}