#include <unistd.h>
#include <errno.h>
+#ifndef HAVE_DIRENT_D_TYPE
+#include <limits.h> // PATH_MAX
+#endif
+
#include <drm-uapi/i915_drm.h>
-#include "gen_perf.h"
-#include "perf/gen_perf_metrics.h"
+#include "common/gen_gem.h"
#include "dev/gen_debug.h"
#include "dev/gen_device_info.h"
+
+#include "perf/gen_perf.h"
+#include "perf/gen_perf_regs.h"
+#include "perf/gen_perf_mdapi.h"
+#include "perf/gen_perf_metrics.h"
+#include "perf/gen_perf_private.h"
+
#include "util/bitscan.h"
+#include "util/mesa-sha1.h"
+#include "util/u_math.h"
#define FILE_DEBUG_FLAG DEBUG_PERFMON
+#define OA_REPORT_INVALID_CTX_ID (0xffffffff)
+
+static inline uint64_t to_user_pointer(void *ptr)
+{
+ return (uintptr_t) ptr;
+}
+
+static bool
+is_dir_or_link(const struct dirent *entry, const char *parent_dir)
+{
+#ifdef HAVE_DIRENT_D_TYPE
+ return entry->d_type == DT_DIR || entry->d_type == DT_LNK;
+#else
+ struct stat st;
+ char path[PATH_MAX + 1];
+ snprintf(path, sizeof(path), "%s/%s", parent_dir, entry->d_name);
+ lstat(path, &st);
+ return S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode);
+#endif
+}
+
static bool
-get_sysfs_dev_dir(struct gen_perf *perf, int fd)
+get_sysfs_dev_dir(struct gen_perf_config *perf, int fd)
{
struct stat sb;
int min, maj;
}
while ((drm_entry = readdir(drmdir))) {
- if ((drm_entry->d_type == DT_DIR ||
- drm_entry->d_type == DT_LNK) &&
+ if (is_dir_or_link(drm_entry, perf->sysfs_dev_dir) &&
strncmp(drm_entry->d_name, "card", 4) == 0)
{
len = snprintf(perf->sysfs_dev_dir,
}
static bool
-read_sysfs_drm_device_file_uint64(struct gen_perf *perf,
+read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
const char *file,
uint64_t *value)
{
}
static void
-register_oa_config(struct gen_perf *perf,
+register_oa_config(struct gen_perf_config *perf,
const struct gen_perf_query_info *query,
uint64_t config_id)
{
- struct gen_perf_query_info *registred_query =
- gen_perf_query_append_query_info(perf, 0);
+ struct gen_perf_query_info *registered_query =
+ gen_perf_append_query_info(perf, 0);
- *registred_query = *query;
- registred_query->oa_metrics_set_id = config_id;
- DBG("metric set registred: id = %" PRIu64", guid = %s\n",
- registred_query->oa_metrics_set_id, query->guid);
+ *registered_query = *query;
+ registered_query->oa_metrics_set_id = config_id;
+ DBG("metric set registered: id = %" PRIu64", guid = %s\n",
+ registered_query->oa_metrics_set_id, query->guid);
}
static void
-enumerate_sysfs_metrics(struct gen_perf *perf)
+enumerate_sysfs_metrics(struct gen_perf_config *perf)
{
DIR *metricsdir = NULL;
struct dirent *metric_entry;
while ((metric_entry = readdir(metricsdir))) {
struct hash_entry *entry;
-
- if ((metric_entry->d_type != DT_DIR &&
- metric_entry->d_type != DT_LNK) ||
+ if (!is_dir_or_link(metric_entry, buf) ||
metric_entry->d_name[0] == '.')
continue;
metric_entry->d_name);
if (entry) {
uint64_t id;
-
- len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id",
- perf->sysfs_dev_dir, metric_entry->d_name);
- if (len < 0 || len >= sizeof(buf)) {
- DBG("Failed to concatenate path to sysfs metric id file\n");
- continue;
- }
-
- if (!read_file_uint64(buf, &id)) {
+ if (!gen_perf_load_metric_id(perf, metric_entry->d_name, &id)) {
DBG("Failed to read metric set id from %s: %m", buf);
continue;
}
}
static bool
-kernel_has_dynamic_config_support(struct gen_perf *perf, int fd)
+kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
{
- hash_table_foreach(perf->oa_metrics_table, entry) {
- struct gen_perf_query_info *query = entry->data;
- char config_path[280];
- uint64_t config_id;
+ uint64_t invalid_config_id = UINT64_MAX;
- snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id",
- perf->sysfs_dev_dir, query->guid);
+ return gen_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
+ &invalid_config_id) < 0 && errno == ENOENT;
+}
- /* Look for the test config, which we know we can't replace. */
- if (read_file_uint64(config_path, &config_id) && config_id == 1) {
- return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
- &config_id) < 0 && errno == ENOENT;
- }
- }
+static int
+i915_query_items(struct gen_perf_config *perf, int fd,
+ struct drm_i915_query_item *items, uint32_t n_items)
+{
+ struct drm_i915_query q = {
+ .num_items = n_items,
+ .items_ptr = to_user_pointer(items),
+ };
+ return gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &q);
+}
- return false;
+static bool
+i915_query_perf_config_supported(struct gen_perf_config *perf, int fd)
+{
+ struct drm_i915_query_item item = {
+ .query_id = DRM_I915_QUERY_PERF_CONFIG,
+ .flags = DRM_I915_QUERY_PERF_CONFIG_LIST,
+ };
+
+ return i915_query_items(perf, fd, &item, 1) == 0 && item.length > 0;
+}
+
+static bool
+i915_query_perf_config_data(struct gen_perf_config *perf,
+ int fd, const char *guid,
+ struct drm_i915_perf_oa_config *config)
+{
+ struct {
+ struct drm_i915_query_perf_config query;
+ struct drm_i915_perf_oa_config config;
+ } item_data;
+ struct drm_i915_query_item item = {
+ .query_id = DRM_I915_QUERY_PERF_CONFIG,
+ .flags = DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
+ .data_ptr = to_user_pointer(&item_data),
+ .length = sizeof(item_data),
+ };
+
+ memset(&item_data, 0, sizeof(item_data));
+ memcpy(item_data.query.uuid, guid, sizeof(item_data.query.uuid));
+ memcpy(&item_data.config, config, sizeof(item_data.config));
+
+ if (!(i915_query_items(perf, fd, &item, 1) == 0 && item.length > 0))
+ return false;
+
+ memcpy(config, &item_data.config, sizeof(item_data.config));
+
+ return true;
}
bool
-gen_perf_load_metric_id(struct gen_perf *perf, const char *guid,
+gen_perf_load_metric_id(struct gen_perf_config *perf_cfg,
+ const char *guid,
uint64_t *metric_id)
{
char config_path[280];
snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id",
- perf->sysfs_dev_dir, guid);
+ perf_cfg->sysfs_dev_dir, guid);
/* Don't recreate already loaded configs. */
return read_file_uint64(config_path, metric_id);
}
+static uint64_t
+i915_add_config(struct gen_perf_config *perf, int fd,
+ const struct gen_perf_registers *config,
+ const char *guid)
+{
+ struct drm_i915_perf_oa_config i915_config = { 0, };
+
+ memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid));
+
+ i915_config.n_mux_regs = config->n_mux_regs;
+ i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
+
+ i915_config.n_boolean_regs = config->n_b_counter_regs;
+ i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
+
+ i915_config.n_flex_regs = config->n_flex_regs;
+ i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
+
+ int ret = gen_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config);
+ return ret > 0 ? ret : 0;
+}
+
static void
-init_oa_configs(struct gen_perf *perf, int fd)
+init_oa_configs(struct gen_perf_config *perf, int fd)
{
hash_table_foreach(perf->oa_metrics_table, entry) {
const struct gen_perf_query_info *query = entry->data;
- struct drm_i915_perf_oa_config config;
uint64_t config_id;
- int ret;
if (gen_perf_load_metric_id(perf, query->guid, &config_id)) {
DBG("metric set: %s (already loaded)\n", query->guid);
continue;
}
- memset(&config, 0, sizeof(config));
-
- memcpy(config.uuid, query->guid, sizeof(config.uuid));
-
- config.n_mux_regs = query->n_mux_regs;
- config.mux_regs_ptr = (uintptr_t) query->mux_regs;
-
- config.n_boolean_regs = query->n_b_counter_regs;
- config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs;
-
- config.n_flex_regs = query->n_flex_regs;
- config.flex_regs_ptr = (uintptr_t) query->flex_regs;
-
- ret = perf->ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
+ int ret = i915_add_config(perf, fd, &query->config, query->guid);
if (ret < 0) {
DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
query->name, query->guid, strerror(errno));
}
static void
-compute_topology_builtins(struct gen_perf *perf,
+compute_topology_builtins(struct gen_perf_config *perf,
const struct gen_device_info *devinfo)
{
perf->sys_vars.slice_mask = devinfo->slice_masks;
for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]);
- perf->sys_vars.eu_threads_count =
- perf->sys_vars.n_eus * devinfo->num_thread_per_eu;
+ perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
/* The subslice mask builtin contains bits for all slices. Prior to Gen11
* it had groups of 3bits for each slice, on Gen11 it's 8bits for each
}
static bool
-init_oa_sys_vars(struct gen_perf *perf, const struct gen_device_info *devinfo)
+init_oa_sys_vars(struct gen_perf_config *perf, const struct gen_device_info *devinfo)
{
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
return true;
}
-typedef void (*perf_register_oa_queries_t)(struct gen_perf *);
+typedef void (*perf_register_oa_queries_t)(struct gen_perf_config *);
static perf_register_oa_queries_t
get_register_queries_function(const struct gen_device_info *devinfo)
}
if (devinfo->is_cannonlake)
return gen_oa_register_queries_cnl;
+ if (devinfo->gen == 11) {
+ if (devinfo->is_elkhartlake)
+ return gen_oa_register_queries_lkf;
+ return gen_oa_register_queries_icl;
+ }
+ if (devinfo->gen == 12)
+ return gen_oa_register_queries_tgl;
return NULL;
}
-bool
-gen_perf_load_oa_metrics(struct gen_perf *perf, int fd,
+static void
+load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo)
+{
+ struct gen_perf_query_info *query =
+ gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
+
+ query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
+ query->name = "Pipeline Statistics Registers";
+
+ gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
+
+ if (devinfo->gen == 6) {
+ gen_perf_query_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
+ "SO_PRIM_STORAGE_NEEDED",
+ "N geometry shader stream-out primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
+ "SO_NUM_PRIMS_WRITTEN",
+ "N geometry shader stream-out primitives (written)");
+ } else {
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 0)",
+ "N stream-out (stream 0) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 1)",
+ "N stream-out (stream 1) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 2)",
+ "N stream-out (stream 2) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 3)",
+ "N stream-out (stream 3) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 0)",
+ "N stream-out (stream 0) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 1)",
+ "N stream-out (stream 1) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 2)",
+ "N stream-out (stream 2) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 3)",
+ "N stream-out (stream 3) primitives (written)");
+ }
+
+ gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
+
+ gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
+
+ gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
+
+ if (devinfo->is_haswell || devinfo->gen == 8) {
+ gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
+ } else {
+ gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
+ }
+
+ gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
+ "N z-pass fragments");
+
+ if (devinfo->gen >= 7) {
+ gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
+ }
+
+ query->data_size = sizeof(uint64_t) * query->n_counters;
+}
+
+static int
+i915_perf_version(int drm_fd)
+{
+ int tmp;
+ drm_i915_getparam_t gp = {
+ .param = I915_PARAM_PERF_REVISION,
+ .value = &tmp,
+ };
+
+ int ret = gen_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+
+ /* Return 0 if this getparam is not supported, the first version supported
+ * is 1.
+ */
+ return ret < 0 ? 0 : tmp;
+}
+
+static void
+i915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu)
+{
+ struct drm_i915_gem_context_param arg = {
+ .param = I915_CONTEXT_PARAM_SSEU,
+ .size = sizeof(*sseu),
+ .value = to_user_pointer(sseu)
+ };
+
+ gen_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
+}
+
+static bool
+load_oa_metrics(struct gen_perf_config *perf, int fd,
const struct gen_device_info *devinfo)
{
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
bool i915_perf_oa_available = false;
struct stat sb;
+ perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
+ perf->i915_perf_version = i915_perf_version(fd);
+
+ /* Record the default SSEU configuration. */
+ i915_get_sseu(fd, &perf->sseu);
+
/* The existence of this sysctl parameter implies the kernel supports
* the i915 perf interface.
*/
return false;
perf->oa_metrics_table =
- _mesa_hash_table_create(perf, _mesa_key_hash_string,
+ _mesa_hash_table_create(perf, _mesa_hash_string,
_mesa_key_string_equal);
/* Index all the metric sets mesa knows about before looking to see what
return true;
}
+struct gen_perf_registers *
+gen_perf_load_configuration(struct gen_perf_config *perf_cfg, int fd, const char *guid)
+{
+ if (!perf_cfg->i915_query_supported)
+ return NULL;
+
+ struct drm_i915_perf_oa_config i915_config = { 0, };
+ if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config))
+ return NULL;
+
+ struct gen_perf_registers *config = rzalloc(NULL, struct gen_perf_registers);
+ config->n_flex_regs = i915_config.n_flex_regs;
+ config->flex_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_flex_regs);
+ config->n_mux_regs = i915_config.n_mux_regs;
+ config->mux_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_mux_regs);
+ config->n_b_counter_regs = i915_config.n_boolean_regs;
+ config->b_counter_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_b_counter_regs);
+
+ /*
+ * struct gen_perf_query_register_prog maps exactly to the tuple of
+ * (register offset, register value) returned by the i915.
+ */
+ i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
+ i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
+ i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
+ if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) {
+ ralloc_free(config);
+ return NULL;
+ }
+
+ return config;
+}
+
+uint64_t
+gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd,
+ const struct gen_perf_registers *config,
+ const char *guid)
+{
+ if (guid)
+ return i915_add_config(perf_cfg, fd, config, guid);
+
+ struct mesa_sha1 sha1_ctx;
+ _mesa_sha1_init(&sha1_ctx);
+
+ if (config->flex_regs) {
+ _mesa_sha1_update(&sha1_ctx, config->flex_regs,
+ sizeof(config->flex_regs[0]) *
+ config->n_flex_regs);
+ }
+ if (config->mux_regs) {
+ _mesa_sha1_update(&sha1_ctx, config->mux_regs,
+ sizeof(config->mux_regs[0]) *
+ config->n_mux_regs);
+ }
+ if (config->b_counter_regs) {
+ _mesa_sha1_update(&sha1_ctx, config->b_counter_regs,
+ sizeof(config->b_counter_regs[0]) *
+ config->n_b_counter_regs);
+ }
+
+ uint8_t hash[20];
+ _mesa_sha1_final(&sha1_ctx, hash);
+
+ char formatted_hash[41];
+ _mesa_sha1_format(formatted_hash, hash);
+
+ char generated_guid[37];
+ snprintf(generated_guid, sizeof(generated_guid),
+ "%.8s-%.4s-%.4s-%.4s-%.12s",
+ &formatted_hash[0], &formatted_hash[8],
+ &formatted_hash[8 + 4], &formatted_hash[8 + 4 + 4],
+ &formatted_hash[8 + 4 + 4 + 4]);
+
+ /* Check if already present. */
+ uint64_t id;
+ if (gen_perf_load_metric_id(perf_cfg, generated_guid, &id))
+ return id;
+
+ return i915_add_config(perf_cfg, fd, config, generated_guid);
+}
+
/* Accumulate 32bits OA counters */
static inline void
accumulate_uint32(const uint32_t *report0,
{
int i, idx = 0;
- result->hw_id = start[2];
+ if (result->hw_id == OA_REPORT_INVALID_CTX_ID &&
+ start[2] != OA_REPORT_INVALID_CTX_ID)
+ result->hw_id = start[2];
+ if (result->reports_accumulated == 0)
+ result->begin_timestamp = start[1];
result->reports_accumulated++;
switch (query->oa_format) {
gen_perf_query_result_clear(struct gen_perf_query_result *result)
{
memset(result, 0, sizeof(*result));
- result->hw_id = 0xffffffff; /* invalid */
+ result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */
+}
+
+void
+gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo,
+ int drm_fd)
+{
+ load_pipeline_statistic_metrics(perf_cfg, devinfo);
+ gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
+ if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
+ gen_perf_register_mdapi_oa_query(perf_cfg, devinfo);
}