#include <drm-uapi/i915_drm.h>
+#include "common/gen_gem.h"
#include "gen_perf.h"
+#include "perf/gen_perf_mdapi.h"
#include "perf/gen_perf_metrics.h"
#include "dev/gen_debug.h"
#define FILE_DEBUG_FLAG DEBUG_PERFMON
static bool
-get_sysfs_dev_dir(struct gen_perf *perf, int fd)
+get_sysfs_dev_dir(struct gen_perf_config *perf, int fd)
{
struct stat sb;
int min, maj;
}
static bool
-read_sysfs_drm_device_file_uint64(struct gen_perf *perf,
+read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
const char *file,
uint64_t *value)
{
}
static void
-register_oa_config(struct gen_perf *perf,
+register_oa_config(struct gen_perf_config *perf,
const struct gen_perf_query_info *query,
uint64_t config_id)
{
}
static void
-enumerate_sysfs_metrics(struct gen_perf *perf)
+enumerate_sysfs_metrics(struct gen_perf_config *perf)
{
DIR *metricsdir = NULL;
struct dirent *metric_entry;
}
static bool
-kernel_has_dynamic_config_support(struct gen_perf *perf, int fd)
+kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
{
uint64_t invalid_config_id = UINT64_MAX;
- return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
- &invalid_config_id) < 0 && errno == ENOENT;
+ return gen_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
+ &invalid_config_id) < 0 && errno == ENOENT;
}
bool
-gen_perf_load_metric_id(struct gen_perf *perf, const char *guid,
+gen_perf_load_metric_id(struct gen_perf_config *perf, const char *guid,
uint64_t *metric_id)
{
char config_path[280];
}
static void
-init_oa_configs(struct gen_perf *perf, int fd)
+init_oa_configs(struct gen_perf_config *perf, int fd)
{
hash_table_foreach(perf->oa_metrics_table, entry) {
const struct gen_perf_query_info *query = entry->data;
config.n_flex_regs = query->n_flex_regs;
config.flex_regs_ptr = (uintptr_t) query->flex_regs;
- ret = perf->ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
+ ret = gen_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
if (ret < 0) {
DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
query->name, query->guid, strerror(errno));
}
static void
-compute_topology_builtins(struct gen_perf *perf,
+compute_topology_builtins(struct gen_perf_config *perf,
const struct gen_device_info *devinfo)
{
perf->sys_vars.slice_mask = devinfo->slice_masks;
}
static bool
-init_oa_sys_vars(struct gen_perf *perf, const struct gen_device_info *devinfo)
+init_oa_sys_vars(struct gen_perf_config *perf, const struct gen_device_info *devinfo)
{
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
return true;
}
-typedef void (*perf_register_oa_queries_t)(struct gen_perf *);
+typedef void (*perf_register_oa_queries_t)(struct gen_perf_config *);
static perf_register_oa_queries_t
get_register_queries_function(const struct gen_device_info *devinfo)
}
bool
-gen_perf_load_oa_metrics(struct gen_perf *perf, int fd,
+gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd,
const struct gen_device_info *devinfo)
{
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
memset(result, 0, sizeof(*result));
result->hw_id = 0xffffffff; /* invalid */
}
+
+static void
+fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
+ const char *name,
+ uint32_t data_offset,
+ uint32_t data_size,
+ enum gen_perf_counter_data_type data_type)
+{
+ struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
+
+ assert(query->n_counters <= query->max_counters);
+
+ counter->name = name;
+ counter->desc = "Raw counter value";
+ counter->type = GEN_PERF_COUNTER_TYPE_RAW;
+ counter->data_type = data_type;
+ counter->offset = data_offset;
+
+ query->n_counters++;
+
+ assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
+}
+
+#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
+ fill_mdapi_perf_query_counter(query, #field_name, \
+ (uint8_t *) &struct_name.field_name - \
+ (uint8_t *) &struct_name, \
+ sizeof(struct_name.field_name), \
+ GEN_PERF_COUNTER_DATA_TYPE_##type_name)
+#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
+ fill_mdapi_perf_query_counter(query, \
+ ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
+ (uint8_t *) &struct_name.field_name[idx] - \
+ (uint8_t *) &struct_name, \
+ sizeof(struct_name.field_name[0]), \
+ GEN_PERF_COUNTER_DATA_TYPE_##type_name)
+
+void
+gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
+ struct gen_perf_config *perf)
+{
+ struct gen_perf_query_info *query = NULL;
+
+ /* MDAPI requires different structures for pretty much every generation
+ * (right now we have definitions for gen 7 to 11).
+ */
+ if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
+ return;
+
+ switch (devinfo->gen) {
+ case 7: {
+ query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7);
+ query->oa_format = I915_OA_FORMAT_A45_B8_C8;
+
+ struct gen7_mdapi_metrics metric_data;
+ query->data_size = sizeof(metric_data);
+
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, ACounters, i, UINT64);
+ }
+ for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, NOACounters, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+ break;
+ }
+ case 8: {
+ query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16);
+ query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+
+ struct gen8_mdapi_metrics metric_data;
+ query->data_size = sizeof(metric_data);
+
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, OaCntr, i, UINT64);
+ }
+ for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, NoaCntr, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+ break;
+ }
+ case 9:
+ case 10:
+ case 11: {
+ query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
+ query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+
+ struct gen9_mdapi_metrics metric_data;
+ query->data_size = sizeof(metric_data);
+
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, OaCntr, i, UINT64);
+ }
+ for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, NoaCntr, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, UserCntr, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
+ break;
+ }
+ default:
+ unreachable("Unsupported gen");
+ break;
+ }
+
+ query->kind = GEN_PERF_QUERY_TYPE_RAW;
+ query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
+ query->guid = GEN_PERF_QUERY_GUID_MDAPI;
+
+ {
+ /* Accumulation buffer offsets copied from an actual query... */
+ const struct gen_perf_query_info *copy_query =
+ &perf->queries[0];
+
+ query->gpu_time_offset = copy_query->gpu_time_offset;
+ query->gpu_clock_offset = copy_query->gpu_clock_offset;
+ query->a_offset = copy_query->a_offset;
+ query->b_offset = copy_query->b_offset;
+ query->c_offset = copy_query->c_offset;
+ }
+}
+
+void
+gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo,
+ struct gen_perf_config *perf)
+{
+ if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
+ return;
+
+ struct gen_perf_query_info *query =
+ gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS);
+
+ query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
+ query->name = "Intel_Raw_Pipeline_Statistics_Query";
+
+ /* The order has to match mdapi_pipeline_metrics. */
+ gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
+ gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
+ gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
+ if (devinfo->is_haswell || devinfo->gen == 8) {
+ gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
+ } else {
+ gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
+ }
+ gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
+ if (devinfo->gen >= 7) {
+ gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
+ }
+
+ if (devinfo->gen >= 10) {
+ /* Reuse existing CS invocation register until we can expose this new
+ * one.
+ */
+ gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "Reserved1");
+ }
+
+ query->data_size = sizeof(uint64_t) * query->n_counters;
+}
+
+uint64_t
+gen_perf_query_get_metric_id(struct gen_perf_config *perf,
+ const struct gen_perf_query_info *query)
+{
+ /* These queries are know not to ever change, their config ID has been
+ * loaded upon the first query creation. No need to look them up again.
+ */
+ if (query->kind == GEN_PERF_QUERY_TYPE_OA)
+ return query->oa_metrics_set_id;
+
+ assert(query->kind == GEN_PERF_QUERY_TYPE_RAW);
+
+ /* Raw queries can be reprogrammed up by an external application/library.
+ * When a raw query is used for the first time it's id is set to a value !=
+ * 0. When it stops being used the id returns to 0. No need to reload the
+ * ID when it's already loaded.
+ */
+ if (query->oa_metrics_set_id != 0) {
+ DBG("Raw query '%s' guid=%s using cached ID: %"PRIu64"\n",
+ query->name, query->guid, query->oa_metrics_set_id);
+ return query->oa_metrics_set_id;
+ }
+
+ struct gen_perf_query_info *raw_query = (struct gen_perf_query_info *)query;
+ if (!gen_perf_load_metric_id(perf, query->guid,
+ &raw_query->oa_metrics_set_id)) {
+ DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid);
+ raw_query->oa_metrics_set_id = 1ULL;
+ } else {
+ DBG("Raw query '%s'guid=%s loaded ID: %"PRIu64"\n",
+ query->name, query->guid, query->oa_metrics_set_id);
+ }
+ return query->oa_metrics_set_id;
+}
+
+struct oa_sample_buf *
+gen_perf_get_free_sample_buf(struct gen_perf_context *perf_ctx)
+{
+ struct exec_node *node = exec_list_pop_head(&perf_ctx->free_sample_buffers);
+ struct oa_sample_buf *buf;
+
+ if (node)
+ buf = exec_node_data(struct oa_sample_buf, node, link);
+ else {
+ buf = ralloc_size(perf_ctx->perf, sizeof(*buf));
+
+ exec_node_init(&buf->link);
+ buf->refcount = 0;
+ buf->len = 0;
+ }
+
+ return buf;
+}