#include "perf/gen_perf_regs.h"
#include "perf/gen_perf_mdapi.h"
#include "perf/gen_perf_metrics.h"
+#include "perf/gen_perf_private.h"
#include "util/bitscan.h"
#include "util/mesa-sha1.h"
return read_file_uint64(buf, value);
}
-static inline struct gen_perf_query_info *
-append_query_info(struct gen_perf_config *perf, int max_counters)
-{
- struct gen_perf_query_info *query;
-
- perf->queries = reralloc(perf, perf->queries,
- struct gen_perf_query_info,
- ++perf->n_queries);
- query = &perf->queries[perf->n_queries - 1];
- memset(query, 0, sizeof(*query));
-
- if (max_counters > 0) {
- query->max_counters = max_counters;
- query->counters =
- rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
- }
-
- return query;
-}
-
static void
register_oa_config(struct gen_perf_config *perf,
const struct gen_perf_query_info *query,
uint64_t config_id)
{
- struct gen_perf_query_info *registered_query = append_query_info(perf, 0);
+ struct gen_perf_query_info *registered_query =
+ gen_perf_append_query_info(perf, 0);
*registered_query = *query;
registered_query->oa_metrics_set_id = config_id;
return NULL;
}
-static inline void
-add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
- uint32_t numerator, uint32_t denominator,
- const char *name, const char *description)
-{
- struct gen_perf_query_counter *counter;
-
- assert(query->n_counters < query->max_counters);
-
- counter = &query->counters[query->n_counters];
- counter->name = name;
- counter->desc = description;
- counter->type = GEN_PERF_COUNTER_TYPE_RAW;
- counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
- counter->offset = sizeof(uint64_t) * query->n_counters;
- counter->pipeline_stat.reg = reg;
- counter->pipeline_stat.numerator = numerator;
- counter->pipeline_stat.denominator = denominator;
-
- query->n_counters++;
-}
-
-static inline void
-add_basic_stat_reg(struct gen_perf_query_info *query,
- uint32_t reg, const char *name)
-{
- add_stat_reg(query, reg, 1, 1, name, name);
-}
-
static void
load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
- const struct gen_device_info *devinfo)
+ const struct gen_device_info *devinfo)
{
struct gen_perf_query_info *query =
- append_query_info(perf_cfg, MAX_STAT_COUNTERS);
+ gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
query->name = "Pipeline Statistics Registers";
- add_basic_stat_reg(query, IA_VERTICES_COUNT,
- "N vertices submitted");
- add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
- "N primitives submitted");
- add_basic_stat_reg(query, VS_INVOCATION_COUNT,
- "N vertex shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
if (devinfo->gen == 6) {
- add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
- "SO_PRIM_STORAGE_NEEDED",
- "N geometry shader stream-out primitives (total)");
- add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
- "SO_NUM_PRIMS_WRITTEN",
- "N geometry shader stream-out primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
+ "SO_PRIM_STORAGE_NEEDED",
+ "N geometry shader stream-out primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
+ "SO_NUM_PRIMS_WRITTEN",
+ "N geometry shader stream-out primitives (written)");
} else {
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 0)",
- "N stream-out (stream 0) primitives (total)");
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 1)",
- "N stream-out (stream 1) primitives (total)");
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 2)",
- "N stream-out (stream 2) primitives (total)");
- add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
- "SO_PRIM_STORAGE_NEEDED (Stream 3)",
- "N stream-out (stream 3) primitives (total)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 0)",
- "N stream-out (stream 0) primitives (written)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 1)",
- "N stream-out (stream 1) primitives (written)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 2)",
- "N stream-out (stream 2) primitives (written)");
- add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
- "SO_NUM_PRIMS_WRITTEN (Stream 3)",
- "N stream-out (stream 3) primitives (written)");
- }
-
- add_basic_stat_reg(query, HS_INVOCATION_COUNT,
- "N TCS shader invocations");
- add_basic_stat_reg(query, DS_INVOCATION_COUNT,
- "N TES shader invocations");
-
- add_basic_stat_reg(query, GS_INVOCATION_COUNT,
- "N geometry shader invocations");
- add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
- "N geometry shader primitives emitted");
-
- add_basic_stat_reg(query, CL_INVOCATION_COUNT,
- "N primitives entering clipping");
- add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
- "N primitives leaving clipping");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 0)",
+ "N stream-out (stream 0) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 1)",
+ "N stream-out (stream 1) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 2)",
+ "N stream-out (stream 2) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 3)",
+ "N stream-out (stream 3) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 0)",
+ "N stream-out (stream 0) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 1)",
+ "N stream-out (stream 1) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 2)",
+ "N stream-out (stream 2) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 3)",
+ "N stream-out (stream 3) primitives (written)");
+ }
+
+ gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
+
+ gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
+
+ gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
if (devinfo->is_haswell || devinfo->gen == 8) {
- add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
- "N fragment shader invocations",
- "N fragment shader invocations");
+ gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
} else {
- add_basic_stat_reg(query, PS_INVOCATION_COUNT,
- "N fragment shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
}
- add_basic_stat_reg(query, PS_DEPTH_COUNT,
- "N z-pass fragments");
+ gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
+ "N z-pass fragments");
if (devinfo->gen >= 7) {
- add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "N compute shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
}
query->data_size = sizeof(uint64_t) * query->n_counters;
result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */
}
-static void
-register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
- const struct gen_device_info *devinfo)
-{
- if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
- return;
-
- struct gen_perf_query_info *query =
- append_query_info(perf_cfg, MAX_STAT_COUNTERS);
-
- query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
- query->name = "Intel_Raw_Pipeline_Statistics_Query";
-
- /* The order has to match mdapi_pipeline_metrics. */
- add_basic_stat_reg(query, IA_VERTICES_COUNT,
- "N vertices submitted");
- add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
- "N primitives submitted");
- add_basic_stat_reg(query, VS_INVOCATION_COUNT,
- "N vertex shader invocations");
- add_basic_stat_reg(query, GS_INVOCATION_COUNT,
- "N geometry shader invocations");
- add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
- "N geometry shader primitives emitted");
- add_basic_stat_reg(query, CL_INVOCATION_COUNT,
- "N primitives entering clipping");
- add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
- "N primitives leaving clipping");
- if (devinfo->is_haswell || devinfo->gen == 8) {
- add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
- "N fragment shader invocations",
- "N fragment shader invocations");
- } else {
- add_basic_stat_reg(query, PS_INVOCATION_COUNT,
- "N fragment shader invocations");
- }
- add_basic_stat_reg(query, HS_INVOCATION_COUNT,
- "N TCS shader invocations");
- add_basic_stat_reg(query, DS_INVOCATION_COUNT,
- "N TES shader invocations");
- if (devinfo->gen >= 7) {
- add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "N compute shader invocations");
- }
-
- if (devinfo->gen >= 10) {
- /* Reuse existing CS invocation register until we can expose this new
- * one.
- */
- add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "Reserved1");
- }
-
- query->data_size = sizeof(uint64_t) * query->n_counters;
-}
-
-static void
-fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
- const char *name,
- uint32_t data_offset,
- uint32_t data_size,
- enum gen_perf_counter_data_type data_type)
-{
- struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
-
- assert(query->n_counters <= query->max_counters);
-
- counter->name = name;
- counter->desc = "Raw counter value";
- counter->type = GEN_PERF_COUNTER_TYPE_RAW;
- counter->data_type = data_type;
- counter->offset = data_offset;
-
- query->n_counters++;
-
- assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
-}
-
-#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
- fill_mdapi_perf_query_counter(query, #field_name, \
- (uint8_t *) &struct_name.field_name - \
- (uint8_t *) &struct_name, \
- sizeof(struct_name.field_name), \
- GEN_PERF_COUNTER_DATA_TYPE_##type_name)
-#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
- fill_mdapi_perf_query_counter(query, \
- ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
- (uint8_t *) &struct_name.field_name[idx] - \
- (uint8_t *) &struct_name, \
- sizeof(struct_name.field_name[0]), \
- GEN_PERF_COUNTER_DATA_TYPE_##type_name)
-
-static void
-register_mdapi_oa_query(const struct gen_device_info *devinfo,
- struct gen_perf_config *perf)
-{
- struct gen_perf_query_info *query = NULL;
-
- /* MDAPI requires different structures for pretty much every generation
- * (right now we have definitions for gen 7 to 11).
- */
- if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
- return;
-
- switch (devinfo->gen) {
- case 7: {
- query = append_query_info(perf, 1 + 45 + 16 + 7);
- query->oa_format = I915_OA_FORMAT_A45_B8_C8;
-
- struct gen7_mdapi_metrics metric_data;
- query->data_size = sizeof(metric_data);
-
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
- for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, ACounters, i, UINT64);
- }
- for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, NOACounters, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
- break;
- }
- case 8: {
- query = append_query_info(perf, 2 + 36 + 16 + 16);
- query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
-
- struct gen8_mdapi_metrics metric_data;
- query->data_size = sizeof(metric_data);
-
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
- for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, OaCntr, i, UINT64);
- }
- for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, NoaCntr, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
- break;
- }
- case 9:
- case 10:
- case 11: {
- query = append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
- query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
-
- struct gen9_mdapi_metrics metric_data;
- query->data_size = sizeof(metric_data);
-
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
- for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, OaCntr, i, UINT64);
- }
- for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, NoaCntr, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
- for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
- MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
- metric_data, UserCntr, i, UINT64);
- }
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
- MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
- break;
- }
- default:
- unreachable("Unsupported gen");
- break;
- }
-
- query->kind = GEN_PERF_QUERY_TYPE_RAW;
- query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
- query->guid = GEN_PERF_QUERY_GUID_MDAPI;
-
- {
- /* Accumulation buffer offsets copied from an actual query... */
- const struct gen_perf_query_info *copy_query =
- &perf->queries[0];
-
- query->gpu_time_offset = copy_query->gpu_time_offset;
- query->gpu_clock_offset = copy_query->gpu_clock_offset;
- query->a_offset = copy_query->a_offset;
- query->b_offset = copy_query->b_offset;
- query->c_offset = copy_query->c_offset;
- }
-}
-
void
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
const struct gen_device_info *devinfo,
int drm_fd)
{
load_pipeline_statistic_metrics(perf_cfg, devinfo);
- register_mdapi_statistic_query(perf_cfg, devinfo);
+ gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
- register_mdapi_oa_query(devinfo, perf_cfg);
+ gen_perf_register_mdapi_oa_query(perf_cfg, devinfo);
}
#include "gen_perf.h"
#include "gen_perf_mdapi.h"
+#include "gen_perf_private.h"
+#include "gen_perf_regs.h"
#include "dev/gen_device_info.h"
+#include <drm-uapi/i915_drm.h>
+
+
int
gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
const struct gen_device_info *devinfo,
unreachable("unexpected gen");
}
}
+
+void
+gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo)
+{
+ if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
+ return;
+
+ struct gen_perf_query_info *query =
+ gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
+
+ query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
+ query->name = "Intel_Raw_Pipeline_Statistics_Query";
+
+ /* The order has to match mdapi_pipeline_metrics. */
+ gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
+ gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
+ if (devinfo->is_haswell || devinfo->gen == 8) {
+ gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
+ } else {
+ gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
+ }
+ gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
+ if (devinfo->gen >= 7) {
+ gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
+ }
+
+ if (devinfo->gen >= 10) {
+ /* Reuse existing CS invocation register until we can expose this new
+ * one.
+ */
+ gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "Reserved1");
+ }
+
+ query->data_size = sizeof(uint64_t) * query->n_counters;
+}
+
+static void
+fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
+ const char *name,
+ uint32_t data_offset,
+ uint32_t data_size,
+ enum gen_perf_counter_data_type data_type)
+{
+ struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
+
+ assert(query->n_counters <= query->max_counters);
+
+ counter->name = name;
+ counter->desc = "Raw counter value";
+ counter->type = GEN_PERF_COUNTER_TYPE_RAW;
+ counter->data_type = data_type;
+ counter->offset = data_offset;
+
+ query->n_counters++;
+
+ assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
+}
+
+#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
+ fill_mdapi_perf_query_counter(query, #field_name, \
+ (uint8_t *) &struct_name.field_name - \
+ (uint8_t *) &struct_name, \
+ sizeof(struct_name.field_name), \
+ GEN_PERF_COUNTER_DATA_TYPE_##type_name)
+#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
+ fill_mdapi_perf_query_counter(query, \
+ ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
+ (uint8_t *) &struct_name.field_name[idx] - \
+ (uint8_t *) &struct_name, \
+ sizeof(struct_name.field_name[0]), \
+ GEN_PERF_COUNTER_DATA_TYPE_##type_name)
+
+void
+gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
+ const struct gen_device_info *devinfo)
+{
+ struct gen_perf_query_info *query = NULL;
+
+ /* MDAPI requires different structures for pretty much every generation
+ * (right now we have definitions for gen 7 to 11).
+ */
+ if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
+ return;
+
+ switch (devinfo->gen) {
+ case 7: {
+ query = gen_perf_append_query_info(perf, 1 + 45 + 16 + 7);
+ query->oa_format = I915_OA_FORMAT_A45_B8_C8;
+
+ struct gen7_mdapi_metrics metric_data;
+ query->data_size = sizeof(metric_data);
+
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, ACounters, i, UINT64);
+ }
+ for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, NOACounters, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+ break;
+ }
+ case 8: {
+ query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16);
+ query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+
+ struct gen8_mdapi_metrics metric_data;
+ query->data_size = sizeof(metric_data);
+
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, OaCntr, i, UINT64);
+ }
+ for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, NoaCntr, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+ break;
+ }
+ case 9:
+ case 10:
+ case 11: {
+ query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
+ query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+
+ struct gen9_mdapi_metrics metric_data;
+ query->data_size = sizeof(metric_data);
+
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, OaCntr, i, UINT64);
+ }
+ for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, NoaCntr, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+ for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
+ MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+ metric_data, UserCntr, i, UINT64);
+ }
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
+ MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
+ break;
+ }
+ default:
+ unreachable("Unsupported gen");
+ break;
+ }
+
+ query->kind = GEN_PERF_QUERY_TYPE_RAW;
+ query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
+ query->guid = GEN_PERF_QUERY_GUID_MDAPI;
+
+ {
+ /* Accumulation buffer offsets copied from an actual query... */
+ const struct gen_perf_query_info *copy_query =
+ &perf->queries[0];
+
+ query->gpu_time_offset = copy_query->gpu_time_offset;
+ query->gpu_clock_offset = copy_query->gpu_clock_offset;
+ query->a_offset = copy_query->a_offset;
+ query->b_offset = copy_query->b_offset;
+ query->c_offset = copy_query->c_offset;
+ }
+}