From: Lionel Landwerlin Date: Wed, 4 Sep 2019 11:10:22 +0000 (+0300) Subject: intel/perf: move mdapi query definitions to their own file X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dde96d31b75f7895900405ab771fb3dd0dd78069;p=mesa.git intel/perf: move mdapi query definitions to their own file Where they belong. Signed-off-by: Lionel Landwerlin Acked-by: Jason Ekstrand Acked-by: Tapani Pälli Reviewed-by: Rafael Antognolli Reviewed-by: Mark Janes Part-of: --- diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 195ce3e67d8..b9d72ea5b30 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -359,6 +359,7 @@ GEN_PERF_FILES = \ perf/gen_perf.h \ perf/gen_perf_mdapi.c \ perf/gen_perf_mdapi.h \ + perf/gen_perf_private.h \ perf/gen_perf_query.h \ perf/gen_perf_query.c diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index c3c7f2e891a..2c5d541feed 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -44,6 +44,7 @@ #include "perf/gen_perf_regs.h" #include "perf/gen_perf_mdapi.h" #include "perf/gen_perf_metrics.h" +#include "perf/gen_perf_private.h" #include "util/bitscan.h" #include "util/mesa-sha1.h" @@ -172,32 +173,13 @@ read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf, return read_file_uint64(buf, value); } -static inline struct gen_perf_query_info * -append_query_info(struct gen_perf_config *perf, int max_counters) -{ - struct gen_perf_query_info *query; - - perf->queries = reralloc(perf, perf->queries, - struct gen_perf_query_info, - ++perf->n_queries); - query = &perf->queries[perf->n_queries - 1]; - memset(query, 0, sizeof(*query)); - - if (max_counters > 0) { - query->max_counters = max_counters; - query->counters = - rzalloc_array(perf, struct gen_perf_query_counter, max_counters); - } - - return query; -} - static void register_oa_config(struct gen_perf_config *perf, const struct gen_perf_query_info *query, uint64_t config_id) { - struct gen_perf_query_info *registered_query = append_query_info(perf, 0); + struct gen_perf_query_info *registered_query = + gen_perf_append_query_info(perf, 0); *registered_query = *query; registered_query->oa_metrics_set_id = config_id; @@ -474,116 +456,87 @@ get_register_queries_function(const struct gen_device_info *devinfo) return NULL; } -static inline void -add_stat_reg(struct gen_perf_query_info *query, uint32_t reg, - uint32_t numerator, uint32_t denominator, - const char *name, const char *description) -{ - struct gen_perf_query_counter *counter; - - assert(query->n_counters < query->max_counters); - - counter = &query->counters[query->n_counters]; - counter->name = name; - counter->desc = description; - counter->type = GEN_PERF_COUNTER_TYPE_RAW; - counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; - counter->offset = sizeof(uint64_t) * query->n_counters; - counter->pipeline_stat.reg = reg; - counter->pipeline_stat.numerator = numerator; - counter->pipeline_stat.denominator = denominator; - - query->n_counters++; -} - -static inline void -add_basic_stat_reg(struct gen_perf_query_info *query, - uint32_t reg, const char *name) -{ - add_stat_reg(query, reg, 1, 1, name, name); -} - static void load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg, - const struct gen_device_info *devinfo) + const struct gen_device_info *devinfo) { struct gen_perf_query_info *query = - append_query_info(perf_cfg, MAX_STAT_COUNTERS); + gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS); query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; query->name = "Pipeline Statistics Registers"; - add_basic_stat_reg(query, IA_VERTICES_COUNT, - "N vertices submitted"); - add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, - "N primitives submitted"); - add_basic_stat_reg(query, VS_INVOCATION_COUNT, - "N vertex shader invocations"); + gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); if (devinfo->gen == 6) { - add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, - "SO_PRIM_STORAGE_NEEDED", - "N geometry shader stream-out primitives (total)"); - add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, - "SO_NUM_PRIMS_WRITTEN", - "N geometry shader stream-out primitives (written)"); + gen_perf_query_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, + "SO_PRIM_STORAGE_NEEDED", + "N geometry shader stream-out primitives (total)"); + gen_perf_query_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, + "SO_NUM_PRIMS_WRITTEN", + "N geometry shader stream-out primitives (written)"); } else { - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 0)", - "N stream-out (stream 0) primitives (total)"); - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 1)", - "N stream-out (stream 1) primitives (total)"); - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 2)", - "N stream-out (stream 2) primitives (total)"); - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 3)", - "N stream-out (stream 3) primitives (total)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 0)", - "N stream-out (stream 0) primitives (written)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 1)", - "N stream-out (stream 1) primitives (written)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 2)", - "N stream-out (stream 2) primitives (written)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 3)", - "N stream-out (stream 3) primitives (written)"); - } - - add_basic_stat_reg(query, HS_INVOCATION_COUNT, - "N TCS shader invocations"); - add_basic_stat_reg(query, DS_INVOCATION_COUNT, - "N TES shader invocations"); - - add_basic_stat_reg(query, GS_INVOCATION_COUNT, - "N geometry shader invocations"); - add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, - "N geometry shader primitives emitted"); - - add_basic_stat_reg(query, CL_INVOCATION_COUNT, - "N primitives entering clipping"); - add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, - "N primitives leaving clipping"); + gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 0)", + "N stream-out (stream 0) primitives (total)"); + gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 1)", + "N stream-out (stream 1) primitives (total)"); + gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 2)", + "N stream-out (stream 2) primitives (total)"); + gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 3)", + "N stream-out (stream 3) primitives (total)"); + gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 0)", + "N stream-out (stream 0) primitives (written)"); + gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 1)", + "N stream-out (stream 1) primitives (written)"); + gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 2)", + "N stream-out (stream 2) primitives (written)"); + gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 3)", + "N stream-out (stream 3) primitives (written)"); + } + + gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + "N TCS shader invocations"); + gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + "N TES shader invocations"); + + gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + "N geometry shader invocations"); + gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + "N geometry shader primitives emitted"); + + gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + "N primitives entering clipping"); + gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + "N primitives leaving clipping"); if (devinfo->is_haswell || devinfo->gen == 8) { - add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, - "N fragment shader invocations", - "N fragment shader invocations"); + gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + "N fragment shader invocations", + "N fragment shader invocations"); } else { - add_basic_stat_reg(query, PS_INVOCATION_COUNT, - "N fragment shader invocations"); + gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + "N fragment shader invocations"); } - add_basic_stat_reg(query, PS_DEPTH_COUNT, - "N z-pass fragments"); + gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT, + "N z-pass fragments"); if (devinfo->gen >= 7) { - add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "N compute shader invocations"); + gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); } query->data_size = sizeof(uint64_t) * query->n_counters; @@ -863,243 +816,13 @@ gen_perf_query_result_clear(struct gen_perf_query_result *result) result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */ } -static void -register_mdapi_statistic_query(struct gen_perf_config *perf_cfg, - const struct gen_device_info *devinfo) -{ - if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) - return; - - struct gen_perf_query_info *query = - append_query_info(perf_cfg, MAX_STAT_COUNTERS); - - query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; - query->name = "Intel_Raw_Pipeline_Statistics_Query"; - - /* The order has to match mdapi_pipeline_metrics. */ - add_basic_stat_reg(query, IA_VERTICES_COUNT, - "N vertices submitted"); - add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, - "N primitives submitted"); - add_basic_stat_reg(query, VS_INVOCATION_COUNT, - "N vertex shader invocations"); - add_basic_stat_reg(query, GS_INVOCATION_COUNT, - "N geometry shader invocations"); - add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, - "N geometry shader primitives emitted"); - add_basic_stat_reg(query, CL_INVOCATION_COUNT, - "N primitives entering clipping"); - add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, - "N primitives leaving clipping"); - if (devinfo->is_haswell || devinfo->gen == 8) { - add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, - "N fragment shader invocations", - "N fragment shader invocations"); - } else { - add_basic_stat_reg(query, PS_INVOCATION_COUNT, - "N fragment shader invocations"); - } - add_basic_stat_reg(query, HS_INVOCATION_COUNT, - "N TCS shader invocations"); - add_basic_stat_reg(query, DS_INVOCATION_COUNT, - "N TES shader invocations"); - if (devinfo->gen >= 7) { - add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "N compute shader invocations"); - } - - if (devinfo->gen >= 10) { - /* Reuse existing CS invocation register until we can expose this new - * one. - */ - add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "Reserved1"); - } - - query->data_size = sizeof(uint64_t) * query->n_counters; -} - -static void -fill_mdapi_perf_query_counter(struct gen_perf_query_info *query, - const char *name, - uint32_t data_offset, - uint32_t data_size, - enum gen_perf_counter_data_type data_type) -{ - struct gen_perf_query_counter *counter = &query->counters[query->n_counters]; - - assert(query->n_counters <= query->max_counters); - - counter->name = name; - counter->desc = "Raw counter value"; - counter->type = GEN_PERF_COUNTER_TYPE_RAW; - counter->data_type = data_type; - counter->offset = data_offset; - - query->n_counters++; - - assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size); -} - -#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \ - fill_mdapi_perf_query_counter(query, #field_name, \ - (uint8_t *) &struct_name.field_name - \ - (uint8_t *) &struct_name, \ - sizeof(struct_name.field_name), \ - GEN_PERF_COUNTER_DATA_TYPE_##type_name) -#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \ - fill_mdapi_perf_query_counter(query, \ - ralloc_asprintf(ctx, "%s%i", #field_name, idx), \ - (uint8_t *) &struct_name.field_name[idx] - \ - (uint8_t *) &struct_name, \ - sizeof(struct_name.field_name[0]), \ - GEN_PERF_COUNTER_DATA_TYPE_##type_name) - -static void -register_mdapi_oa_query(const struct gen_device_info *devinfo, - struct gen_perf_config *perf) -{ - struct gen_perf_query_info *query = NULL; - - /* MDAPI requires different structures for pretty much every generation - * (right now we have definitions for gen 7 to 11). - */ - if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) - return; - - switch (devinfo->gen) { - case 7: { - query = append_query_info(perf, 1 + 45 + 16 + 7); - query->oa_format = I915_OA_FORMAT_A45_B8_C8; - - struct gen7_mdapi_metrics metric_data; - query->data_size = sizeof(metric_data); - - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); - for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, - metric_data, ACounters, i, UINT64); - } - for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, - metric_data, NOACounters, i, UINT64); - } - MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); - break; - } - case 8: { - query = append_query_info(perf, 2 + 36 + 16 + 16); - query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; - - struct gen8_mdapi_metrics metric_data; - query->data_size = sizeof(metric_data); - - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); - for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, - metric_data, OaCntr, i, UINT64); - } - for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, - metric_data, NoaCntr, i, UINT64); - } - MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); - break; - } - case 9: - case 10: - case 11: { - query = append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); - query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; - - struct gen9_mdapi_metrics metric_data; - query->data_size = sizeof(metric_data); - - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); - for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, - metric_data, OaCntr, i, UINT64); - } - for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, - metric_data, NoaCntr, i, UINT64); - } - MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); - for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, - metric_data, UserCntr, i, UINT64); - } - MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32); - break; - } - default: - unreachable("Unsupported gen"); - break; - } - - query->kind = GEN_PERF_QUERY_TYPE_RAW; - query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; - query->guid = GEN_PERF_QUERY_GUID_MDAPI; - - { - /* Accumulation buffer offsets copied from an actual query... */ - const struct gen_perf_query_info *copy_query = - &perf->queries[0]; - - query->gpu_time_offset = copy_query->gpu_time_offset; - query->gpu_clock_offset = copy_query->gpu_clock_offset; - query->a_offset = copy_query->a_offset; - query->b_offset = copy_query->b_offset; - query->c_offset = copy_query->c_offset; - } -} - void gen_perf_init_metrics(struct gen_perf_config *perf_cfg, const struct gen_device_info *devinfo, int drm_fd) { load_pipeline_statistic_metrics(perf_cfg, devinfo); - register_mdapi_statistic_query(perf_cfg, devinfo); + gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo); if (load_oa_metrics(perf_cfg, drm_fd, devinfo)) - register_mdapi_oa_query(devinfo, perf_cfg); + gen_perf_register_mdapi_oa_query(perf_cfg, devinfo); } diff --git a/src/intel/perf/gen_perf_mdapi.c b/src/intel/perf/gen_perf_mdapi.c index 4c70d1f0207..33d05f07443 100644 --- a/src/intel/perf/gen_perf_mdapi.c +++ b/src/intel/perf/gen_perf_mdapi.c @@ -23,9 +23,14 @@ #include "gen_perf.h" #include "gen_perf_mdapi.h" +#include "gen_perf_private.h" +#include "gen_perf_regs.h" #include "dev/gen_device_info.h" +#include + + int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size, const struct gen_device_info *devinfo, @@ -121,3 +126,233 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size, unreachable("unexpected gen"); } } + +void +gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg, + const struct gen_device_info *devinfo) +{ + if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) + return; + + struct gen_perf_query_info *query = + gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS); + + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; + query->name = "Intel_Raw_Pipeline_Statistics_Query"; + + /* The order has to match mdapi_pipeline_metrics. */ + gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); + gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + "N geometry shader invocations"); + gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + "N geometry shader primitives emitted"); + gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + "N primitives entering clipping"); + gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + "N primitives leaving clipping"); + if (devinfo->is_haswell || devinfo->gen == 8) { + gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + "N fragment shader invocations", + "N fragment shader invocations"); + } else { + gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + "N fragment shader invocations"); + } + gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + "N TCS shader invocations"); + gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + "N TES shader invocations"); + if (devinfo->gen >= 7) { + gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); + } + + if (devinfo->gen >= 10) { + /* Reuse existing CS invocation register until we can expose this new + * one. + */ + gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "Reserved1"); + } + + query->data_size = sizeof(uint64_t) * query->n_counters; +} + +static void +fill_mdapi_perf_query_counter(struct gen_perf_query_info *query, + const char *name, + uint32_t data_offset, + uint32_t data_size, + enum gen_perf_counter_data_type data_type) +{ + struct gen_perf_query_counter *counter = &query->counters[query->n_counters]; + + assert(query->n_counters <= query->max_counters); + + counter->name = name; + counter->desc = "Raw counter value"; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; + counter->data_type = data_type; + counter->offset = data_offset; + + query->n_counters++; + + assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size); +} + +#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \ + fill_mdapi_perf_query_counter(query, #field_name, \ + (uint8_t *) &struct_name.field_name - \ + (uint8_t *) &struct_name, \ + sizeof(struct_name.field_name), \ + GEN_PERF_COUNTER_DATA_TYPE_##type_name) +#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \ + fill_mdapi_perf_query_counter(query, \ + ralloc_asprintf(ctx, "%s%i", #field_name, idx), \ + (uint8_t *) &struct_name.field_name[idx] - \ + (uint8_t *) &struct_name, \ + sizeof(struct_name.field_name[0]), \ + GEN_PERF_COUNTER_DATA_TYPE_##type_name) + +void +gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf, + const struct gen_device_info *devinfo) +{ + struct gen_perf_query_info *query = NULL; + + /* MDAPI requires different structures for pretty much every generation + * (right now we have definitions for gen 7 to 11). + */ + if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) + return; + + switch (devinfo->gen) { + case 7: { + query = gen_perf_append_query_info(perf, 1 + 45 + 16 + 7); + query->oa_format = I915_OA_FORMAT_A45_B8_C8; + + struct gen7_mdapi_metrics metric_data; + query->data_size = sizeof(metric_data); + + MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); + for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, ACounters, i, UINT64); + } + for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NOACounters, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); + break; + } + case 8: { + query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16); + query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; + + struct gen8_mdapi_metrics metric_data; + query->data_size = sizeof(metric_data); + + MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); + for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); + } + for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); + break; + } + case 9: + case 10: + case 11: { + query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); + query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; + + struct gen9_mdapi_metrics metric_data; + query->data_size = sizeof(metric_data); + + MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); + for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); + } + for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); + for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, UserCntr, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32); + break; + } + default: + unreachable("Unsupported gen"); + break; + } + + query->kind = GEN_PERF_QUERY_TYPE_RAW; + query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; + query->guid = GEN_PERF_QUERY_GUID_MDAPI; + + { + /* Accumulation buffer offsets copied from an actual query... */ + const struct gen_perf_query_info *copy_query = + &perf->queries[0]; + + query->gpu_time_offset = copy_query->gpu_time_offset; + query->gpu_clock_offset = copy_query->gpu_clock_offset; + query->a_offset = copy_query->a_offset; + query->b_offset = copy_query->b_offset; + query->c_offset = copy_query->c_offset; + } +} diff --git a/src/intel/perf/gen_perf_mdapi.h b/src/intel/perf/gen_perf_mdapi.h index c0e8d811c1d..2e326fa7721 100644 --- a/src/intel/perf/gen_perf_mdapi.h +++ b/src/intel/perf/gen_perf_mdapi.h @@ -28,7 +28,6 @@ #include "dev/gen_device_info.h" -struct gen_device_info; struct gen_perf_query_result; /* Guid has to matches with MDAPI's. */ diff --git a/src/intel/perf/gen_perf_private.h b/src/intel/perf/gen_perf_private.h new file mode 100644 index 00000000000..3dc3461cbf3 --- /dev/null +++ b/src/intel/perf/gen_perf_private.h @@ -0,0 +1,84 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef GEN_PERF_PRIVATE_H +#define GEN_PERF_PRIVATE_H + +#include "gen_perf.h" + +static inline void +gen_perf_query_add_stat_reg(struct gen_perf_query_info *query, uint32_t reg, + uint32_t numerator, uint32_t denominator, + const char *name, const char *description) +{ + struct gen_perf_query_counter *counter; + + assert(query->n_counters < query->max_counters); + + counter = &query->counters[query->n_counters]; + counter->name = name; + counter->desc = description; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; + counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; + counter->offset = sizeof(uint64_t) * query->n_counters; + counter->pipeline_stat.reg = reg; + counter->pipeline_stat.numerator = numerator; + counter->pipeline_stat.denominator = denominator; + + query->n_counters++; +} + +static inline void +gen_perf_query_add_basic_stat_reg(struct gen_perf_query_info *query, + uint32_t reg, const char *name) +{ + gen_perf_query_add_stat_reg(query, reg, 1, 1, name, name); +} + +static inline struct gen_perf_query_info * +gen_perf_append_query_info(struct gen_perf_config *perf, int max_counters) +{ + struct gen_perf_query_info *query; + + perf->queries = reralloc(perf, perf->queries, + struct gen_perf_query_info, + ++perf->n_queries); + query = &perf->queries[perf->n_queries - 1]; + memset(query, 0, sizeof(*query)); + + if (max_counters > 0) { + query->max_counters = max_counters; + query->counters = + rzalloc_array(perf, struct gen_perf_query_counter, max_counters); + } + + return query; +} + +void gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg, + const struct gen_device_info *devinfo); +void gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf, + const struct gen_device_info *devinfo); + + +#endif /* GEN_PERF_PRIVATE_H */