i965/program_cache: Cast the key to char * before adding key_size
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query_mdapi.c
index 70f69debe987a2f5cfeadb50243ff62bbe1fbc61..379515d328f21e7e7f56e1d00775fe9dd77adbf0 100644 (file)
 #include "brw_defines.h"
 #include "brw_performance_query.h"
 
-/**
- * Data format expected by MDAPI.
- */
-
-struct mdapi_gen7_metrics {
-   uint64_t TotalTime;
-
-   uint64_t ACounters[45];
-   uint64_t NOACounters[16];
-
-   uint64_t PerfCounter1;
-   uint64_t PerfCounter2;
-   uint32_t SplitOccured;
-   uint32_t CoreFrequencyChanged;
-   uint64_t CoreFrequency;
-   uint32_t ReportId;
-   uint32_t ReportsCount;
-};
-
-#define GTDI_QUERY_BDW_METRICS_OA_COUNT         36
-#define GTDI_QUERY_BDW_METRICS_OA_40b_COUNT     32
-#define GTDI_QUERY_BDW_METRICS_NOA_COUNT        16
-struct mdapi_gen8_metrics {
-   uint64_t TotalTime;
-   uint64_t GPUTicks;
-   uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
-   uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
-   uint64_t BeginTimestamp;
-   uint64_t Reserved1;
-   uint64_t Reserved2;
-   uint32_t Reserved3;
-   uint32_t OverrunOccured;
-   uint64_t MarkerUser;
-   uint64_t MarkerDriver;
-
-   uint64_t SliceFrequency;
-   uint64_t UnsliceFrequency;
-   uint64_t PerfCounter1;
-   uint64_t PerfCounter2;
-   uint32_t SplitOccured;
-   uint32_t CoreFrequencyChanged;
-   uint64_t CoreFrequency;
-   uint32_t ReportId;
-   uint32_t ReportsCount;
-};
-
-#define GTDI_MAX_READ_REGS 16
-
-struct mdapi_gen9_metrics {
-   uint64_t TotalTime;
-   uint64_t GPUTicks;
-   uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
-   uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
-   uint64_t BeginTimestamp;
-   uint64_t Reserved1;
-   uint64_t Reserved2;
-   uint32_t Reserved3;
-   uint32_t OverrunOccured;
-   uint64_t MarkerUser;
-   uint64_t MarkerDriver;
-
-   uint64_t SliceFrequency;
-   uint64_t UnsliceFrequency;
-   uint64_t PerfCounter1;
-   uint64_t PerfCounter2;
-   uint32_t SplitOccured;
-   uint32_t CoreFrequencyChanged;
-   uint64_t CoreFrequency;
-   uint32_t ReportId;
-   uint32_t ReportsCount;
-
-   uint64_t UserCntr[GTDI_MAX_READ_REGS];
-   uint32_t UserCntrCfgId;
-   uint32_t Reserved4;
-};
-
-struct mdapi_pipeline_metrics {
-   uint64_t IAVertices;
-   uint64_t IAPrimitives;
-   uint64_t VSInvocations;
-   uint64_t GSInvocations;
-   uint64_t GSPrimitives;
-   uint64_t CInvocations;
-   uint64_t CPrimitives;
-   uint64_t PSInvocations;
-   uint64_t HSInvocations;
-   uint64_t DSInvocations;
-   uint64_t CSInvocations;
-};
-
-int
-brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
-                                 struct brw_perf_query_object *obj,
-                                 size_t data_size,
-                                 uint8_t *data)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-
-   switch (devinfo->gen) {
-   case 7: {
-      struct mdapi_gen7_metrics *mdapi_data = (struct mdapi_gen7_metrics *) data;
-
-      if (data_size < sizeof(*mdapi_data))
-         return 0;
-
-      assert(devinfo->is_haswell);
-
-      for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
-         mdapi_data->ACounters[i] = obj->oa.accumulator[1 + i];
-
-      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
-         mdapi_data->NOACounters[i] =
-            obj->oa.accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
-      }
-
-      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
-      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
-      mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
-      mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
-      return sizeof(*mdapi_data);
-   }
-   case 8: {
-      struct mdapi_gen8_metrics *mdapi_data = (struct mdapi_gen8_metrics *) data;
-
-      if (data_size < sizeof(*mdapi_data))
-         return 0;
-
-      for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
-         mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
-      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
-         mdapi_data->NoaCntr[i] =
-            obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
-      }
-
-      mdapi_data->ReportId = obj->oa.hw_id;
-      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
-      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
-      mdapi_data->GPUTicks = obj->oa.accumulator[1];
-      mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
-      mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
-      mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
-      mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
-
-      return sizeof(*mdapi_data);
-   }
-   case 9:
-   case 10:
-   case 11: {
-      struct mdapi_gen9_metrics *mdapi_data = (struct mdapi_gen9_metrics *) data;
-
-      if (data_size < sizeof(*mdapi_data))
-         return 0;
-
-      for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
-         mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
-      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
-         mdapi_data->NoaCntr[i] =
-            obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
-      }
-
-      mdapi_data->ReportId = obj->oa.hw_id;
-      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
-      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
-      mdapi_data->GPUTicks = obj->oa.accumulator[1];
-      mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
-      mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
-      mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
-      mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
-
-      return sizeof(*mdapi_data);
-   }
-   default:
-      unreachable("unexpected gen");
-   }
-
-   return 0;
-}
+#include "perf/gen_perf.h"
+#include "perf/gen_perf_mdapi.h"
 
 static void
-fill_mdapi_perf_query_counter(struct brw_perf_query_info *query,
+fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
                               const char *name,
                               uint32_t data_offset,
                               uint32_t data_size,
-                              GLenum data_type)
+                              enum gen_perf_counter_data_type data_type)
 {
-   struct brw_perf_query_counter *counter = &query->counters[query->n_counters];
+   struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
+
+   assert(query->n_counters <= query->max_counters);
 
    counter->name = name;
    counter->desc = "Raw counter value";
+   counter->type = GEN_PERF_COUNTER_TYPE_RAW;
    counter->data_type = data_type;
    counter->offset = data_offset;
-   counter->size = data_size;
-   assert(counter->offset + counter->size <= query->data_size);
 
    query->n_counters++;
+
+   assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
 }
 
 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
@@ -226,19 +54,21 @@ fill_mdapi_perf_query_counter(struct brw_perf_query_info *query,
                                  (uint8_t *) &struct_name.field_name -  \
                                  (uint8_t *) &struct_name,              \
                                  sizeof(struct_name.field_name),        \
-                                 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
+                                 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
    fill_mdapi_perf_query_counter(query,                                 \
                                  ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
                                  (uint8_t *) &struct_name.field_name[idx] - \
                                  (uint8_t *) &struct_name,              \
                                  sizeof(struct_name.field_name[0]),     \
-                                 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
+                                 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
 
 void
 brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   struct gen_perf *perf = brw->perfquery.perf;
+   struct gen_perf_query_info *query = NULL;
 
    /* MDAPI requires different structures for pretty much every generation
     * (right now we have definitions for gen 7 to 11).
@@ -246,36 +76,22 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
    if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
       return;
 
-   struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw);
-
-   query->kind = OA_COUNTERS_RAW;
-   query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
-   /* Guid has to matches with MDAPI's. */
-   query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba";
-   query->n_counters = 0;
-   query->oa_metrics_set_id = 0; /* Set by MDAPI */
-
-   int n_counters;
    switch (devinfo->gen) {
    case 7: {
+      query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7);
       query->oa_format = I915_OA_FORMAT_A45_B8_C8;
 
-      struct mdapi_gen7_metrics metric_data;
+      struct gen7_mdapi_metrics metric_data;
       query->data_size = sizeof(metric_data);
 
-      n_counters = 1 + 45 + 16 + 7;
-      query->counters =
-         rzalloc_array_size(brw->perfquery.queries,
-                            sizeof(*query->counters), n_counters);
-
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
       for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
-         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
-                                       query, metric_data, ACounters, i, UINT64);
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+                                       metric_data, ACounters, i, UINT64);
       }
       for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
-         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
-                                       query, metric_data, NOACounters, i, UINT64);
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+                                       metric_data, NOACounters, i, UINT64);
       }
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
@@ -287,25 +103,21 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
       break;
    }
    case 8: {
+      query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16);
       query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
 
-      struct mdapi_gen8_metrics metric_data;
+      struct gen8_mdapi_metrics metric_data;
       query->data_size = sizeof(metric_data);
 
-      n_counters = 2 + 36 + 16 + 16;
-      query->counters =
-         rzalloc_array_size(brw->perfquery.queries,
-                            sizeof(*query->counters), n_counters);
-
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
-         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
-                                       query, metric_data, OaCntr, i, UINT64);
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+                                       metric_data, OaCntr, i, UINT64);
       }
       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
-         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
-                                       query, metric_data, NoaCntr, i, UINT64);
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+                                       metric_data, NoaCntr, i, UINT64);
       }
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
@@ -328,25 +140,21 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
    case 9:
    case 10:
    case 11: {
+      query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
       query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
 
-      struct mdapi_gen9_metrics metric_data;
+      struct gen9_mdapi_metrics metric_data;
       query->data_size = sizeof(metric_data);
 
-      n_counters = 2 + 36 + 16 + 16 + 16 + 2;
-      query->counters =
-         rzalloc_array_size(brw->perfquery.queries,
-                            sizeof(*query->counters), n_counters);
-
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
-         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
-                                       query, metric_data, OaCntr, i, UINT64);
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+                                       metric_data, OaCntr, i, UINT64);
       }
       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
-         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
-                                       query, metric_data, NoaCntr, i, UINT64);
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+                                       metric_data, NoaCntr, i, UINT64);
       }
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
@@ -365,8 +173,8 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
       for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
-         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
-                                       query, metric_data, UserCntr, i, UINT64);
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
+                                       metric_data, UserCntr, i, UINT64);
       }
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
@@ -377,12 +185,14 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
       break;
    }
 
-   assert(query->n_counters <= n_counters);
+   query->kind = GEN_PERF_QUERY_TYPE_RAW;
+   query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
+   query->guid = GEN_PERF_QUERY_GUID_MDAPI;
 
    {
       /* Accumulation buffer offsets copied from an actual query... */
-      const struct brw_perf_query_info *copy_query =
-         &brw->perfquery.queries[0];
+      const struct gen_perf_query_info *copy_query =
+         &brw->perfquery.perf->queries[0];
 
       query->gpu_time_offset = copy_query->gpu_time_offset;
       query->gpu_clock_offset = copy_query->gpu_clock_offset;
@@ -397,48 +207,54 @@ brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
-   if (!(devinfo->gen >= 7 && devinfo->gen <= 9))
+   if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
       return;
 
-   struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw);
+   struct gen_perf_query_info *query =
+      gen_perf_query_append_query_info(brw->perfquery.perf, MAX_STAT_COUNTERS);
 
-   query->kind = PIPELINE_STATS;
+   query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
    query->name = "Intel_Raw_Pipeline_Statistics_Query";
-   query->n_counters = 0;
-   query->counters =
-      rzalloc_array(brw, struct brw_perf_query_counter, MAX_STAT_COUNTERS);
 
    /* The order has to match mdapi_pipeline_metrics. */
-   brw_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
                                           "N vertices submitted");
-   brw_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
                                           "N primitives submitted");
-   brw_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
                                           "N vertex shader invocations");
-   brw_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
                                           "N geometry shader invocations");
-   brw_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
                                           "N geometry shader primitives emitted");
-   brw_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
                                           "N primitives entering clipping");
-   brw_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
                                           "N primitives leaving clipping");
    if (devinfo->is_haswell || devinfo->gen == 8) {
-      brw_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+      gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
                                        "N fragment shader invocations",
                                        "N fragment shader invocations");
    } else {
-      brw_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+      gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
                                              "N fragment shader invocations");
    }
-   brw_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
                                           "N TCS shader invocations");
-   brw_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+   gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
                                           "N TES shader invocations");
    if (devinfo->gen >= 7) {
-      brw_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+      gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
                                              "N compute shader invocations");
    }
 
+   if (devinfo->gen >= 10) {
+      /* Reuse existing CS invocation register until we can expose this new
+       * one.
+       */
+      gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+                                             "Reserved1");
+   }
+
    query->data_size = sizeof(uint64_t) * query->n_counters;
 }