i965: perf: add support for raw queries
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 7 Mar 2018 14:28:41 +0000 (14:28 +0000)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Mon, 23 Apr 2018 17:30:10 +0000 (18:30 +0100)
The INTEL_performance_query extension provides a list of queries that
a user can select to monitor a particular workload. Each query reports
different sets of counters (roughly looking at different parts of the
hardware, i.e. caches/fixed functions/etc...).

Each query has an associated configuration that we need to program
into the hardware before using the query. Up to now, we provided
predefined queries. This change allows the user to build its own query
(and associated configuration) externally, and have the i965 driver
use that configuration through a new query named :

   Intel_Raw_Hardware_Counters_Set_0_Query

When this query is selected, the i965 driver will report raw counters
deltas (meaning their values need to be interpreted by the user, as
opposed to existing queries that provide human readable values).

This change is also useful for debug purposes for building new
pre-defined queries and verifying the underlying numbers make sense
before writing equations for user readable output.

This change's purpose is also to enable GPA. GPA uses a library called
MDAPI that processes raw counter data. MDAPI expects raw data to have
a certain layout (per generation which is a bit unfortunate...). This
change also embeds the expected data layouts.

v2: Enable raw queries on gen 7->11, v1 had 7->9 (Lionel)

v3: Don't assert on cherryview for gen7... (Ken)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_performance_query.c
src/mesa/drivers/dri/i965/brw_performance_query.h
src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/meson.build

index 31ecbe6d30eae38c2161ae03add1843b1af11883..5e53d874d88ebdae3738a07307a5e444b0e17da5 100644 (file)
@@ -37,6 +37,7 @@ i965_FILES = \
        brw_pipe_control.h \
        brw_performance_query.h \
        brw_performance_query.c \
+       brw_performance_query_mdapi.c \
        brw_performance_query_metrics.h \
        brw_program.c \
        brw_program.h \
index cd7636454299d7eaec34dbfae00f096ecb84aad2..1e6a45eee1fa8f48f6882907a83e3d9a54da252f 100644 (file)
@@ -681,7 +681,8 @@ struct gen_l3_config;
 
 enum brw_query_kind {
    OA_COUNTERS,
-   PIPELINE_STATS
+   OA_COUNTERS_RAW,
+   PIPELINE_STATS,
 };
 
 struct brw_perf_query_register_prog {
index 9052f6cf1906e31e48fa16dda73dd47ec9d69ecd..ece2ff0ab69effda7c661a689bc07540223a9b7a 100644 (file)
@@ -266,6 +266,44 @@ static bool
 brw_is_perf_query_ready(struct gl_context *ctx,
                         struct gl_perf_query_object *o);
 
+static uint64_t
+brw_perf_query_get_metric_id(struct brw_context *brw,
+                             const struct brw_perf_query_info *query)
+{
+   /* These queries are know not to ever change, their config ID has been
+    * loaded upon the first query creation. No need to look them up again.
+    */
+   if (query->kind == OA_COUNTERS)
+      return query->oa_metrics_set_id;
+
+   assert(query->kind == OA_COUNTERS_RAW);
+
+   /* Raw queries can be reprogrammed up by an external application/library.
+    * When a raw query is used for the first time it's id is set to a value !=
+    * 0. When it stops being used the id returns to 0. No need to reload the
+    * ID when it's already loaded.
+    */
+   if (query->oa_metrics_set_id != 0) {
+      DBG("Raw query '%s' guid=%s using cached ID: %"PRIu64"\n",
+          query->name, query->guid, query->oa_metrics_set_id);
+      return query->oa_metrics_set_id;
+   }
+
+   char metric_id_file[280];
+   snprintf(metric_id_file, sizeof(metric_id_file),
+            "%s/metrics/%s/id", brw->perfquery.sysfs_dev_dir, query->guid);
+
+   struct brw_perf_query_info *raw_query = (struct brw_perf_query_info *)query;
+   if (!read_file_uint64(metric_id_file, &raw_query->oa_metrics_set_id)) {
+      DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid);
+      raw_query->oa_metrics_set_id = 1ULL;
+   } else {
+      DBG("Raw query '%s'guid=%s loaded ID: %"PRIu64"\n",
+          query->name, query->guid, query->oa_metrics_set_id);
+   }
+   return query->oa_metrics_set_id;
+}
+
 static void
 dump_perf_query_callback(GLuint id, void *query_void, void *brw_void)
 {
@@ -275,6 +313,7 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void)
 
    switch (obj->query->kind) {
    case OA_COUNTERS:
+   case OA_COUNTERS_RAW:
       DBG("%4d: %-6s %-8s BO: %-4s OA data: %-10s %-15s\n",
           id,
           o->Used ? "Dirty," : "New,",
@@ -383,6 +422,7 @@ brw_get_perf_query_info(struct gl_context *ctx,
 
    switch (query->kind) {
    case OA_COUNTERS:
+   case OA_COUNTERS_RAW:
       *n_active = brw->perfquery.n_active_oa_queries;
       break;
 
@@ -940,12 +980,18 @@ open_i915_perf_oa_stream(struct brw_context *brw,
 }
 
 static void
-close_perf(struct brw_context *brw)
+close_perf(struct brw_context *brw,
+           const struct brw_perf_query_info *query)
 {
    if (brw->perfquery.oa_stream_fd != -1) {
       close(brw->perfquery.oa_stream_fd);
       brw->perfquery.oa_stream_fd = -1;
    }
+   if (query->kind == OA_COUNTERS_RAW) {
+      struct brw_perf_query_info *raw_query =
+         (struct brw_perf_query_info *) query;
+      raw_query->oa_metrics_set_id = 0;
+   }
 }
 
 static void
@@ -1033,6 +1079,7 @@ brw_begin_perf_query(struct gl_context *ctx,
 
    switch (query->kind) {
    case OA_COUNTERS:
+   case OA_COUNTERS_RAW: {
 
       /* Opening an i915 perf stream implies exclusive access to the OA unit
        * which will generate counter reports for a specific counter set with a
@@ -1040,14 +1087,17 @@ brw_begin_perf_query(struct gl_context *ctx,
        * require a different counter set or format unless we get an opportunity
        * to close the stream and open a new one...
        */
+      uint64_t metric_id = brw_perf_query_get_metric_id(brw, query);
+
       if (brw->perfquery.oa_stream_fd != -1 &&
-          brw->perfquery.current_oa_metrics_set_id !=
-          query->oa_metrics_set_id) {
+          brw->perfquery.current_oa_metrics_set_id != metric_id) {
 
-         if (brw->perfquery.n_oa_users != 0)
+         if (brw->perfquery.n_oa_users != 0) {
+            DBG("WARNING: Begin(%d) failed already using perf config=%i/%"PRIu64"\n",
+                o->Id, brw->perfquery.current_oa_metrics_set_id, metric_id);
             return false;
-         else
-            close_perf(brw);
+         else
+            close_perf(brw, query);
       }
 
       /* If the OA counters aren't already on, enable them. */
@@ -1109,17 +1159,15 @@ brw_begin_perf_query(struct gl_context *ctx,
              prev_sample_period / 1000000ul);
 
          if (!open_i915_perf_oa_stream(brw,
-                                       query->oa_metrics_set_id,
+                                       metric_id,
                                        query->oa_format,
                                        period_exponent,
                                        screen->fd, /* drm fd */
                                        brw->hw_ctx))
             return false;
       } else {
-         assert(brw->perfquery.current_oa_metrics_set_id ==
-                query->oa_metrics_set_id &&
-                brw->perfquery.current_oa_format ==
-                query->oa_format);
+         assert(brw->perfquery.current_oa_metrics_set_id == metric_id &&
+                brw->perfquery.current_oa_format == query->oa_format);
       }
 
       if (!inc_n_oa_users(brw)) {
@@ -1182,6 +1230,7 @@ brw_begin_perf_query(struct gl_context *ctx,
 
       add_to_unaccumulated_query_list(brw, obj);
       break;
+   }
 
    case PIPELINE_STATS:
       if (obj->pipeline_stats.bo) {
@@ -1232,6 +1281,7 @@ brw_end_perf_query(struct gl_context *ctx,
 
    switch (obj->query->kind) {
    case OA_COUNTERS:
+   case OA_COUNTERS_RAW:
 
       /* NB: It's possible that the query will have already been marked
        * as 'accumulated' if an error was seen while reading samples
@@ -1277,6 +1327,7 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
 
    switch (obj->query->kind) {
    case OA_COUNTERS:
+   case OA_COUNTERS_RAW:
       bo = obj->oa.bo;
       break;
 
@@ -1305,7 +1356,8 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
     * we need to wait for all the reports to come in before we can
     * read them.
     */
-   if (obj->query->kind == OA_COUNTERS) {
+   if (obj->query->kind == OA_COUNTERS ||
+       obj->query->kind == OA_COUNTERS_RAW) {
       while (!read_oa_samples_for_query(brw, obj))
          ;
    }
@@ -1323,6 +1375,7 @@ brw_is_perf_query_ready(struct gl_context *ctx,
 
    switch (obj->query->kind) {
    case OA_COUNTERS:
+   case OA_COUNTERS_RAW:
       return (obj->oa.results_accumulated ||
               (obj->oa.bo &&
                !brw_batch_references(&brw->batch, obj->oa.bo) &&
@@ -1438,16 +1491,6 @@ get_oa_counter_data(struct brw_context *brw,
    int n_counters = query->n_counters;
    int written = 0;
 
-   if (!obj->oa.results_accumulated) {
-      read_gt_frequency(brw, obj);
-      read_slice_unslice_frequencies(brw, obj);
-      accumulate_oa_reports(brw, obj);
-      assert(obj->oa.results_accumulated);
-
-      brw_bo_unmap(obj->oa.bo);
-      obj->oa.map = NULL;
-   }
-
    for (int i = 0; i < n_counters; i++) {
       const struct brw_perf_query_counter *counter = &query->counters[i];
       uint64_t *out_uint64;
@@ -1537,7 +1580,20 @@ brw_get_perf_query_data(struct gl_context *ctx,
 
    switch (obj->query->kind) {
    case OA_COUNTERS:
-      written = get_oa_counter_data(brw, obj, data_size, (uint8_t *)data);
+   case OA_COUNTERS_RAW:
+      if (!obj->oa.results_accumulated) {
+         read_gt_frequency(brw, obj);
+         read_slice_unslice_frequencies(brw, obj);
+         accumulate_oa_reports(brw, obj);
+         assert(obj->oa.results_accumulated);
+
+         brw_bo_unmap(obj->oa.bo);
+         obj->oa.map = NULL;
+      }
+      if (obj->query->kind == OA_COUNTERS)
+         written = get_oa_counter_data(brw, obj, data_size, (uint8_t *)data);
+      else
+         written = brw_perf_query_get_mdapi_oa_data(brw, obj, data_size, (uint8_t *)data);
       break;
 
    case PIPELINE_STATS:
@@ -1593,6 +1649,7 @@ brw_delete_perf_query(struct gl_context *ctx,
 
    switch (obj->query->kind) {
    case OA_COUNTERS:
+   case OA_COUNTERS_RAW:
       if (obj->oa.bo) {
          if (!obj->oa.results_accumulated) {
             drop_from_unaccumulated_query_list(brw, obj);
@@ -1618,16 +1675,16 @@ brw_delete_perf_query(struct gl_context *ctx,
       break;
    }
 
-   free(obj);
-
    /* As an indication that the INTEL_performance_query extension is no
     * longer in use, it's a good time to free our cache of sample
     * buffers and close any current i915-perf stream.
     */
    if (--brw->perfquery.n_query_instances == 0) {
       free_sample_bufs(brw);
-      close_perf(brw);
+      close_perf(brw, obj->query);
    }
+
+   free(obj);
 }
 
 /******************************************************************************/
@@ -2150,6 +2207,8 @@ brw_init_perf_query_info(struct gl_context *ctx)
          init_oa_configs(brw);
       else
          enumerate_sysfs_metrics(brw);
+
+      brw_perf_query_register_mdapi_oa_query(brw);
    }
 
    brw->perfquery.unaccumulated =
index a6604fb89f6ec9324f884879f4f9349930ce9d22..20fdbc0473f51786a0365a82e470e4ef119540d1 100644 (file)
@@ -217,4 +217,11 @@ brw_perf_query_accumulate_uint40(int a_index,
    *accumulator += delta;
 }
 
+int brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
+                                     struct brw_perf_query_object *obj,
+                                     size_t data_size,
+                                     uint8_t *data);
+void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw);
+
+
 #endif /* BRW_PERFORMANCE_QUERY_H */
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c
new file mode 100644 (file)
index 0000000..f98918b
--- /dev/null
@@ -0,0 +1,378 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_performance_query.h"
+
+/**
+ * Data format expected by MDAPI.
+ */
+
+struct mdapi_gen7_metrics {
+   uint64_t TotalTime;
+
+   uint64_t ACounters[45];
+   uint64_t NOACounters[16];
+
+   uint64_t PerfCounter1;
+   uint64_t PerfCounter2;
+   uint32_t SplitOccured;
+   uint32_t CoreFrequencyChanged;
+   uint64_t CoreFrequency;
+   uint32_t ReportId;
+   uint32_t ReportsCount;
+};
+
+#define GTDI_QUERY_BDW_METRICS_OA_COUNT         36
+#define GTDI_QUERY_BDW_METRICS_OA_40b_COUNT     32
+#define GTDI_QUERY_BDW_METRICS_NOA_COUNT        16
+struct mdapi_gen8_metrics {
+   uint64_t TotalTime;
+   uint64_t GPUTicks;
+   uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
+   uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
+   uint64_t BeginTimestamp;
+   uint64_t Reserved1;
+   uint64_t Reserved2;
+   uint32_t Reserved3;
+   uint32_t OverrunOccured;
+   uint64_t MarkerUser;
+   uint64_t MarkerDriver;
+
+   uint64_t SliceFrequency;
+   uint64_t UnsliceFrequency;
+   uint64_t PerfCounter1;
+   uint64_t PerfCounter2;
+   uint32_t SplitOccured;
+   uint32_t CoreFrequencyChanged;
+   uint64_t CoreFrequency;
+   uint32_t ReportId;
+   uint32_t ReportsCount;
+};
+
+#define GTDI_MAX_READ_REGS 16
+
+struct mdapi_gen9_metrics {
+   uint64_t TotalTime;
+   uint64_t GPUTicks;
+   uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
+   uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
+   uint64_t BeginTimestamp;
+   uint64_t Reserved1;
+   uint64_t Reserved2;
+   uint32_t Reserved3;
+   uint32_t OverrunOccured;
+   uint64_t MarkerUser;
+   uint64_t MarkerDriver;
+
+   uint64_t SliceFrequency;
+   uint64_t UnsliceFrequency;
+   uint64_t PerfCounter1;
+   uint64_t PerfCounter2;
+   uint32_t SplitOccured;
+   uint32_t CoreFrequencyChanged;
+   uint64_t CoreFrequency;
+   uint32_t ReportId;
+   uint32_t ReportsCount;
+
+   uint64_t UserCntr[GTDI_MAX_READ_REGS];
+   uint32_t UserCntrCfgId;
+   uint32_t Reserved4;
+};
+
+int
+brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
+                                 struct brw_perf_query_object *obj,
+                                 size_t data_size,
+                                 uint8_t *data)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   switch (devinfo->gen) {
+   case 7: {
+      struct mdapi_gen7_metrics *mdapi_data = (struct mdapi_gen7_metrics *) data;
+
+      if (data_size < sizeof(*mdapi_data))
+         return 0;
+
+      assert(devinfo->is_haswell);
+
+      for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
+         mdapi_data->ACounters[i] = obj->oa.accumulator[1 + i];
+
+      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
+         mdapi_data->NOACounters[i] =
+            obj->oa.accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
+      }
+
+      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
+      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
+      mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
+      mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
+      return sizeof(*mdapi_data);
+   }
+   case 8: {
+      struct mdapi_gen8_metrics *mdapi_data = (struct mdapi_gen8_metrics *) data;
+
+      if (data_size < sizeof(*mdapi_data))
+         return 0;
+
+      for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
+         mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
+      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
+         mdapi_data->NoaCntr[i] =
+            obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
+      }
+
+      mdapi_data->ReportId = obj->oa.hw_id;
+      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
+      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
+      mdapi_data->GPUTicks = obj->oa.accumulator[1];
+      mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
+      mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
+      mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
+      mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
+
+      return sizeof(*mdapi_data);
+   }
+   case 9:
+   case 10:
+   case 11: {
+      struct mdapi_gen9_metrics *mdapi_data = (struct mdapi_gen9_metrics *) data;
+
+      if (data_size < sizeof(*mdapi_data))
+         return 0;
+
+      for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
+         mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
+      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
+         mdapi_data->NoaCntr[i] =
+            obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
+      }
+
+      mdapi_data->ReportId = obj->oa.hw_id;
+      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
+      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
+      mdapi_data->GPUTicks = obj->oa.accumulator[1];
+      mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
+      mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
+      mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
+      mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
+
+      return sizeof(*mdapi_data);
+   }
+   default:
+      unreachable("unexpected gen");
+   }
+
+   return 0;
+}
+
+static void
+fill_mdapi_perf_query_counter(struct brw_perf_query_info *query,
+                              const char *name,
+                              uint32_t data_offset,
+                              uint32_t data_size,
+                              GLenum data_type)
+{
+   struct brw_perf_query_counter *counter = &query->counters[query->n_counters];
+
+   counter->name = name;
+   counter->desc = "Raw counter value";
+   counter->data_type = data_type;
+   counter->offset = data_offset;
+   counter->size = data_size;
+   assert(counter->offset + counter->size <= query->data_size);
+
+   query->n_counters++;
+}
+
+#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
+   fill_mdapi_perf_query_counter(query, #field_name,                    \
+                                 (uint8_t *) &struct_name.field_name -  \
+                                 (uint8_t *) &struct_name,              \
+                                 sizeof(struct_name.field_name),        \
+                                 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
+#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
+   fill_mdapi_perf_query_counter(query,                                 \
+                                 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
+                                 (uint8_t *) &struct_name.field_name[idx] - \
+                                 (uint8_t *) &struct_name,              \
+                                 sizeof(struct_name.field_name[0]),     \
+                                 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
+
+void
+brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   /* MDAPI requires different structures for pretty much every generation
+    * (right now we have definitions for gen 7 to 11).
+    */
+   if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
+      return;
+
+   struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw);
+
+   query->kind = OA_COUNTERS_RAW;
+   query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
+   /* Guid has to matches with MDAPI's. */
+   query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba";
+   query->n_counters = 0;
+   query->oa_metrics_set_id = 0; /* Set by MDAPI */
+
+   int n_counters;
+   switch (devinfo->gen) {
+   case 7: {
+      query->oa_format = I915_OA_FORMAT_A45_B8_C8;
+
+      struct mdapi_gen7_metrics metric_data;
+      query->data_size = sizeof(metric_data);
+
+      n_counters = 1 + 45 + 16 + 7;
+      query->counters =
+         rzalloc_array_size(brw->perfquery.queries,
+                            sizeof(*query->counters), n_counters);
+
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+      for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
+                                       query, metric_data, ACounters, i, UINT64);
+      }
+      for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
+                                       query, metric_data, NOACounters, i, UINT64);
+      }
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+      break;
+   }
+   case 8: {
+      query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+
+      struct mdapi_gen8_metrics metric_data;
+      query->data_size = sizeof(metric_data);
+
+      n_counters = 2 + 36 + 16 + 16;
+      query->counters =
+         rzalloc_array_size(brw->perfquery.queries,
+                            sizeof(*query->counters), n_counters);
+
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
+      for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
+                                       query, metric_data, OaCntr, i, UINT64);
+      }
+      for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
+                                       query, metric_data, NoaCntr, i, UINT64);
+      }
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+      break;
+   }
+   case 9:
+   case 10:
+   case 11: {
+      query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+
+      struct mdapi_gen9_metrics metric_data;
+      query->data_size = sizeof(metric_data);
+
+      n_counters = 2 + 36 + 16 + 16 + 16 + 2;
+      query->counters =
+         rzalloc_array_size(brw->perfquery.queries,
+                            sizeof(*query->counters), n_counters);
+
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
+      for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
+                                       query, metric_data, OaCntr, i, UINT64);
+      }
+      for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
+                                       query, metric_data, NoaCntr, i, UINT64);
+      }
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
+      for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
+         MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
+                                       query, metric_data, UserCntr, i, UINT64);
+      }
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
+      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
+      break;
+   }
+   default:
+      unreachable("Unsupported gen");
+      break;
+   }
+
+   assert(query->n_counters <= n_counters);
+
+   {
+      /* Accumulation buffer offsets copied from an actual query... */
+      const struct brw_perf_query_info *copy_query =
+         &brw->perfquery.queries[0];
+
+      query->gpu_time_offset = copy_query->gpu_time_offset;
+      query->gpu_clock_offset = copy_query->gpu_clock_offset;
+      query->a_offset = copy_query->a_offset;
+      query->b_offset = copy_query->b_offset;
+      query->c_offset = copy_query->c_offset;
+   }
+}
index a73ca7807fecf2a42cab8fc7dcad1d775ecf6ddc..a2c1896fecbeafabdaff2440283b590cefd08fb3 100644 (file)
@@ -56,6 +56,7 @@ files_i965 = files(
   'brw_pipe_control.c',
   'brw_performance_query.h',
   'brw_performance_query.c',
+  'brw_performance_query_mdapi.c',
   'brw_performance_query_metrics.h',
   'brw_program.c',
   'brw_program.h',