iris/perf: implement routines to return counter info
authorMark Janes <mark.a.janes@intel.com>
Mon, 22 Apr 2019 18:43:12 +0000 (11:43 -0700)
committerMark Janes <mark.a.janes@intel.com>
Sat, 10 Aug 2019 02:28:03 +0000 (19:28 -0700)
With this commit, Iris will report that AMD_performance_monitor is
supported, and will allow the caller to query the available metrics.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_monitor.c [new file with mode: 0644]
src/gallium/drivers/iris/iris_monitor.h [new file with mode: 0644]
src/gallium/drivers/iris/iris_screen.c
src/gallium/drivers/iris/iris_screen.h
src/gallium/drivers/iris/iris_state.c
src/gallium/drivers/iris/meson.build

index f25c91fb317042e2bfd7848a65e7afbf48d6f445..158dfe70d715085056cd71be08a2eb553cdcf80b 100644 (file)
@@ -460,6 +460,11 @@ struct iris_vtable {
                                  struct iris_bo *bo, uint32_t offset,
                                  uint64_t imm);
 
+   void (*emit_mi_report_perf_count)(struct iris_batch *batch,
+                                     struct iris_bo *bo,
+                                     uint32_t offset_in_bytes,
+                                     uint32_t report_id);
+
    unsigned (*derived_program_state_size)(enum iris_program_cache_id id);
    void (*store_derived_program_state)(struct iris_context *ice,
                                        enum iris_program_cache_id cache_id,
@@ -886,6 +891,11 @@ void iris_render_cache_add_bo(struct iris_batch *batch,
                               enum isl_aux_usage aux_usage);
 void iris_cache_flush_for_depth(struct iris_batch *batch, struct iris_bo *bo);
 void iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo);
+int iris_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+                               struct pipe_driver_query_info *info);
+int iris_get_driver_query_group_info(struct pipe_screen *pscreen,
+                                     unsigned index,
+                                     struct pipe_driver_query_group_info *info);
 
 /* iris_state.c */
 void gen9_toggle_preemption(struct iris_context *ice,
diff --git a/src/gallium/drivers/iris/iris_monitor.c b/src/gallium/drivers/iris/iris_monitor.c
new file mode 100644 (file)
index 0000000..07045dd
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "iris_monitor.h"
+
+#include <xf86drm.h>
+
+#include "iris_screen.h"
+#include "iris_context.h"
+
+#include "perf/gen_perf.h"
+
+int iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
+                          struct pipe_driver_query_info *info)
+{
+   const struct iris_screen *screen = (struct iris_screen *)pscreen;
+   assert(screen->monitor_cfg);
+   if (!screen->monitor_cfg)
+      return 0;
+
+   const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
+   if (!info)
+      /* return the number of metrics */
+      return monitor_cfg->num_counters;
+   const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
+   const int group = monitor_cfg->counters[index].group;
+   const int counter_index = monitor_cfg->counters[index].counter;
+   info->group_id = group;
+   struct gen_perf_query_counter *counter =
+      &perf_cfg->queries[group].counters[counter_index];
+   info->name = counter->name;
+   info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
+
+   if (counter->type == GEN_PERF_COUNTER_TYPE_THROUGHPUT)
+      info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
+   else
+      info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
+   switch (counter->data_type) {
+   case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
+   case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
+      info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
+      info->max_value.u32 = 0;
+      break;
+   case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
+      info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+      info->max_value.u64 = 0;
+      break;
+   case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
+   case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
+      info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
+      info->max_value.u64 = -1;
+      break;
+   default:
+      assert(false);
+      break;
+   }
+
+   /* indicates that this is an OA query, not a pipeline statistics query */
+   info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+   return 1;
+}
+
+typedef void (*bo_unreference_t)(void *);
+typedef void *(*bo_map_t)(void *, void *, unsigned flags);
+typedef void (*bo_unmap_t)(void *);
+typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
+typedef void (*emit_mi_flush_t)(void *);
+typedef void (*capture_frequency_stat_register_t)(void *, void *,
+                                                  uint32_t );
+typedef void (*store_register_mem64_t)(void *ctx, void *bo,
+                                       uint32_t reg, uint32_t offset);
+typedef bool (*batch_references_t)(void *batch, void *bo);
+typedef void (*bo_wait_rendering_t)(void *bo);
+typedef int (*bo_busy_t)(void *bo);
+
+static void *
+iris_oa_bo_alloc(void *bufmgr,
+                 const char *name,
+                 uint64_t size)
+{
+   return iris_bo_alloc(bufmgr, name, size, IRIS_MEMZONE_OTHER);
+}
+
+static void
+iris_monitor_emit_mi_flush(struct iris_context *ice)
+{
+   const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                     PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+                     PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+                     PIPE_CONTROL_DATA_CACHE_FLUSH |
+                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                     PIPE_CONTROL_VF_CACHE_INVALIDATE |
+                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+                     PIPE_CONTROL_CS_STALL;
+   iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
+                                "OA metrics",
+                                flags);
+}
+
+static void
+iris_monitor_emit_mi_report_perf_count(void *c,
+                                       void *bo,
+                                       uint32_t offset_in_bytes,
+                                       uint32_t report_id)
+{
+   struct iris_context *ice = c;
+   ice->vtbl.emit_mi_report_perf_count(&ice->batches[IRIS_BATCH_RENDER],
+                                       bo,
+                                       offset_in_bytes,
+                                       report_id);
+}
+
+static void
+iris_monitor_batchbuffer_flush(void *c, const char *file, int line)
+{
+   struct iris_context *ice = c;
+   _iris_batch_flush(&ice->batches[IRIS_BATCH_RENDER], __FILE__, __LINE__);
+}
+
+static void
+iris_monitor_capture_frequency_stat_register(void *ctx,
+                                             void *bo,
+                                             uint32_t bo_offset)
+{
+   struct iris_context *ice = ctx;
+   ice->vtbl.store_register_mem32(&ice->batches[IRIS_BATCH_RENDER],
+                                  GEN9_RPSTAT0, bo, bo_offset, false);
+}
+
+static void
+iris_monitor_store_register_mem64(void *ctx, void *bo,
+                                  uint32_t reg, uint32_t offset)
+{
+   struct iris_context *ice = ctx;
+   ice->vtbl.store_register_mem64(&ice->batches[IRIS_BATCH_RENDER], reg, bo,
+                                  offset, false);
+}
+
+
+static bool
+iris_monitor_init_metrics(struct iris_screen *screen)
+{
+   struct iris_monitor_config *monitor_cfg =
+      rzalloc(screen, struct iris_monitor_config);
+   struct gen_perf_config *perf_cfg = NULL;
+   if (unlikely(!monitor_cfg))
+      goto allocation_error;
+   perf_cfg = gen_perf_new(monitor_cfg);
+   if (unlikely(!perf_cfg))
+      goto allocation_error;
+
+   monitor_cfg->perf_cfg = perf_cfg;
+
+   perf_cfg->vtbl.bo_alloc = iris_oa_bo_alloc;
+   perf_cfg->vtbl.bo_unreference = (bo_unreference_t)iris_bo_unreference;
+   perf_cfg->vtbl.bo_map = (bo_map_t)iris_bo_map;
+   perf_cfg->vtbl.bo_unmap = (bo_unmap_t)iris_bo_unmap;
+   perf_cfg->vtbl.emit_mi_flush = (emit_mi_flush_t)iris_monitor_emit_mi_flush;
+
+   perf_cfg->vtbl.emit_mi_report_perf_count =
+      (emit_mi_report_t)iris_monitor_emit_mi_report_perf_count;
+   perf_cfg->vtbl.batchbuffer_flush = iris_monitor_batchbuffer_flush;
+   perf_cfg->vtbl.capture_frequency_stat_register =
+      (capture_frequency_stat_register_t) iris_monitor_capture_frequency_stat_register;
+   perf_cfg->vtbl.store_register_mem64 =
+      (store_register_mem64_t) iris_monitor_store_register_mem64;
+   perf_cfg->vtbl.batch_references = (batch_references_t)iris_batch_references;
+   perf_cfg->vtbl.bo_wait_rendering =
+      (bo_wait_rendering_t)iris_bo_wait_rendering;
+   perf_cfg->vtbl.bo_busy = (bo_busy_t)iris_bo_busy;
+
+   gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd);
+   screen->monitor_cfg = monitor_cfg;
+
+   /* a gallium "group" is equivalent to a gen "query"
+    * a gallium "query" is equivalent to a gen "query_counter"
+    *
+    * Each gen_query supports a specific number of query_counters.  To
+    * allocate the array of iris_monitor_counter, we need an upper bound
+    * (ignoring duplicate query_counters).
+    */
+   int gen_query_counters_count = 0;
+   for (int gen_query_id = 0;
+        gen_query_id < perf_cfg->n_queries;
+        ++gen_query_id) {
+      gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
+   }
+
+   monitor_cfg->counters = rzalloc_size(monitor_cfg,
+                                        sizeof(struct iris_monitor_counter) *
+                                        gen_query_counters_count);
+   if (unlikely(!monitor_cfg->counters))
+      goto allocation_error;
+
+   int iris_monitor_id = 0;
+   for (int group = 0; group < perf_cfg->n_queries; ++group) {
+      for (int counter = 0;
+           counter < perf_cfg->queries[group].n_counters;
+           ++counter) {
+         /* Check previously identified metrics to filter out duplicates. The
+          * user is not helped by having the same metric available in several
+          * groups. (n^2 algorithm).
+          */
+         bool duplicate = false;
+         for (int existing_group = 0;
+              existing_group < group && !duplicate;
+              ++existing_group) {
+            for (int existing_counter = 0;
+                 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
+                 ++existing_counter) {
+               const char *current_name = perf_cfg->queries[group].counters[counter].name;
+               const char *existing_name =
+                  perf_cfg->queries[existing_group].counters[existing_counter].name;
+               if (strcmp(current_name, existing_name) == 0) {
+                  duplicate = true;
+               }
+            }
+         }
+         if (duplicate)
+            continue;
+         monitor_cfg->counters[iris_monitor_id].group = group;
+         monitor_cfg->counters[iris_monitor_id].counter = counter;
+         ++iris_monitor_id;
+      }
+   }
+   monitor_cfg->num_counters = iris_monitor_id;
+   return monitor_cfg->num_counters;
+
+allocation_error:
+   if (monitor_cfg)
+      free(monitor_cfg->counters);
+   free(perf_cfg);
+   free(monitor_cfg);
+   return false;
+}
+
+int iris_get_monitor_group_info(struct pipe_screen *pscreen,
+                                unsigned group_index,
+                                struct pipe_driver_query_group_info *info)
+{
+   struct iris_screen *screen = (struct iris_screen *)pscreen;
+   if (!screen->monitor_cfg) {
+      if (!iris_monitor_init_metrics(screen))
+         return 0;
+   }
+
+   const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
+   const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
+   if (!info)
+      /* return the count that can be queried */
+      return perf_cfg->n_queries;
+
+   if (group_index >= perf_cfg->n_queries)
+      /* out of range */
+      return 0;
+
+   struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
+   info->name = query->name;
+   info->max_active_queries = query->n_counters;
+   info->num_queries = query->n_counters;
+   return 1;
+}
diff --git a/src/gallium/drivers/iris/iris_monitor.h b/src/gallium/drivers/iris/iris_monitor.h
new file mode 100644 (file)
index 0000000..2a7a72e
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef IRIS_MONITOR_H
+#define IRIS_MONITOR_H
+
+#include "pipe/p_screen.h"
+
+struct iris_monitor_counter {
+   int group;
+   int counter;
+};
+
+struct iris_monitor_config {
+   struct gen_perf_config *perf_cfg;
+
+   /* gallium requires an index for each counter */
+   int num_counters;
+   struct iris_monitor_counter *counters;
+
+};
+
+int iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
+                          struct pipe_driver_query_info *info);
+int iris_get_monitor_group_info(struct pipe_screen *pscreen,
+                                unsigned index,
+                                struct pipe_driver_query_group_info *info);
+
+
+#endif
index a2b64ba9a143c632ea1be12569dd75744db21689..e92685d4ae67abc8606446892ed503fb2feb84b7 100644 (file)
@@ -53,6 +53,7 @@
 #include "iris_screen.h"
 #include "intel/compiler/brw_compiler.h"
 #include "intel/common/gen_gem.h"
+#include "iris_monitor.h"
 
 static void
 iris_flush_frontbuffer(struct pipe_screen *_screen,
@@ -683,6 +684,8 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
    pscreen->flush_frontbuffer = iris_flush_frontbuffer;
    pscreen->get_timestamp = iris_get_timestamp;
    pscreen->query_memory_info = iris_query_memory_info;
+   pscreen->get_driver_query_group_info = iris_get_monitor_group_info;
+   pscreen->get_driver_query_info = iris_get_monitor_info;
 
    return pscreen;
 }
index c83e300a9377b1475bc28187c1aff46f1588db5d..e8eeac07f093530f430797effa34fe2c31ececf0 100644 (file)
@@ -33,6 +33,7 @@
 #include "iris_bufmgr.h"
 
 struct iris_bo;
+struct iris_monitor_config;
 
 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
 #define WRITE_ONCE(x, v) *(volatile __typeof__(x) *)&(x) = (v)
@@ -75,6 +76,7 @@ struct iris_screen {
    struct isl_device isl_dev;
    struct iris_bufmgr *bufmgr;
    struct brw_compiler *compiler;
+   struct iris_monitor_config *monitor_cfg;
 
    /**
     * A buffer containing nothing useful, for hardware workarounds that
index 7932df23e3da1665f7034563075990b52ecde30a..b6c9d1885d9ba26e11235b42be299b4eb3feb6ba 100644 (file)
@@ -6450,6 +6450,18 @@ iris_lost_genx_state(struct iris_context *ice, struct iris_batch *batch)
    memset(genx->last_index_buffer, 0, sizeof(genx->last_index_buffer));
 }
 
+static void
+iris_emit_mi_report_perf_count(struct iris_batch *batch,
+                               struct iris_bo *bo,
+                               uint32_t offset_in_bytes,
+                               uint32_t report_id)
+{
+   iris_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
+      mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes);
+      mi_rpc.ReportID = report_id;
+   }
+}
+
 void
 genX(init_state)(struct iris_context *ice)
 {
@@ -6502,6 +6514,7 @@ genX(init_state)(struct iris_context *ice)
    ice->vtbl.update_surface_base_address = iris_update_surface_base_address;
    ice->vtbl.upload_compute_state = iris_upload_compute_state;
    ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
+   ice->vtbl.emit_mi_report_perf_count = iris_emit_mi_report_perf_count;
    ice->vtbl.rebind_buffer = iris_rebind_buffer;
    ice->vtbl.load_register_reg32 = iris_load_register_reg32;
    ice->vtbl.load_register_reg64 = iris_load_register_reg64;
index a953524a643430f844be61f7008d432fddcd6d72..3f611c2b5698be71ba08833f162fba42d51e2985 100644 (file)
@@ -37,6 +37,7 @@ files_libiris = files(
   'iris_formats.c',
   'iris_genx_macros.h',
   'iris_genx_protos.h',
+  'iris_monitor.c',
   'iris_pipe.h',
   'iris_pipe_control.c',
   'iris_program.c',
@@ -89,7 +90,7 @@ libiris = static_library(
   dependencies : [dep_libdrm, dep_valgrind, idep_genxml, idep_libintel_common],
   link_with : [
     iris_gen_libs, libintel_compiler, libintel_dev, libisl,
-    libblorp
+    libblorp, libintel_perf
   ],
 )