intel/perf: fix raw query kernel metric selection
[mesa.git] / src / intel / perf / gen_perf.h
index c79db7cfa44538f8d3d6a070661792da21b98980..790719ccbadad3960acea2d5f1d7e1af2d3f39ec 100644 (file)
@@ -25,6 +25,7 @@
 #define GEN_PERF_H
 
 #include <stdio.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
 
 #include "compiler/glsl/list.h"
 #include "util/ralloc.h"
 
+#include "drm-uapi/i915_drm.h"
+
 struct gen_device_info;
 
 struct gen_perf_config;
 struct gen_perf_query_info;
 
-#define GEN7_RPSTAT1                       0xA01C
-#define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
-#define  GEN7_RPSTAT1_CURR_GT_FREQ_MASK    INTEL_MASK(13, 7)
-#define  GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT   0
-#define  GEN7_RPSTAT1_PREV_GT_FREQ_MASK    INTEL_MASK(6, 0)
-
-#define GEN9_RPSTAT0                       0xA01C
-#define  GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT   23
-#define  GEN9_RPSTAT0_CURR_GT_FREQ_MASK    INTEL_MASK(31, 23)
-#define  GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT   0
-#define  GEN9_RPSTAT0_PREV_GT_FREQ_MASK    INTEL_MASK(8, 0)
-
 enum gen_perf_counter_type {
    GEN_PERF_COUNTER_TYPE_EVENT,
    GEN_PERF_COUNTER_TYPE_DURATION_NORM,
@@ -72,6 +63,39 @@ enum gen_perf_counter_data_type {
    GEN_PERF_COUNTER_DATA_TYPE_DOUBLE,
 };
 
+enum gen_perf_counter_units {
+   /* size */
+   GEN_PERF_COUNTER_UNITS_BYTES,
+
+   /* frequency */
+   GEN_PERF_COUNTER_UNITS_HZ,
+
+   /* time */
+   GEN_PERF_COUNTER_UNITS_NS,
+   GEN_PERF_COUNTER_UNITS_US,
+
+   /**/
+   GEN_PERF_COUNTER_UNITS_PIXELS,
+   GEN_PERF_COUNTER_UNITS_TEXELS,
+   GEN_PERF_COUNTER_UNITS_THREADS,
+   GEN_PERF_COUNTER_UNITS_PERCENT,
+
+   /* events */
+   GEN_PERF_COUNTER_UNITS_MESSAGES,
+   GEN_PERF_COUNTER_UNITS_NUMBER,
+   GEN_PERF_COUNTER_UNITS_CYCLES,
+   GEN_PERF_COUNTER_UNITS_EVENTS,
+   GEN_PERF_COUNTER_UNITS_UTILIZATION,
+
+   /**/
+   GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES,
+   GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES,
+   GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES,
+   GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE,
+
+   GEN_PERF_COUNTER_UNITS_MAX
+};
+
 struct gen_pipeline_stat {
    uint32_t reg;
    uint32_t numerator;
@@ -87,19 +111,6 @@ struct gen_pipeline_stat {
  */
 #define MAX_OA_REPORT_COUNTERS 62
 
-#define IA_VERTICES_COUNT          0x2310
-#define IA_PRIMITIVES_COUNT        0x2318
-#define VS_INVOCATION_COUNT        0x2320
-#define HS_INVOCATION_COUNT        0x2300
-#define DS_INVOCATION_COUNT        0x2308
-#define GS_INVOCATION_COUNT        0x2328
-#define GS_PRIMITIVES_COUNT        0x2330
-#define CL_INVOCATION_COUNT        0x2338
-#define CL_PRIMITIVES_COUNT        0x2340
-#define PS_INVOCATION_COUNT        0x2348
-#define CS_INVOCATION_COUNT        0x2290
-#define PS_DEPTH_COUNT             0x2350
-
 /*
  * When currently allocate only one page for pipeline statistics queries. Here
  * we derived the maximum number of counters for that amount.
@@ -138,13 +149,26 @@ struct gen_perf_query_result {
     * query.
     */
    uint64_t unslice_frequency[2];
+
+   /**
+    * Timestamp of the query.
+    */
+   uint64_t begin_timestamp;
+
+   /**
+    * Whether the query was interrupted by another workload (aka preemption).
+    */
+   bool query_disjoint;
 };
 
 struct gen_perf_query_counter {
    const char *name;
    const char *desc;
+   const char *symbol_name;
+   const char *category;
    enum gen_perf_counter_type type;
    enum gen_perf_counter_data_type data_type;
+   enum gen_perf_counter_units units;
    uint64_t raw_max;
    size_t offset;
 
@@ -164,6 +188,18 @@ struct gen_perf_query_register_prog {
    uint32_t val;
 };
 
+/* Register programming for a given query */
+struct gen_perf_registers {
+   const struct gen_perf_query_register_prog *flex_regs;
+   uint32_t n_flex_regs;
+
+   const struct gen_perf_query_register_prog *mux_regs;
+   uint32_t n_mux_regs;
+
+   const struct gen_perf_query_register_prog *b_counter_regs;
+   uint32_t n_b_counter_regs;
+};
+
 struct gen_perf_query_info {
    enum gen_perf_query_type {
       GEN_PERF_QUERY_TYPE_OA,
@@ -171,6 +207,7 @@ struct gen_perf_query_info {
       GEN_PERF_QUERY_TYPE_PIPELINE,
    } kind;
    const char *name;
+   const char *symbol_name;
    const char *guid;
    struct gen_perf_query_counter *counters;
    int n_counters;
@@ -188,21 +225,40 @@ struct gen_perf_query_info {
    int b_offset;
    int c_offset;
 
-   /* Register programming for a given query */
-   struct gen_perf_query_register_prog *flex_regs;
-   uint32_t n_flex_regs;
+   struct gen_perf_registers config;
+};
 
-   struct gen_perf_query_register_prog *mux_regs;
-   uint32_t n_mux_regs;
+struct gen_perf_query_counter_info {
+   struct gen_perf_query_counter *counter;
 
-   struct gen_perf_query_register_prog *b_counter_regs;
-   uint32_t n_b_counter_regs;
+   uint64_t query_mask;
+
+   /**
+    * Each counter can be a part of many groups, each time at different index.
+    * This struct stores one of those locations.
+    */
+   struct {
+      int group_idx; /* query/group number */
+      int counter_idx; /* index inside of query/group */
+   } location;
 };
 
 struct gen_perf_config {
+   /* Whether i915 has DRM_I915_QUERY_PERF_CONFIG support. */
+   bool i915_query_supported;
+
+   /* Version of the i915-perf subsystem, refer to i915_drm.h. */
+   int i915_perf_version;
+
+   /* Powergating configuration for the running the query. */
+   struct drm_i915_gem_context_param_sseu sseu;
+
    struct gen_perf_query_info *queries;
    int n_queries;
 
+   struct gen_perf_query_counter_info *counter_infos;
+   int n_counters;
+
    /* Variables referenced in the XML meta data for OA performance
     * counters, e.g in the normalization equations.
     *
@@ -227,6 +283,17 @@ struct gen_perf_config {
     */
    struct hash_table *oa_metrics_table;
 
+   /* When MDAPI hasn't configured the metric we need to use by the time the
+    * query begins, this OA metric is used as a fallback.
+    */
+   uint64_t fallback_raw_oa_metric;
+
+   /* Whether we have support for this platform. If true && n_queries == 0,
+    * this means we will not be able to use i915-perf because of it is in
+    * paranoid mode.
+    */
+   bool platform_supported;
+
    /* Location of the device's sysfs entry. */
    char sysfs_dev_dir[256];
 
@@ -238,26 +305,28 @@ struct gen_perf_config {
       bool (*batch_references)(void *batch, void *bo);
       void (*bo_wait_rendering)(void *bo);
       int (*bo_busy)(void *bo);
-      void (*emit_mi_flush)(void *ctx);
+      void (*emit_stall_at_pixel_scoreboard)(void *ctx);
       void (*emit_mi_report_perf_count)(void *ctx,
                                         void *bo,
                                         uint32_t offset_in_bytes,
                                         uint32_t report_id);
       void (*batchbuffer_flush)(void *ctx,
                                 const char *file, int line);
-      void (*capture_frequency_stat_register)(void *ctx, void *bo,
-                                              uint32_t bo_offset);
-      void (*store_register_mem64)(void *ctx, void *bo, uint32_t reg, uint32_t offset);
+      void (*store_register_mem)(void *ctx, void *bo, uint32_t reg, uint32_t reg_size, uint32_t offset);
 
    } vtbl;
 };
 
-struct gen_perf_query_object;
-const struct gen_perf_query_info* gen_perf_query_info(const struct gen_perf_query_object *);
+struct gen_perf_counter_pass {
+   struct gen_perf_query_info *query;
+   struct gen_perf_query_counter *counter;
+   uint32_t pass;
+};
 
 void gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
                            const struct gen_device_info *devinfo,
-                           int drm_fd);
+                           int drm_fd,
+                           bool include_pipeline_statistics);
 
 /** Query i915 for a metric id using guid.
  */
@@ -265,6 +334,20 @@ bool gen_perf_load_metric_id(struct gen_perf_config *perf_cfg,
                              const char *guid,
                              uint64_t *metric_id);
 
+/** Load a configuation's content from i915 using a guid.
+ */
+struct gen_perf_registers *gen_perf_load_configuration(struct gen_perf_config *perf_cfg,
+                                                      int fd, const char *guid);
+
+/** Store a configuration into i915 using guid and return a new metric id.
+ *
+ * If guid is NULL, then a generated one will be provided by hashing the
+ * content of the configuration.
+ */
+uint64_t gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd,
+                                      const struct gen_perf_registers *config,
+                                      const char *guid);
+
 /** Read the slice/unslice frequency from 2 OA reports and store then into
  *  result.
  */
@@ -280,22 +363,6 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
                                       const uint32_t *end);
 void gen_perf_query_result_clear(struct gen_perf_query_result *result);
 
-struct gen_perf_context;
-struct gen_perf_context *gen_perf_new_context(void *parent);
-
-void gen_perf_init_context(struct gen_perf_context *perf_ctx,
-                           struct gen_perf_config *perf_cfg,
-                           void * ctx,  /* driver context (eg, brw_context) */
-                           void * bufmgr,  /* eg brw_bufmgr */
-                           const struct gen_device_info *devinfo,
-                           uint32_t hw_ctx,
-                           int drm_fd);
-
-struct gen_perf_config *gen_perf_config(struct gen_perf_context *ctx);
-
-int gen_perf_active_queries(struct gen_perf_context *perf_ctx,
-                            const struct gen_perf_query_info *query);
-
 static inline size_t
 gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
 {
@@ -322,31 +389,13 @@ gen_perf_new(void *ctx)
    return perf;
 }
 
-struct gen_perf_query_object *
-gen_perf_new_query(struct gen_perf_context *, unsigned query_index);
-
-
-bool gen_perf_begin_query(struct gen_perf_context *perf_ctx,
-                          struct gen_perf_query_object *query);
-void gen_perf_end_query(struct gen_perf_context *perf_ctx,
-                        struct gen_perf_query_object *query);
-void gen_perf_wait_query(struct gen_perf_context *perf_ctx,
-                         struct gen_perf_query_object *query,
-                         void *current_batch);
-bool gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
-                             struct gen_perf_query_object *query,
-                             void *current_batch);
-void gen_perf_delete_query(struct gen_perf_context *perf_ctx,
-                           struct gen_perf_query_object *query);
-void gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
-                             struct gen_perf_query_object *query,
-                             int data_size,
-                             unsigned *data,
-                             unsigned *bytes_written);
-
-void gen_perf_dump_query_count(struct gen_perf_context *perf_ctx);
-void gen_perf_dump_query(struct gen_perf_context *perf_ctx,
-                         struct gen_perf_query_object *obj,
-                         void *current_batch);
+uint32_t gen_perf_get_n_passes(struct gen_perf_config *perf,
+                               const uint32_t *counter_indices,
+                               uint32_t counter_indices_count,
+                               struct gen_perf_query_info **pass_queries);
+void gen_perf_get_counters_passes(struct gen_perf_config *perf,
+                                  const uint32_t *counter_indices,
+                                  uint32_t counter_indices_count,
+                                  struct gen_perf_counter_pass *counter_pass);
 
 #endif /* GEN_PERF_H */