intel/perf: move is_query_ready to gen_perf

[mesa.git] / src / intel / perf / gen_perf.h
diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h

index a41414a662c111f7b18291ea6d730423d9e8b146..efb884afe36f770f2359db7ebae581b698484f6b 100644 (file)
--- a/src/intel/perf/gen_perf.h
+++ b/src/intel/perf/gen_perf.h
@@ -217,6 +217,12 @@ struct gen_perf_config {
     struct {
        void *(*bo_alloc)(void *bufmgr, const char *name, uint64_t size);
        void (*bo_unreference)(void *bo);
+      void *(*bo_map)(void *ctx, void *bo, unsigned flags);
+      void (*bo_unmap)(void *bo);
+      bool (*batch_references)(void *batch, void *bo);
+      void (*bo_wait_rendering)(void *bo);
+      int (*bo_busy)(void *bo);
+      void (*emit_mi_flush)(void *ctx);
        void (*emit_mi_report_perf_count)(void *ctx,
                                          void *bo,
                                          uint32_t offset_in_bytes,
@@ -225,6 +231,8 @@ struct gen_perf_config {
                                  const char *file, int line);
        void (*capture_frequency_stat_register)(void *ctx, void *bo,
                                                uint32_t bo_offset);
+      void (*store_register_mem64)(void *ctx, void *bo, uint32_t reg, uint32_t offset);
+
     } vtbl;
  };
  
@@ -348,9 +356,88 @@ struct oa_sample_buf {
     uint32_t last_timestamp;
  };
  
+/**
+ * gen representation of a performance query object.
+ *
+ * NB: We want to keep this structure relatively lean considering that
+ * applications may expect to allocate enough objects to be able to
+ * query around all draw calls in a frame.
+ */
+struct gen_perf_query_object
+{
+   const struct gen_perf_query_info *queryinfo;
+
+   /* See query->kind to know which state below is in use... */
+   union {
+      struct {
+
+         /**
+          * BO containing OA counter snapshots at query Begin/End time.
+          */
+         void *bo;
+
+         /**
+          * Address of mapped of @bo
+          */
+         void *map;
+
+         /**
+          * The MI_REPORT_PERF_COUNT command lets us specify a unique
+          * ID that will be reflected in the resulting OA report
+          * that's written by the GPU. This is the ID we're expecting
+          * in the begin report and the the end report should be
+          * @begin_report_id + 1.
+          */
+         int begin_report_id;
+
+         /**
+          * Reference the head of the brw->perfquery.sample_buffers
+          * list at the time that the query started (so we only need
+          * to look at nodes after this point when looking for samples
+          * related to this query)
+          *
+          * (See struct brw_oa_sample_buf description for more details)
+          */
+         struct exec_node *samples_head;
+
+         /**
+          * false while in the unaccumulated_elements list, and set to
+          * true when the final, end MI_RPC snapshot has been
+          * accumulated.
+          */
+         bool results_accumulated;
+
+         /**
+          * Frequency of the GT at begin and end of the query.
+          */
+         uint64_t gt_frequency[2];
+
+         /**
+          * Accumulated OA results between begin and end of the query.
+          */
+         struct gen_perf_query_result result;
+      } oa;
+
+      struct {
+         /**
+          * BO containing starting and ending snapshots for the
+          * statistics counters.
+          */
+         void *bo;
+      } pipeline_stats;
+   };
+};
+
  struct gen_perf_context {
     struct gen_perf_config *perf;
  
+   void * ctx;  /* driver context (eg, brw_context) */
+   void * bufmgr;
+   const struct gen_device_info *devinfo;
+
+   uint32_t hw_ctx;
+   int drm_fd;
+
     /* The i915 perf stream we open to setup + enable the OA counters */
     int oa_stream_fd;
  
@@ -394,7 +481,7 @@ struct gen_perf_context {
      * These may be active, or have already ended.  However, the
      * results have not been requested.
      */
-   struct brw_perf_query_object **unaccumulated;
+   struct gen_perf_query_object **unaccumulated;
     int unaccumulated_elements;
     int unaccumulated_array_size;
  
@@ -406,6 +493,14 @@ struct gen_perf_context {
     int n_query_instances;
  };
  
+void gen_perf_init_context(struct gen_perf_context *perf_ctx,
+                           struct gen_perf_config *perf_cfg,
+                           void * ctx,  /* driver context (eg, brw_context) */
+                           void * bufmgr,  /* eg brw_bufmgr */
+                           const struct gen_device_info *devinfo,
+                           uint32_t hw_ctx,
+                           int drm_fd);
+
  static inline size_t
  gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
  {
@@ -506,6 +601,34 @@ uint64_t gen_perf_query_get_metric_id(struct gen_perf_config *perf,
                                        const struct gen_perf_query_info *query);
  struct oa_sample_buf * gen_perf_get_free_sample_buf(struct gen_perf_context *perf);
  void gen_perf_reap_old_sample_buffers(struct gen_perf_context *perf_ctx);
-
+void gen_perf_free_sample_bufs(struct gen_perf_context *perf_ctx);
+
+void gen_perf_snapshot_statistics_registers(void *context,
+                                            struct gen_perf_config *perf,
+                                            struct gen_perf_query_object *obj,
+                                            uint32_t offset_in_bytes);
+
+void gen_perf_close(struct gen_perf_context *perfquery,
+                    const struct gen_perf_query_info *query);
+bool gen_perf_open(struct gen_perf_context *perfquery,
+                   int metrics_set_id,
+                   int report_format,
+                   int period_exponent,
+                   int drm_fd,
+                   uint32_t ctx_id);
+
+bool gen_perf_inc_n_users(struct gen_perf_context *perfquery);
+void gen_perf_dec_n_users(struct gen_perf_context *perfquery);
+
+bool gen_perf_begin_query(struct gen_perf_context *perf_ctx,
+                          struct gen_perf_query_object *query);
+void gen_perf_end_query(struct gen_perf_context *perf_ctx,
+                        struct gen_perf_query_object *query);
+void gen_perf_wait_query(struct gen_perf_context *perf_ctx,
+                         struct gen_perf_query_object *query,
+                         void *current_batch);
+bool gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
+                             struct gen_perf_query_object *query,
+                             void *current_batch);
  
  #endif /* GEN_PERF_H */