struct {
void *(*bo_alloc)(void *bufmgr, const char *name, uint64_t size);
void (*bo_unreference)(void *bo);
+ void *(*bo_map)(void *ctx, void *bo, unsigned flags);
+ void (*bo_unmap)(void *bo);
+ bool (*batch_references)(void *batch, void *bo);
+ void (*bo_wait_rendering)(void *bo);
+ int (*bo_busy)(void *bo);
+ void (*emit_mi_flush)(void *ctx);
void (*emit_mi_report_perf_count)(void *ctx,
void *bo,
uint32_t offset_in_bytes,
const char *file, int line);
void (*capture_frequency_stat_register)(void *ctx, void *bo,
uint32_t bo_offset);
+ void (*store_register_mem64)(void *ctx, void *bo, uint32_t reg, uint32_t offset);
+
} vtbl;
};
uint32_t last_timestamp;
};
+/**
+ * gen representation of a performance query object.
+ *
+ * NB: We want to keep this structure relatively lean considering that
+ * applications may expect to allocate enough objects to be able to
+ * query around all draw calls in a frame.
+ */
+struct gen_perf_query_object
+{
+ const struct gen_perf_query_info *queryinfo;
+
+ /* See query->kind to know which state below is in use... */
+ union {
+ struct {
+
+ /**
+ * BO containing OA counter snapshots at query Begin/End time.
+ */
+ void *bo;
+
+ /**
+ * Address of mapped of @bo
+ */
+ void *map;
+
+ /**
+ * The MI_REPORT_PERF_COUNT command lets us specify a unique
+ * ID that will be reflected in the resulting OA report
+ * that's written by the GPU. This is the ID we're expecting
+ * in the begin report and the the end report should be
+ * @begin_report_id + 1.
+ */
+ int begin_report_id;
+
+ /**
+ * Reference the head of the brw->perfquery.sample_buffers
+ * list at the time that the query started (so we only need
+ * to look at nodes after this point when looking for samples
+ * related to this query)
+ *
+ * (See struct brw_oa_sample_buf description for more details)
+ */
+ struct exec_node *samples_head;
+
+ /**
+ * false while in the unaccumulated_elements list, and set to
+ * true when the final, end MI_RPC snapshot has been
+ * accumulated.
+ */
+ bool results_accumulated;
+
+ /**
+ * Frequency of the GT at begin and end of the query.
+ */
+ uint64_t gt_frequency[2];
+
+ /**
+ * Accumulated OA results between begin and end of the query.
+ */
+ struct gen_perf_query_result result;
+ } oa;
+
+ struct {
+ /**
+ * BO containing starting and ending snapshots for the
+ * statistics counters.
+ */
+ void *bo;
+ } pipeline_stats;
+ };
+};
+
struct gen_perf_context {
struct gen_perf_config *perf;
+ void * ctx; /* driver context (eg, brw_context) */
+ void * bufmgr;
+ const struct gen_device_info *devinfo;
+
+ uint32_t hw_ctx;
+ int drm_fd;
+
/* The i915 perf stream we open to setup + enable the OA counters */
int oa_stream_fd;
* These may be active, or have already ended. However, the
* results have not been requested.
*/
- struct brw_perf_query_object **unaccumulated;
+ struct gen_perf_query_object **unaccumulated;
int unaccumulated_elements;
int unaccumulated_array_size;
int n_query_instances;
};
+void gen_perf_init_context(struct gen_perf_context *perf_ctx,
+ struct gen_perf_config *perf_cfg,
+ void * ctx, /* driver context (eg, brw_context) */
+ void * bufmgr, /* eg brw_bufmgr */
+ const struct gen_device_info *devinfo,
+ uint32_t hw_ctx,
+ int drm_fd);
+
static inline size_t
gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
{
struct gen_perf_config *perf);
uint64_t gen_perf_query_get_metric_id(struct gen_perf_config *perf,
const struct gen_perf_query_info *query);
-
+struct oa_sample_buf * gen_perf_get_free_sample_buf(struct gen_perf_context *perf);
+void gen_perf_reap_old_sample_buffers(struct gen_perf_context *perf_ctx);
+void gen_perf_free_sample_bufs(struct gen_perf_context *perf_ctx);
+
+void gen_perf_snapshot_statistics_registers(void *context,
+ struct gen_perf_config *perf,
+ struct gen_perf_query_object *obj,
+ uint32_t offset_in_bytes);
+
+void gen_perf_close(struct gen_perf_context *perfquery,
+ const struct gen_perf_query_info *query);
+bool gen_perf_open(struct gen_perf_context *perfquery,
+ int metrics_set_id,
+ int report_format,
+ int period_exponent,
+ int drm_fd,
+ uint32_t ctx_id);
+
+bool gen_perf_inc_n_users(struct gen_perf_context *perfquery);
+void gen_perf_dec_n_users(struct gen_perf_context *perfquery);
+
+bool gen_perf_begin_query(struct gen_perf_context *perf_ctx,
+ struct gen_perf_query_object *query);
+void gen_perf_end_query(struct gen_perf_context *perf_ctx,
+ struct gen_perf_query_object *query);
+void gen_perf_wait_query(struct gen_perf_context *perf_ctx,
+ struct gen_perf_query_object *query,
+ void *current_batch);
+bool gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
+ struct gen_perf_query_object *query,
+ void *current_batch);
#endif /* GEN_PERF_H */