From 52f7a0bff7d4065de1ce4b91a767555dee811751 Mon Sep 17 00:00:00 2001
From: Mark Janes <mark.a.janes@intel.com>
Date: Wed, 26 Jun 2019 11:01:48 -0700
Subject: [PATCH] intel/perf: use temporary pointers to simplify access to perf
 state

Most accesses to perf state were made through repeated dereferences of
brw_context members.  Prefering temporary variables of perf_ctx and
perf_cfg has the following advantages:

 - more concise implementation
 - easier refactor when moving subsequent methods to perf

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 .../drivers/dri/i965/brw_performance_query.c  | 170 ++++++++++--------
 1 file changed, 92 insertions(+), 78 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 11c2111e2ba..63752e5eaca 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -161,8 +161,9 @@ brw_get_perf_query_info(struct gl_context *ctx,
                         GLuint *n_active)
 {
    struct brw_context *brw = brw_context(ctx);
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
    const struct gen_perf_query_info *query =
-      &brw->perf_ctx.perf->queries[query_index];
+      &perf_ctx->perf->queries[query_index];
 
    *name = query->name;
    *data_size = query->data_size;
@@ -171,11 +172,11 @@ brw_get_perf_query_info(struct gl_context *ctx,
    switch (query->kind) {
    case GEN_PERF_QUERY_TYPE_OA:
    case GEN_PERF_QUERY_TYPE_RAW:
-      *n_active = brw->perf_ctx.n_active_oa_queries;
+      *n_active = perf_ctx->n_active_oa_queries;
       break;
 
    case GEN_PERF_QUERY_TYPE_PIPELINE:
-      *n_active = brw->perf_ctx.n_active_pipeline_stats_queries;
+      *n_active = perf_ctx->n_active_pipeline_stats_queries;
       break;
 
    default:
@@ -254,17 +255,18 @@ static void
 add_to_unaccumulated_query_list(struct brw_context *brw,
                                 struct gen_perf_query_object *obj)
 {
-   if (brw->perf_ctx.unaccumulated_elements >=
-       brw->perf_ctx.unaccumulated_array_size)
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   if (perf_ctx->unaccumulated_elements >=
+       perf_ctx->unaccumulated_array_size)
    {
-      brw->perf_ctx.unaccumulated_array_size *= 1.5;
-      brw->perf_ctx.unaccumulated =
-         reralloc(brw, brw->perf_ctx.unaccumulated,
+      perf_ctx->unaccumulated_array_size *= 1.5;
+      perf_ctx->unaccumulated =
+         reralloc(brw, perf_ctx->unaccumulated,
                   struct gen_perf_query_object *,
-                  brw->perf_ctx.unaccumulated_array_size);
+                  perf_ctx->unaccumulated_array_size);
    }
 
-   brw->perf_ctx.unaccumulated[brw->perf_ctx.unaccumulated_elements++] = obj;
+   perf_ctx->unaccumulated[perf_ctx->unaccumulated_elements++] = obj;
 }
 
 /**
@@ -277,15 +279,16 @@ static void
 drop_from_unaccumulated_query_list(struct brw_context *brw,
                                    struct gen_perf_query_object *obj)
 {
-   for (int i = 0; i < brw->perf_ctx.unaccumulated_elements; i++) {
-      if (brw->perf_ctx.unaccumulated[i] == obj) {
-         int last_elt = --brw->perf_ctx.unaccumulated_elements;
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   for (int i = 0; i < perf_ctx->unaccumulated_elements; i++) {
+      if (perf_ctx->unaccumulated[i] == obj) {
+         int last_elt = --perf_ctx->unaccumulated_elements;
 
          if (i == last_elt)
-            brw->perf_ctx.unaccumulated[i] = NULL;
+            perf_ctx->unaccumulated[i] = NULL;
          else {
-            brw->perf_ctx.unaccumulated[i] =
-               brw->perf_ctx.unaccumulated[last_elt];
+            perf_ctx->unaccumulated[i] =
+               perf_ctx->unaccumulated[last_elt];
          }
 
          break;
@@ -311,13 +314,14 @@ drop_from_unaccumulated_query_list(struct brw_context *brw,
 static bool
 inc_n_oa_users(struct brw_context *brw)
 {
-   if (brw->perf_ctx.n_oa_users == 0 &&
-       drmIoctl(brw->perf_ctx.oa_stream_fd,
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   if (perf_ctx->n_oa_users == 0 &&
+       drmIoctl(perf_ctx->oa_stream_fd,
                 I915_PERF_IOCTL_ENABLE, 0) < 0)
    {
       return false;
    }
-   ++brw->perf_ctx.n_oa_users;
+   ++perf_ctx->n_oa_users;
 
    return true;
 }
@@ -330,9 +334,10 @@ dec_n_oa_users(struct brw_context *brw)
     * MI_RPC commands at this point since they could stall the CS
     * indefinitely once OACONTROL is disabled.
     */
-   --brw->perf_ctx.n_oa_users;
-   if (brw->perf_ctx.n_oa_users == 0 &&
-       drmIoctl(brw->perf_ctx.oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0)
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   --perf_ctx->n_oa_users;
+   if (perf_ctx->n_oa_users == 0 &&
+       drmIoctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0)
    {
       DBG("WARNING: Error disabling i915 perf stream: %m\n");
    }
@@ -346,11 +351,12 @@ dec_n_oa_users(struct brw_context *brw)
 static void
 discard_all_queries(struct brw_context *brw)
 {
-   while (brw->perf_ctx.unaccumulated_elements) {
-      struct gen_perf_query_object *obj = brw->perf_ctx.unaccumulated[0];
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   while (perf_ctx->unaccumulated_elements) {
+      struct gen_perf_query_object *obj = perf_ctx->unaccumulated[0];
 
       obj->oa.results_accumulated = true;
-      drop_from_unaccumulated_query_list(brw, brw->perf_ctx.unaccumulated[0]);
+      drop_from_unaccumulated_query_list(brw, perf_ctx->unaccumulated[0]);
 
       dec_n_oa_users(brw);
    }
@@ -367,23 +373,24 @@ read_oa_samples_until(struct brw_context *brw,
                       uint32_t start_timestamp,
                       uint32_t end_timestamp)
 {
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
    struct exec_node *tail_node =
-      exec_list_get_tail(&brw->perf_ctx.sample_buffers);
+      exec_list_get_tail(&perf_ctx->sample_buffers);
    struct oa_sample_buf *tail_buf =
       exec_node_data(struct oa_sample_buf, tail_node, link);
    uint32_t last_timestamp = tail_buf->last_timestamp;
 
    while (1) {
-      struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(&brw->perf_ctx);
+      struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(perf_ctx);
       uint32_t offset;
       int len;
 
-      while ((len = read(brw->perf_ctx.oa_stream_fd, buf->buf,
+      while ((len = read(perf_ctx->oa_stream_fd, buf->buf,
                          sizeof(buf->buf))) < 0 && errno == EINTR)
          ;
 
       if (len <= 0) {
-         exec_list_push_tail(&brw->perf_ctx.free_sample_buffers, &buf->link);
+         exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link);
 
          if (len < 0) {
             if (errno == EAGAIN)
@@ -401,7 +408,7 @@ read_oa_samples_until(struct brw_context *brw,
       }
 
       buf->len = len;
-      exec_list_push_tail(&brw->perf_ctx.sample_buffers, &buf->link);
+      exec_list_push_tail(&perf_ctx->sample_buffers, &buf->link);
 
       /* Go through the reports and update the last timestamp. */
       offset = 0;
@@ -521,7 +528,7 @@ accumulate_oa_reports(struct brw_context *brw,
    /* See if we have any periodic reports to accumulate too... */
 
    /* N.B. The oa.samples_head was set when the query began and
-    * pointed to the tail of the brw->perf_ctx.sample_buffers list at
+    * pointed to the tail of the perf_ctx->sample_buffers list at
     * the time the query started. Since the buffer existed before the
     * first MI_REPORT_PERF_COUNT command was emitted we therefore know
     * that no data in this particular node's buffer can possibly be
@@ -679,10 +686,11 @@ open_i915_perf_oa_stream(struct brw_context *brw,
       return false;
    }
 
-   brw->perf_ctx.oa_stream_fd = fd;
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   perf_ctx->oa_stream_fd = fd;
 
-   brw->perf_ctx.current_oa_metrics_set_id = metrics_set_id;
-   brw->perf_ctx.current_oa_format = report_format;
+   perf_ctx->current_oa_metrics_set_id = metrics_set_id;
+   perf_ctx->current_oa_format = report_format;
 
    return true;
 }
@@ -691,9 +699,10 @@ static void
 close_perf(struct brw_context *brw,
            const struct gen_perf_query_info *query)
 {
-   if (brw->perf_ctx.oa_stream_fd != -1) {
-      close(brw->perf_ctx.oa_stream_fd);
-      brw->perf_ctx.oa_stream_fd = -1;
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   if (perf_ctx->oa_stream_fd != -1) {
+      close(perf_ctx->oa_stream_fd);
+      perf_ctx->oa_stream_fd = -1;
    }
    if (query->kind == GEN_PERF_QUERY_TYPE_RAW) {
       struct gen_perf_query_info *raw_query =
@@ -728,7 +737,8 @@ brw_begin_perf_query(struct gl_context *ctx,
    struct brw_perf_query_object *brw_query = brw_perf_query(o);
    struct gen_perf_query_object *obj = brw_query->query;
    const struct gen_perf_query_info *query = obj->queryinfo;
-   struct gen_perf_config *perf_cfg = brw->perf_ctx.perf;
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   struct gen_perf_config *perf_cfg = perf_ctx->perf;
 
    /* We can assume the frontend hides mistaken attempts to Begin a
     * query object multiple times before its End. Similarly if an
@@ -797,21 +807,22 @@ brw_begin_perf_query(struct gl_context *ctx,
        * require a different counter set or format unless we get an opportunity
        * to close the stream and open a new one...
        */
-      uint64_t metric_id = gen_perf_query_get_metric_id(brw->perf_ctx.perf, query);
+      uint64_t metric_id = gen_perf_query_get_metric_id(perf_ctx->perf, query);
 
-      if (brw->perf_ctx.oa_stream_fd != -1 &&
-          brw->perf_ctx.current_oa_metrics_set_id != metric_id) {
+      if (perf_ctx->oa_stream_fd != -1 &&
+          perf_ctx->current_oa_metrics_set_id != metric_id) {
 
-         if (brw->perf_ctx.n_oa_users != 0) {
+         if (perf_ctx->n_oa_users != 0) {
             DBG("WARNING: Begin(%d) failed already using perf config=%i/%"PRIu64"\n",
-                o->Id, brw->perf_ctx.current_oa_metrics_set_id, metric_id);
+                o->Id, perf_ctx->current_oa_metrics_set_id, metric_id);
             return false;
          } else
             close_perf(brw, query);
       }
 
       /* If the OA counters aren't already on, enable them. */
-      if (brw->perf_ctx.oa_stream_fd == -1) {
+
+      if (perf_ctx->oa_stream_fd == -1) {
          __DRIscreen *screen = brw->screen->driScrnPriv;
          const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
@@ -839,12 +850,12 @@ brw_begin_perf_query(struct gl_context *ctx,
             a_counter_in_bits = 40;
 
          uint64_t overflow_period = pow(2, a_counter_in_bits) /
-            (brw->perf_ctx.perf->sys_vars.n_eus *
+            (perf_cfg->sys_vars.n_eus *
              /* drop 1GHz freq to have units in nanoseconds */
              2);
 
          DBG("A counter overflow period: %"PRIu64"ns, %"PRIu64"ms (n_eus=%"PRIu64")\n",
-             overflow_period, overflow_period / 1000000ul, brw->perf_ctx.perf->sys_vars.n_eus);
+             overflow_period, overflow_period / 1000000ul, perf_cfg->sys_vars.n_eus);
 
          int period_exponent = 0;
          uint64_t prev_sample_period, next_sample_period;
@@ -876,8 +887,8 @@ brw_begin_perf_query(struct gl_context *ctx,
                                        brw->hw_ctx))
             return false;
       } else {
-         assert(brw->perf_ctx.current_oa_metrics_set_id == metric_id &&
-                brw->perf_ctx.current_oa_format == query->oa_format);
+         assert(perf_ctx->current_oa_metrics_set_id == metric_id &&
+                perf_ctx->current_oa_format == query->oa_format);
       }
 
       if (!inc_n_oa_users(brw)) {
@@ -886,7 +897,7 @@ brw_begin_perf_query(struct gl_context *ctx,
       }
 
       if (obj->oa.bo) {
-         brw->perf_ctx.perf->vtbl.bo_unreference(obj->oa.bo);
+         perf_cfg->vtbl.bo_unreference(obj->oa.bo);
          obj->oa.bo = NULL;
       }
 
@@ -901,8 +912,8 @@ brw_begin_perf_query(struct gl_context *ctx,
       brw_bo_unmap(obj->oa.bo);
 #endif
 
-      obj->oa.begin_report_id = brw->perf_ctx.next_query_start_report_id;
-      brw->perf_ctx.next_query_start_report_id += 2;
+      obj->oa.begin_report_id = perf_ctx->next_query_start_report_id;
+      perf_ctx->next_query_start_report_id += 2;
 
       /* We flush the batchbuffer here to minimize the chances that MI_RPC
        * delimiting commands end up in different batchbuffers. If that's the
@@ -913,20 +924,20 @@ brw_begin_perf_query(struct gl_context *ctx,
       perf_cfg->vtbl.batchbuffer_flush(brw, __FILE__, __LINE__);
 
       /* Take a starting OA counter snapshot. */
-      brw->perf_ctx.perf->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
-                                                         obj->oa.begin_report_id);
+      perf_cfg->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
+                                            obj->oa.begin_report_id);
       perf_cfg->vtbl.capture_frequency_stat_register(brw, obj->oa.bo,
                                                      MI_FREQ_START_OFFSET_BYTES);
 
-      ++brw->perf_ctx.n_active_oa_queries;
+      ++perf_ctx->n_active_oa_queries;
 
       /* No already-buffered samples can possibly be associated with this query
        * so create a marker within the list of sample buffers enabling us to
        * easily ignore earlier samples when processing this query after
        * completion.
        */
-      assert(!exec_list_is_empty(&brw->perf_ctx.sample_buffers));
-      obj->oa.samples_head = exec_list_get_tail(&brw->perf_ctx.sample_buffers);
+      assert(!exec_list_is_empty(&perf_ctx->sample_buffers));
+      obj->oa.samples_head = exec_list_get_tail(&perf_ctx->sample_buffers);
 
       struct oa_sample_buf *buf =
          exec_node_data(struct oa_sample_buf, obj->oa.samples_head, link);
@@ -958,7 +969,7 @@ brw_begin_perf_query(struct gl_context *ctx,
       /* Take starting snapshots. */
       gen_perf_snapshot_statistics_registers(brw, perf_cfg, obj, 0);
 
-      ++brw->perf_ctx.n_active_pipeline_stats_queries;
+      ++perf_ctx->n_active_pipeline_stats_queries;
       break;
 
    default:
@@ -983,6 +994,7 @@ brw_end_perf_query(struct gl_context *ctx,
    struct brw_perf_query_object *brw_query = brw_perf_query(o);
    struct gen_perf_query_object *obj = brw_query->query;
    struct gen_perf_config *perf_cfg = brw->perf_ctx.perf;
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
 
    DBG("End(%d)\n", o->Id);
 
@@ -1012,7 +1024,7 @@ brw_end_perf_query(struct gl_context *ctx,
                                              obj->oa.begin_report_id + 1);
       }
 
-      --brw->perf_ctx.n_active_oa_queries;
+      --perf_ctx->n_active_oa_queries;
 
       /* NB: even though the query has now ended, it can't be accumulated
        * until the end MI_REPORT_PERF_COUNT snapshot has been written
@@ -1023,7 +1035,7 @@ brw_end_perf_query(struct gl_context *ctx,
    case GEN_PERF_QUERY_TYPE_PIPELINE:
       gen_perf_snapshot_statistics_registers(brw, perf_cfg, obj,
                                              STATS_BO_END_OFFSET_BYTES);
-      --brw->perf_ctx.n_active_pipeline_stats_queries;
+      --perf_ctx->n_active_pipeline_stats_queries;
       break;
 
    default:
@@ -1296,8 +1308,9 @@ static struct gl_perf_query_object *
 brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
 {
    struct brw_context *brw = brw_context(ctx);
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
    const struct gen_perf_query_info *queryinfo =
-      &brw->perf_ctx.perf->queries[query_index];
+      &perf_ctx->perf->queries[query_index];
    struct gen_perf_query_object *obj =
       calloc(1, sizeof(struct gen_perf_query_object));
 
@@ -1306,7 +1319,7 @@ brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
 
    obj->queryinfo = queryinfo;
 
-   brw->perf_ctx.n_query_instances++;
+   perf_ctx->n_query_instances++;
 
    struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object));
    if (unlikely(!brw_query))
@@ -1326,6 +1339,7 @@ brw_delete_perf_query(struct gl_context *ctx,
    struct gen_perf_config *perf_cfg = brw->perf_ctx.perf;
    struct brw_perf_query_object *brw_query = brw_perf_query(o);
    struct gen_perf_query_object *obj = brw_query->query;
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
 
    /* We can assume that the frontend waits for a query to complete
     * before ever calling into here, so we don't have to worry about
@@ -1368,8 +1382,8 @@ brw_delete_perf_query(struct gl_context *ctx,
     * longer in use, it's a good time to free our cache of sample
     * buffers and close any current i915-perf stream.
     */
-   if (--brw->perf_ctx.n_query_instances == 0) {
-      gen_perf_free_sample_bufs(&brw->perf_ctx);
+   if (--perf_ctx->n_query_instances == 0) {
+      gen_perf_free_sample_bufs(perf_ctx);
       close_perf(brw, obj->queryinfo);
    }
 
@@ -1547,12 +1561,13 @@ brw_init_perf_query_info(struct gl_context *ctx)
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    __DRIscreen *screen = brw->screen->driScrnPriv;
 
-   struct gen_perf_config *perf_cfg = brw->perf_ctx.perf;
-   if (perf_cfg)
-      return perf_cfg->n_queries;
+   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   if (perf_ctx->perf)
+      return perf_ctx->perf->n_queries;
+
+   perf_ctx->perf = gen_perf_new(brw);
+   struct gen_perf_config *perf_cfg = perf_ctx->perf;
 
-   perf_cfg = gen_perf_new(brw);
-   brw->perf_ctx.perf = perf_cfg;
    perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc;
    perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference;
    perf_cfg->vtbl.emit_mi_report_perf_count =
@@ -1569,16 +1584,15 @@ brw_init_perf_query_info(struct gl_context *ctx)
 
    if ((oa_metrics_kernel_support(screen->fd, devinfo)) &&
        (gen_perf_load_oa_metrics(perf_cfg, screen->fd, devinfo)))
-      gen_perf_query_register_mdapi_oa_query(&brw->screen->devinfo,
-                                             brw->perf_ctx.perf);
+      gen_perf_query_register_mdapi_oa_query(devinfo, perf_cfg);
 
-   brw->perf_ctx.unaccumulated =
+   perf_ctx->unaccumulated =
       ralloc_array(brw, struct gen_perf_query_object *, 2);
-   brw->perf_ctx.unaccumulated_elements = 0;
-   brw->perf_ctx.unaccumulated_array_size = 2;
+   perf_ctx->unaccumulated_elements = 0;
+   perf_ctx->unaccumulated_array_size = 2;
 
-   exec_list_make_empty(&brw->perf_ctx.sample_buffers);
-   exec_list_make_empty(&brw->perf_ctx.free_sample_buffers);
+   exec_list_make_empty(&perf_ctx->sample_buffers);
+   exec_list_make_empty(&perf_ctx->free_sample_buffers);
 
    /* It's convenient to guarantee that this linked list of sample
     * buffers is never empty so we add an empty head so when we
@@ -1586,11 +1600,11 @@ brw_init_perf_query_info(struct gl_context *ctx)
     * in this list.
     */
    struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(&brw->perf_ctx);
-   exec_list_push_head(&brw->perf_ctx.sample_buffers, &buf->link);
+   exec_list_push_head(&perf_ctx->sample_buffers, &buf->link);
 
-   brw->perf_ctx.oa_stream_fd = -1;
+   perf_ctx->oa_stream_fd = -1;
 
-   brw->perf_ctx.next_query_start_report_id = 1000;
+   perf_ctx->next_query_start_report_id = 1000;
 
    return perf_cfg->n_queries;
 }
-- 
2.30.2