From 52f7a0bff7d4065de1ce4b91a767555dee811751 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Wed, 26 Jun 2019 11:01:48 -0700 Subject: [PATCH] intel/perf: use temporary pointers to simplify access to perf state Most accesses to perf state were made through repeated dereferences of brw_context members. Prefering temporary variables of perf_ctx and perf_cfg has the following advantages: - more concise implementation - easier refactor when moving subsequent methods to perf Reviewed-by: Kenneth Graunke --- .../drivers/dri/i965/brw_performance_query.c | 170 ++++++++++-------- 1 file changed, 92 insertions(+), 78 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 11c2111e2ba..63752e5eaca 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -161,8 +161,9 @@ brw_get_perf_query_info(struct gl_context *ctx, GLuint *n_active) { struct brw_context *brw = brw_context(ctx); + struct gen_perf_context *perf_ctx = &brw->perf_ctx; const struct gen_perf_query_info *query = - &brw->perf_ctx.perf->queries[query_index]; + &perf_ctx->perf->queries[query_index]; *name = query->name; *data_size = query->data_size; @@ -171,11 +172,11 @@ brw_get_perf_query_info(struct gl_context *ctx, switch (query->kind) { case GEN_PERF_QUERY_TYPE_OA: case GEN_PERF_QUERY_TYPE_RAW: - *n_active = brw->perf_ctx.n_active_oa_queries; + *n_active = perf_ctx->n_active_oa_queries; break; case GEN_PERF_QUERY_TYPE_PIPELINE: - *n_active = brw->perf_ctx.n_active_pipeline_stats_queries; + *n_active = perf_ctx->n_active_pipeline_stats_queries; break; default: @@ -254,17 +255,18 @@ static void add_to_unaccumulated_query_list(struct brw_context *brw, struct gen_perf_query_object *obj) { - if (brw->perf_ctx.unaccumulated_elements >= - brw->perf_ctx.unaccumulated_array_size) + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + if (perf_ctx->unaccumulated_elements >= + perf_ctx->unaccumulated_array_size) { - brw->perf_ctx.unaccumulated_array_size *= 1.5; - brw->perf_ctx.unaccumulated = - reralloc(brw, brw->perf_ctx.unaccumulated, + perf_ctx->unaccumulated_array_size *= 1.5; + perf_ctx->unaccumulated = + reralloc(brw, perf_ctx->unaccumulated, struct gen_perf_query_object *, - brw->perf_ctx.unaccumulated_array_size); + perf_ctx->unaccumulated_array_size); } - brw->perf_ctx.unaccumulated[brw->perf_ctx.unaccumulated_elements++] = obj; + perf_ctx->unaccumulated[perf_ctx->unaccumulated_elements++] = obj; } /** @@ -277,15 +279,16 @@ static void drop_from_unaccumulated_query_list(struct brw_context *brw, struct gen_perf_query_object *obj) { - for (int i = 0; i < brw->perf_ctx.unaccumulated_elements; i++) { - if (brw->perf_ctx.unaccumulated[i] == obj) { - int last_elt = --brw->perf_ctx.unaccumulated_elements; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + for (int i = 0; i < perf_ctx->unaccumulated_elements; i++) { + if (perf_ctx->unaccumulated[i] == obj) { + int last_elt = --perf_ctx->unaccumulated_elements; if (i == last_elt) - brw->perf_ctx.unaccumulated[i] = NULL; + perf_ctx->unaccumulated[i] = NULL; else { - brw->perf_ctx.unaccumulated[i] = - brw->perf_ctx.unaccumulated[last_elt]; + perf_ctx->unaccumulated[i] = + perf_ctx->unaccumulated[last_elt]; } break; @@ -311,13 +314,14 @@ drop_from_unaccumulated_query_list(struct brw_context *brw, static bool inc_n_oa_users(struct brw_context *brw) { - if (brw->perf_ctx.n_oa_users == 0 && - drmIoctl(brw->perf_ctx.oa_stream_fd, + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + if (perf_ctx->n_oa_users == 0 && + drmIoctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_ENABLE, 0) < 0) { return false; } - ++brw->perf_ctx.n_oa_users; + ++perf_ctx->n_oa_users; return true; } @@ -330,9 +334,10 @@ dec_n_oa_users(struct brw_context *brw) * MI_RPC commands at this point since they could stall the CS * indefinitely once OACONTROL is disabled. */ - --brw->perf_ctx.n_oa_users; - if (brw->perf_ctx.n_oa_users == 0 && - drmIoctl(brw->perf_ctx.oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0) + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + --perf_ctx->n_oa_users; + if (perf_ctx->n_oa_users == 0 && + drmIoctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0) { DBG("WARNING: Error disabling i915 perf stream: %m\n"); } @@ -346,11 +351,12 @@ dec_n_oa_users(struct brw_context *brw) static void discard_all_queries(struct brw_context *brw) { - while (brw->perf_ctx.unaccumulated_elements) { - struct gen_perf_query_object *obj = brw->perf_ctx.unaccumulated[0]; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + while (perf_ctx->unaccumulated_elements) { + struct gen_perf_query_object *obj = perf_ctx->unaccumulated[0]; obj->oa.results_accumulated = true; - drop_from_unaccumulated_query_list(brw, brw->perf_ctx.unaccumulated[0]); + drop_from_unaccumulated_query_list(brw, perf_ctx->unaccumulated[0]); dec_n_oa_users(brw); } @@ -367,23 +373,24 @@ read_oa_samples_until(struct brw_context *brw, uint32_t start_timestamp, uint32_t end_timestamp) { + struct gen_perf_context *perf_ctx = &brw->perf_ctx; struct exec_node *tail_node = - exec_list_get_tail(&brw->perf_ctx.sample_buffers); + exec_list_get_tail(&perf_ctx->sample_buffers); struct oa_sample_buf *tail_buf = exec_node_data(struct oa_sample_buf, tail_node, link); uint32_t last_timestamp = tail_buf->last_timestamp; while (1) { - struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(&brw->perf_ctx); + struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(perf_ctx); uint32_t offset; int len; - while ((len = read(brw->perf_ctx.oa_stream_fd, buf->buf, + while ((len = read(perf_ctx->oa_stream_fd, buf->buf, sizeof(buf->buf))) < 0 && errno == EINTR) ; if (len <= 0) { - exec_list_push_tail(&brw->perf_ctx.free_sample_buffers, &buf->link); + exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link); if (len < 0) { if (errno == EAGAIN) @@ -401,7 +408,7 @@ read_oa_samples_until(struct brw_context *brw, } buf->len = len; - exec_list_push_tail(&brw->perf_ctx.sample_buffers, &buf->link); + exec_list_push_tail(&perf_ctx->sample_buffers, &buf->link); /* Go through the reports and update the last timestamp. */ offset = 0; @@ -521,7 +528,7 @@ accumulate_oa_reports(struct brw_context *brw, /* See if we have any periodic reports to accumulate too... */ /* N.B. The oa.samples_head was set when the query began and - * pointed to the tail of the brw->perf_ctx.sample_buffers list at + * pointed to the tail of the perf_ctx->sample_buffers list at * the time the query started. Since the buffer existed before the * first MI_REPORT_PERF_COUNT command was emitted we therefore know * that no data in this particular node's buffer can possibly be @@ -679,10 +686,11 @@ open_i915_perf_oa_stream(struct brw_context *brw, return false; } - brw->perf_ctx.oa_stream_fd = fd; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + perf_ctx->oa_stream_fd = fd; - brw->perf_ctx.current_oa_metrics_set_id = metrics_set_id; - brw->perf_ctx.current_oa_format = report_format; + perf_ctx->current_oa_metrics_set_id = metrics_set_id; + perf_ctx->current_oa_format = report_format; return true; } @@ -691,9 +699,10 @@ static void close_perf(struct brw_context *brw, const struct gen_perf_query_info *query) { - if (brw->perf_ctx.oa_stream_fd != -1) { - close(brw->perf_ctx.oa_stream_fd); - brw->perf_ctx.oa_stream_fd = -1; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + if (perf_ctx->oa_stream_fd != -1) { + close(perf_ctx->oa_stream_fd); + perf_ctx->oa_stream_fd = -1; } if (query->kind == GEN_PERF_QUERY_TYPE_RAW) { struct gen_perf_query_info *raw_query = @@ -728,7 +737,8 @@ brw_begin_perf_query(struct gl_context *ctx, struct brw_perf_query_object *brw_query = brw_perf_query(o); struct gen_perf_query_object *obj = brw_query->query; const struct gen_perf_query_info *query = obj->queryinfo; - struct gen_perf_config *perf_cfg = brw->perf_ctx.perf; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + struct gen_perf_config *perf_cfg = perf_ctx->perf; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -797,21 +807,22 @@ brw_begin_perf_query(struct gl_context *ctx, * require a different counter set or format unless we get an opportunity * to close the stream and open a new one... */ - uint64_t metric_id = gen_perf_query_get_metric_id(brw->perf_ctx.perf, query); + uint64_t metric_id = gen_perf_query_get_metric_id(perf_ctx->perf, query); - if (brw->perf_ctx.oa_stream_fd != -1 && - brw->perf_ctx.current_oa_metrics_set_id != metric_id) { + if (perf_ctx->oa_stream_fd != -1 && + perf_ctx->current_oa_metrics_set_id != metric_id) { - if (brw->perf_ctx.n_oa_users != 0) { + if (perf_ctx->n_oa_users != 0) { DBG("WARNING: Begin(%d) failed already using perf config=%i/%"PRIu64"\n", - o->Id, brw->perf_ctx.current_oa_metrics_set_id, metric_id); + o->Id, perf_ctx->current_oa_metrics_set_id, metric_id); return false; } else close_perf(brw, query); } /* If the OA counters aren't already on, enable them. */ - if (brw->perf_ctx.oa_stream_fd == -1) { + + if (perf_ctx->oa_stream_fd == -1) { __DRIscreen *screen = brw->screen->driScrnPriv; const struct gen_device_info *devinfo = &brw->screen->devinfo; @@ -839,12 +850,12 @@ brw_begin_perf_query(struct gl_context *ctx, a_counter_in_bits = 40; uint64_t overflow_period = pow(2, a_counter_in_bits) / - (brw->perf_ctx.perf->sys_vars.n_eus * + (perf_cfg->sys_vars.n_eus * /* drop 1GHz freq to have units in nanoseconds */ 2); DBG("A counter overflow period: %"PRIu64"ns, %"PRIu64"ms (n_eus=%"PRIu64")\n", - overflow_period, overflow_period / 1000000ul, brw->perf_ctx.perf->sys_vars.n_eus); + overflow_period, overflow_period / 1000000ul, perf_cfg->sys_vars.n_eus); int period_exponent = 0; uint64_t prev_sample_period, next_sample_period; @@ -876,8 +887,8 @@ brw_begin_perf_query(struct gl_context *ctx, brw->hw_ctx)) return false; } else { - assert(brw->perf_ctx.current_oa_metrics_set_id == metric_id && - brw->perf_ctx.current_oa_format == query->oa_format); + assert(perf_ctx->current_oa_metrics_set_id == metric_id && + perf_ctx->current_oa_format == query->oa_format); } if (!inc_n_oa_users(brw)) { @@ -886,7 +897,7 @@ brw_begin_perf_query(struct gl_context *ctx, } if (obj->oa.bo) { - brw->perf_ctx.perf->vtbl.bo_unreference(obj->oa.bo); + perf_cfg->vtbl.bo_unreference(obj->oa.bo); obj->oa.bo = NULL; } @@ -901,8 +912,8 @@ brw_begin_perf_query(struct gl_context *ctx, brw_bo_unmap(obj->oa.bo); #endif - obj->oa.begin_report_id = brw->perf_ctx.next_query_start_report_id; - brw->perf_ctx.next_query_start_report_id += 2; + obj->oa.begin_report_id = perf_ctx->next_query_start_report_id; + perf_ctx->next_query_start_report_id += 2; /* We flush the batchbuffer here to minimize the chances that MI_RPC * delimiting commands end up in different batchbuffers. If that's the @@ -913,20 +924,20 @@ brw_begin_perf_query(struct gl_context *ctx, perf_cfg->vtbl.batchbuffer_flush(brw, __FILE__, __LINE__); /* Take a starting OA counter snapshot. */ - brw->perf_ctx.perf->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0, - obj->oa.begin_report_id); + perf_cfg->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0, + obj->oa.begin_report_id); perf_cfg->vtbl.capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_START_OFFSET_BYTES); - ++brw->perf_ctx.n_active_oa_queries; + ++perf_ctx->n_active_oa_queries; /* No already-buffered samples can possibly be associated with this query * so create a marker within the list of sample buffers enabling us to * easily ignore earlier samples when processing this query after * completion. */ - assert(!exec_list_is_empty(&brw->perf_ctx.sample_buffers)); - obj->oa.samples_head = exec_list_get_tail(&brw->perf_ctx.sample_buffers); + assert(!exec_list_is_empty(&perf_ctx->sample_buffers)); + obj->oa.samples_head = exec_list_get_tail(&perf_ctx->sample_buffers); struct oa_sample_buf *buf = exec_node_data(struct oa_sample_buf, obj->oa.samples_head, link); @@ -958,7 +969,7 @@ brw_begin_perf_query(struct gl_context *ctx, /* Take starting snapshots. */ gen_perf_snapshot_statistics_registers(brw, perf_cfg, obj, 0); - ++brw->perf_ctx.n_active_pipeline_stats_queries; + ++perf_ctx->n_active_pipeline_stats_queries; break; default: @@ -983,6 +994,7 @@ brw_end_perf_query(struct gl_context *ctx, struct brw_perf_query_object *brw_query = brw_perf_query(o); struct gen_perf_query_object *obj = brw_query->query; struct gen_perf_config *perf_cfg = brw->perf_ctx.perf; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; DBG("End(%d)\n", o->Id); @@ -1012,7 +1024,7 @@ brw_end_perf_query(struct gl_context *ctx, obj->oa.begin_report_id + 1); } - --brw->perf_ctx.n_active_oa_queries; + --perf_ctx->n_active_oa_queries; /* NB: even though the query has now ended, it can't be accumulated * until the end MI_REPORT_PERF_COUNT snapshot has been written @@ -1023,7 +1035,7 @@ brw_end_perf_query(struct gl_context *ctx, case GEN_PERF_QUERY_TYPE_PIPELINE: gen_perf_snapshot_statistics_registers(brw, perf_cfg, obj, STATS_BO_END_OFFSET_BYTES); - --brw->perf_ctx.n_active_pipeline_stats_queries; + --perf_ctx->n_active_pipeline_stats_queries; break; default: @@ -1296,8 +1308,9 @@ static struct gl_perf_query_object * brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index) { struct brw_context *brw = brw_context(ctx); + struct gen_perf_context *perf_ctx = &brw->perf_ctx; const struct gen_perf_query_info *queryinfo = - &brw->perf_ctx.perf->queries[query_index]; + &perf_ctx->perf->queries[query_index]; struct gen_perf_query_object *obj = calloc(1, sizeof(struct gen_perf_query_object)); @@ -1306,7 +1319,7 @@ brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index) obj->queryinfo = queryinfo; - brw->perf_ctx.n_query_instances++; + perf_ctx->n_query_instances++; struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object)); if (unlikely(!brw_query)) @@ -1326,6 +1339,7 @@ brw_delete_perf_query(struct gl_context *ctx, struct gen_perf_config *perf_cfg = brw->perf_ctx.perf; struct brw_perf_query_object *brw_query = brw_perf_query(o); struct gen_perf_query_object *obj = brw_query->query; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; /* We can assume that the frontend waits for a query to complete * before ever calling into here, so we don't have to worry about @@ -1368,8 +1382,8 @@ brw_delete_perf_query(struct gl_context *ctx, * longer in use, it's a good time to free our cache of sample * buffers and close any current i915-perf stream. */ - if (--brw->perf_ctx.n_query_instances == 0) { - gen_perf_free_sample_bufs(&brw->perf_ctx); + if (--perf_ctx->n_query_instances == 0) { + gen_perf_free_sample_bufs(perf_ctx); close_perf(brw, obj->queryinfo); } @@ -1547,12 +1561,13 @@ brw_init_perf_query_info(struct gl_context *ctx) const struct gen_device_info *devinfo = &brw->screen->devinfo; __DRIscreen *screen = brw->screen->driScrnPriv; - struct gen_perf_config *perf_cfg = brw->perf_ctx.perf; - if (perf_cfg) - return perf_cfg->n_queries; + struct gen_perf_context *perf_ctx = &brw->perf_ctx; + if (perf_ctx->perf) + return perf_ctx->perf->n_queries; + + perf_ctx->perf = gen_perf_new(brw); + struct gen_perf_config *perf_cfg = perf_ctx->perf; - perf_cfg = gen_perf_new(brw); - brw->perf_ctx.perf = perf_cfg; perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc; perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference; perf_cfg->vtbl.emit_mi_report_perf_count = @@ -1569,16 +1584,15 @@ brw_init_perf_query_info(struct gl_context *ctx) if ((oa_metrics_kernel_support(screen->fd, devinfo)) && (gen_perf_load_oa_metrics(perf_cfg, screen->fd, devinfo))) - gen_perf_query_register_mdapi_oa_query(&brw->screen->devinfo, - brw->perf_ctx.perf); + gen_perf_query_register_mdapi_oa_query(devinfo, perf_cfg); - brw->perf_ctx.unaccumulated = + perf_ctx->unaccumulated = ralloc_array(brw, struct gen_perf_query_object *, 2); - brw->perf_ctx.unaccumulated_elements = 0; - brw->perf_ctx.unaccumulated_array_size = 2; + perf_ctx->unaccumulated_elements = 0; + perf_ctx->unaccumulated_array_size = 2; - exec_list_make_empty(&brw->perf_ctx.sample_buffers); - exec_list_make_empty(&brw->perf_ctx.free_sample_buffers); + exec_list_make_empty(&perf_ctx->sample_buffers); + exec_list_make_empty(&perf_ctx->free_sample_buffers); /* It's convenient to guarantee that this linked list of sample * buffers is never empty so we add an empty head so when we @@ -1586,11 +1600,11 @@ brw_init_perf_query_info(struct gl_context *ctx) * in this list. */ struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(&brw->perf_ctx); - exec_list_push_head(&brw->perf_ctx.sample_buffers, &buf->link); + exec_list_push_head(&perf_ctx->sample_buffers, &buf->link); - brw->perf_ctx.oa_stream_fd = -1; + perf_ctx->oa_stream_fd = -1; - brw->perf_ctx.next_query_start_report_id = 1000; + perf_ctx->next_query_start_report_id = 1000; return perf_cfg->n_queries; } -- 2.30.2