From afdc0121b55e8543118c003307b20d90e14329e7 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 16 Dec 2019 16:11:40 +0200 Subject: [PATCH] i965/iris/perf: factor out frequency register capture Signed-off-by: Lionel Landwerlin Reviewed-by: Mark Janes Tested-by: Marge Bot Part-of: --- src/gallium/drivers/iris/iris_perf.c | 39 +++++++----------- src/intel/perf/gen_perf.c | 34 +++++++++++----- src/intel/perf/gen_perf.h | 4 +- .../drivers/dri/i965/brw_performance_query.c | 40 +++++++++---------- 4 files changed, 56 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/iris/iris_perf.c b/src/gallium/drivers/iris/iris_perf.c index 1e5ec8140dc..784890e4728 100644 --- a/src/gallium/drivers/iris/iris_perf.c +++ b/src/gallium/drivers/iris/iris_perf.c @@ -57,27 +57,18 @@ iris_perf_batchbuffer_flush(void *c, const char *file, int line) } static void -iris_perf_capture_frequency_stat_register(void *ctx, - void *bo, - uint32_t bo_offset) +iris_perf_store_register_mem(void *ctx, void *bo, + uint32_t reg, uint32_t reg_size, + uint32_t offset) { struct iris_context *ice = ctx; struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - struct gen_device_info *devinfo = &batch->screen->devinfo; - - if (devinfo->gen == 8 && !devinfo->is_cherryview) - ice->vtbl.store_register_mem32(batch, GEN7_RPSTAT1, bo, bo_offset, false); - else if (devinfo->gen >= 9) - ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false); -} - -static void -iris_perf_store_register_mem64(void *ctx, void *bo, - uint32_t reg, uint32_t offset) -{ - struct iris_context *ice = ctx; - struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - ice->vtbl.store_register_mem64(batch, reg, bo, offset, false); + if (reg_size == 8) { + ice->vtbl.store_register_mem64(batch, reg, bo, offset, false); + } else { + assert(reg_size == 4); + ice->vtbl.store_register_mem32(batch, reg, bo, offset, false); + } } typedef void (*bo_unreference_t)(void *); @@ -85,9 +76,9 @@ typedef void *(*bo_map_t)(void *, void *, unsigned flags); typedef void (*bo_unmap_t)(void *); typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t); typedef void (*emit_mi_flush_t)(void *); -typedef void (*capture_frequency_stat_register_t)(void *, void *, uint32_t ); -typedef void (*store_register_mem64_t)(void *ctx, void *bo, - uint32_t reg, uint32_t offset); +typedef void (*store_register_mem_t)(void *ctx, void *bo, + uint32_t reg, uint32_t reg_size, + uint32_t offset); typedef bool (*batch_references_t)(void *batch, void *bo); typedef void (*bo_wait_rendering_t)(void *bo); typedef int (*bo_busy_t)(void *bo); @@ -105,10 +96,8 @@ iris_perf_init_vtbl(struct gen_perf_config *perf_cfg) perf_cfg->vtbl.emit_mi_report_perf_count = (emit_mi_report_t)iris_perf_emit_mi_report_perf_count; perf_cfg->vtbl.batchbuffer_flush = iris_perf_batchbuffer_flush; - perf_cfg->vtbl.capture_frequency_stat_register = - (capture_frequency_stat_register_t) iris_perf_capture_frequency_stat_register; - perf_cfg->vtbl.store_register_mem64 = - (store_register_mem64_t) iris_perf_store_register_mem64; + perf_cfg->vtbl.store_register_mem = + (store_register_mem_t) iris_perf_store_register_mem; perf_cfg->vtbl.batch_references = (batch_references_t)iris_batch_references; perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)iris_bo_wait_rendering; diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 9e987d599d7..7ab5e685104 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -1512,11 +1512,11 @@ free_sample_bufs(struct gen_perf_context *perf_ctx) * pipeline statistics for the performance query object. */ static void -snapshot_statistics_registers(void *context, - struct gen_perf_config *perf, +snapshot_statistics_registers(struct gen_perf_context *ctx, struct gen_perf_query_object *obj, uint32_t offset_in_bytes) { + struct gen_perf_config *perf = ctx->perf; const struct gen_perf_query_info *query = obj->queryinfo; const int n_counters = query->n_counters; @@ -1525,12 +1525,26 @@ snapshot_statistics_registers(void *context, assert(counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64); - perf->vtbl.store_register_mem64(context, obj->pipeline_stats.bo, - counter->pipeline_stat.reg, - offset_in_bytes + i * sizeof(uint64_t)); + perf->vtbl.store_register_mem(ctx->ctx, obj->pipeline_stats.bo, + counter->pipeline_stat.reg, 8, + offset_in_bytes + i * sizeof(uint64_t)); } } +static void +snapshot_freq_register(struct gen_perf_context *ctx, + struct gen_perf_query_object *query, + uint32_t bo_offset) +{ + struct gen_perf_config *perf = ctx->perf; + const struct gen_device_info *devinfo = ctx->devinfo; + + if (devinfo->gen == 8 && !devinfo->is_cherryview) + perf->vtbl.store_register_mem(ctx->ctx, query->oa.bo, GEN7_RPSTAT1, 4, bo_offset); + else if (devinfo->gen >= 9) + perf->vtbl.store_register_mem(ctx->ctx, query->oa.bo, GEN9_RPSTAT0, 4, bo_offset); +} + static void gen_perf_close(struct gen_perf_context *perfquery, const struct gen_perf_query_info *query) @@ -1848,8 +1862,7 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx, /* Take a starting OA counter snapshot. */ perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo, 0, query->oa.begin_report_id); - perf_cfg->vtbl.capture_frequency_stat_register(perf_ctx->ctx, query->oa.bo, - MI_FREQ_START_OFFSET_BYTES); + snapshot_freq_register(perf_ctx, query, MI_FREQ_START_OFFSET_BYTES); ++perf_ctx->n_active_oa_queries; @@ -1889,7 +1902,7 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx, STATS_BO_SIZE); /* Take starting snapshots. */ - snapshot_statistics_registers(perf_ctx->ctx , perf_cfg, query, 0); + snapshot_statistics_registers(perf_ctx, query, 0); ++perf_ctx->n_active_pipeline_stats_queries; break; @@ -1927,8 +1940,7 @@ gen_perf_end_query(struct gen_perf_context *perf_ctx, */ if (!query->oa.results_accumulated) { /* Take an ending OA counter snapshot. */ - perf_cfg->vtbl.capture_frequency_stat_register(perf_ctx->ctx, query->oa.bo, - MI_FREQ_END_OFFSET_BYTES); + snapshot_freq_register(perf_ctx, query, MI_FREQ_END_OFFSET_BYTES); perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, query->oa.bo, MI_RPC_BO_END_OFFSET_BYTES, query->oa.begin_report_id + 1); @@ -1943,7 +1955,7 @@ gen_perf_end_query(struct gen_perf_context *perf_ctx, break; case GEN_PERF_QUERY_TYPE_PIPELINE: - snapshot_statistics_registers(perf_ctx->ctx, perf_cfg, query, + snapshot_statistics_registers(perf_ctx, query, STATS_BO_END_OFFSET_BYTES); --perf_ctx->n_active_pipeline_stats_queries; break; diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index 2cd246a1dca..d8698c90cd9 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -226,9 +226,7 @@ struct gen_perf_config { uint32_t report_id); void (*batchbuffer_flush)(void *ctx, const char *file, int line); - void (*capture_frequency_stat_register)(void *ctx, void *bo, - uint32_t bo_offset); - void (*store_register_mem64)(void *ctx, void *bo, uint32_t reg, uint32_t offset); + void (*store_register_mem)(void *ctx, void *bo, uint32_t reg, uint32_t reg_size, uint32_t offset); } vtbl; }; diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index cfd3efe374e..7e0c66f895c 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -223,21 +223,6 @@ enum OaReadStatus { /******************************************************************************/ -static void -capture_frequency_stat_register(struct brw_context *brw, - struct brw_bo *bo, - uint32_t bo_offset) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->gen >= 7 && devinfo->gen <= 8 && - !devinfo->is_baytrail && !devinfo->is_cherryview) { - brw_store_register_mem32(brw, bo, GEN7_RPSTAT1, bo_offset); - } else if (devinfo->gen >= 9) { - brw_store_register_mem32(brw, bo, GEN9_RPSTAT0, bo_offset); - } -} - /** * Driver hook for glBeginPerfQueryINTEL(). */ @@ -466,9 +451,22 @@ brw_oa_emit_stall_at_pixel_scoreboard(void *c) brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD); } -typedef void (*capture_frequency_stat_register_t)(void *, void *, uint32_t ); -typedef void (*store_register_mem64_t)(void *ctx, void *bo, - uint32_t reg, uint32_t offset); +static void +brw_perf_store_register(struct brw_context *brw, struct brw_bo *bo, + uint32_t reg, uint32_t reg_size, + uint32_t offset) +{ + if (reg_size == 8) { + brw_store_register_mem64(brw, bo, reg, offset); + } else { + assert(reg_size == 4); + brw_store_register_mem32(brw, bo, reg, offset); + } +} + +typedef void (*store_register_mem_t)(void *ctx, void *bo, + uint32_t reg, uint32_t reg_size, + uint32_t offset); typedef bool (*batch_references_t)(void *batch, void *bo); typedef void (*bo_wait_rendering_t)(void *bo); typedef int (*bo_busy_t)(void *bo); @@ -499,10 +497,8 @@ brw_init_perf_query_info(struct gl_context *ctx) perf_cfg->vtbl.emit_mi_report_perf_count = (emit_mi_report_t)brw_oa_emit_mi_report_perf_count; perf_cfg->vtbl.batchbuffer_flush = brw_oa_batchbuffer_flush; - perf_cfg->vtbl.capture_frequency_stat_register = - (capture_frequency_stat_register_t) capture_frequency_stat_register; - perf_cfg->vtbl.store_register_mem64 = - (store_register_mem64_t) brw_store_register_mem64; + perf_cfg->vtbl.store_register_mem = + (store_register_mem_t) brw_perf_store_register; perf_cfg->vtbl.batch_references = (batch_references_t)brw_batch_references; perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)brw_bo_wait_rendering; perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy; -- 2.30.2