From 1761671b0627ce8e1c0eae721e1fca5c2d04690e Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 11 Jul 2014 21:24:02 -0700 Subject: [PATCH] i965: Replace cfg instances with calls to calculate_cfg(). Avoids regenerating it unnecessarily. Every program in shader-db improved, none by an amount less than a 1/3 reduction. One Dota2 shader decreased from 62 -> 24. cfg calculations: 429492 -> 193197 (-55.02%) Reviewed-by: Topi Pohjolainen --- .../dri/i965/brw_fs_copy_propagation.cpp | 20 ++++++++++--------- .../i965/brw_fs_peephole_predicated_break.cpp | 6 +++--- .../drivers/dri/i965/brw_fs_sel_peephole.cpp | 6 +++--- src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 +++--- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 6 ++---- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 62e64a6773b..071620279b2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -591,31 +591,33 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, bool fs_visitor::opt_copy_propagate() { + calculate_cfg(); + bool progress = false; void *copy_prop_ctx = ralloc_context(NULL); - cfg_t cfg(&instructions); - exec_list *out_acp[cfg.num_blocks]; - for (int i = 0; i < cfg.num_blocks; i++) + exec_list *out_acp[cfg->num_blocks]; + + for (int i = 0; i < cfg->num_blocks; i++) out_acp[i] = new exec_list [ACP_HASH_SIZE]; /* First, walk through each block doing local copy propagation and getting * the set of copies available at the end of the block. */ - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; progress = opt_copy_propagate_local(copy_prop_ctx, block, out_acp[b]) || progress; } /* Do dataflow analysis for those available copies. */ - fs_copy_prop_dataflow dataflow(copy_prop_ctx, &cfg, out_acp); + fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp); /* Next, re-run local copy propagation, this time with the set of copies * provided by the dataflow analysis available at the start of a block. */ - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; exec_list in_acp[ACP_HASH_SIZE]; for (int i = 0; i < dataflow.num_acp; i++) { @@ -628,7 +630,7 @@ fs_visitor::opt_copy_propagate() progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress; } - for (int i = 0; i < cfg.num_blocks; i++) + for (int i = 0; i < cfg->num_blocks; i++) delete [] out_acp[i]; ralloc_free(copy_prop_ctx); diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index bb0a2ac09a1..3ba0b262451 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -45,10 +45,10 @@ fs_visitor::opt_peephole_predicated_break() { bool progress = false; - cfg_t cfg(&instructions); + calculate_cfg(); - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; /* BREAK and CONTINUE instructions, by definition, can only be found at * the ends of basic blocks. diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index db0be1911cc..cf47cb5fd81 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -127,10 +127,10 @@ fs_visitor::opt_peephole_sel() { bool progress = false; - cfg_t cfg(&instructions); + calculate_cfg(); - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; /* IF instructions, by definition, can only be found at the ends of * basic blocks. diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 9ea0b147644..49a4e9b6274 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -748,13 +748,13 @@ vec4_visitor::opt_set_dependency_control() vec4_instruction *last_mrf_write[BRW_MAX_GRF]; uint8_t mrf_channels_written[BRW_MAX_GRF]; - cfg_t cfg(&instructions); + calculate_cfg(); assert(prog_data->total_grf || !"Must be called after register allocation"); - for (int i = 0; i < cfg.num_blocks; i++) { - bblock_t *bblock = cfg.blocks[i]; + for (int i = 0; i < cfg->num_blocks; i++) { + bblock_t *bblock = cfg->blocks[i]; vec4_instruction *inst; memset(last_grf_write, 0, sizeof(last_grf_write)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 83c7eb83eb2..86360d25d2a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -254,10 +254,8 @@ vec4_visitor::opt_cse() calculate_live_intervals(); - cfg_t cfg(&instructions); - - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; progress = opt_cse_local(block) || progress; } -- 2.30.2