From 05882b0d3b69ac14e9bc93460c77f9dc203c2ff9 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 1 Nov 2012 22:04:50 -0700 Subject: [PATCH] i965/fs: Compact the virtual GRF arrays. During code generation, we create tons of temporary variables, many of which get immediately killed and are never used. Later optimization and analysis passes, such as compute_live_intervals, loop over all the virtual GRFs. By compacting them, we can save a lot of overhead. Reduces compilation time in L4D2's largest fragment shader from 10.2 seconds to 5.2 seconds (50%). Drops compute_live_variables() from 10-12% of another game's startup time to 8%. Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 60 ++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + 2 files changed, 61 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 56cb447fafe..777879e1241 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1102,6 +1102,64 @@ fs_visitor::split_virtual_grfs() this->live_intervals_valid = false; } +/** + * Remove unused virtual GRFs and compact the virtual_grf_* arrays. + * + * During code generation, we create tons of temporary variables, many of + * which get immediately killed and are never used again. Yet, in later + * optimization and analysis passes, such as compute_live_intervals, we need + * to loop over all the virtual GRFs. Compacting them can save a lot of + * overhead. + */ +void +fs_visitor::compact_virtual_grfs() +{ + /* Mark which virtual GRFs are used, and count how many. */ + int remap_table[this->virtual_grf_count]; + memset(remap_table, -1, sizeof(remap_table)); + + foreach_list(node, &this->instructions) { + const fs_inst *inst = (const fs_inst *) node; + + if (inst->dst.file == GRF) + remap_table[inst->dst.reg] = 0; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + remap_table[inst->src[i].reg] = 0; + } + } + + /* Compact the GRF arrays. */ + int new_index = 0; + for (int i = 0; i < this->virtual_grf_count; i++) { + if (remap_table[i] != -1) { + remap_table[i] = new_index; + virtual_grf_sizes[new_index] = virtual_grf_sizes[i]; + if (live_intervals_valid) { + virtual_grf_use[new_index] = virtual_grf_use[i]; + virtual_grf_def[new_index] = virtual_grf_def[i]; + } + ++new_index; + } + } + + this->virtual_grf_count = new_index; + + /* Patch all the instructions to use the newly renumbered registers */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *) node; + + if (inst->dst.file == GRF) + inst->dst.reg = remap_table[inst->dst.reg]; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + inst->src[i].reg = remap_table[inst->src[i].reg]; + } + } +} + bool fs_visitor::remove_dead_constants() { @@ -1860,6 +1918,8 @@ fs_visitor::run() do { progress = false; + compact_virtual_grfs(); + progress = remove_duplicate_mrf_writes() || progress; progress = opt_algebraic() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0b2681d6cdd..13662bb8836 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -240,6 +240,7 @@ public: int choose_spill_reg(struct ra_graph *g); void spill_reg(int spill_reg); void split_virtual_grfs(); + void compact_virtual_grfs(); void setup_pull_constants(); void calculate_live_intervals(); bool opt_algebraic(); -- 2.30.2