From f179f419d1d0a03fad36c2b0a58e8b853bae6118 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Feb 2013 14:36:06 -0800 Subject: [PATCH] i965/fs: Improve live variables calculation performance. We can execute way fewer instructions by doing our boolean manipulation on an "int" of bits at a time, while also reducing our working set size. Reduces compile time of L4D2's slowest shader from 4s to 1.1s (-72.4% +/- 0.2%, n=10) v2: Remove redundant masking (noted by Ken) Reviewed-by: Kenneth Graunke --- .../dri/i965/brw_fs_live_variables.cpp | 48 ++++++++++--------- .../drivers/dri/i965/brw_fs_live_variables.h | 10 ++-- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 4c7991dc54d..63af1480d56 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -40,7 +40,7 @@ using namespace brw; */ /** - * Sets up the use[] and def[] arrays. + * Sets up the use[] and def[] bitsets. * * The basic-block-level live variable analysis needs to know which * variables get used before they're completely defined, and which @@ -67,8 +67,8 @@ fs_live_variables::setup_def_use() if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; - if (!bd[b].def[reg]) - bd[b].use[reg] = true; + if (!BITSET_TEST(bd[b].def, reg)) + BITSET_SET(bd[b].use, reg); } } @@ -82,8 +82,8 @@ fs_live_variables::setup_def_use() !inst->force_uncompressed && !inst->force_sechalf) { int reg = inst->dst.reg; - if (!bd[b].use[reg]) - bd[b].def[reg] = true; + if (!BITSET_TEST(bd[b].use, reg)) + BITSET_SET(bd[b].def, reg); } ip++; @@ -107,12 +107,12 @@ fs_live_variables::compute_live_variables() for (int b = 0; b < cfg->num_blocks; b++) { /* Update livein */ - for (int i = 0; i < num_vars; i++) { - if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) { - if (!bd[b].livein[i]) { - bd[b].livein[i] = true; - cont = true; - } + for (int i = 0; i < bitset_words; i++) { + BITSET_WORD new_livein = (bd[b].use[i] | + (bd[b].liveout[i] & ~bd[b].def[i])); + if (new_livein & ~bd[b].livein[i]) { + bd[b].livein[i] |= new_livein; + cont = true; } } @@ -121,11 +121,13 @@ fs_live_variables::compute_live_variables() bblock_link *link = (bblock_link *)block_node; bblock_t *block = link->block; - for (int i = 0; i < num_vars; i++) { - if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) { - bd[b].liveout[i] = true; - cont = true; - } + for (int i = 0; i < bitset_words; i++) { + BITSET_WORD new_liveout = (bd[block->block_num].livein[i] & + ~bd[b].liveout[i]); + if (new_liveout) { + bd[b].liveout[i] |= new_liveout; + cont = true; + } } } } @@ -140,11 +142,13 @@ fs_live_variables::fs_live_variables(fs_visitor *v, cfg_t *cfg) num_vars = v->virtual_grf_count; bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); + bitset_words = (ALIGN(v->virtual_grf_count, BITSET_WORDBITS) / + BITSET_WORDBITS); for (int i = 0; i < cfg->num_blocks; i++) { - bd[i].def = rzalloc_array(mem_ctx, bool, num_vars); - bd[i].use = rzalloc_array(mem_ctx, bool, num_vars); - bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars); - bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars); + bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); + bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); + bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); + bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); } setup_def_use(); @@ -239,12 +243,12 @@ fs_visitor::calculate_live_intervals() for (int b = 0; b < cfg.num_blocks; b++) { for (int i = 0; i < num_vars; i++) { - if (livevars.bd[b].livein[i]) { + if (BITSET_TEST(livevars.bd[b].livein, i)) { def[i] = MIN2(def[i], cfg.blocks[b]->start_ip); use[i] = MAX2(use[i], cfg.blocks[b]->start_ip); } - if (livevars.bd[b].liveout[i]) { + if (BITSET_TEST(livevars.bd[b].liveout, i)) { def[i] = MIN2(def[i], cfg.blocks[b]->end_ip); use[i] = MAX2(use[i], cfg.blocks[b]->end_ip); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h index 5f7e67e5138..1cde5f46798 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h @@ -26,6 +26,7 @@ */ #include "brw_fs.h" +#include "main/bitset.h" namespace brw { @@ -36,18 +37,18 @@ struct block_data { * Note that for our purposes, "defined" means unconditionally, completely * defined. */ - bool *def; + BITSET_WORD *def; /** * Which variables are used before being defined in the block. */ - bool *use; + BITSET_WORD *use; /** Which defs reach the entry point of the block. */ - bool *livein; + BITSET_WORD *livein; /** Which defs reach the exit point of the block. */ - bool *liveout; + BITSET_WORD *liveout; }; class fs_live_variables { @@ -73,6 +74,7 @@ public: void *mem_ctx; int num_vars; + int bitset_words; /** Per-basic-block information on live variables */ struct block_data *bd; -- 2.30.2