From fce0214e94ef2f95693d4c51e823ea7eca8b6dab Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 May 2019 12:06:38 -0500 Subject: [PATCH] intel/fs/live_variables: Do compute_start_end in BITSET_WORD chunks For a block with a contiguous chunk of 32 vars that don't need updating, this lets us skip 32 vars at a time. Also, by using bitscan, we only iterate for each set bit rather than testing them all one at a time. Looking at perf (with -O0 which is unfortunately necessary to get reasonable back-traces), this seems to cuts about 50-60% of the time spent in compute_start_end() which is, itself about 4-6% of the run-time. In the real world, with a release driver build, this cuts 1.34% off a full shader-db run. (I ran shader-db 5 times in each configuration). Reviewed-by: Matt Turner --- src/intel/compiler/brw_fs_live_variables.cpp | 24 ++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp index 059f076fa51..aa9bb72300c 100644 --- a/src/intel/compiler/brw_fs_live_variables.cpp +++ b/src/intel/compiler/brw_fs_live_variables.cpp @@ -235,15 +235,21 @@ fs_live_variables::compute_start_end() foreach_block (block, cfg) { struct block_data *bd = &block_data[block->num]; - for (int i = 0; i < num_vars; i++) { - if (BITSET_TEST(bd->livein, i) && BITSET_TEST(bd->defin, i)) { - start[i] = MIN2(start[i], block->start_ip); - end[i] = MAX2(end[i], block->start_ip); - } - - if (BITSET_TEST(bd->liveout, i) && BITSET_TEST(bd->defout, i)) { - start[i] = MIN2(start[i], block->end_ip); - end[i] = MAX2(end[i], block->end_ip); + for (int w = 0; w < bitset_words; w++) { + BITSET_WORD livedefin = bd->livein[w] & bd->defin[w]; + BITSET_WORD livedefout = bd->liveout[w] & bd->defout[w]; + BITSET_WORD livedefinout = livedefin | livedefout; + while (livedefinout) { + unsigned b = u_bit_scan(&livedefinout); + unsigned i = w * BITSET_WORDBITS + b; + if (livedefin & (1u << b)) { + start[i] = MIN2(start[i], block->start_ip); + end[i] = MAX2(end[i], block->start_ip); + } + if (livedefout & (1u << b)) { + start[i] = MIN2(start[i], block->end_ip); + end[i] = MAX2(end[i], block->end_ip); + } } } } -- 2.30.2