From ccefce46cb8e9030b3a9134964ab2f660f66391c Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 16 Nov 2019 13:23:31 -0800 Subject: [PATCH] nir/range-analysis: Add pragmas to help loop unrolling I was pretty liberal with these assertions when I wrote this code because I had assumed that GCC would unroll the loops, inline the look ups of static const arrays with now constant indices, and then elmininate all the actuall assertions. It seems none of this happens even at -O3. Adding the pragmas helps encourage loop unrolling at some optimization levels. I tested by running shader-db with NIR_VALIDATE=false on a Core i7 Haswell desktop system. -Og: No difference proven at 95.0% confidence. N=5 -O1: -48.304 +/- 1.221 (-16.3343% +/- 0.412888%) N=5 -O2: -49.94 +/- 1.23521 (-17.9634% +/- 0.444303%) N=5 v2: Add a _Pragma to an inner loop that was accidentally dropped during a rebase. Reviewed-by: Eric Anholt --- src/compiler/nir/nir_range_analysis.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index b40c87081c3..0eaa97aafc2 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -221,7 +221,9 @@ analyze_constant(const struct nir_alu_instr *instr, unsigned src, #ifndef NDEBUG #define ASSERT_TABLE_IS_COMMUTATIVE(t) \ do { \ + _Pragma("GCC unroll 7") \ for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \ + _Pragma("GCC unroll 7") \ for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \ assert(t[r][c] == t[c][r]); \ } \ @@ -229,6 +231,7 @@ analyze_constant(const struct nir_alu_instr *instr, unsigned src, #define ASSERT_TABLE_IS_DIAGONAL(t) \ do { \ + _Pragma("GCC unroll 7") \ for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \ assert(t[r][r] == r); \ } while (false) @@ -258,10 +261,12 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b) */ #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \ do { \ + _Pragma("GCC unroll 7") \ for (unsigned i = 0; i < last_range; i++) { \ enum ssa_ranges col_range = t[i][unknown + 1]; \ enum ssa_ranges row_range = t[unknown + 1][i]; \ \ + _Pragma("GCC unroll 5") \ for (unsigned j = unknown + 2; j < last_range; j++) { \ col_range = union_ranges(col_range, t[i][j]); \ row_range = union_ranges(row_range, t[j][i]); \ @@ -286,6 +291,7 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b) #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \ do { \ + _Pragma("GCC unroll 7") \ for (unsigned i = 0; i < last_range; i++) { \ assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \ assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \ @@ -316,6 +322,7 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b) #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \ do { \ + _Pragma("GCC unroll 7") \ for (unsigned i = 0; i < last_range; i++) { \ assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \ t[i][unknown]); \ @@ -356,6 +363,9 @@ static struct ssa_result_range analyze_expression(const nir_alu_instr *instr, unsigned src, struct hash_table *ht, nir_alu_type use_type) { + /* Ensure that the _Pragma("GCC unroll 7") above are correct. */ + STATIC_ASSERT(last_range + 1 == 7); + if (!instr->src[src].src.is_ssa) return (struct ssa_result_range){unknown, false}; -- 2.30.2