nir/range-analysis: Add pragmas to help loop unrolling
authorIan Romanick <ian.d.romanick@intel.com>
Sat, 16 Nov 2019 21:23:31 +0000 (13:23 -0800)
committerIan Romanick <ian.d.romanick@intel.com>
Fri, 22 Nov 2019 16:16:06 +0000 (08:16 -0800)
I was pretty liberal with these assertions when I wrote this code
because I had assumed that GCC would unroll the loops, inline the look ups
of static const arrays with now constant indices, and then elmininate
all the actuall assertions.  It seems none of this happens even at -O3.

Adding the pragmas helps encourage loop unrolling at some optimization
levels.  I tested by running shader-db with NIR_VALIDATE=false on a Core
i7 Haswell desktop system.

-Og: No difference proven at 95.0% confidence. N=5
-O1: -48.304 +/- 1.221 (-16.3343% +/- 0.412888%) N=5
-O2: -49.94 +/- 1.23521 (-17.9634% +/- 0.444303%) N=5

v2: Add a _Pragma to an inner loop that was accidentally dropped during
a rebase.

Reviewed-by: Eric Anholt <eric@anholt.net>
src/compiler/nir/nir_range_analysis.c

index b40c87081c363f294fff6f47ef420eb5f4421c66..0eaa97aafc27ad4766e68ef79dab6a676a448376 100644 (file)
@@ -221,7 +221,9 @@ analyze_constant(const struct nir_alu_instr *instr, unsigned src,
 #ifndef NDEBUG
 #define ASSERT_TABLE_IS_COMMUTATIVE(t)                        \
    do {                                                       \
+      _Pragma("GCC unroll 7")                                 \
       for (unsigned r = 0; r < ARRAY_SIZE(t); r++) {          \
+         _Pragma("GCC unroll 7")                              \
          for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++)      \
             assert(t[r][c] == t[c][r]);                       \
       }                                                       \
@@ -229,6 +231,7 @@ analyze_constant(const struct nir_alu_instr *instr, unsigned src,
 
 #define ASSERT_TABLE_IS_DIAGONAL(t)                           \
    do {                                                       \
+      _Pragma("GCC unroll 7")                                 \
       for (unsigned r = 0; r < ARRAY_SIZE(t); r++)            \
          assert(t[r][r] == r);                                \
    } while (false)
@@ -258,10 +261,12 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b)
  */
 #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)              \
    do {                                                                 \
+      _Pragma("GCC unroll 7")                                           \
       for (unsigned i = 0; i < last_range; i++) {                       \
          enum ssa_ranges col_range = t[i][unknown + 1];                 \
          enum ssa_ranges row_range = t[unknown + 1][i];                 \
                                                                         \
+         _Pragma("GCC unroll 5")                                        \
          for (unsigned j = unknown + 2; j < last_range; j++) {          \
             col_range = union_ranges(col_range, t[i][j]);               \
             row_range = union_ranges(row_range, t[j][i]);               \
@@ -286,6 +291,7 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b)
 
 #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
    do {                                                                 \
+      _Pragma("GCC unroll 7")                                           \
       for (unsigned i = 0; i < last_range; i++) {                       \
          assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
          assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
@@ -316,6 +322,7 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b)
 
 #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)            \
    do {                                                                 \
+      _Pragma("GCC unroll 7")                                           \
       for (unsigned i = 0; i < last_range; i++) {                       \
          assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) ==           \
                 t[i][unknown]);                                         \
@@ -356,6 +363,9 @@ static struct ssa_result_range
 analyze_expression(const nir_alu_instr *instr, unsigned src,
                    struct hash_table *ht, nir_alu_type use_type)
 {
+   /* Ensure that the _Pragma("GCC unroll 7") above are correct. */
+   STATIC_ASSERT(last_range + 1 == 7);
+
    if (!instr->src[src].src.is_ssa)
       return (struct ssa_result_range){unknown, false};