intel/fs: roll the loop with the <0,1,0> additions in emit_scan()
authorPaulo Zanoni <paulo.r.zanoni@intel.com>
Sat, 24 Aug 2019 00:15:27 +0000 (17:15 -0700)
committerJason Ekstrand <jason@jlekstrand.net>
Thu, 19 Sep 2019 02:47:17 +0000 (02:47 +0000)
IMHO the code is easier to understand this way, being explicit that
we're doing exactly the same thing every time.

No functional changes.

v2: Adjust the loop breaking condition (Jason).

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
src/intel/compiler/brw_fs_builder.h

index 1000d956d6f5b2e8ee0de34a58c6234c32bdca6e..54f5bd16892dbbb105b6d5dfd5482f4ff18cac62 100644 (file)
@@ -503,48 +503,30 @@ namespace brw {
             }
          }
 
-         if (cluster_size > 4) {
-            const fs_builder ubld = exec_all().group(4, 0);
-            src_reg left = component(tmp, 3);
-            dst_reg right = horiz_offset(tmp, 4);
+         for (unsigned i = 4;
+              i < MIN2(cluster_size, dispatch_width());
+              i *= 2) {
+            const fs_builder ubld = exec_all().group(i, 0);
+            src_reg left = component(tmp, i - 1);
+            dst_reg right = horiz_offset(tmp, i);
             set_condmod(mod, ubld.emit(opcode, right, left, right));
 
-            if (dispatch_width() > 8) {
-               left = component(tmp, 8 + 3);
-               right = horiz_offset(tmp, 8 + 4);
+            if (dispatch_width() > i * 2) {
+               left = component(tmp, i * 3 - 1);
+               right = horiz_offset(tmp, i * 3);
                set_condmod(mod, ubld.emit(opcode, right, left, right));
             }
 
-            if (dispatch_width() > 16) {
-               left = component(tmp, 16 + 3);
-               right = horiz_offset(tmp, 16 + 4);
+            if (dispatch_width() > i * 4) {
+               left = component(tmp, i * 5 - 1);
+               right = horiz_offset(tmp, i * 5);
                set_condmod(mod, ubld.emit(opcode, right, left, right));
 
-               left = component(tmp, 24 + 3);
-               right = horiz_offset(tmp, 24 + 4);
+               left = component(tmp, i * 7 - 1);
+               right = horiz_offset(tmp, i * 7);
                set_condmod(mod, ubld.emit(opcode, right, left, right));
             }
          }
-
-         if (cluster_size > 8 && dispatch_width() > 8) {
-            const fs_builder ubld = exec_all().group(8, 0);
-            src_reg left = component(tmp, 7);
-            dst_reg right = horiz_offset(tmp, 8);
-            set_condmod(mod, ubld.emit(opcode, right, left, right));
-
-            if (dispatch_width() > 16) {
-               left = component(tmp, 16 + 7);
-               right = horiz_offset(tmp, 16 + 8);
-               set_condmod(mod, ubld.emit(opcode, right, left, right));
-            }
-         }
-
-         if (cluster_size > 16 && dispatch_width() > 16) {
-            const fs_builder ubld = exec_all().group(16, 0);
-            src_reg left = component(tmp, 15);
-            dst_reg right = horiz_offset(tmp, 16);
-            set_condmod(mod, ubld.emit(opcode, right, left, right));
-         }
       }
 
       /**