intel/fs: make scan/reduce work with SIMD32 when it fits 2 registers
authorPaulo Zanoni <paulo.r.zanoni@intel.com>
Fri, 9 Aug 2019 22:40:33 +0000 (15:40 -0700)
committerJason Ekstrand <jason@jlekstrand.net>
Thu, 19 Sep 2019 02:47:17 +0000 (02:47 +0000)
When dealing with uint16_t and uint8_t on SIMD32 we can do all the
operations using just 2 registers, so we don't hit the recursion at
the beginning of emit_scan(). Because of that, we need to actually
compute scan/reduce for channels 31:16.

v2: Still missed instructions (Jason).

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
src/intel/compiler/brw_fs_builder.h

index 0c1b6f5d6c7f8b77a2c46627c6b43151d1487510..1000d956d6f5b2e8ee0de34a58c6234c32bdca6e 100644 (file)
@@ -514,6 +514,16 @@ namespace brw {
                right = horiz_offset(tmp, 8 + 4);
                set_condmod(mod, ubld.emit(opcode, right, left, right));
             }
+
+            if (dispatch_width() > 16) {
+               left = component(tmp, 16 + 3);
+               right = horiz_offset(tmp, 16 + 4);
+               set_condmod(mod, ubld.emit(opcode, right, left, right));
+
+               left = component(tmp, 24 + 3);
+               right = horiz_offset(tmp, 24 + 4);
+               set_condmod(mod, ubld.emit(opcode, right, left, right));
+            }
          }
 
          if (cluster_size > 8 && dispatch_width() > 8) {
@@ -521,6 +531,19 @@ namespace brw {
             src_reg left = component(tmp, 7);
             dst_reg right = horiz_offset(tmp, 8);
             set_condmod(mod, ubld.emit(opcode, right, left, right));
+
+            if (dispatch_width() > 16) {
+               left = component(tmp, 16 + 7);
+               right = horiz_offset(tmp, 16 + 8);
+               set_condmod(mod, ubld.emit(opcode, right, left, right));
+            }
+         }
+
+         if (cluster_size > 16 && dispatch_width() > 16) {
+            const fs_builder ubld = exec_all().group(16, 0);
+            src_reg left = component(tmp, 15);
+            dst_reg right = horiz_offset(tmp, 16);
+            set_condmod(mod, ubld.emit(opcode, right, left, right));
          }
       }