intel/fs: shuffle_from_32bit_read at load_per_vertex_input at TCS/TES
authorJose Maria Casanova Crespo <jmcasanova@igalia.com>
Sat, 9 Jun 2018 09:46:07 +0000 (11:46 +0200)
committerJose Maria Casanova Crespo <jmcasanova@igalia.com>
Sat, 16 Jun 2018 20:39:08 +0000 (22:39 +0200)
Previously, the shuffle function had a source/destination overlap that
needs to be avoided to use shuffle_from_32bit_read. As we can use for
the shuffle destination the destination of removed MOVs.

This change also avoids the internal MOVs done by the previous shuffle
to deal with possible overlaps.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/intel/compiler/brw_fs_nir.cpp

index 7fdc9313d15b7f491dac70b572b3173c1b9b6ed6..352d50e74db6e4c080338b9d0387d411cde4b337 100644 (file)
@@ -2665,13 +2665,10 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
           * or SSBOs.
           */
          if (type_sz(dst.type) == 8) {
-            shuffle_32bit_load_result_to_64bit_data(
-               bld, dst, retype(dst, BRW_REGISTER_TYPE_F), num_components);
-
-            for (unsigned c = 0; c < num_components; c++) {
-               bld.MOV(offset(orig_dst, bld, iter * 2 + c),
-                       offset(dst, bld, c));
-            }
+            shuffle_from_32bit_read(bld,
+                                    offset(orig_dst, bld, iter * 2),
+                                    retype(dst, BRW_REGISTER_TYPE_D),
+                                    0, num_components);
          }
 
          /* Copy the temporary to the destination to deal with writemasking.
@@ -3014,13 +3011,10 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
              * or SSBOs.
              */
             if (type_sz(dest.type) == 8) {
-               shuffle_32bit_load_result_to_64bit_data(
-                  bld, dest, retype(dest, BRW_REGISTER_TYPE_F), num_components);
-
-               for (unsigned c = 0; c < num_components; c++) {
-                  bld.MOV(offset(orig_dest, bld, iter * 2 + c),
-                          offset(dest, bld, c));
-               }
+               shuffle_from_32bit_read(bld,
+                                       offset(orig_dest, bld, iter * 2),
+                                       retype(dest, BRW_REGISTER_TYPE_D),
+                                       0, num_components);
             }
 
             /* If we are loading double data and we need a second read message