i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

author Jose Maria Casanova Crespo <jmcasanova@igalia.com>

Mon, 20 Nov 2017 22:10:51 +0000 (23:10 +0100)

committer Jose Maria Casanova Crespo <jmcasanova@igalia.com>

Wed, 6 Dec 2017 07:57:18 +0000 (08:57 +0100)
author Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Mon, 20 Nov 2017 22:10:51 +0000 (23:10 +0100)
committer Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Wed, 6 Dec 2017 07:57:18 +0000 (08:57 +0100)
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h

index 19b897e7a9f2745ffef90d02167ca9153abc5db8..30557324d5a6b980962eb8f0b52e6cb6b83f7a87 100644 (file)
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -497,6 +497,17 @@ void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld,
  fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld,
                                            const fs_reg &src,
                                            uint32_t components);
+
+void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder &bld,
+                                             const fs_reg &dst,
+                                             const fs_reg &src,
+                                             uint32_t components);
+
+void shuffle_16bit_data_for_32bit_write(const brw::fs_builder &bld,
+                                        const fs_reg &dst,
+                                        const fs_reg &src,
+                                        uint32_t components);
+
  fs_reg setup_imm_df(const brw::fs_builder &bld,
                      double v);
  
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index ae85834ffe648e2ca1c8cf16d651d6562a6660d5..aa4a8c4b8b3f96c17d349fd640f47a6719a795bd 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4844,6 +4844,38 @@ shuffle_32bit_load_result_to_64bit_data(const fs_builder &bld,
     }
  }
  
+void
+shuffle_32bit_load_result_to_16bit_data(const fs_builder &bld,
+                                        const fs_reg &dst,
+                                        const fs_reg &src,
+                                        uint32_t components)
+{
+   assert(type_sz(src.type) == 4);
+   assert(type_sz(dst.type) == 2);
+
+   /* A temporary is used to un-shuffle the 32-bit data of each component in
+    * into a valid 16-bit vector. We can't write directly to dst because it
+    * can be the same register as src and in that case the first MOV in the
+    * loop below would overwrite the data read in the second MOV.
+    */
+   fs_reg tmp = retype(bld.vgrf(src.type), dst.type);
+
+   for (unsigned i = 0; i < components; i++) {
+      const fs_reg component_i =
+         subscript(offset(src, bld, i / 2), dst.type, i % 2);
+
+      bld.MOV(offset(tmp, bld, i % 2), component_i);
+
+      if (i % 2) {
+         bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0));
+         bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1));
+      }
+   }
+   if (components % 2) {
+      bld.MOV(offset(dst, bld, components - 1), tmp);
+   }
+}
+
  /**
   * This helper does the inverse operation of
   * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA.
@@ -4876,6 +4908,34 @@ shuffle_64bit_data_for_32bit_write(const fs_builder &bld,
     return dst;
  }
  
+void
+shuffle_16bit_data_for_32bit_write(const fs_builder &bld,
+                                   const fs_reg &dst,
+                                   const fs_reg &src,
+                                   uint32_t components)
+{
+   assert(type_sz(src.type) == 2);
+   assert(type_sz(dst.type) == 4);
+
+   /* A temporary is used to shuffle the 16-bit data of each component in the
+    * 32-bit data vector. We can't write directly to dst because it can be the
+    * same register as src and in that case the first MOV in the loop below
+    * would overwrite the data read in the second MOV.
+    */
+   fs_reg tmp = bld.vgrf(dst.type);
+
+   for (unsigned i = 0; i < components; i++) {
+      const fs_reg component_i = offset(src, bld, i);
+      bld.MOV(subscript(tmp, src.type, i % 2), component_i);
+      if (i % 2) {
+         bld.MOV(offset(dst, bld, i / 2), tmp);
+      }
+   }
+   if (components % 2) {
+      bld.MOV(offset(dst, bld, components / 2), tmp);
+   }
+}
+
  fs_reg
  setup_imm_df(const fs_builder &bld, double v)
  {
author	Jose Maria Casanova Crespo <jmcasanova@igalia.com>
	Mon, 20 Nov 2017 22:10:51 +0000 (23:10 +0100)
committer	Jose Maria Casanova Crespo <jmcasanova@igalia.com>
	Wed, 6 Dec 2017 07:57:18 +0000 (08:57 +0100)
src/intel/compiler/brw_fs.h		patch \| blob \| history
src/intel/compiler/brw_fs_nir.cpp		patch \| blob \| history