From f16bd8a9feec026d2a11a203d860189be954896d Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 12 Nov 2018 18:38:24 -0600
Subject: [PATCH] nir/builder: Add a nir_pack/unpack/bitcast helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The new helpers can generate any pack/unpack operation including those
for which we do not have specific opcodes and they express a bitcast in
terms of these pack/unpack operations.  In particular, the new helpers
properly handle 8-bit types.

Reviewed-by: Samuel Iglesias GonsÃ¡lvez <siglesias@igalia.com>
---
 src/compiler/nir/nir_builder.h | 126 +++++++++++++++++++++++++++++++++
 src/compiler/spirv/vtn_alu.c   | 101 +++++++-------------------
 2 files changed, 151 insertions(+), 76 deletions(-)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index cf096d41bf2..e37aba23dc2 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -565,6 +565,132 @@ nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
    return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));
 }
 
+static inline nir_ssa_def *
+nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
+{
+   assert(src->num_components * src->bit_size == dest_bit_size);
+
+   switch (dest_bit_size) {
+   case 64:
+      switch (src->bit_size) {
+      case 32: return nir_pack_64_2x32(b, src);
+      case 16: return nir_pack_64_4x16(b, src);
+      default: break;
+      }
+      break;
+
+   case 32:
+      if (src->bit_size == 16)
+         return nir_pack_32_2x16(b, src);
+      break;
+
+   default:
+      break;
+   }
+
+   /* If we got here, we have no dedicated unpack opcode. */
+   nir_ssa_def *dest = nir_imm_intN_t(b, 0, dest_bit_size);
+   for (unsigned i = 0; i < src->num_components; i++) {
+      nir_ssa_def *val;
+      switch (dest_bit_size) {
+      case 64: val = nir_u2u64(b, nir_channel(b, src, i));  break;
+      case 32: val = nir_u2u32(b, nir_channel(b, src, i));  break;
+      case 16: val = nir_u2u16(b, nir_channel(b, src, i));  break;
+      default: unreachable("Invalid bit size");
+      }
+      val = nir_ishl(b, val, nir_imm_int(b, i * src->bit_size));
+      dest = nir_ior(b, dest, val);
+   }
+   return dest;
+}
+
+static inline nir_ssa_def *
+nir_unpack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
+{
+   assert(src->num_components == 1);
+   assert(src->bit_size > dest_bit_size);
+   const unsigned dest_num_components = src->bit_size / dest_bit_size;
+   assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
+
+   switch (src->bit_size) {
+   case 64:
+      switch (dest_bit_size) {
+      case 32: return nir_unpack_64_2x32(b, src);
+      case 16: return nir_unpack_64_4x16(b, src);
+      default: break;
+      }
+      break;
+
+   case 32:
+      if (dest_bit_size == 16)
+         return nir_unpack_32_2x16(b, src);
+      break;
+
+   default:
+      break;
+   }
+
+   /* If we got here, we have no dedicated unpack opcode. */
+   nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < dest_num_components; i++) {
+      nir_ssa_def *val = nir_ushr(b, src, nir_imm_int(b, i * dest_bit_size));
+      switch (dest_bit_size) {
+      case 32: dest_comps[i] = nir_u2u32(b, val);  break;
+      case 16: dest_comps[i] = nir_u2u16(b, val);  break;
+      case 8:  dest_comps[i] = nir_u2u8(b, val);   break;
+      default: unreachable("Invalid bit size");
+      }
+   }
+   return nir_vec(b, dest_comps, dest_num_components);
+}
+
+static inline nir_ssa_def *
+nir_bitcast_vector(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
+{
+   assert((src->bit_size * src->num_components) % dest_bit_size == 0);
+   const unsigned dest_num_components =
+      (src->bit_size * src->num_components) / dest_bit_size;
+   assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
+
+   if (src->bit_size > dest_bit_size) {
+      assert(src->bit_size % dest_bit_size == 0);
+      if (src->num_components == 1) {
+         return nir_unpack_bits(b, src, dest_bit_size);
+      } else {
+         const unsigned divisor = src->bit_size / dest_bit_size;
+         assert(src->num_components * divisor == dest_num_components);
+         nir_ssa_def *dest[NIR_MAX_VEC_COMPONENTS];
+         for (unsigned i = 0; i < src->num_components; i++) {
+            nir_ssa_def *unpacked =
+               nir_unpack_bits(b, nir_channel(b, src, i), dest_bit_size);
+            assert(unpacked->num_components == divisor);
+            for (unsigned j = 0; j < divisor; j++)
+               dest[i * divisor + j] = nir_channel(b, unpacked, j);
+         }
+         return nir_vec(b, dest, dest_num_components);
+      }
+   } else if (src->bit_size < dest_bit_size) {
+      assert(dest_bit_size % src->bit_size == 0);
+      if (dest_num_components == 1) {
+         return nir_pack_bits(b, src, dest_bit_size);
+      } else {
+         const unsigned divisor = dest_bit_size / src->bit_size;
+         assert(src->num_components == dest_num_components * divisor);
+         nir_ssa_def *dest[NIR_MAX_VEC_COMPONENTS];
+         for (unsigned i = 0; i < dest_num_components; i++) {
+            nir_component_mask_t src_mask =
+               ((1 << divisor) - 1) << (i * divisor);
+            dest[i] = nir_pack_bits(b, nir_channels(b, src, src_mask),
+                                       dest_bit_size);
+         }
+         return nir_vec(b, dest, dest_num_components);
+      }
+   } else {
+      assert(src->bit_size == dest_bit_size);
+      return src;
+   }
+}
+
 /**
  * Turns a nir_src into a nir_ssa_def * so it can be passed to
  * nir_build_alu()-based builder calls.
diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index a23f8c29b5c..dc6fedc9129 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -211,81 +211,6 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
    }
 }
 
-static void
-vtn_handle_bitcast(struct vtn_builder *b, struct vtn_ssa_value *dest,
-                   struct nir_ssa_def *src)
-{
-   if (glsl_get_vector_elements(dest->type) == src->num_components) {
-      /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
-       *
-       * "If Result Type has the same number of components as Operand, they
-       * must also have the same component width, and results are computed per
-       * component."
-       */
-      dest->def = nir_imov(&b->nb, src);
-      return;
-   }
-
-   /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
-    *
-    * "If Result Type has a different number of components than Operand, the
-    * total number of bits in Result Type must equal the total number of bits
-    * in Operand. Let L be the type, either Result Type or Operandâs type, that
-    * has the larger number of components. Let S be the other type, with the
-    * smaller number of components. The number of components in L must be an
-    * integer multiple of the number of components in S. The first component
-    * (that is, the only or lowest-numbered component) of S maps to the first
-    * components of L, and so on, up to the last component of S mapping to the
-    * last components of L. Within this mapping, any single component of S
-    * (mapping to multiple components of L) maps its lower-ordered bits to the
-    * lower-numbered components of L."
-    */
-   unsigned src_bit_size = src->bit_size;
-   unsigned dest_bit_size = glsl_get_bit_size(dest->type);
-   unsigned src_components = src->num_components;
-   unsigned dest_components = glsl_get_vector_elements(dest->type);
-   vtn_assert(src_bit_size * src_components == dest_bit_size * dest_components);
-
-   nir_ssa_def *dest_chan[NIR_MAX_VEC_COMPONENTS];
-   if (src_bit_size > dest_bit_size) {
-      vtn_assert(src_bit_size % dest_bit_size == 0);
-      unsigned divisor = src_bit_size / dest_bit_size;
-      for (unsigned comp = 0; comp < src_components; comp++) {
-         nir_ssa_def *split;
-         if (src_bit_size == 64) {
-            assert(dest_bit_size == 32 || dest_bit_size == 16);
-            split = dest_bit_size == 32 ?
-               nir_unpack_64_2x32(&b->nb, nir_channel(&b->nb, src, comp)) :
-               nir_unpack_64_4x16(&b->nb, nir_channel(&b->nb, src, comp));
-         } else {
-            vtn_assert(src_bit_size == 32);
-            vtn_assert(dest_bit_size == 16);
-            split = nir_unpack_32_2x16(&b->nb, nir_channel(&b->nb, src, comp));
-         }
-         for (unsigned i = 0; i < divisor; i++)
-            dest_chan[divisor * comp + i] = nir_channel(&b->nb, split, i);
-      }
-   } else {
-      vtn_assert(dest_bit_size % src_bit_size == 0);
-      unsigned divisor = dest_bit_size / src_bit_size;
-      for (unsigned comp = 0; comp < dest_components; comp++) {
-         unsigned channels = ((1 << divisor) - 1) << (comp * divisor);
-         nir_ssa_def *src_chan = nir_channels(&b->nb, src, channels);
-         if (dest_bit_size == 64) {
-            assert(src_bit_size == 32 || src_bit_size == 16);
-            dest_chan[comp] = src_bit_size == 32 ?
-               nir_pack_64_2x32(&b->nb, src_chan) :
-               nir_pack_64_4x16(&b->nb, src_chan);
-         } else {
-            vtn_assert(dest_bit_size == 32);
-            vtn_assert(src_bit_size == 16);
-            dest_chan[comp] = nir_pack_32_2x16(&b->nb, src_chan);
-         }
-      }
-   }
-   dest->def = nir_vec(&b->nb, dest_chan, dest_components);
-}
-
 nir_op
 vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
                                 SpvOp opcode, bool *swap,
@@ -633,7 +558,31 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
    }
 
    case SpvOpBitcast:
-      vtn_handle_bitcast(b, val->ssa, src[0]);
+      /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
+       *
+       *    "If Result Type has the same number of components as Operand, they
+       *    must also have the same component width, and results are computed
+       *    per component.
+       *
+       *    If Result Type has a different number of components than Operand,
+       *    the total number of bits in Result Type must equal the total
+       *    number of bits in Operand. Let L be the type, either Result Type
+       *    or Operandâs type, that has the larger number of components. Let S
+       *    be the other type, with the smaller number of components. The
+       *    number of components in L must be an integer multiple of the
+       *    number of components in S.  The first component (that is, the only
+       *    or lowest-numbered component) of S maps to the first components of
+       *    L, and so on, up to the last component of S mapping to the last
+       *    components of L. Within this mapping, any single component of S
+       *    (mapping to multiple components of L) maps its lower-ordered bits
+       *    to the lower-numbered components of L."
+       */
+      vtn_fail_if(src[0]->num_components * src[0]->bit_size !=
+                  glsl_get_vector_elements(type) * glsl_get_bit_size(type),
+                  "Source and destination of OpBitcast must have the same "
+                  "total number of bits");
+      val->ssa->def = nir_bitcast_vector(&b->nb, src[0],
+                                         glsl_get_bit_size(type));
       break;
 
    case SpvOpFConvert: {
-- 
2.30.2