spirv: Add support for lowering workgroup access to offsets
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 19 Oct 2017 00:59:47 +0000 (17:59 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 6 Dec 2017 06:01:54 +0000 (22:01 -0800)
Before, we always left workgroup variables as shared nir_variables and
let the driver call nir_lower_io.  This adds an option to do the
lowering directly in spirv_to_nir.  To do this, we implicitly assign the
variables a std430 layout and then treat them like a UBO or SSBO and
immediately lower all the way to an offset.

As a side-effect, the spirv_to_nir pass now handles variable pointers
for workgroup variables.

Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/compiler/spirv/nir_spirv.h
src/compiler/spirv/spirv_to_nir.c
src/compiler/spirv/vtn_private.h
src/compiler/spirv/vtn_variables.c

index f129a205764a683ae897ed0669f9fd0e4ace5221..eb7146cb89de088a9a86386023d48b1806e7d39b 100644 (file)
@@ -49,6 +49,14 @@ enum nir_spirv_debug_level {
 };
 
 struct spirv_to_nir_options {
+   /* Whether or not to lower all workgroup variable access to offsets
+    * up-front.  This means you will _shared intrinsics instead of _var
+    * for workgroup data access.
+    *
+    * This is currently required for full variable pointers support.
+    */
+   bool lower_workgroup_access_to_offsets;
+
    struct {
       bool float64;
       bool image_ms_array;
index d4daa53f0e829cbb8a83f217e26e93525b31db7a..5c2f53dc1302ad51f67db32b7a54528af47e70fb 100644 (file)
@@ -809,6 +809,64 @@ translate_image_format(struct vtn_builder *b, SpvImageFormat format)
    }
 }
 
+static struct vtn_type *
+vtn_type_layout_std430(struct vtn_builder *b, struct vtn_type *type,
+                       uint32_t *size_out, uint32_t *align_out)
+{
+   switch (type->base_type) {
+   case vtn_base_type_scalar: {
+      uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
+      *size_out = comp_size;
+      *align_out = comp_size;
+      return type;
+   }
+
+   case vtn_base_type_vector: {
+      uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
+      assert(type->length > 0 && type->length <= 4);
+      unsigned align_comps = type->length == 3 ? 4 : type->length;
+      *size_out = comp_size * type->length,
+      *align_out = comp_size * align_comps;
+      return type;
+   }
+
+   case vtn_base_type_matrix:
+   case vtn_base_type_array: {
+      /* We're going to add an array stride */
+      type = vtn_type_copy(b, type);
+      uint32_t elem_size, elem_align;
+      type->array_element = vtn_type_layout_std430(b, type->array_element,
+                                                   &elem_size, &elem_align);
+      type->stride = vtn_align_u32(elem_size, elem_align);
+      *size_out = type->stride * type->length;
+      *align_out = elem_align;
+      return type;
+   }
+
+   case vtn_base_type_struct: {
+      /* We're going to add member offsets */
+      type = vtn_type_copy(b, type);
+      uint32_t offset = 0;
+      uint32_t align = 0;
+      for (unsigned i = 0; i < type->length; i++) {
+         uint32_t mem_size, mem_align;
+         type->members[i] = vtn_type_layout_std430(b, type->members[i],
+                                                   &mem_size, &mem_align);
+         offset = vtn_align_u32(offset, mem_align);
+         type->offsets[i] = offset;
+         offset += mem_size;
+         align = MAX2(align, mem_align);
+      }
+      *size_out = offset;
+      *align_out = align;
+      return type;
+   }
+
+   default:
+      unreachable("Invalid SPIR-V type for std430");
+   }
+}
+
 static void
 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
                 const uint32_t *w, unsigned count)
@@ -958,6 +1016,19 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
           */
          val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2);
       }
+
+      if (storage_class == SpvStorageClassWorkgroup &&
+          b->options->lower_workgroup_access_to_offsets) {
+         uint32_t size, align;
+         val->type->deref = vtn_type_layout_std430(b, val->type->deref,
+                                                   &size, &align);
+         val->type->length = size;
+         val->type->align = align;
+         /* These can actually be stored to nir_variables and used as SSA
+          * values so they need a real glsl_type.
+          */
+         val->type->type = glsl_uint_type();
+      }
       break;
    }
 
@@ -2181,6 +2252,32 @@ get_ssbo_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
    }
 }
 
+static nir_intrinsic_op
+get_shared_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
+{
+   switch (opcode) {
+   case SpvOpAtomicLoad:      return nir_intrinsic_load_shared;
+   case SpvOpAtomicStore:     return nir_intrinsic_store_shared;
+#define OP(S, N) case SpvOp##S: return nir_intrinsic_shared_##N;
+   OP(AtomicExchange,         atomic_exchange)
+   OP(AtomicCompareExchange,  atomic_comp_swap)
+   OP(AtomicIIncrement,       atomic_add)
+   OP(AtomicIDecrement,       atomic_add)
+   OP(AtomicIAdd,             atomic_add)
+   OP(AtomicISub,             atomic_add)
+   OP(AtomicSMin,             atomic_imin)
+   OP(AtomicUMin,             atomic_umin)
+   OP(AtomicSMax,             atomic_imax)
+   OP(AtomicUMax,             atomic_umax)
+   OP(AtomicAnd,              atomic_and)
+   OP(AtomicOr,               atomic_or)
+   OP(AtomicXor,              atomic_xor)
+#undef OP
+   default:
+      vtn_fail("Invalid shared atomic");
+   }
+}
+
 static nir_intrinsic_op
 get_var_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
 {
@@ -2246,7 +2343,8 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
    SpvMemorySemanticsMask semantics = w[5];
    */
 
-   if (ptr->mode == vtn_variable_mode_workgroup) {
+   if (ptr->mode == vtn_variable_mode_workgroup &&
+       !b->options->lower_workgroup_access_to_offsets) {
       nir_deref_var *deref = vtn_pointer_to_deref(b, ptr);
       const struct glsl_type *deref_type = nir_deref_tail(&deref->deref)->type;
       nir_intrinsic_op op = get_var_nir_atomic_op(b, opcode);
@@ -2286,27 +2384,36 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
 
       }
    } else {
-      vtn_assert(ptr->mode == vtn_variable_mode_ssbo);
       nir_ssa_def *offset, *index;
       offset = vtn_pointer_to_offset(b, ptr, &index, NULL);
 
-      nir_intrinsic_op op = get_ssbo_nir_atomic_op(b, opcode);
+      nir_intrinsic_op op;
+      if (ptr->mode == vtn_variable_mode_ssbo) {
+         op = get_ssbo_nir_atomic_op(b, opcode);
+      } else {
+         vtn_assert(ptr->mode == vtn_variable_mode_workgroup &&
+                    b->options->lower_workgroup_access_to_offsets);
+         op = get_shared_nir_atomic_op(b, opcode);
+      }
 
       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
 
+      int src = 0;
       switch (opcode) {
       case SpvOpAtomicLoad:
          atomic->num_components = glsl_get_vector_elements(ptr->type->type);
-         atomic->src[0] = nir_src_for_ssa(index);
-         atomic->src[1] = nir_src_for_ssa(offset);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
          break;
 
       case SpvOpAtomicStore:
          atomic->num_components = glsl_get_vector_elements(ptr->type->type);
          nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
-         atomic->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
-         atomic->src[1] = nir_src_for_ssa(index);
-         atomic->src[2] = nir_src_for_ssa(offset);
+         atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
          break;
 
       case SpvOpAtomicExchange:
@@ -2323,9 +2430,10 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
       case SpvOpAtomicAnd:
       case SpvOpAtomicOr:
       case SpvOpAtomicXor:
-         atomic->src[0] = nir_src_for_ssa(index);
-         atomic->src[1] = nir_src_for_ssa(offset);
-         fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
+         fill_common_atomic_sources(b, opcode, w, &atomic->src[src]);
          break;
 
       default:
index 0f92a74ec84cc27b342cad61234670a377bb08db..5f140b4618f88cf05073341338ade778ffcdf93b 100644 (file)
@@ -276,7 +276,10 @@ struct vtn_type {
    /* The value that declares this type.  Used for finding decorations */
    struct vtn_value *val;
 
-   /* Specifies the length of complex types. */
+   /* Specifies the length of complex types.
+    *
+    * For Workgroup pointers, this is the size of the referenced type.
+    */
    unsigned length;
 
    /* for arrays, matrices and pointers, the array stride */
@@ -327,6 +330,9 @@ struct vtn_type {
 
          /* Storage class for pointers */
          SpvStorageClass storage_class;
+
+         /* Required alignment for pointers */
+         uint32_t align;
       };
 
       /* Members for image types */
@@ -441,6 +447,8 @@ struct vtn_variable {
    nir_variable *var;
    nir_variable **members;
 
+   int shared_location;
+
    /**
     * In some early released versions of GLSLang, it implemented all function
     * calls by making copies of all parameters into temporary variables and
@@ -686,6 +694,13 @@ void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
 bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
                                     const uint32_t *words, unsigned count);
 
+static inline uint32_t
+vtn_align_u32(uint32_t v, uint32_t a)
+{
+   assert(a != 0 && a == (a & -a));
+   return (v + a - 1) & ~(a - 1);
+}
+
 static inline uint64_t
 vtn_u64_literal(const uint32_t *w)
 {
index 3369ae05a955f2cdb33e0a789aca5b9586eb0103..06eab2dc15c244c22c5caa0affbef1c76328e714 100644 (file)
@@ -62,7 +62,9 @@ vtn_pointer_uses_ssa_offset(struct vtn_builder *b,
                             struct vtn_pointer *ptr)
 {
    return ptr->mode == vtn_variable_mode_ubo ||
-          ptr->mode == vtn_variable_mode_ssbo;
+          ptr->mode == vtn_variable_mode_ssbo ||
+          (ptr->mode == vtn_variable_mode_workgroup &&
+           b->options->lower_workgroup_access_to_offsets);
 }
 
 static bool
@@ -71,7 +73,9 @@ vtn_pointer_is_external_block(struct vtn_builder *b,
 {
    return ptr->mode == vtn_variable_mode_ssbo ||
           ptr->mode == vtn_variable_mode_ubo ||
-          ptr->mode == vtn_variable_mode_push_constant;
+          ptr->mode == vtn_variable_mode_push_constant ||
+          (ptr->mode == vtn_variable_mode_workgroup &&
+           b->options->lower_workgroup_access_to_offsets);
 }
 
 /* Dereference the given base pointer by the access chain */
@@ -167,7 +171,8 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
       /* We need ptr_type for the stride */
       vtn_assert(base->ptr_type);
       /* This must be a pointer to an actual element somewhere */
-      vtn_assert(block_index && offset);
+      vtn_assert(offset);
+      vtn_assert(block_index || base->mode == vtn_variable_mode_workgroup);
       /* We need at least one element in the chain */
       vtn_assert(deref_chain->length >= 1);
 
@@ -183,6 +188,7 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
       vtn_assert(!block_index);
 
       vtn_assert(base->var);
+      vtn_assert(base->ptr_type);
       switch (base->mode) {
       case vtn_variable_mode_ubo:
       case vtn_variable_mode_ssbo:
@@ -201,6 +207,22 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
          offset = nir_imm_int(&b->nb, 0);
          break;
 
+      case vtn_variable_mode_workgroup:
+         /* Assign location on first use so that we don't end up bloating SLM
+          * address space for variables which are never statically used.
+          */
+         if (base->var->shared_location < 0) {
+            assert(base->ptr_type->length > 0 && base->ptr_type->align > 0);
+            b->shader->num_shared = vtn_align_u32(b->shader->num_shared,
+                                                  base->ptr_type->align);
+            base->var->shared_location = b->shader->num_shared;
+            b->shader->num_shared += base->ptr_type->length;
+         }
+
+         block_index = NULL;
+         offset = nir_imm_int(&b->nb, base->var->shared_location);
+         break;
+
       default:
          vtn_fail("Invalid offset pointer mode");
       }
@@ -837,6 +859,9 @@ vtn_block_load(struct vtn_builder *b, struct vtn_pointer *src)
       vtn_access_chain_get_offset_size(b, src->chain, src->var->type,
                                        &access_offset, &access_size);
       break;
+   case vtn_variable_mode_workgroup:
+      op = nir_intrinsic_load_shared;
+      break;
    default:
       vtn_fail("Invalid block variable mode");
    }
@@ -861,6 +886,9 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
    case vtn_variable_mode_ssbo:
       op = nir_intrinsic_store_ssbo;
       break;
+   case vtn_variable_mode_workgroup:
+      op = nir_intrinsic_store_shared;
+      break;
    default:
       vtn_fail("Invalid block variable mode");
    }
@@ -946,7 +974,8 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
                    struct vtn_pointer *dest)
 {
    if (vtn_pointer_is_external_block(b, dest)) {
-      vtn_assert(dest->mode == vtn_variable_mode_ssbo);
+      vtn_assert(dest->mode == vtn_variable_mode_ssbo ||
+                 dest->mode == vtn_variable_mode_workgroup);
       vtn_block_store(b, src, dest);
    } else {
       _vtn_variable_load_store(b, false, dest, &src);
@@ -1526,7 +1555,7 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr)
                  ptr->mode == vtn_variable_mode_ssbo);
       return nir_vec2(&b->nb, ptr->block_index, ptr->offset);
    } else {
-      vtn_fail("Invalid pointer");
+      vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
       return ptr->offset;
    }
 }
@@ -1555,7 +1584,7 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def *ssa,
       ptr->offset = nir_channel(&b->nb, ssa, 1);
    } else {
       vtn_assert(ssa->num_components == 1);
-      unreachable("Invalid pointer");
+      vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
       ptr->block_index = NULL;
       ptr->offset = ssa;
    }
@@ -1630,7 +1659,6 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
    case vtn_variable_mode_global:
    case vtn_variable_mode_image:
    case vtn_variable_mode_sampler:
-   case vtn_variable_mode_workgroup:
       /* For these, we create the variable normally */
       var->var = rzalloc(b->shader, nir_variable);
       var->var->name = ralloc_strdup(var->var, val->name);
@@ -1648,6 +1676,18 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
       }
       break;
 
+   case vtn_variable_mode_workgroup:
+      if (b->options->lower_workgroup_access_to_offsets) {
+         var->shared_location = -1;
+      } else {
+         /* Create the variable normally */
+         var->var = rzalloc(b->shader, nir_variable);
+         var->var->name = ralloc_strdup(var->var, val->name);
+         var->var->type = var->type->type;
+         var->var->data.mode = nir_var_shared;
+      }
+      break;
+
    case vtn_variable_mode_input:
    case vtn_variable_mode_output: {
       /* In order to know whether or not we're a per-vertex inout, we need