spirv: reduce array size in vtn_handle_constant

[mesa.git] / src / compiler / spirv / spirv_to_nir.c
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c

index 2136c9df705d88706b0ebee75ae775b1e6a0c970..6fae48074f7c1e913b835cc63a58ea32e932cc5b 100644 (file)
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -32,6 +32,8 @@
  #include "nir/nir_deref.h"
  #include "spirv_info.h"
  
+#include "util/u_math.h"
+
  #include <stdio.h>
  
  void
@@ -819,6 +821,13 @@ struct_member_decoration_cb(struct vtn_builder *b,
        break;
  
     case SpvDecorationCPacked:
+      if (b->shader->info.stage != MESA_SHADER_KERNEL)
+         vtn_warn("Decoration only allowed for CL-style kernels: %s",
+                  spirv_decoration_to_string(dec->decoration));
+      else
+         ctx->type->packed = true;
+      break;
+
     case SpvDecorationSaturatedConversion:
     case SpvDecorationFuncParamAttr:
     case SpvDecorationFPRoundingMode:
@@ -898,6 +907,21 @@ struct_member_matrix_stride_cb(struct vtn_builder *b,
     ctx->fields[member].type = ctx->type->members[member]->type;
  }
  
+static void
+struct_block_decoration_cb(struct vtn_builder *b,
+                           struct vtn_value *val, int member,
+                           const struct vtn_decoration *dec, void *ctx)
+{
+   if (member != -1)
+      return;
+
+   struct vtn_type *type = val->type;
+   if (dec->decoration == SpvDecorationBlock)
+      type->block = true;
+   else if (dec->decoration == SpvDecorationBufferBlock)
+      type->buffer_block = true;
+}
+
  static void
  type_decoration_cb(struct vtn_builder *b,
                     struct vtn_value *val, int member,
@@ -919,11 +943,11 @@ type_decoration_cb(struct vtn_builder *b,
        break;
     case SpvDecorationBlock:
        vtn_assert(type->base_type == vtn_base_type_struct);
-      type->block = true;
+      vtn_assert(type->block);
        break;
     case SpvDecorationBufferBlock:
        vtn_assert(type->base_type == vtn_base_type_struct);
-      type->buffer_block = true;
+      vtn_assert(type->buffer_block);
        break;
     case SpvDecorationGLSLShared:
     case SpvDecorationGLSLPacked:
@@ -979,6 +1003,13 @@ type_decoration_cb(struct vtn_builder *b,
        break;
  
     case SpvDecorationCPacked:
+      if (b->shader->info.stage != MESA_SHADER_KERNEL)
+         vtn_warn("Decoration only allowed for CL-style kernels: %s",
+                  spirv_decoration_to_string(dec->decoration));
+      else
+         type->packed = true;
+      break;
+
     case SpvDecorationSaturatedConversion:
     case SpvDecorationFuncParamAttr:
     case SpvDecorationFPRoundingMode:
@@ -1228,7 +1259,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
  
        val->type->base_type = vtn_base_type_array;
        val->type->array_element = array_element;
-      val->type->stride = 0;
+      if (b->shader->info.stage == MESA_SHADER_KERNEL)
+         val->type->stride = glsl_get_cl_size(array_element->type);
  
        vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
        val->type->type = glsl_array_type(array_element->type, val->type->length,
@@ -1242,6 +1274,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
        val->type->length = num_fields;
        val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
        val->type->offsets = ralloc_array(b, unsigned, num_fields);
+      val->type->packed = false;
  
        NIR_VLA(struct glsl_struct_field, fields, count);
        for (unsigned i = 0; i < num_fields; i++) {
@@ -1255,6 +1288,15 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
           };
        }
  
+      if (b->shader->info.stage == MESA_SHADER_KERNEL) {
+         unsigned offset = 0;
+         for (unsigned i = 0; i < num_fields; i++) {
+            offset = align(offset, glsl_get_cl_alignment(fields[i].type));
+            fields[i].offset = offset;
+            offset += glsl_get_cl_size(fields[i].type);
+         }
+      }
+
        struct member_decoration_ctx ctx = {
           .num_fields = num_fields,
           .fields = fields,
@@ -1264,9 +1306,21 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
        vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
        vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
  
-      const char *name = val->name ? val->name : "struct";
+      vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
+
+      const char *name = val->name;
  
-      val->type->type = glsl_struct_type(fields, num_fields, name);
+      if (val->type->block || val->type->buffer_block) {
+         /* Packing will be ignored since types coming from SPIR-V are
+          * explicitly laid out.
+          */
+         val->type->type = glsl_interface_type(fields, num_fields,
+                                               /* packing */ 0, false,
+                                               name ? name : "block");
+      } else {
+         val->type->type = glsl_struct_type(fields, num_fields,
+                                            name ? name : "struct", false);
+      }
        break;
     }
  
@@ -1321,6 +1375,13 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
           case SpvStorageClassWorkgroup:
              val->type->type = b->options->shared_ptr_type;
              break;
+         case SpvStorageClassCrossWorkgroup:
+            val->type->type = b->options->global_ptr_type;
+            break;
+         case SpvStorageClassFunction:
+            if (b->physical_ptrs)
+               val->type->type = b->options->temp_ptr_type;
+            break;
           default:
              /* In this case, no variable pointers are allowed so all deref
               * chains are complete back to the variable and it doesn't matter
@@ -1345,6 +1406,19 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
  
           vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
  
+         if (b->physical_ptrs) {
+            switch (storage_class) {
+            case SpvStorageClassFunction:
+            case SpvStorageClassWorkgroup:
+            case SpvStorageClassCrossWorkgroup:
+               val->type->stride = align(glsl_get_cl_size(val->type->deref->type),
+                                         glsl_get_cl_alignment(val->type->deref->type));
+               break;
+            default:
+               break;
+            }
+         }
+
           if (storage_class == SpvStorageClassWorkgroup &&
               b->options->lower_workgroup_access_to_offsets) {
              uint32_t size, align;
@@ -1380,7 +1454,9 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
           vtn_fail("Invalid SPIR-V image dimensionality");
        }
  
-      bool is_shadow = w[4];
+      /* w[4]: as per Vulkan spec "Validation Rules within a Module",
+       *       The “Depth” operand of OpTypeImage is ignored.
+       */
        bool is_array = w[5];
        bool multisampled = w[6];
        unsigned sampled = w[7];
@@ -1406,10 +1482,9 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
           glsl_get_base_type(sampled_type->type);
        if (sampled == 1) {
           val->type->sampled = true;
-         val->type->type = glsl_sampler_type(dim, is_shadow, is_array,
+         val->type->type = glsl_sampler_type(dim, false, is_array,
                                               sampled_base_type);
        } else if (sampled == 2) {
-         vtn_assert(!is_shadow);
           val->type->sampled = false;
           val->type->type = glsl_image_type(dim, is_array, sampled_base_type);
        } else {
@@ -1930,7 +2005,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
           nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
                                                       nir_alu_type_get_type_size(src_alu_type),
                                                       nir_alu_type_get_type_size(dst_alu_type));
-         nir_const_value src[4];
+         nir_const_value src[3];
  
           for (unsigned i = 0; i < count - 4; i++) {
              struct vtn_value *src_val =
@@ -2021,6 +2096,7 @@ vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
              child_type = glsl_get_array_element(type);
              break;
           case GLSL_TYPE_STRUCT:
+         case GLSL_TYPE_INTERFACE:
              child_type = glsl_get_struct_field(type, i);
              break;
           default:
@@ -2270,7 +2346,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
        (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
  
     /* Now we need to handle some number of optional arguments */
-   const struct vtn_ssa_value *gather_offsets = NULL;
+   struct vtn_value *gather_offsets = NULL;
     if (idx < count) {
        uint32_t operands = w[idx++];
  
@@ -2298,9 +2374,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
           (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset);
  
        if (operands & SpvImageOperandsConstOffsetsMask) {
-         nir_tex_src none = {0};
-         gather_offsets = vtn_ssa_value(b, w[idx++]);
-         (*p++) = none;
+         vtn_assert(texop == nir_texop_tg4);
+         gather_offsets = vtn_value(b, w[idx++], vtn_value_type_constant);
        }
  
        if (operands & SpvImageOperandsSampleMask) {
@@ -2332,6 +2407,12 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
        is_shadow && glsl_get_components(ret_type->type) == 1;
     instr->component = gather_component;
  
+   if (sampled.image && (sampled.image->access & ACCESS_NON_UNIFORM))
+      instr->texture_non_uniform = true;
+
+   if (sampled.sampler && (sampled.sampler->access & ACCESS_NON_UNIFORM))
+      instr->sampler_non_uniform = true;
+
     switch (glsl_get_sampler_result_type(image_type)) {
     case GLSL_TYPE_FLOAT:   instr->dest_type = nir_type_float;     break;
     case GLSL_TYPE_INT:     instr->dest_type = nir_type_int;       break;
@@ -2347,63 +2428,40 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
     vtn_assert(glsl_get_vector_elements(ret_type->type) ==
                nir_tex_instr_dest_size(instr));
  
-   nir_ssa_def *def;
-   nir_instr *instruction;
     if (gather_offsets) {
-      vtn_assert(glsl_get_base_type(gather_offsets->type) == GLSL_TYPE_ARRAY);
-      vtn_assert(glsl_get_length(gather_offsets->type) == 4);
-      nir_tex_instr *instrs[4] = {instr, NULL, NULL, NULL};
-
-      /* Copy the current instruction 4x */
-      for (uint32_t i = 1; i < 4; i++) {
-         instrs[i] = nir_tex_instr_create(b->shader, instr->num_srcs);
-         instrs[i]->op = instr->op;
-         instrs[i]->coord_components = instr->coord_components;
-         instrs[i]->sampler_dim = instr->sampler_dim;
-         instrs[i]->is_array = instr->is_array;
-         instrs[i]->is_shadow = instr->is_shadow;
-         instrs[i]->is_new_style_shadow = instr->is_new_style_shadow;
-         instrs[i]->component = instr->component;
-         instrs[i]->dest_type = instr->dest_type;
-
-         memcpy(instrs[i]->src, srcs, instr->num_srcs * sizeof(*instr->src));
-
-         nir_ssa_dest_init(&instrs[i]->instr, &instrs[i]->dest,
-                           nir_tex_instr_dest_size(instr), 32, NULL);
-      }
-
-      /* Fill in the last argument with the offset from the passed in offsets
-       * and insert the instruction into the stream.
-       */
+      vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
+                  gather_offsets->type->length != 4,
+                  "ConstOffsets must be an array of size four of vectors "
+                  "of two integer components");
+
+      struct vtn_type *vec_type = gather_offsets->type->array_element;
+      vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
+                  vec_type->length != 2 ||
+                  !glsl_type_is_integer(vec_type->type),
+                  "ConstOffsets must be an array of size four of vectors "
+                  "of two integer components");
+
+      unsigned bit_size = glsl_get_bit_size(vec_type->type);
        for (uint32_t i = 0; i < 4; i++) {
-         nir_tex_src src;
-         src.src = nir_src_for_ssa(gather_offsets->elems[i]->def);
-         src.src_type = nir_tex_src_offset;
-         instrs[i]->src[instrs[i]->num_srcs - 1] = src;
-         nir_builder_instr_insert(&b->nb, &instrs[i]->instr);
-      }
-
-      /* Combine the results of the 4 instructions by taking their .w
-       * components
-       */
-      nir_alu_instr *vec4 = nir_alu_instr_create(b->shader, nir_op_vec4);
-      nir_ssa_dest_init(&vec4->instr, &vec4->dest.dest, 4, 32, NULL);
-      vec4->dest.write_mask = 0xf;
-      for (uint32_t i = 0; i < 4; i++) {
-         vec4->src[i].src = nir_src_for_ssa(&instrs[i]->dest.ssa);
-         vec4->src[i].swizzle[0] = 3;
+         const nir_const_value *cvec =
+            &gather_offsets->constant->elements[i]->values[0];
+         for (uint32_t j = 0; j < 2; j++) {
+            switch (bit_size) {
+            case 8:  instr->tg4_offsets[i][j] = cvec->i8[j];    break;
+            case 16: instr->tg4_offsets[i][j] = cvec->i16[j];   break;
+            case 32: instr->tg4_offsets[i][j] = cvec->i32[j];   break;
+            case 64: instr->tg4_offsets[i][j] = cvec->i64[j];   break;
+            default:
+               vtn_fail("Unsupported bit size");
+            }
+         }
        }
-      def = &vec4->dest.dest.ssa;
-      instruction = &vec4->instr;
-   } else {
-      def = &instr->dest.ssa;
-      instruction = &instr->instr;
     }
  
     val->ssa = vtn_create_ssa_value(b, ret_type->type);
-   val->ssa->def = def;
+   val->ssa->def = &instr->dest.ssa;
  
-   nir_builder_instr_insert(&b->nb, instruction);
+   nir_builder_instr_insert(&b->nb, &instr->instr);
  }
  
  static void
@@ -3046,12 +3104,7 @@ nir_ssa_def *
  vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
                             nir_ssa_def *index)
  {
-   nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
-   for (unsigned i = 1; i < src->num_components; i++)
-      dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i),
-                       vtn_vector_extract(b, src, i), dest);
-
-   return dest;
+   return nir_vector_extract(&b->nb, src, nir_i2i(&b->nb, index, 32));
  }
  
  nir_ssa_def *
@@ -3723,6 +3776,11 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
           spv_check_supported(physical_storage_buffer_address, cap);
           break;
  
+      case SpvCapabilityComputeDerivativeGroupQuadsNV:
+      case SpvCapabilityComputeDerivativeGroupLinearNV:
+         spv_check_supported(derivative_group, cap);
+         break;
+
        default:
           vtn_fail("Unhandled capability");
        }
@@ -3740,12 +3798,18 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
                       "AddressingModelPhysical32 only supported for kernels");
           b->shader->info.cs.ptr_size = 32;
           b->physical_ptrs = true;
+         b->options->shared_ptr_type = glsl_uint_type();
+         b->options->global_ptr_type = glsl_uint_type();
+         b->options->temp_ptr_type = glsl_uint_type();
           break;
        case SpvAddressingModelPhysical64:
           vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
                       "AddressingModelPhysical64 only supported for kernels");
           b->shader->info.cs.ptr_size = 64;
           b->physical_ptrs = true;
+         b->options->shared_ptr_type = glsl_uint64_t_type();
+         b->options->global_ptr_type = glsl_uint64_t_type();
+         b->options->temp_ptr_type = glsl_uint64_t_type();
           break;
        case SpvAddressingModelLogical:
           vtn_fail_if(b->shader->info.stage >= MESA_SHADER_STAGES,
@@ -3960,6 +4024,16 @@ vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
        vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
        break;
  
+   case SpvExecutionModeDerivativeGroupQuadsNV:
+      vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
+      b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_QUADS;
+      break;
+
+   case SpvExecutionModeDerivativeGroupLinearNV:
+      vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
+      b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR;
+      break;
+
     default:
        vtn_fail("Unhandled execution mode");
     }
@@ -4075,6 +4149,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
     case SpvOpAccessChain:
     case SpvOpPtrAccessChain:
     case SpvOpInBoundsAccessChain:
+   case SpvOpInBoundsPtrAccessChain:
     case SpvOpArrayLength:
     case SpvOpConvertPtrToU:
     case SpvOpConvertUToPtr:
@@ -4391,6 +4466,10 @@ vtn_create_builder(const uint32_t *words, size_t word_count,
  {
     /* Initialize the vtn_builder object */
     struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
+   struct spirv_to_nir_options *dup_options =
+      ralloc(b, struct spirv_to_nir_options);
+   *dup_options = *options;
+
     b->spirv = words;
     b->spirv_word_count = word_count;
     b->file = NULL;
@@ -4399,7 +4478,7 @@ vtn_create_builder(const uint32_t *words, size_t word_count,
     exec_list_make_empty(&b->functions);
     b->entry_point_stage = stage;
     b->entry_point_name = entry_point_name;
-   b->options = options;
+   b->options = dup_options;
  
     /*
      * Handle the SPIR-V header (first 5 dwords).
@@ -4442,6 +4521,68 @@ vtn_create_builder(const uint32_t *words, size_t word_count,
     return NULL;
  }
  
+static nir_function *
+vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
+                                    nir_function *entry_point)
+{
+   vtn_assert(entry_point == b->entry_point->func->impl->function);
+   vtn_fail_if(!entry_point->name, "entry points are required to have a name");
+   const char *func_name =
+      ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
+
+   /* we shouldn't have any inputs yet */
+   vtn_assert(!entry_point->shader->num_inputs);
+   vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
+
+   nir_function *main_entry_point = nir_function_create(b->shader, func_name);
+   main_entry_point->impl = nir_function_impl_create(main_entry_point);
+   nir_builder_init(&b->nb, main_entry_point->impl);
+   b->nb.cursor = nir_after_cf_list(&main_entry_point->impl->body);
+   b->func_param_idx = 0;
+
+   nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
+
+   for (unsigned i = 0; i < entry_point->num_params; ++i) {
+      struct vtn_type *param_type = b->entry_point->func->type->params[i];
+
+      /* consider all pointers to function memory to be parameters passed
+       * by value
+       */
+      bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
+         param_type->storage_class == SpvStorageClassFunction;
+
+      /* input variable */
+      nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
+      in_var->data.mode = nir_var_shader_in;
+      in_var->data.read_only = true;
+      in_var->data.location = i;
+
+      if (is_by_val)
+         in_var->type = param_type->deref->type;
+      else
+         in_var->type = param_type->type;
+
+      nir_shader_add_variable(b->nb.shader, in_var);
+      b->nb.shader->num_inputs++;
+
+      /* we have to copy the entire variable into function memory */
+      if (is_by_val) {
+         nir_variable *copy_var =
+            nir_local_variable_create(main_entry_point->impl, in_var->type,
+                                      "copy_in");
+         nir_copy_var(&b->nb, copy_var, in_var);
+         call->params[i] =
+            nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->dest.ssa);
+      } else {
+         call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var));
+      }
+   }
+
+   nir_builder_instr_insert(&b->nb, &call->instr);
+
+   return main_entry_point;
+}
+
  nir_function *
  spirv_to_nir(const uint32_t *words, size_t word_count,
               struct nir_spirv_specialization *spec, unsigned num_spec,
@@ -4531,6 +4672,10 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
     nir_function *entry_point = b->entry_point->func->impl->function;
     vtn_assert(entry_point);
  
+   /* post process entry_points with input params */
+   if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
+      entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
+
     entry_point->is_entrypoint = true;
  
     /* When multiple shader stages exist in the same SPIR-V module, we