anv,i965: Lower away image derefs in the driver
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 16 Aug 2018 21:23:10 +0000 (16:23 -0500)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 29 Aug 2018 19:04:03 +0000 (14:04 -0500)
Previously, the back-end compiler turn image access into magic uniform
reads and there was a complex contract between back-end compiler and
driver about setting up and filling out those params.  As of this
commit, both drivers now lower image_deref_load_param_intel intrinsics
to load_uniform intrinsics controlled by the driver and lower the other
image_deref_* intrinsics to image_* intrinsics which take an actual
binding table index.  There are still "magic" uniforms but they are now
added and controlled entirely by the driver and that contract no longer
spans components.

This also has the side-effect of making most image use compile-time
binding table indices.  Previously, all image access pulled the binding
table index from a uniform.  Part of the reason for this was that the
magic uniforms made it difficult to decouple binding table indices from
the uniforms and, since they are indexed completely differently
(especially in Vulkan), it was hard to pull them apart.  Now that the
driver is handling both, it's trivial to decouple the two and provide
actual binding table indices.

Shader-db results on Kaby Lake:

    total instructions in shared programs: 15166872 -> 15164293 (-0.02%)
    instructions in affected programs: 115834 -> 113255 (-2.23%)
    helped: 191
    HURT: 0

    total cycles in shared programs: 571311495 -> 571196465 (-0.02%)
    cycles in affected programs: 4757115 -> 4642085 (-2.42%)
    helped: 73
    HURT: 67

    total spills in shared programs: 10951 -> 10926 (-0.23%)
    spills in affected programs: 742 -> 717 (-3.37%)
    helped: 7
    HURT: 0

    total fills in shared programs: 22226 -> 22201 (-0.11%)
    fills in affected programs: 1146 -> 1121 (-2.18%)
    helped: 7
    HURT: 0

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/compiler/nir/nir_intrinsics.py
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/brw_nir.h
src/intel/compiler/brw_nir_lower_image_load_store.c
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
src/intel/vulkan/anv_pipeline.c
src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
src/mesa/drivers/dri/i965/brw_program.c

index d7184dadbbca94c4f19e671c5e720cafa21919f8..b06b38fc2ce3c4bcb4363d3a1d3ef8e8cbe1ef6b 100644 (file)
@@ -331,9 +331,9 @@ image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
 # variable. The const index specifies which of the six parameters to load.
 intrinsic("image_deref_load_param_intel", src_comp=[1], dest_comp=0,
           indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
-intrinsic("image_deref_load_raw_intel", src_comp=[1, 1], dest_comp=0,
-          flags=[CAN_ELIMINATE])
-intrinsic("image_deref_store_raw_intel", src_comp=[1, 1, 0])
+image("load_raw_intel", src_comp=[1], dest_comp=0,
+      flags=[CAN_ELIMINATE])
+image("store_raw_intel", src_comp=[1, 0])
 
 # Vulkan descriptor set intrinsics
 #
index 58736503f9aa1323e199eb0e814471f6c845ba8d..02a7a33c4d7440eb58e72476feca10d79bacf218 100644 (file)
@@ -494,16 +494,14 @@ type_size_scalar(const struct glsl_type *type)
       }
       return size;
    case GLSL_TYPE_SAMPLER:
-      /* Samplers take up no register space, since they're baked in at
-       * link time.
-       */
-      return 0;
    case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_IMAGE:
+      /* Samplers, atomics, and images take up no register space, since
+       * they're baked in at link time.
+       */
       return 0;
    case GLSL_TYPE_SUBROUTINE:
       return 1;
-   case GLSL_TYPE_IMAGE:
-      return BRW_IMAGE_PARAM_SIZE;
    case GLSL_TYPE_VOID:
    case GLSL_TYPE_ERROR:
    case GLSL_TYPE_INTERFACE:
index 52220db2dc00d13eef9f1668846e6d82a54f5871..aba19d5ab2ca062f63916d96dd39a1740124ad51 100644 (file)
@@ -216,6 +216,8 @@ public:
                               nir_intrinsic_instr *instr);
    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
                               nir_intrinsic_instr *instr);
+   fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+                                        nir_intrinsic_instr *instr);
    void nir_emit_intrinsic(const brw::fs_builder &bld,
                            nir_intrinsic_instr *instr);
    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
@@ -235,7 +237,6 @@ public:
    fs_reg get_nir_src(const nir_src &src);
    fs_reg get_nir_src_imm(const nir_src &src);
    fs_reg get_nir_dest(const nir_dest &dest);
-   fs_reg get_nir_image_deref(nir_deref_instr *deref);
    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
                      unsigned wr_mask);
index b2be91f91174f58be338c59062ea8bc04dcb7165..aaba0e2a693ddfd3318bf7e72478b45baebfd490 100644 (file)
@@ -1694,70 +1694,6 @@ fs_visitor::get_nir_dest(const nir_dest &dest)
    }
 }
 
-fs_reg
-fs_visitor::get_nir_image_deref(nir_deref_instr *deref)
-{
-   fs_reg arr_offset = brw_imm_ud(0);
-   unsigned array_size = BRW_IMAGE_PARAM_SIZE * 4;
-   nir_deref_instr *head = deref;
-   while (head->deref_type != nir_deref_type_var) {
-      assert(head->deref_type == nir_deref_type_array);
-
-      /* This level's element size is the previous level's array size */
-      const unsigned elem_size = array_size;
-
-      fs_reg index = retype(get_nir_src_imm(head->arr.index),
-                            BRW_REGISTER_TYPE_UD);
-      if (arr_offset.file == BRW_IMMEDIATE_VALUE &&
-          index.file == BRW_IMMEDIATE_VALUE) {
-         arr_offset.ud += index.ud * elem_size;
-      } else if (index.file == BRW_IMMEDIATE_VALUE) {
-         bld.ADD(arr_offset, arr_offset, brw_imm_ud(index.ud * elem_size));
-      } else {
-         fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
-         bld.MUL(tmp, index, brw_imm_ud(elem_size));
-         bld.ADD(tmp, tmp, arr_offset);
-         arr_offset = tmp;
-      }
-
-      head = nir_deref_instr_parent(head);
-      assert(glsl_type_is_array(head->type));
-      array_size = elem_size * glsl_get_length(head->type);
-   }
-
-   assert(head->deref_type == nir_deref_type_var);
-   const unsigned max_arr_offset = array_size - (BRW_IMAGE_PARAM_SIZE * 4);
-   fs_reg image(UNIFORM, head->var->data.driver_location / 4,
-                BRW_REGISTER_TYPE_UD);
-
-   if (arr_offset.file == BRW_IMMEDIATE_VALUE) {
-      /* The offset is in bytes but we want it in dwords */
-      return offset(image, bld, MIN2(arr_offset.ud, max_arr_offset) / 4);
-   } else {
-      /* Accessing an invalid surface index with the dataport can result
-       * in a hang.  According to the spec "if the index used to
-       * select an individual element is negative or greater than or
-       * equal to the size of the array, the results of the operation
-       * are undefined but may not lead to termination" -- which is one
-       * of the possible outcomes of the hang.  Clamp the index to
-       * prevent access outside of the array bounds.
-       */
-      bld.emit_minmax(arr_offset, arr_offset, brw_imm_ud(max_arr_offset),
-                      BRW_CONDITIONAL_L);
-
-      /* Emit a pile of MOVs to load the uniform into a temporary.  The
-       * dead-code elimination pass will get rid of what we don't use.
-       */
-      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
-      for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
-         bld.emit(SHADER_OPCODE_MOV_INDIRECT,
-                  offset(tmp, bld, j), offset(image, bld, j),
-                  arr_offset, brw_imm_ud(max_arr_offset + 4));
-      }
-      return tmp;
-   }
-}
-
 void
 fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
                          unsigned wr_mask)
@@ -3847,6 +3783,43 @@ brw_cond_mod_for_nir_reduction_op(nir_op op)
    }
 }
 
+fs_reg
+fs_visitor::get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+                                          nir_intrinsic_instr *instr)
+{
+   fs_reg image = retype(get_nir_src_imm(instr->src[0]), BRW_REGISTER_TYPE_UD);
+
+   if (stage_prog_data->binding_table.image_start > 0) {
+      if (image.file == BRW_IMMEDIATE_VALUE) {
+         image.d += stage_prog_data->binding_table.image_start;
+      } else {
+         bld.ADD(image, image,
+                 brw_imm_d(stage_prog_data->binding_table.image_start));
+      }
+   }
+
+   return bld.emit_uniformize(image);
+}
+
+static unsigned
+image_intrinsic_coord_components(nir_intrinsic_instr *instr)
+{
+   switch (nir_intrinsic_image_dim(instr)) {
+   case GLSL_SAMPLER_DIM_1D:
+      return 1 + nir_intrinsic_image_array(instr);
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_RECT:
+      return 2 + nir_intrinsic_image_array(instr);
+   case GLSL_SAMPLER_DIM_3D:
+   case GLSL_SAMPLER_DIM_CUBE:
+      return 3;
+   case GLSL_SAMPLER_DIM_BUF:
+      return 1;
+   default:
+      unreachable("Invalid image dimension");
+   }
+}
+
 void
 fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
 {
@@ -3855,40 +3828,37 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       dest = get_nir_dest(instr->dest);
 
    switch (instr->intrinsic) {
-   case nir_intrinsic_image_deref_load:
-   case nir_intrinsic_image_deref_store:
-   case nir_intrinsic_image_deref_atomic_add:
-   case nir_intrinsic_image_deref_atomic_min:
-   case nir_intrinsic_image_deref_atomic_max:
-   case nir_intrinsic_image_deref_atomic_and:
-   case nir_intrinsic_image_deref_atomic_or:
-   case nir_intrinsic_image_deref_atomic_xor:
-   case nir_intrinsic_image_deref_atomic_exchange:
-   case nir_intrinsic_image_deref_atomic_comp_swap: {
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_store:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_min:
+   case nir_intrinsic_image_atomic_max:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap: {
       if (stage == MESA_SHADER_FRAGMENT &&
-          instr->intrinsic != nir_intrinsic_image_deref_load)
+          instr->intrinsic != nir_intrinsic_image_load)
          brw_wm_prog_data(prog_data)->has_side_effects = true;
 
-      /* Get the referenced image variable and type. */
-      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
-      const glsl_type *type = deref->type;
-
       /* Get some metadata from the image intrinsic. */
       const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
-      const unsigned dims = type->coordinate_components();
+      const unsigned dims = image_intrinsic_coord_components(instr);
+      const GLenum format = nir_intrinsic_format(instr);
       const unsigned dest_components = nir_intrinsic_dest_components(instr);
 
       /* Get the arguments of the image intrinsic. */
-      const fs_reg image = get_nir_image_deref(deref);
+      const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
       const fs_reg coords = retype(get_nir_src(instr->src[1]),
                                    BRW_REGISTER_TYPE_UD);
       fs_reg tmp;
 
       /* Emit an image load, store or atomic op. */
-      if (instr->intrinsic == nir_intrinsic_image_deref_load) {
+      if (instr->intrinsic == nir_intrinsic_image_load) {
          tmp = emit_typed_read(bld, image, coords, dims,
                                instr->num_components);
-      } else if (instr->intrinsic == nir_intrinsic_image_deref_store) {
+      } else if (instr->intrinsic == nir_intrinsic_image_store) {
          const fs_reg src0 = get_nir_src(instr->src[3]);
          emit_typed_write(bld, image, coords, src0, dims,
                           instr->num_components);
@@ -3897,7 +3867,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          unsigned num_srcs = info->num_srcs;
 
          switch (instr->intrinsic) {
-         case nir_intrinsic_image_deref_atomic_add:
+         case nir_intrinsic_image_atomic_add:
             assert(num_srcs == 4);
 
             op = get_op_for_atomic_add(instr, 3);
@@ -3905,27 +3875,27 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
             if (op != BRW_AOP_ADD)
                num_srcs = 3;
             break;
-         case nir_intrinsic_image_deref_atomic_min:
-            op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
-                 BRW_AOP_IMIN : BRW_AOP_UMIN);
+         case nir_intrinsic_image_atomic_min:
+            assert(format == GL_R32UI || format == GL_R32I);
+            op = (format == GL_R32I) ? BRW_AOP_IMIN : BRW_AOP_UMIN;
             break;
-         case nir_intrinsic_image_deref_atomic_max:
-            op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
-                 BRW_AOP_IMAX : BRW_AOP_UMAX);
+         case nir_intrinsic_image_atomic_max:
+            assert(format == GL_R32UI || format == GL_R32I);
+            op = (format == GL_R32I) ? BRW_AOP_IMAX : BRW_AOP_UMAX;
             break;
-         case nir_intrinsic_image_deref_atomic_and:
+         case nir_intrinsic_image_atomic_and:
             op = BRW_AOP_AND;
             break;
-         case nir_intrinsic_image_deref_atomic_or:
+         case nir_intrinsic_image_atomic_or:
             op = BRW_AOP_OR;
             break;
-         case nir_intrinsic_image_deref_atomic_xor:
+         case nir_intrinsic_image_atomic_xor:
             op = BRW_AOP_XOR;
             break;
-         case nir_intrinsic_image_deref_atomic_exchange:
+         case nir_intrinsic_image_atomic_exchange:
             op = BRW_AOP_MOV;
             break;
-         case nir_intrinsic_image_deref_atomic_comp_swap:
+         case nir_intrinsic_image_atomic_comp_swap:
             op = BRW_AOP_CMPWR;
             break;
          default:
@@ -3948,19 +3918,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_image_deref_load_param_intel: {
-      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
-      const fs_reg image = get_nir_image_deref(deref);
-      const fs_reg param = offset(image, bld, nir_intrinsic_base(instr) * 4);
-      for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
-         bld.MOV(offset(retype(dest, param.type), bld, c),
-                 offset(param, bld, c));
-      }
-      break;
-   }
-
-   case nir_intrinsic_image_deref_load_raw_intel: {
-      const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+   case nir_intrinsic_image_load_raw_intel: {
+      const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
       const fs_reg addr = retype(get_nir_src(instr->src[1]),
                                  BRW_REGISTER_TYPE_UD);
 
@@ -3974,8 +3933,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_image_deref_store_raw_intel: {
-      const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+   case nir_intrinsic_image_store_raw_intel: {
+      const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
       const fs_reg addr = retype(get_nir_src(instr->src[1]),
                                  BRW_REGISTER_TYPE_UD);
       const fs_reg data = retype(get_nir_src(instr->src[2]),
@@ -4010,7 +3969,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_image_deref_samples:
+   case nir_intrinsic_image_samples:
       /* The driver does not support multi-sampled images. */
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1));
       break;
index 72a6ee8884abecae7dc770f79de89f4f3c2d6e3b..500732655396d327c44c50f4eb308de98208beae 100644 (file)
@@ -116,6 +116,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
 
 bool brw_nir_lower_image_load_store(nir_shader *nir,
                                     const struct gen_device_info *devinfo);
+void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+                                     nir_ssa_def *index);
 
 nir_shader *brw_postprocess_nir(nir_shader *nir,
                                 const struct brw_compiler *compiler,
@@ -147,6 +149,9 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
                                 struct gl_program *prog,
                                 struct brw_stage_prog_data *stage_prog_data);
 
+void brw_nir_lower_glsl_images(nir_shader *shader,
+                               const struct gl_program *prog);
+
 void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
                                 nir_shader *nir,
                                 const struct brw_vs_prog_key *vs_key,
index 819fb440f2ca4c5e059992edab96dfcecc00d076..5eba9ddabd3e8bfeef99519cdf71c9a1eaae7a60 100644 (file)
@@ -811,3 +811,44 @@ brw_nir_lower_image_load_store(nir_shader *shader,
 
    return progress;
 }
+
+void
+brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+                                nir_ssa_def *index)
+{
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   switch (intrin->intrinsic) {
+#define CASE(op) \
+   case nir_intrinsic_image_deref_##op: \
+      intrin->intrinsic = nir_intrinsic_image_##op; \
+      break;
+   CASE(load)
+   CASE(store)
+   CASE(atomic_add)
+   CASE(atomic_min)
+   CASE(atomic_max)
+   CASE(atomic_and)
+   CASE(atomic_or)
+   CASE(atomic_xor)
+   CASE(atomic_exchange)
+   CASE(atomic_comp_swap)
+   CASE(atomic_fadd)
+   CASE(size)
+   CASE(samples)
+   CASE(load_raw_intel)
+   CASE(store_raw_intel)
+#undef CASE
+   default:
+      unreachable("Unhanded image intrinsic");
+   }
+
+   nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
+   nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
+   nir_intrinsic_set_access(intrin, var->data.image.access);
+   nir_intrinsic_set_format(intrin, var->data.image.format);
+
+   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+                         nir_src_for_ssa(index));
+}
index 84a664826e839b81aa52572a379bf38f4c463234..583b5a17cc6d86c949f3f9dfca01668fabd47951 100644 (file)
@@ -24,6 +24,7 @@
 #include "anv_nir.h"
 #include "program/prog_parameter.h"
 #include "nir/nir_builder.h"
+#include "compiler/brw_nir.h"
 
 struct apply_pipeline_layout_state {
    nir_shader *shader;
@@ -32,6 +33,8 @@ struct apply_pipeline_layout_state {
    struct anv_pipeline_layout *layout;
    bool add_bounds_checks;
 
+   unsigned first_image_uniform;
+
    bool uses_constants;
    uint8_t constants_offset;
    struct {
@@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block,
          case nir_intrinsic_image_deref_atomic_comp_swap:
          case nir_intrinsic_image_deref_size:
          case nir_intrinsic_image_deref_samples:
+         case nir_intrinsic_image_deref_load_param_intel:
+         case nir_intrinsic_image_deref_load_raw_intel:
+         case nir_intrinsic_image_deref_store_raw_intel:
             add_deref_src_binding(state, intrin->src[0]);
             break;
 
@@ -178,6 +184,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
    nir_instr_remove(&intrin->instr);
 }
 
+static void
+lower_image_intrinsic(nir_intrinsic_instr *intrin,
+                      struct apply_pipeline_layout_state *state)
+{
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   unsigned set = var->data.descriptor_set;
+   unsigned binding = var->data.binding;
+   unsigned array_size =
+      state->layout->set[set].layout->binding[binding].array_size;
+
+   nir_builder *b = &state->builder;
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   nir_ssa_def *index = NULL;
+   if (deref->deref_type != nir_deref_type_var) {
+      assert(deref->deref_type == nir_deref_type_array);
+      index = nir_ssa_for_src(b, deref->arr.index, 1);
+      if (state->add_bounds_checks)
+         index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
+   } else {
+      index = nir_imm_int(b, 0);
+   }
+
+   if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
+      b->cursor = nir_instr_remove(&intrin->instr);
+
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
+
+      nir_intrinsic_set_base(load, state->first_image_uniform +
+                                   state->set[set].image_offsets[binding] *
+                                   BRW_IMAGE_PARAM_SIZE * 4);
+      nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
+
+      const unsigned param = nir_intrinsic_base(intrin);
+      nir_ssa_def *offset =
+         nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
+      offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
+      load->src[0] = nir_src_for_ssa(offset);
+
+      load->num_components = intrin->dest.ssa.num_components;
+      nir_ssa_dest_init(&load->instr, &load->dest,
+                        intrin->dest.ssa.num_components,
+                        intrin->dest.ssa.bit_size, NULL);
+      nir_builder_instr_insert(b, &load->instr);
+
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                               nir_src_for_ssa(&load->dest.ssa));
+   } else {
+      unsigned binding_offset = state->set[set].surface_offsets[binding];
+      index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
+      brw_nir_rewrite_image_intrinsic(intrin, index);
+   }
+}
+
 static void
 lower_load_constant(nir_intrinsic_instr *intrin,
                     struct apply_pipeline_layout_state *state)
@@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block,
          case nir_intrinsic_vulkan_resource_reindex:
             lower_res_reindex_intrinsic(intrin, state);
             break;
+         case nir_intrinsic_image_deref_load:
+         case nir_intrinsic_image_deref_store:
+         case nir_intrinsic_image_deref_atomic_add:
+         case nir_intrinsic_image_deref_atomic_min:
+         case nir_intrinsic_image_deref_atomic_max:
+         case nir_intrinsic_image_deref_atomic_and:
+         case nir_intrinsic_image_deref_atomic_or:
+         case nir_intrinsic_image_deref_atomic_xor:
+         case nir_intrinsic_image_deref_atomic_exchange:
+         case nir_intrinsic_image_deref_atomic_comp_swap:
+         case nir_intrinsic_image_deref_size:
+         case nir_intrinsic_image_deref_samples:
+         case nir_intrinsic_image_deref_load_param_intel:
+         case nir_intrinsic_image_deref_load_raw_intel:
+         case nir_intrinsic_image_deref_store_raw_intel:
+            lower_image_intrinsic(intrin, state);
+            break;
          case nir_intrinsic_load_constant:
             lower_load_constant(intrin, state);
             break;
@@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
       }
    }
 
+   unsigned image_uniform;
+   if (map->image_count > 0) {
+      assert(map->image_count <= MAX_IMAGES);
+      assert(shader->num_uniforms == prog_data->nr_params * 4);
+      state.first_image_uniform = shader->num_uniforms;
+      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
+                                                       map->image_count *
+                                                       BRW_IMAGE_PARAM_SIZE);
+      struct anv_push_constants *null_data = NULL;
+      const struct brw_image_param *image_param = null_data->images;
+      for (uint32_t i = 0; i < map->image_count; i++) {
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+                                  (uintptr_t)&image_param->surface_idx, 1);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+                                  (uintptr_t)image_param->offset, 2);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+                                  (uintptr_t)image_param->size, 3);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+                                  (uintptr_t)image_param->stride, 4);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+                                  (uintptr_t)image_param->tiling, 3);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+                                  (uintptr_t)image_param->swizzling, 2);
+
+         param += BRW_IMAGE_PARAM_SIZE;
+         image_param ++;
+      }
+      assert(param == prog_data->param + prog_data->nr_params);
+
+      shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
+      assert(shader->num_uniforms == prog_data->nr_params * 4);
+   }
+
    nir_foreach_variable(var, &shader->uniforms) {
       const struct glsl_type *glsl_type = glsl_without_array(var->type);
 
@@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
                                             nir_metadata_dominance);
    }
 
-   if (map->image_count > 0) {
-      assert(map->image_count <= MAX_IMAGES);
-      nir_foreach_variable(var, &shader->uniforms) {
-         if (glsl_type_is_image(var->type) ||
-             (glsl_type_is_array(var->type) &&
-              glsl_type_is_image(glsl_get_array_element(var->type)))) {
-            /* Images are represented as uniform push constants and the actual
-             * information required for reading/writing to/from the image is
-             * storred in the uniform.
-             */
-            unsigned set = var->data.descriptor_set;
-            unsigned binding = var->data.binding;
-            unsigned image_index = state.set[set].image_offsets[binding];
-
-            var->data.driver_location = shader->num_uniforms +
-                                        image_index * BRW_IMAGE_PARAM_SIZE * 4;
-         }
-      }
-
-      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
-                                                       map->image_count *
-                                                       BRW_IMAGE_PARAM_SIZE);
-      struct anv_push_constants *null_data = NULL;
-      const struct brw_image_param *image_param = null_data->images;
-      for (uint32_t i = 0; i < map->image_count; i++) {
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
-                                  (uintptr_t)&image_param->surface_idx, 1);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
-                                  (uintptr_t)image_param->offset, 2);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
-                                  (uintptr_t)image_param->size, 3);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
-                                  (uintptr_t)image_param->stride, 4);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
-                                  (uintptr_t)image_param->tiling, 3);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
-                                  (uintptr_t)image_param->swizzling, 2);
-
-         param += BRW_IMAGE_PARAM_SIZE;
-         image_param ++;
-      }
-      assert(param == prog_data->param + prog_data->nr_params);
-
-      shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
-   }
-
    ralloc_free(mem_ctx);
 }
index 19d59b7fbac7fb1749e09c1e36795f27d7983ace..a3eb68769a2f494bac079480b7dfb68a77f80a95 100644 (file)
@@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
    if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
       pipeline->needs_data_cache = true;
 
+   NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
+
    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
    if (layout) {
       anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,
@@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
    if (nir->info.stage != MESA_SHADER_COMPUTE)
       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
 
-   NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
-
    assert(nir->num_uniforms == prog_data->nr_params * 4);
 
    stage->nir = nir;
index 54f9f9b1a6b804f8f71217e5664c782339e2e57f..8a560d9bac11392648f9460e13eb7fa0c094e0b9 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "compiler/brw_nir.h"
 #include "compiler/glsl/ir_uniform.h"
+#include "compiler/nir/nir_builder.h"
 #include "brw_program.h"
 
 static void
@@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
          stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
    }
 }
+
+static nir_ssa_def *
+get_aoa_deref_offset(nir_builder *b,
+                     nir_deref_instr *deref,
+                     unsigned elem_size)
+{
+   unsigned array_size = elem_size;
+   nir_ssa_def *offset = nir_imm_int(b, 0);
+
+   while (deref->deref_type != nir_deref_type_var) {
+      assert(deref->deref_type == nir_deref_type_array);
+
+      /* This level's element size is the previous level's array size */
+      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
+      assert(deref->arr.index.ssa);
+      offset = nir_iadd(b, offset,
+                           nir_imul(b, index, nir_imm_int(b, array_size)));
+
+      deref = nir_deref_instr_parent(deref);
+      assert(glsl_type_is_array(deref->type));
+      array_size *= glsl_get_length(deref->type);
+   }
+
+   /* Accessing an invalid surface index with the dataport can result in a
+    * hang.  According to the spec "if the index used to select an individual
+    * element is negative or greater than or equal to the size of the array,
+    * the results of the operation are undefined but may not lead to
+    * termination" -- which is one of the possible outcomes of the hang.
+    * Clamp the index to prevent access outside of the array bounds.
+    */
+   return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
+}
+
+void
+brw_nir_lower_glsl_images(nir_shader *shader,
+                          const struct gl_program *prog)
+{
+   /* We put image uniforms at the end */
+   nir_foreach_variable(var, &shader->uniforms) {
+      if (!var->type->contains_image())
+         continue;
+
+      /* GL Only allows arrays of arrays of images */
+      assert(var->type->without_array()->is_image());
+      const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
+
+      var->data.driver_location = shader->num_uniforms;
+      shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
+   }
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_image_deref_load:
+         case nir_intrinsic_image_deref_store:
+         case nir_intrinsic_image_deref_atomic_add:
+         case nir_intrinsic_image_deref_atomic_min:
+         case nir_intrinsic_image_deref_atomic_max:
+         case nir_intrinsic_image_deref_atomic_and:
+         case nir_intrinsic_image_deref_atomic_or:
+         case nir_intrinsic_image_deref_atomic_xor:
+         case nir_intrinsic_image_deref_atomic_exchange:
+         case nir_intrinsic_image_deref_atomic_comp_swap:
+         case nir_intrinsic_image_deref_size:
+         case nir_intrinsic_image_deref_samples:
+         case nir_intrinsic_image_deref_load_raw_intel:
+         case nir_intrinsic_image_deref_store_raw_intel: {
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            nir_variable *var = nir_deref_instr_get_variable(deref);
+            const unsigned num_images =
+               MAX2(1, var->type->arrays_of_arrays_size());
+
+            struct gl_uniform_storage *storage =
+               &prog->sh.data->UniformStorage[var->data.location];
+            const unsigned image_var_idx =
+               storage->opaque[shader->info.stage].index;
+
+            b.cursor = nir_before_instr(&intrin->instr);
+            nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
+                                          get_aoa_deref_offset(&b, deref, 1));
+            brw_nir_rewrite_image_intrinsic(intrin, index);
+            break;
+         }
+
+         case nir_intrinsic_image_deref_load_param_intel: {
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            nir_variable *var = nir_deref_instr_get_variable(deref);
+            const unsigned num_images =
+               MAX2(1, var->type->arrays_of_arrays_size());
+
+            b.cursor = nir_instr_remove(&intrin->instr);
+
+            const unsigned param = nir_intrinsic_base(intrin);
+            nir_ssa_def *offset =
+               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
+            offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
+
+            nir_intrinsic_instr *load =
+               nir_intrinsic_instr_create(b.shader,
+                                          nir_intrinsic_load_uniform);
+            nir_intrinsic_set_base(load, var->data.driver_location);
+            nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
+            load->src[0] = nir_src_for_ssa(offset);
+            load->num_components = intrin->dest.ssa.num_components;
+            nir_ssa_dest_init(&load->instr, &load->dest,
+                              intrin->dest.ssa.num_components,
+                              intrin->dest.ssa.bit_size, NULL);
+            nir_builder_instr_insert(&b, &load->instr);
+
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&load->dest.ssa));
+            break;
+         }
+
+         default:
+            break;
+         }
+      }
+   }
+}
index f5ebd3c3b059de25dba9b978c7bf46f479580e50..041395ec4c0558cbfab063c1801cc6fb13f66de2 100644 (file)
@@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw,
    }
 
    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
+   NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog);
 
    return nir;
 }