anv: Put image params in the descriptor set buffer on gen8 and earlier
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 22 Nov 2018 00:26:27 +0000 (18:26 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Fri, 19 Apr 2019 19:56:42 +0000 (19:56 +0000)
This is really where they belong; not push constants.  The one downside
here is that we can't push them anymore for compute shaders.  However,
that's a general problem and we should figure out how to push descriptor
sets for compute shaders.  This lets us bump MAX_IMAGES to 64 on BDW and
earlier platforms because we no longer have to worry about push constant
overhead limits.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
src/intel/vulkan/anv_pipeline_cache.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_buffer.c

index a648105970d559563b643c4d534c9c708523b6c0..fa1d042f7921ab57a8d39b1aee242bf4a223ad67 100644 (file)
@@ -90,7 +90,12 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
 static unsigned
 anv_descriptor_data_size(enum anv_descriptor_data data)
 {
-   return 0;
+   unsigned size = 0;
+
+   if (data & ANV_DESCRIPTOR_IMAGE_PARAM)
+      size += BRW_IMAGE_PARAM_SIZE * 4;
+
+   return size;
 }
 
 /** Returns the size in bytes of each descriptor with the given layout */
@@ -902,6 +907,24 @@ VkResult anv_FreeDescriptorSets(
    return VK_SUCCESS;
 }
 
+static void
+anv_descriptor_set_write_image_param(uint32_t *param_desc_map,
+                                     const struct brw_image_param *param)
+{
+#define WRITE_PARAM_FIELD(field, FIELD) \
+   for (unsigned i = 0; i < ARRAY_SIZE(param->field); i++) \
+      param_desc_map[BRW_IMAGE_PARAM_##FIELD##_OFFSET + i] = param->field[i]
+
+   WRITE_PARAM_FIELD(offset, OFFSET);
+   WRITE_PARAM_FIELD(size, SIZE);
+   WRITE_PARAM_FIELD(stride, STRIDE);
+   WRITE_PARAM_FIELD(tiling, TILING);
+   WRITE_PARAM_FIELD(swizzling, SWIZZLING);
+   WRITE_PARAM_FIELD(size, SIZE);
+
+#undef WRITE_PARAM_FIELD
+}
+
 void
 anv_descriptor_set_write_image_view(struct anv_device *device,
                                     struct anv_descriptor_set *set,
@@ -952,6 +975,18 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
       .image_view = image_view,
       .sampler = sampler,
    };
+
+   void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
+                    element * anv_descriptor_size(bind_layout);
+
+   if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
+      /* Storage images can only ever have one plane */
+      assert(image_view->n_planes == 1);
+      const struct brw_image_param *image_param =
+         &image_view->planes[0].storage_image_param;
+
+      anv_descriptor_set_write_image_param(desc_map, image_param);
+   }
 }
 
 void
@@ -973,6 +1008,14 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
       .type = type,
       .buffer_view = buffer_view,
    };
+
+   void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
+                    element * anv_descriptor_size(bind_layout);
+
+   if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
+      anv_descriptor_set_write_image_param(desc_map,
+                                           &buffer_view->storage_image_param);
+   }
 }
 
 void
index 2af09d4f1227bcd6f2887b52b02f60c47bf033ba..bc51c2c1aab0ddd2161f9052a4e3e504778203c0 100644 (file)
@@ -1096,8 +1096,6 @@ void anv_GetPhysicalDeviceProperties(
    const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
                                  128 : 16;
 
-   const uint32_t max_images = devinfo->gen < 9 ? MAX_GEN8_IMAGES : MAX_IMAGES;
-
    VkSampleCountFlags sample_counts =
       isl_device_get_sample_counts(&pdevice->isl_dev);
 
@@ -1121,7 +1119,7 @@ void anv_GetPhysicalDeviceProperties(
       .maxPerStageDescriptorUniformBuffers      = 64,
       .maxPerStageDescriptorStorageBuffers      = 64,
       .maxPerStageDescriptorSampledImages       = max_samplers,
-      .maxPerStageDescriptorStorageImages       = max_images,
+      .maxPerStageDescriptorStorageImages       = MAX_IMAGES,
       .maxPerStageDescriptorInputAttachments    = 64,
       .maxPerStageResources                     = 250,
       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
@@ -1130,7 +1128,7 @@ void anv_GetPhysicalDeviceProperties(
       .maxDescriptorSetStorageBuffers           = 6 * 64,           /* number of stages * maxPerStageDescriptorStorageBuffers */
       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
       .maxDescriptorSetSampledImages            = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
-      .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
+      .maxDescriptorSetStorageImages            = 6 * MAX_IMAGES,   /* number of stages * maxPerStageDescriptorStorageImages */
       .maxDescriptorSetInputAttachments         = 256,
       .maxVertexInputAttributes                 = MAX_VBS,
       .maxVertexInputBindings                   = MAX_VBS,
index 29d43ca27958fe3cd822678fca13ecafb0a949f4..3448e75384292b3c9a273f070be53b1978422a7b 100644 (file)
@@ -35,8 +35,6 @@ struct apply_pipeline_layout_state {
    struct anv_pipeline_layout *layout;
    bool add_bounds_checks;
 
-   unsigned first_image_uniform;
-
    bool uses_constants;
    uint8_t constants_offset;
    struct {
@@ -46,7 +44,6 @@ struct apply_pipeline_layout_state {
       BITSET_WORD *used;
       uint8_t *surface_offsets;
       uint8_t *sampler_offsets;
-      uint8_t *image_offsets;
    } set[MAX_SETS];
 };
 
@@ -239,11 +236,11 @@ lower_get_buffer_size(nir_intrinsic_instr *intrin,
                          nir_src_for_ssa(nir_channel(b, index, 0)));
 }
 
-static void
-lower_image_intrinsic(nir_intrinsic_instr *intrin,
+static nir_ssa_def *
+build_descriptor_load(nir_deref_instr *deref, unsigned offset,
+                      unsigned num_components, unsigned bit_size,
                       struct apply_pipeline_layout_state *state)
 {
-   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
    nir_variable *var = nir_deref_instr_get_variable(deref);
 
    unsigned set = var->data.descriptor_set;
@@ -251,47 +248,80 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
    unsigned array_size =
       state->layout->set[set].layout->binding[binding].array_size;
 
+   const struct anv_descriptor_set_binding_layout *bind_layout =
+      &state->layout->set[set].layout->binding[binding];
+
    nir_builder *b = &state->builder;
-   b->cursor = nir_before_instr(&intrin->instr);
 
-   nir_ssa_def *index = NULL;
+   nir_ssa_def *desc_buffer_index =
+      nir_imm_int(b, state->set[set].desc_offset);
+
+   nir_ssa_def *desc_offset =
+      nir_imm_int(b, bind_layout->descriptor_offset + offset);
    if (deref->deref_type != nir_deref_type_var) {
       assert(deref->deref_type == nir_deref_type_array);
-      index = nir_ssa_for_src(b, deref->arr.index, 1);
+
+      const unsigned descriptor_size = anv_descriptor_size(bind_layout);
+      nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
       if (state->add_bounds_checks)
-         index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
-   } else {
-      index = nir_imm_int(b, 0);
+         arr_index = nir_umin(b, arr_index, nir_imm_int(b, array_size - 1));
+
+      desc_offset = nir_iadd(b, desc_offset,
+                             nir_imul_imm(b, arr_index, descriptor_size));
    }
 
+   nir_intrinsic_instr *desc_load =
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
+   desc_load->src[0] = nir_src_for_ssa(desc_buffer_index);
+   desc_load->src[1] = nir_src_for_ssa(desc_offset);
+   desc_load->num_components = num_components;
+   nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
+                     num_components, bit_size, NULL);
+   nir_builder_instr_insert(b, &desc_load->instr);
+
+   return &desc_load->dest.ssa;
+}
+
+static void
+lower_image_intrinsic(nir_intrinsic_instr *intrin,
+                      struct apply_pipeline_layout_state *state)
+{
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+
+   nir_builder *b = &state->builder;
+   b->cursor = nir_before_instr(&intrin->instr);
+
    if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
       b->cursor = nir_instr_remove(&intrin->instr);
 
-      nir_intrinsic_instr *load =
-         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
+      const unsigned param = nir_intrinsic_base(intrin);
 
-      nir_intrinsic_set_base(load, state->first_image_uniform +
-                                   state->set[set].image_offsets[binding] *
-                                   BRW_IMAGE_PARAM_SIZE * 4);
-      nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
+      nir_ssa_def *desc =
+         build_descriptor_load(deref, param * 16,
+                               intrin->dest.ssa.num_components,
+                               intrin->dest.ssa.bit_size, state);
 
-      const unsigned param = nir_intrinsic_base(intrin);
-      nir_ssa_def *offset =
-         nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
-      offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
-      load->src[0] = nir_src_for_ssa(offset);
-
-      load->num_components = intrin->dest.ssa.num_components;
-      nir_ssa_dest_init(&load->instr, &load->dest,
-                        intrin->dest.ssa.num_components,
-                        intrin->dest.ssa.bit_size, NULL);
-      nir_builder_instr_insert(b, &load->instr);
-
-      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                               nir_src_for_ssa(&load->dest.ssa));
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc));
    } else {
+      nir_variable *var = nir_deref_instr_get_variable(deref);
+
+      unsigned set = var->data.descriptor_set;
+      unsigned binding = var->data.binding;
       unsigned binding_offset = state->set[set].surface_offsets[binding];
-      index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
+      unsigned array_size =
+         state->layout->set[set].layout->binding[binding].array_size;
+
+      nir_ssa_def *index = NULL;
+      if (deref->deref_type != nir_deref_type_var) {
+         assert(deref->deref_type == nir_deref_type_array);
+         index = nir_ssa_for_src(b, deref->arr.index, 1);
+         if (state->add_bounds_checks)
+            index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
+      } else {
+         index = nir_imm_int(b, 0);
+      }
+
+      index = nir_iadd_imm(b, index, binding_offset);
       nir_rewrite_image_intrinsic(intrin, index, false);
    }
 }
@@ -475,16 +505,6 @@ apply_pipeline_layout_block(nir_block *block,
    }
 }
 
-static void
-setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n)
-{
-   for (unsigned i = 0; i < n; ++i)
-      params[i] = ANV_PARAM_PUSH(offset + i * sizeof(uint32_t));
-
-   for (unsigned i = n; i < 4; ++i)
-      params[i] = BRW_PARAM_BUILTIN_ZERO;
-}
-
 void
 anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
                               bool robust_buffer_access,
@@ -508,7 +528,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
       state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
       state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
       state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
-      state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
    }
 
    nir_foreach_function(function, shader) {
@@ -583,45 +602,9 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
                }
             }
          }
-
-         if (binding->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
-            state.set[set].image_offsets[b] = map->image_param_count;
-            map->image_param_count += binding->array_size;
-         }
       }
    }
 
-   if (map->image_param_count > 0) {
-      assert(map->image_param_count <= MAX_GEN8_IMAGES);
-      assert(shader->num_uniforms == prog_data->nr_params * 4);
-      state.first_image_uniform = shader->num_uniforms;
-      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
-                                                       map->image_param_count *
-                                                       BRW_IMAGE_PARAM_SIZE);
-      struct anv_push_constants *null_data = NULL;
-      const struct brw_image_param *image_param = null_data->images;
-      for (uint32_t i = 0; i < map->image_param_count; i++) {
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
-                                  (uintptr_t)image_param->offset, 2);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
-                                  (uintptr_t)image_param->size, 3);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
-                                  (uintptr_t)image_param->stride, 4);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
-                                  (uintptr_t)image_param->tiling, 3);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
-                                  (uintptr_t)image_param->swizzling, 2);
-
-         param += BRW_IMAGE_PARAM_SIZE;
-         image_param ++;
-      }
-      assert(param == prog_data->param + prog_data->nr_params);
-
-      shader->num_uniforms += map->image_param_count *
-                              BRW_IMAGE_PARAM_SIZE * 4;
-      assert(shader->num_uniforms == prog_data->nr_params * 4);
-   }
-
    nir_foreach_variable(var, &shader->uniforms) {
       const struct glsl_type *glsl_type = glsl_without_array(var->type);
 
index bc7dd100e2baa8fccaf2872842790e6dafc9678e..7b9b1e83678078ec932db6290481f2958552066b 100644 (file)
@@ -154,7 +154,6 @@ anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
 
    blob_write_uint32(blob, shader->bind_map.surface_count);
    blob_write_uint32(blob, shader->bind_map.sampler_count);
-   blob_write_uint32(blob, shader->bind_map.image_param_count);
    blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
                     shader->bind_map.surface_count *
                     sizeof(*shader->bind_map.surface_to_descriptor));
@@ -194,7 +193,6 @@ anv_shader_bin_create_from_blob(struct anv_device *device,
    struct anv_pipeline_bind_map bind_map;
    bind_map.surface_count = blob_read_uint32(blob);
    bind_map.sampler_count = blob_read_uint32(blob);
-   bind_map.image_param_count = blob_read_uint32(blob);
    bind_map.surface_to_descriptor = (void *)
       blob_read_bytes(blob, bind_map.surface_count *
                             sizeof(*bind_map.surface_to_descriptor));
index bfeade7ce4878c90b0ad178d211a0a5123ef1b0a..28854411b114510f9731a9ad587bc13d85092da6 100644 (file)
@@ -158,7 +158,6 @@ struct gen_l3_config;
 #define MAX_PUSH_CONSTANTS_SIZE 128
 #define MAX_DYNAMIC_BUFFERS 16
 #define MAX_IMAGES 64
-#define MAX_GEN8_IMAGES 8
 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
@@ -2077,9 +2076,6 @@ struct anv_push_constants {
 
    /* Used for vkCmdDispatchBase */
    uint32_t base_work_group_id[3];
-
-   /* Image data for image_load_store on pre-SKL */
-   struct brw_image_param images[MAX_GEN8_IMAGES];
 };
 
 struct anv_dynamic_state {
@@ -2587,7 +2583,6 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
 struct anv_pipeline_bind_map {
    uint32_t surface_count;
    uint32_t sampler_count;
-   uint32_t image_param_count;
 
    struct anv_pipeline_binding *                surface_to_descriptor;
    struct anv_pipeline_binding *                sampler_to_descriptor;
index a6d061e7ca02c8f00d1903ecadabf873aec9cfe7..3189585cbd3d15cc3d39b8747d29d01f1af40320 100644 (file)
@@ -2010,7 +2010,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                    gl_shader_stage stage,
                    struct anv_state *bt_state)
 {
-   const struct gen_device_info *devinfo = &cmd_buffer->device->info;
    struct anv_subpass *subpass = cmd_buffer->state.subpass;
    struct anv_cmd_pipeline_state *pipe_state;
    struct anv_pipeline *pipeline;
@@ -2051,17 +2050,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
    const bool need_client_mem_relocs =
       !cmd_buffer->device->instance->physicalDevice.use_softpin;
 
-   /* We only use push constant space for images before gen9 */
-   if (map->image_param_count > 0) {
-      VkResult result =
-         anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images);
-      if (result != VK_SUCCESS)
-         return result;
-
-      cmd_buffer->state.push_constants_dirty |= 1 << stage;
-   }
-
-   uint32_t image = 0;
    for (uint32_t s = 0; s < map->surface_count; s++) {
       struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s];
 
@@ -2201,18 +2189,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
          assert(surface_state.alloc_size);
          if (need_client_mem_relocs)
             add_surface_state_relocs(cmd_buffer, sstate);
-         if (devinfo->gen < 9) {
-            /* We only need the image params on gen8 and earlier.  No image
-             * workarounds that require tiling information are required on
-             * SKL and above.
-             */
-            assert(image < MAX_GEN8_IMAGES);
-            struct brw_image_param *image_param =
-               &cmd_buffer->state.push_constants[stage]->images[image++];
-
-            *image_param =
-               desc->image_view->planes[binding->plane].storage_image_param;
-         }
          break;
       }
 
@@ -2262,13 +2238,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             add_surface_reloc(cmd_buffer, surface_state,
                               desc->buffer_view->address);
          }
-         if (devinfo->gen < 9) {
-            assert(image < MAX_GEN8_IMAGES);
-            struct brw_image_param *image_param =
-               &cmd_buffer->state.push_constants[stage]->images[image++];
-
-            *image_param = desc->buffer_view->storage_image_param;
-         }
          break;
 
       default:
@@ -2278,7 +2247,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
       bt_map[s] = surface_state.offset + state_offset;
    }
-   assert(image == map->image_param_count);
 
 #if GEN_GEN >= 11
    /* The PIPE_CONTROL command description says: