iris: Fill out brw_image_params for storage images on Broadwell
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 30 Nov 2018 10:27:07 +0000 (02:27 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 21 Feb 2019 18:26:11 +0000 (10:26 -0800)
src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_state.c

index ad7cfdb7b909e7920afa93a6eba173ae874bc836..0b0255bcc1876615aa7881989520285042ad08bc 100644 (file)
@@ -48,6 +48,18 @@ struct blorp_params;
 #define IRIS_MAX_VIEWPORTS 16
 #define IRIS_MAX_CLIP_PLANES 8
 
+enum iris_param_domain {
+   BRW_PARAM_DOMAIN_BUILTIN = 0,
+   BRW_PARAM_DOMAIN_IMAGE,
+};
+
+#define BRW_PARAM(domain, val)   (BRW_PARAM_DOMAIN_##domain << 24 | (val))
+#define BRW_PARAM_DOMAIN(param)  ((uint32_t)(param) >> 24)
+#define BRW_PARAM_VALUE(param)   ((uint32_t)(param) & 0x00ffffff)
+#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
+#define BRW_PARAM_IMAGE_IDX(value)   (BRW_PARAM_VALUE(value) >> 8)
+#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
+
 /**
  * Dirty flags.  When state changes, we flag some combination of these
  * to indicate that particular GPU commands need to be re-emitted.
@@ -294,6 +306,9 @@ struct iris_shader_state {
       struct pipe_resource *res;
       struct iris_state_ref surface_state;
       unsigned access;
+
+      /** Gen8-only uniform data for image lowering */
+      struct brw_image_param param;
    } image[PIPE_MAX_SHADER_IMAGES];
 
    struct iris_state_ref sampler_table;
index b8929be9b72ba3b3b0d52ad3a06622b9dcd866f4..47a4b635f99a3961e73571fdc748e8f432879c09 100644 (file)
@@ -152,7 +152,9 @@ iris_lower_storage_image_derefs(nir_shader *nir)
          case nir_intrinsic_image_deref_atomic_exchange:
          case nir_intrinsic_image_deref_atomic_comp_swap:
          case nir_intrinsic_image_deref_size:
-         case nir_intrinsic_image_deref_samples: {
+         case nir_intrinsic_image_deref_samples:
+         case nir_intrinsic_image_deref_load_raw_intel:
+         case nir_intrinsic_image_deref_store_raw_intel: {
             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
             nir_variable *var = nir_deref_instr_get_variable(deref);
 
@@ -569,6 +571,19 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
    return next_binding_table_offset;
 }
 
+static void
+setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
+                        unsigned offset, unsigned n)
+{
+   assert(offset % sizeof(uint32_t) == 0);
+
+   for (unsigned i = 0; i < n; ++i)
+      sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
+
+   for (unsigned i = n; i < 4; ++i)
+      sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
+}
+
 /**
  * Associate NIR uniform variables with the prog_data->param[] mechanism
  * used by the backend.  Also, decide which UBOs we'd like to push in an
@@ -582,12 +597,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
                     enum brw_param_builtin **out_system_values,
                     unsigned *out_num_system_values)
 {
-   /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
-    * about it for compute shaders, so go ahead and make some fake ones
-    * which the backend will dead code eliminate.
-    */
-   prog_data->nr_params = nir->num_uniforms;
-   prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
+   const struct gen_device_info *devinfo = compiler->devinfo;
 
    /* The intel compiler assumes that num_uniforms is in bytes. For
     * scalar that means 4 bytes per uniform slot.
@@ -596,14 +606,17 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
     */
    nir->num_uniforms *= 4;
 
-   const unsigned IRIS_MAX_SYSTEM_VALUES = 32;
+   const unsigned IRIS_MAX_SYSTEM_VALUES =
+      PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
    enum brw_param_builtin *system_values =
       rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
    unsigned num_system_values = 0;
 
    unsigned patch_vert_idx = -1;
    unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
+   unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
    memset(ucp_idx, -1, sizeof(ucp_idx));
+   memset(img_idx, -1, sizeof(img_idx));
 
    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
 
@@ -650,6 +663,49 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
             b.cursor = nir_before_instr(instr);
             offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
             break;
+         case nir_intrinsic_image_deref_load_param_intel: {
+            assert(devinfo->gen < 9);
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            nir_variable *var = nir_deref_instr_get_variable(deref);
+
+            if (img_idx[var->data.binding] == -1) {
+               /* GL only allows arrays of arrays of images. */
+               assert(glsl_type_is_image(glsl_without_array(var->type)));
+               unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
+
+               for (int i = 0; i < num_images; i++) {
+                  const unsigned img = var->data.binding + i;
+
+                  img_idx[img] = num_system_values;
+                  num_system_values += BRW_IMAGE_PARAM_SIZE;
+
+                  uint32_t *img_sv = &system_values[img_idx[img]];
+
+                  setup_vec4_image_sysval(
+                     img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
+                     offsetof(struct brw_image_param, offset), 2);
+                  setup_vec4_image_sysval(
+                     img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
+                     offsetof(struct brw_image_param, size), 3);
+                  setup_vec4_image_sysval(
+                     img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
+                     offsetof(struct brw_image_param, stride), 4);
+                  setup_vec4_image_sysval(
+                     img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
+                     offsetof(struct brw_image_param, tiling), 3);
+                  setup_vec4_image_sysval(
+                     img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
+                     offsetof(struct brw_image_param, swizzling), 2);
+               }
+            }
+
+            b.cursor = nir_before_instr(instr);
+            offset = nir_iadd(&b,
+               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
+               nir_imm_int(&b, img_idx[var->data.binding] * 4 +
+                               nir_intrinsic_base(intrin) * 16));
+            break;
+         }
          default:
             continue;
          }
@@ -717,6 +773,13 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
    if (nir->info.stage != MESA_SHADER_COMPUTE)
       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
 
+   /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts
+    * about it for compute shaders, so go ahead and make some fake ones
+    * which the backend will dead code eliminate.
+    */
+   prog_data->nr_params = nir->num_uniforms / 4;
+   prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
+
    *out_system_values = system_values;
    *out_num_system_values = num_system_values;
 }
index 1bdf44693971ff8ddb99926b79d3c71c22bcf89d..5d1aaf97c41e96a50408659b82ae0e19d5cf7e35 100644 (file)
@@ -1741,6 +1741,36 @@ iris_create_surface(struct pipe_context *ctx,
    return psurf;
 }
 
+#if GEN_GEN < 9
+static void
+fill_default_image_param(struct brw_image_param *param)
+{
+   memset(param, 0, sizeof(*param));
+   /* Set the swizzling shifts to all-ones to effectively disable swizzling --
+    * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
+    * detailed explanation of these parameters.
+    */
+   param->swizzling[0] = 0xff;
+   param->swizzling[1] = 0xff;
+}
+
+static void
+fill_buffer_image_param(struct brw_image_param *param,
+                        enum pipe_format pfmt,
+                        unsigned size)
+{
+   const unsigned cpp = util_format_get_blocksize(pfmt);
+
+   fill_default_image_param(param);
+   param->size[0] = size / cpp;
+   param->stride[0] = cpp;
+}
+#else
+#define isl_surf_fill_image_param(x, ...)
+#define fill_default_image_param(x, ...)
+#define fill_buffer_image_param(x, ...)
+#endif
+
 /**
  * The pipe->set_shader_images() driver hook.
  */
@@ -1798,19 +1828,31 @@ iris_set_shader_images(struct pipe_context *ctx,
             };
 
             fill_surface_state(&screen->isl_dev, map, res, &view);
+            isl_surf_fill_image_param(&screen->isl_dev,
+                                      &shs->image[start_slot + i].param,
+                                      &res->surf, &view);
          } else {
             fill_buffer_surface_state(&screen->isl_dev, res->bo, map,
                                       isl_format, img->u.buf.offset,
                                       img->u.buf.size);
+            fill_buffer_image_param(&shs->image[start_slot + i].param,
+                                    img->format, img->u.buf.size);
          }
       } else {
          pipe_resource_reference(&shs->image[start_slot + i].res, NULL);
          pipe_resource_reference(&shs->image[start_slot + i].surface_state.res,
                                  NULL);
+         fill_default_image_param(&shs->image[start_slot + i].param);
       }
    }
 
    ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage;
+
+   /* Broadwell also needs brw_image_params re-uploaded */
+   if (GEN_GEN < 9) {
+      ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage;
+      shs->cbuf0_needs_upload = true;
+   }
 }
 
 
@@ -2289,7 +2331,16 @@ upload_uniforms(struct iris_context *ice,
       uint32_t sysval = shader->system_values[i];
       uint32_t value = 0;
 
-      if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) {
+      if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) {
+         unsigned img = BRW_PARAM_IMAGE_IDX(sysval);
+         unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval);
+         struct brw_image_param *param = &shs->image[img].param;
+
+         assert(offset < sizeof(struct brw_image_param));
+         value = ((uint32_t *) param)[offset];
+      } else if (sysval == BRW_PARAM_BUILTIN_ZERO) {
+         value = 0;
+      } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) {
          int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval);
          int comp  = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval);
          value = fui(ice->state.clip_planes.ucp[plane][comp]);