iris: better ubo handling
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 6 Jun 2018 09:16:52 +0000 (02:16 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 21 Feb 2019 18:26:06 +0000 (10:26 -0800)
src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_screen.c
src/gallium/drivers/iris/iris_state.c

index 50942198f5faf1a1fab9b20a827b7942b97ed4d5..f23ba7c73e8cfd3df4b203c56b47bd820e1a741f 100644 (file)
@@ -87,29 +87,6 @@ struct blorp_params;
 #define IRIS_DIRTY_CONSTANTS_FS             (1ull << 39)
 #define IRIS_DIRTY_DEPTH_BUFFER             (1ull << 40)
 
-enum brw_param_domain {
-   BRW_PARAM_DOMAIN_BUILTIN = 0,
-   BRW_PARAM_DOMAIN_PARAMETER,
-   BRW_PARAM_DOMAIN_UNIFORM,
-   BRW_PARAM_DOMAIN_IMAGE,
-};
-
-#define BRW_PARAM(domain, val)   (BRW_PARAM_DOMAIN_##domain << 24 | (val))
-#define BRW_PARAM_DOMAIN(param)  ((uint32_t)(param) >> 24)
-#define BRW_PARAM_VALUE(param)   ((uint32_t)(param) & 0x00ffffff)
-
-#define BRW_PARAM_PARAMETER(idx, comp) \
-   BRW_PARAM(PARAMETER, ((idx) << 2) | (comp))
-#define BRW_PARAM_PARAMETER_IDX(param)    (BRW_PARAM_VALUE(param) >> 2)
-#define BRW_PARAM_PARAMETER_COMP(param)   (BRW_PARAM_VALUE(param) & 0x3)
-
-#define BRW_PARAM_UNIFORM(idx)            BRW_PARAM(UNIFORM, (idx))
-#define BRW_PARAM_UNIFORM_IDX(param)      BRW_PARAM_VALUE(param)
-
-#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
-#define BRW_PARAM_IMAGE_IDX(value)        (BRW_PARAM_VALUE(value) >> 8)
-#define BRW_PARAM_IMAGE_OFFSET(value)     (BRW_PARAM_VALUE(value) & 0xf)
-
 struct iris_depth_stencil_alpha_state;
 
 enum iris_program_cache_id {
@@ -195,9 +172,8 @@ struct iris_compiled_shader {
 
 struct iris_shader_state {
    struct pipe_constant_buffer constbuf[PIPE_MAX_CONSTANT_BUFFERS];
-   struct pipe_resource *push_resource;
+   struct pipe_resource *const_resources[PIPE_MAX_CONSTANT_BUFFERS];
    unsigned const_offset;
-   unsigned const_size;
 };
 
 struct iris_vtable {
index d1c4b88ca97167c566f3de9a590e36e99fc70ef5..59fcbed63026c3b0ffb369da2f5849a4fb8eea64 100644 (file)
@@ -64,19 +64,6 @@ iris_create_shader_state(struct pipe_context *ctx,
 
    nir = brw_preprocess_nir(screen->compiler, nir);
 
-#if 0
-   /* Reassign uniform locations using type_size_scalar_bytes instead of
-    * the slot based calculation that st_nir uses.
-    */
-   nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
-                            type_size_scalar_bytes);
-   nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
-#endif
-   nir_foreach_variable(var, &nir->uniforms) {
-      var->data.driver_location *= 4;
-   }
-   nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
-
    ish->program_id = get_new_program_id(screen);
    ish->base.type = PIPE_SHADER_IR_NIR;
    ish->base.ir.nir = nir;
@@ -154,10 +141,12 @@ iris_bind_fs_state(struct pipe_context *ctx, void *hwcso)
  */
 static uint32_t
 assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
-                                    const struct shader_info *info,
+                                    const struct nir_shader *nir,
                                     struct brw_stage_prog_data *prog_data,
                                     uint32_t next_binding_table_offset)
 {
+   const struct shader_info *info = &nir->info;
+
    if (info->num_textures) {
       prog_data->binding_table.texture_start = next_binding_table_offset;
       prog_data->binding_table.gather_texture_start = next_binding_table_offset;
@@ -167,10 +156,12 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
       prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
    }
 
-   if (info->num_ubos) {
+   int num_ubos = info->num_ubos + (nir->num_uniforms > 0 ? 1 : 0);
+
+   if (num_ubos) {
       //assert(info->num_ubos <= BRW_MAX_UBO);
       prog_data->binding_table.ubo_start = next_binding_table_offset;
-      next_binding_table_offset += info->num_ubos;
+      next_binding_table_offset += num_ubos;
    } else {
       prog_data->binding_table.ubo_start = 0xd0d0d0d0;
    }
@@ -213,28 +204,41 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 }
 
 static void
-iris_setup_uniforms(void *mem_ctx,
+iris_setup_uniforms(const struct brw_compiler *compiler,
+                    void *mem_ctx,
                     nir_shader *nir,
                     struct brw_stage_prog_data *prog_data)
 {
-   prog_data->nr_params = nir->num_uniforms * 4;
+   prog_data->nr_params = nir->num_uniforms;
    prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
 
-   nir->num_uniforms *= 16;
-
    nir_foreach_variable(var, &nir->uniforms) {
-      /* UBO's, atomics and samplers don't take up space */
-      //if (var->interface_type != NULL || var->type->contains_atomic())
-         //continue;
-
       const unsigned components = glsl_get_components(var->type);
 
-      for (unsigned i = 0; i < 4; i++) {
+      for (unsigned i = 0; i < components; i++) {
          prog_data->param[var->data.driver_location] =
-            i < components ? BRW_PARAM_PARAMETER(var->data.driver_location, i)
-                           : BRW_PARAM_BUILTIN_ZERO;
+            var->data.driver_location;
       }
    }
+
+   // XXX: vs clip planes?
+   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
+}
+
+static void
+iris_setup_push_uniform_range(const struct brw_compiler *compiler,
+                              struct brw_stage_prog_data *prog_data)
+{
+   if (prog_data->nr_params) {
+      for (int i = 3; i > 0; i--)
+         prog_data->ubo_ranges[i] = prog_data->ubo_ranges[i - 1];
+
+      prog_data->ubo_ranges[0] = (struct brw_ubo_range) {
+         .block = 0,
+         .start = 0,
+         .length = DIV_ROUND_UP(prog_data->nr_params, 8),
+      };
+   }
 }
 
 static bool
@@ -256,9 +260,9 @@ iris_compile_vs(struct iris_context *ice,
    nir_shader *nir = ish->base.ir.nir;
 
    // XXX: alt mode
-   assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, 0);
+   assign_common_binding_table_offsets(devinfo, nir, prog_data, 0);
 
-   iris_setup_uniforms(mem_ctx, nir, prog_data);
+   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data);
 
    brw_compute_vue_map(devinfo,
                        &vue_prog_data->vue_map, nir->info.outputs_written,
@@ -274,6 +278,8 @@ iris_compile_vs(struct iris_context *ice,
       return false;
    }
 
+   iris_setup_push_uniform_range(compiler, prog_data);
+
    iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data);
 
    ralloc_free(mem_ctx);
@@ -317,7 +323,7 @@ iris_compile_tes(struct iris_context *ice,
 
    nir_shader *nir = ish->base.ir.nir;
 
-   assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, 0);
+   assign_common_binding_table_offsets(devinfo, nir, prog_data, 0);
 
    struct brw_vue_map input_vue_map;
    brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
@@ -333,6 +339,8 @@ iris_compile_tes(struct iris_context *ice,
       return false;
    }
 
+   iris_setup_push_uniform_range(compiler, prog_data);
+
    iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data);
 
    ralloc_free(mem_ctx);
@@ -383,10 +391,10 @@ iris_compile_fs(struct iris_context *ice,
    nir_shader *nir = ish->base.ir.nir;
 
    // XXX: alt mode
-   assign_common_binding_table_offsets(devinfo, &nir->info, prog_data,
+   assign_common_binding_table_offsets(devinfo, nir, prog_data,
                                        MAX2(key->nr_color_regions, 1));
 
-   iris_setup_uniforms(mem_ctx, nir, prog_data);
+   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data);
 
    char *error_str = NULL;
    const unsigned *program =
@@ -400,6 +408,8 @@ iris_compile_fs(struct iris_context *ice,
 
    //brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
 
+   iris_setup_push_uniform_range(compiler, prog_data);
+
    iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data);
 
    ralloc_free(mem_ctx);
index 342defd8e9336fc7e6c1686971067b5a3ec43db8..2b1a89fa3b402f49dea1bed8bd105ae320ca2449 100644 (file)
@@ -139,6 +139,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
    case PIPE_CAP_CULL_DISTANCE:
+   case PIPE_CAP_PACKED_UNIFORMS:
       return true;
 
    case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
@@ -154,7 +155,6 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_FENCE_SIGNAL:
    case PIPE_CAP_CONSTBUF0_FLAGS:
-   case PIPE_CAP_PACKED_UNIFORMS:
    case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
    case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
    case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
index bcbab9e2b07da82ad37af81707df5f06ad201247..f8772315d1c0d6f5142ea7240ea682b65aa8e8e4 100644 (file)
@@ -1340,12 +1340,23 @@ iris_set_framebuffer_state(struct pipe_context *ctx,
 static void
 iris_set_constant_buffer(struct pipe_context *ctx,
                          enum pipe_shader_type p_stage, unsigned index,
-                         const struct pipe_constant_buffer *cb)
+                         const struct pipe_constant_buffer *input)
 {
    struct iris_context *ice = (struct iris_context *) ctx;
    gl_shader_stage stage = stage_from_pipe(p_stage);
+   struct iris_shader_state *shs = &ice->shaders.state[stage];
 
-   util_copy_constant_buffer(&ice->shaders.state[stage].constbuf[index], cb);
+   if (input && (input->buffer || input->user_buffer)) {
+      if (input->user_buffer) {
+         u_upload_data(ctx->const_uploader, 0, input->buffer_size, 32,
+                       input->user_buffer, &shs->const_offset,
+                       &shs->const_resources[index]);
+      } else {
+         pipe_resource_reference(&shs->const_resources[index], input->buffer);
+      }
+   } else {
+      pipe_resource_reference(&shs->const_resources[index], NULL);
+   }
 }
 
 static void
@@ -2170,36 +2181,47 @@ iris_upload_render_state(struct iris_context *ice,
       if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
          continue;
 
-      struct pipe_constant_buffer *cbuf0 =
-         &ice->shaders.state[stage].constbuf[0];
-
-      if (!ice->shaders.prog[stage] || cbuf0->buffer || !cbuf0->buffer_size)
-         continue;
-
       struct iris_shader_state *shs = &ice->shaders.state[stage];
       struct iris_compiled_shader *shader = ice->shaders.prog[stage];
-      struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
-      // XXX: DIV_ROUND_UP(prog_data->nr_params, 8)?
-      //shs->const_size = DIV_ROUND_UP(cbuf0->buffer_size, 32);
-      shs->const_size = DIV_ROUND_UP(prog_data->nr_params, 8);
-      u_upload_data(ice->ctx.const_uploader, 0, 32 * shs->const_size, 32,
-                    cbuf0->user_buffer, &shs->const_offset,
-                    &shs->push_resource);
-   }
 
-   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
-      // XXX: wrong dirty tracking...
-      if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
+      if (!shader)
          continue;
 
-      struct iris_shader_state *shs = &ice->shaders.state[stage];
-      struct iris_resource *res = (void *) shs->push_resource;
+      struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
 
       iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
          pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
-         if (res) {
-            pkt.ConstantBody.ReadLength[3] = shs->const_size;
-            pkt.ConstantBody.Buffer[3] = ro_bo(res->bo, shs->const_offset);
+         if (prog_data) {
+            /* The Skylake PRM contains the following restriction:
+             *
+             *    "The driver must ensure The following case does not occur
+             *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+             *     buffer 3 read length equal to zero committed followed by a
+             *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+             *     zero committed."
+             *
+             * To avoid this, we program the buffers in the highest slots.
+             * This way, slot 0 is only used if slot 3 is also used.
+             */
+            int n = 3;
+
+            for (int i = 3; i >= 0; i--) {
+               const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
+
+               if (range->length == 0)
+                  continue;
+
+               // XXX: is range->block a constbuf index?  it would be nice
+               struct iris_resource *res =
+                  (void *) shs->const_resources[range->block];
+
+               assert(shs->const_offset % 32 == 0);
+
+               pkt.ConstantBody.ReadLength[n] = range->length;
+               pkt.ConstantBody.Buffer[n] =
+                  ro_bo(res->bo, range->start * 32 + shs->const_offset);
+               n--;
+            }
          }
       }
    }