freedreno/a6xx: Use LDC for UBO loads.

author Eric Anholt <eric@anholt.net>

Mon, 20 Apr 2020 23:39:54 +0000 (16:39 -0700)

committer Marge Bot <eric+marge@anholt.net>

Thu, 14 May 2020 00:10:43 +0000 (00:10 +0000)
author Eric Anholt <eric@anholt.net>
Mon, 20 Apr 2020 23:39:54 +0000 (16:39 -0700)
committer Marge Bot <eric+marge@anholt.net>
Thu, 14 May 2020 00:10:43 +0000 (00:10 +0000)
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 07acb27c6b31cb0aba2acc0dc774d5c8cc4c0bc0..b3f784a557e44a450186b179d2584d95e0a3b150 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -317,7 +317,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
         const bool ubo_progress = !key && OPT(s, ir3_nir_analyze_ubo_ranges, shader);
         const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
         /* UBO offset lowering has to come after we've decided what will be left as load_ubo */
-       OPT_V(s, ir3_nir_lower_io_offsets);
+       OPT_V(s, ir3_nir_lower_io_offsets, shader->compiler->gpu_id);
  
         if (ubo_progress || idiv_progress)
                 ir3_optimize_loop(s);
@@ -449,7 +449,13 @@ ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir)
                         MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1);
         }
  
-       const_state->num_ubos = nir->info.num_ubos;
+       /* On a6xx, we use UBO descriptors and LDC instead of UBO pointers in the
+        * constbuf.
+        */
+       if (compiler->gpu_id >= 600)
+               shader->num_ubos = nir->info.num_ubos;
+       else
+               const_state->num_ubos = nir->info.num_ubos;
  
         /* num_driver_params is scalar, align to vec4: */
         const_state->num_driver_params = align(const_state->num_driver_params, 4);
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h

index 80cd0870374531639961ebdba4bbbc0c5cc04c70..bd29da7c6c6601f2362128f82a22be697add8231 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -36,7 +36,7 @@
  bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
  bool ir3_nir_lower_imul(nir_shader *shader);
  bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
-bool ir3_nir_lower_io_offsets(nir_shader *shader);
+bool ir3_nir_lower_io_offsets(nir_shader *shader, int gpu_id);
  bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader);
  bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader);
  bool ir3_nir_move_varying_inputs(nir_shader *shader);
diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c

index 429cde5bfd754239bad7ceff4867a4a599317032..2d035eedd2303545d215af73f79b2a170abbd5f5 100644 (file)
--- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
+++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
@@ -255,15 +255,12 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
  }
  
  static bool
-lower_offset_for_ubo(nir_intrinsic_instr *intrinsic, nir_builder *b)
+lower_offset_for_ubo(nir_intrinsic_instr *intrinsic, nir_builder *b, int gpu_id)
  {
-       /* We only need to lower offset if using LDC. Currently, we only use LDC
-        * in the bindless mode. Also, LDC is introduced on A6xx, but currently we
-        * only use bindless in turnip which is A6xx only.
-        *
-        * TODO: We should be using LDC always on A6xx+.
+       /* We only need to lower offset if using LDC, which takes an offset in
+        * vec4 units and has the start component baked into the instruction.
          */
-       if (!ir3_bindless_resource(intrinsic->src[0]))
+       if (gpu_id < 600)
                 return false;
  
         /* TODO handle other bitsizes, including non-dword-aligned loads */
@@ -333,7 +330,7 @@ lower_offset_for_ubo(nir_intrinsic_instr *intrinsic, nir_builder *b)
  }
  
  static bool
-lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
+lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx, int gpu_id)
  {
         bool progress = false;
  
@@ -345,7 +342,7 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
  
                 /* UBO */
                 if (intr->intrinsic == nir_intrinsic_load_ubo) {
-                       progress |= lower_offset_for_ubo(intr, b);
+                       progress |= lower_offset_for_ubo(intr, b, gpu_id);
                         continue;
                 }
  
@@ -364,7 +361,7 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
  }
  
  static bool
-lower_io_offsets_func(nir_function_impl *impl)
+lower_io_offsets_func(nir_function_impl *impl, int gpu_id)
  {
         void *mem_ctx = ralloc_parent(impl);
         nir_builder b;
@@ -372,7 +369,7 @@ lower_io_offsets_func(nir_function_impl *impl)
  
         bool progress = false;
         nir_foreach_block_safe (block, impl) {
-               progress |= lower_io_offsets_block(block, &b, mem_ctx);
+               progress |= lower_io_offsets_block(block, &b, mem_ctx, gpu_id);
         }
  
         if (progress) {
@@ -384,13 +381,13 @@ lower_io_offsets_func(nir_function_impl *impl)
  }
  
  bool
-ir3_nir_lower_io_offsets(nir_shader *shader)
+ir3_nir_lower_io_offsets(nir_shader *shader, int gpu_id)
  {
         bool progress = false;
  
         nir_foreach_function (function, shader) {
                 if (function->impl)
-                       progress |= lower_io_offsets_func(function->impl);
+                       progress |= lower_io_offsets_func(function->impl, gpu_id);
         }
  
         return progress;
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h

index e401498612fad3df5007f222b06198c286625761..d623cf4fdaf5ad142210675f63e2796f1affa065 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -607,6 +607,12 @@ struct ir3_shader {
         struct ir3_compiler *compiler;
  
         struct ir3_ubo_analysis_state ubo_state;
+
+       /* Number of UBOs loaded by LDC, as opposed to LDG through pointers in
+        * ubo_state.
+        */
+       unsigned num_ubos;
+
         struct ir3_const_state const_state;
  
         struct nir_shader *nir;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c

index 86ea1d1e41cb1f21f5a249ec07c88ddb75addd0a..1d24d8aafe25e1837fcb9b8a82fd7a09ef090d8a 100644 (file)
--- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c
@@ -104,39 +104,6 @@ fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
         }
  }
  
-static void
-fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type,
-               uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
-{
-       uint32_t anum = align(num, 2);
-       uint32_t i;
-
-       debug_assert((regid % 4) == 0);
-
-       OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum));
-       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
-                       CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
-                       CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-                       CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
-                       CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
-       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
-
-       for (i = 0; i < num; i++) {
-               if (prscs[i]) {
-                       OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
-               } else {
-                       OUT_RING(ring, 0xbad00000 | (i << 16));
-                       OUT_RING(ring, 0xbad00000 | (i << 16));
-               }
-       }
-
-       for (; i < anum; i++) {
-               OUT_RING(ring, 0xffffffff);
-               OUT_RING(ring, 0xffffffff);
-       }
-}
-
  static bool
  is_stateobj(struct fd_ringbuffer *ring)
  {
@@ -160,9 +127,7 @@ emit_const_bo(struct fd_ringbuffer *ring,
                 const struct ir3_shader_variant *v, uint32_t dst_offset,
                 uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
  {
-       /* TODO inline this */
-       assert(dst_offset + num < v->constlen * 4);
-       fd6_emit_const_bo(ring, v->type, dst_offset, num, prscs, offsets);
+       unreachable("shouldn't be called on a6xx");
  }
  
  static void
@@ -262,6 +227,42 @@ emit_tess_consts(struct fd6_emit *emit)
         fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL);
  }
  
+static void
+fd6_emit_ubos(const struct ir3_shader_variant *v,
+               struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
+{
+       if (!v->shader->num_ubos)
+               return;
+
+       int num_ubos = v->shader->num_ubos;
+
+       OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
+       OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+                       CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)|
+                       CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+                       CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
+                       CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
+       OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+       OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+
+       for (int i = 0; i < num_ubos; i++) {
+               /* Note: gallium constbuf 0 was always lowered to hardware constbuf,
+                * and UBO load indices decremented by one.
+                */
+               struct pipe_constant_buffer *cb = &constbuf->cb[i + 1];
+               if (cb->buffer) {
+                       int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
+                       OUT_RELOC(ring, fd_resource(cb->buffer)->bo,
+                                       cb->buffer_offset,
+                                       (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32,
+                                       0);
+               } else {
+                       OUT_RING(ring, 0xbad00000 | (i << 16));
+                       OUT_RING(ring, 0xbad00000 | (i << 16));
+               }
+       }
+}
+
  static void
  emit_user_consts(struct fd6_emit *emit)
  {
@@ -288,7 +289,7 @@ emit_user_consts(struct fd6_emit *emit)
                 if (!variants[i])
                         continue;
                 ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
-               ir3_emit_ubos(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
+               fd6_emit_ubos(variants[i], constobj, &ctx->constbuf[types[i]]);
         }
  
         fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL);
@@ -335,6 +336,7 @@ fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
                 struct fd_context *ctx, const struct pipe_grid_info *info)
  {
         ir3_emit_cs_consts(v, ring, ctx, info);
+       fd6_emit_ubos(v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
  }
  
  void
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 135adfa59c8ad20b9bc1915164984d9d67d5f43a..58613d105c45233996bba60a3d6c3a4295f32cff 100644 (file)
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -1022,7 +1022,7 @@ fd6_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_stat
  
         /* also account for UBO addresses: */
         packets += 1;
-       size += 2 * align(shader->const_state.num_ubos, 2);
+       size += 2 * shader->num_ubos;
  
         unsigned sizedwords = (4 * packets) + size;
         shader->ubo_state.cmdstream_size = sizedwords * 4;
author	Eric Anholt <eric@anholt.net>
	Mon, 20 Apr 2020 23:39:54 +0000 (16:39 -0700)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 14 May 2020 00:10:43 +0000 (00:10 +0000)
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.h		patch \| blob \| history
src/freedreno/ir3/ir3_nir_lower_io_offsets.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.h		patch \| blob \| history
src/gallium/drivers/freedreno/a6xx/fd6_const.c		patch \| blob \| history
src/gallium/drivers/freedreno/a6xx/fd6_program.c		patch \| blob \| history