freedreno/ir3: add SSBO get_buffer_size() support
authorRob Clark <robdclark@gmail.com>
Mon, 30 Oct 2017 17:23:37 +0000 (13:23 -0400)
committerRob Clark <robdclark@gmail.com>
Sun, 12 Nov 2017 17:28:59 +0000 (12:28 -0500)
Somehow I overlooked this when adding initial SSBO support.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a5xx/fd5_emit.c
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/gallium/drivers/freedreno/ir3/ir3_nir.c
src/gallium/drivers/freedreno/ir3/ir3_nir.h
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 21931e9dfbee23d62d6c98c2bd4521d92ce882c9..3d8e43ad3e4d6e76c29d98229a3754c977f088f9 100644 (file)
@@ -379,14 +379,8 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
        OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
        for (unsigned i = 0; i < count; i++) {
-               struct pipe_shader_buffer *buf = &so->sb[i];
-               if (buf->buffer) {
-                       struct fd_resource *rsc = fd_resource(buf->buffer);
-                       OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
-               } else {
-                       OUT_RING(ring, 0x00000000);
-                       OUT_RING(ring, 0x00000000);
-               }
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
        }
@@ -401,10 +395,13 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
        OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
        for (unsigned i = 0; i < count; i++) {
                struct pipe_shader_buffer *buf = &so->sb[i];
+               unsigned sz = buf->buffer_size;
 
-               // TODO maybe offset encoded somewhere here??
-               OUT_RING(ring, (buf->buffer_size << 16));
-               OUT_RING(ring, 0x00000000);
+               /* width is in dwords, overflows into height: */
+               sz /= 4;
+
+               OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
+               OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
        }
 
        OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
index 6b352de3e3d03b316922eddfb8e178d8965d58d7..ac38ec613117d31399a2ee641b78ff50d53ee479 100644 (file)
@@ -217,6 +217,8 @@ compile_init(struct ir3_compiler *compiler,
                nir_print_shader(ctx->s, stdout);
        }
 
+       ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
+
        so->num_uniforms = ctx->s->num_uniforms;
        so->num_ubos = ctx->s->info.num_ubos;
 
@@ -225,6 +227,7 @@ compile_init(struct ir3_compiler *compiler,
         *
         *    user consts
         *    UBO addresses
+        *    SSBO sizes
         *    if (vertex shader) {
         *        driver params (IR3_DP_*)
         *        if (stream_output.num_outputs > 0)
@@ -245,6 +248,12 @@ compile_init(struct ir3_compiler *compiler,
                constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
        }
 
+       if (so->const_layout.ssbo_size.count > 0) {
+               unsigned cnt = so->const_layout.ssbo_size.count;
+               so->constbase.ssbo_sizes = constoff;
+               constoff += align(cnt, 4) / 4;
+       }
+
        unsigned num_driver_params = 0;
        if (so->type == SHADER_VERTEX) {
                num_driver_params = IR3_DP_VS_COUNT;
@@ -1302,6 +1311,21 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        array_insert(b, b->keeps, stgb);
 }
 
+/* src[] = { block_index } */
+static void
+emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+               struct ir3_instruction **dst)
+{
+       /* SSBO size stored as a const starting at ssbo_sizes: */
+       unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0];
+       unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
+               ctx->so->const_layout.ssbo_size.off[blk_idx];
+
+       debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
+
+       dst[0] = create_uniform(ctx, idx);
+}
+
 static struct ir3_instruction *
 emit_intrinsic_atomic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
@@ -1483,6 +1507,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        case nir_intrinsic_store_ssbo:
                emit_intrinsic_store_ssbo(ctx, intr);
                break;
+       case nir_intrinsic_get_buffer_size:
+               emit_intrinsic_ssbo_size(ctx, intr, dst);
+               break;
        case nir_intrinsic_ssbo_atomic_add:
        case nir_intrinsic_ssbo_atomic_imin:
        case nir_intrinsic_ssbo_atomic_umin:
index ef7facff3328f64e5bef4c936d989e7779ab9c4d..19d05b462e5e5bde2528a9ec7c5b375618d311b5 100644 (file)
@@ -209,3 +209,38 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 
        return s;
 }
+
+void
+ir3_nir_scan_driver_consts(nir_shader *shader,
+               struct ir3_driver_const_layout *layout)
+{
+       nir_foreach_function(function, shader) {
+               if (!function->impl)
+                       continue;
+
+               nir_foreach_block(block, function->impl) {
+                       nir_foreach_instr(instr, block) {
+                               if (instr->type != nir_instr_type_intrinsic)
+                                       continue;
+
+                               nir_intrinsic_instr *intr =
+                                       nir_instr_as_intrinsic(instr);
+                               unsigned idx;
+
+                               switch (intr->intrinsic) {
+                               case nir_intrinsic_get_buffer_size:
+                                       idx = nir_src_as_const_value(intr->src[0])->u32[0];
+                                       if (layout->ssbo_size.mask & (1 << idx))
+                                               break;
+                                       layout->ssbo_size.mask |= (1 << idx);
+                                       layout->ssbo_size.off[idx] =
+                                               layout->ssbo_size.count;
+                                       layout->ssbo_size.count += 1; /* one const per */
+                                       break;
+                               default:
+                                       break;
+                               }
+                       }
+               }
+       }
+}
index 2e2e093b09896db66e6e09f36555e028614ca597..e0e3108e328b59e17a7ef2d3b14104c63cae4659 100644 (file)
@@ -34,6 +34,8 @@
 
 #include "ir3_shader.h"
 
+void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
+
 bool ir3_nir_lower_if_else(nir_shader *shader);
 bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
 
index 557cd5613a4246c3179fa57188f03d1da7994fd4..26f291de89430e8d199fc53b89bdabba1dd4c9c0 100644 (file)
@@ -606,6 +606,27 @@ emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
        }
 }
 
+static void
+emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
+               struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
+{
+       uint32_t offset = v->constbase.ssbo_sizes;
+       if (v->constlen > offset) {
+               uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
+               unsigned mask = v->const_layout.ssbo_size.mask;
+
+               while (mask) {
+                       unsigned index = u_bit_scan(&mask);
+                       unsigned off = v->const_layout.ssbo_size.off[index];
+                       sizes[off] = sb->sb[index].buffer_size;
+               }
+
+               fd_wfi(ctx->batch, ring);
+               ctx->emit_const(ring, v->type, offset * 4,
+                       0, ARRAY_SIZE(sizes), sizes, NULL);
+       }
+}
+
 static void
 emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring)
@@ -726,6 +747,11 @@ emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
                if (shader_dirty)
                        emit_immediates(ctx, v, ring);
        }
+
+       if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
+               struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
+               emit_ssbo_sizes(ctx, v, ring, sb);
+       }
 }
 
 void
index 9984809ea24f74277c28d326149bfaa19c53c8bf..dd68e69d16cae4afa08b1f4e354feac7720ec7b7 100644 (file)
@@ -56,6 +56,26 @@ enum ir3_driver_param {
        IR3_DP_VS_COUNT   = 36   /* must be aligned to vec4 */
 };
 
+/**
+ * For consts needed to pass internal values to shader which may or may not
+ * be required, rather than allocating worst-case const space, we scan the
+ * shader and allocate consts as-needed:
+ *
+ *   + SSBO sizes: only needed if shader has a get_buffer_size intrinsic
+ *     for a given SSBO
+ */
+struct ir3_driver_const_layout {
+       struct {
+               uint32_t mask;  /* bitmask of SSBOs that have get_buffer_size */
+               uint32_t count; /* number of consts allocated */
+               /* one const allocated per SSBO which has get_buffer_size,
+                * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes
+                * consts:
+                */
+               uint32_t off[PIPE_MAX_SHADER_BUFFERS];
+       } ssbo_size;
+};
+
 /* Configuration key used to identify a shader variant.. different
  * shader variants can be used to implement features not supported
  * in hw (two sided color), binning-pass vertex shader, etc.
@@ -173,6 +193,7 @@ struct ir3_shader_variant {
 
        struct ir3_shader_key key;
 
+       struct ir3_driver_const_layout const_layout;
        struct ir3_info info;
        struct ir3 *ir;
 
@@ -191,6 +212,7 @@ struct ir3_shader_variant {
         * constants, etc.
         */
        unsigned num_uniforms;
+
        unsigned num_ubos;
 
        /* About Linkage:
@@ -271,6 +293,8 @@ struct ir3_shader_variant {
        struct {
                /* user const start at zero */
                unsigned ubo;
+               /* NOTE that a3xx might need a section for SSBO addresses too */
+               unsigned ssbo_sizes;
                unsigned driver_param;
                unsigned tfbo;
                unsigned immediate;