Somehow I overlooked this when adding initial SSBO support.
Signed-off-by: Rob Clark <robdclark@gmail.com>
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
- struct pipe_shader_buffer *buf = &so->sb[i];
- if (buf->buffer) {
- struct fd_resource *rsc = fd_resource(buf->buffer);
- OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *buf = &so->sb[i];
+ unsigned sz = buf->buffer_size;
- // TODO maybe offset encoded somewhere here??
- OUT_RING(ring, (buf->buffer_size << 16));
- OUT_RING(ring, 0x00000000);
+ /* width is in dwords, overflows into height: */
+ sz /= 4;
+
+ OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
+ OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
}
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
nir_print_shader(ctx->s, stdout);
}
+ ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
+
so->num_uniforms = ctx->s->num_uniforms;
so->num_ubos = ctx->s->info.num_ubos;
*
* user consts
* UBO addresses
+ * SSBO sizes
* if (vertex shader) {
* driver params (IR3_DP_*)
* if (stream_output.num_outputs > 0)
constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
}
+ if (so->const_layout.ssbo_size.count > 0) {
+ unsigned cnt = so->const_layout.ssbo_size.count;
+ so->constbase.ssbo_sizes = constoff;
+ constoff += align(cnt, 4) / 4;
+ }
+
unsigned num_driver_params = 0;
if (so->type == SHADER_VERTEX) {
num_driver_params = IR3_DP_VS_COUNT;
array_insert(b, b->keeps, stgb);
}
+/* src[] = { block_index } */
+static void
+emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+ struct ir3_instruction **dst)
+{
+ /* SSBO size stored as a const starting at ssbo_sizes: */
+ unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0];
+ unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
+ ctx->so->const_layout.ssbo_size.off[blk_idx];
+
+ debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
+
+ dst[0] = create_uniform(ctx, idx);
+}
+
static struct ir3_instruction *
emit_intrinsic_atomic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
case nir_intrinsic_store_ssbo:
emit_intrinsic_store_ssbo(ctx, intr);
break;
+ case nir_intrinsic_get_buffer_size:
+ emit_intrinsic_ssbo_size(ctx, intr, dst);
+ break;
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umin:
return s;
}
+
+void
+ir3_nir_scan_driver_consts(nir_shader *shader,
+ struct ir3_driver_const_layout *layout)
+{
+ nir_foreach_function(function, shader) {
+ if (!function->impl)
+ continue;
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr =
+ nir_instr_as_intrinsic(instr);
+ unsigned idx;
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_get_buffer_size:
+ idx = nir_src_as_const_value(intr->src[0])->u32[0];
+ if (layout->ssbo_size.mask & (1 << idx))
+ break;
+ layout->ssbo_size.mask |= (1 << idx);
+ layout->ssbo_size.off[idx] =
+ layout->ssbo_size.count;
+ layout->ssbo_size.count += 1; /* one const per */
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
#include "ir3_shader.h"
+void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
+
bool ir3_nir_lower_if_else(nir_shader *shader);
bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
}
}
+static void
+emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
+ struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
+{
+ uint32_t offset = v->constbase.ssbo_sizes;
+ if (v->constlen > offset) {
+ uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
+ unsigned mask = v->const_layout.ssbo_size.mask;
+
+ while (mask) {
+ unsigned index = u_bit_scan(&mask);
+ unsigned off = v->const_layout.ssbo_size.off[index];
+ sizes[off] = sb->sb[index].buffer_size;
+ }
+
+ fd_wfi(ctx->batch, ring);
+ ctx->emit_const(ring, v->type, offset * 4,
+ 0, ARRAY_SIZE(sizes), sizes, NULL);
+ }
+}
+
static void
emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
if (shader_dirty)
emit_immediates(ctx, v, ring);
}
+
+ if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
+ struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
+ emit_ssbo_sizes(ctx, v, ring, sb);
+ }
}
void
IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */
};
+/**
+ * For consts needed to pass internal values to shader which may or may not
+ * be required, rather than allocating worst-case const space, we scan the
+ * shader and allocate consts as-needed:
+ *
+ * + SSBO sizes: only needed if shader has a get_buffer_size intrinsic
+ * for a given SSBO
+ */
+struct ir3_driver_const_layout {
+ struct {
+ uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */
+ uint32_t count; /* number of consts allocated */
+ /* one const allocated per SSBO which has get_buffer_size,
+ * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes
+ * consts:
+ */
+ uint32_t off[PIPE_MAX_SHADER_BUFFERS];
+ } ssbo_size;
+};
+
/* Configuration key used to identify a shader variant.. different
* shader variants can be used to implement features not supported
* in hw (two sided color), binning-pass vertex shader, etc.
struct ir3_shader_key key;
+ struct ir3_driver_const_layout const_layout;
struct ir3_info info;
struct ir3 *ir;
* constants, etc.
*/
unsigned num_uniforms;
+
unsigned num_ubos;
/* About Linkage:
struct {
/* user const start at zero */
unsigned ubo;
+ /* NOTE that a3xx might need a section for SSBO addresses too */
+ unsigned ssbo_sizes;
unsigned driver_param;
unsigned tfbo;
unsigned immediate;