From 50dd773a2d15570944d3955bb851b46ac345cdf4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 20 Feb 2019 10:31:15 -0500 Subject: [PATCH] freedreno/ir3/a6xx: use ldib for ssbo reads ... instead of isam. It seems like when using isam, plus atomics, we can have the problem of old data being in the texture cache. Plus this way we don't have to load a component at a time. Note that blob still seems to use isam in some cases. I suppose it might be preferable in the case of loading a single component, when atomics are not in the picture (or that the ssbo does not need to otherwise be coherent). Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_a6xx.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 29b7ea8416c..1a10b63edb2 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -56,40 +56,26 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; struct ir3_instruction *offset; - struct ir3_instruction *sam; + struct ir3_instruction *ldib; nir_const_value *buffer_index; /* can this be non-const buffer_index? how do we handle that? */ buffer_index = nir_src_as_const_value(intr->src[0]); compile_assert(ctx, buffer_index); - int tex_idx = ir3_ssbo_to_tex(&ctx->so->image_mapping, buffer_index->u32[0]); + int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, buffer_index->u32[0]); offset = ssbo_offset(b, ir3_get_src(ctx, &intr->src[1])[0]); - /* Because texture state for SSBO read is setup as a single component - * format (ie. R32_UINT, etc), we can't read more than the .x component - * in one shot. Maybe there is some way we could mangle the state to - * read more than one component at a shot, which would result is some- - * what less register usage (given how we have to stick in the dummy - * .y coord) and less alu instructions to calc offsets. But this is - * also what blob does, so meh? - */ - for (unsigned i; i < intr->num_components; i++) { - struct ir3_instruction *coords[2]; - - coords[0] = (i == 0) ? offset : - ir3_ADD_U(b, offset, 0, create_immed(b, i), 0); - coords[1] = create_immed(b, 0); - - sam = ir3_SAM(b, OPC_ISAM, TYPE_U32, 0b1, 0, - tex_idx, tex_idx, ir3_create_collect(ctx, coords, 2), NULL); + ldib = ir3_LDIB(b, create_immed(b, ibo_idx), 0, offset, 0); + ldib->regs[0]->wrmask = MASK(intr->num_components); + ldib->cat6.iim_val = intr->num_components; + ldib->cat6.d = 1; + ldib->cat6.type = TYPE_U32; + ldib->barrier_class = IR3_BARRIER_BUFFER_R; + ldib->barrier_conflict = IR3_BARRIER_BUFFER_W; - sam->barrier_class = IR3_BARRIER_BUFFER_R; - sam->barrier_conflict = IR3_BARRIER_BUFFER_W; - - dst[i] = sam; - } + ir3_split_dest(b, dst, ldib, 0, intr->num_components); } /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ -- 2.30.2