X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_a6xx.c;h=b10e1b9502763b6bd1f9e4b58d1c51d9361eb9c0;hb=b1465c382b5ab545e524f7f63cadeb49762eaa96;hp=6f0ba54dc36a186e3c87a5e9485134250681db0a;hpb=edc35c1f54a0f72b6c0f01b2156c10c904459b4f;p=mesa.git diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 6f0ba54dc36..b10e1b95027 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -37,43 +37,6 @@ * encoding compared to a4xx/a5xx. */ -static void -handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc) -{ - nir_intrinsic_instr *intrin = ir3_bindless_resource(rsrc); - if (!intrin) - return; - - instr->flags |= IR3_INSTR_B; - instr->cat6.base = nir_intrinsic_desc_set(intrin); -} - -static struct ir3_instruction * -ssbo_idx(struct ir3_context *ctx, nir_src src) -{ - if (ir3_bindless_resource(src)) { - ctx->so->bindless_ibo = true; - return ir3_get_src(ctx, &src)[0]; - } else { - /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(src)); - return create_immed(ctx->block, ibo_idx); - } -} - -static struct ir3_instruction * -image_idx(struct ir3_context *ctx, nir_src src) -{ - if (ir3_bindless_resource(src)) { - ctx->so->bindless_ibo = true; - return ir3_get_src(ctx, &src)[0]; - } else { - /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_image_to_ibo(ctx->so->shader, nir_src_as_uint(src)); - return create_immed(ctx->block, ibo_idx); - } -} - /* src[] = { buffer_index, offset }. No const_index */ static void emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, @@ -85,14 +48,14 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, offset = ir3_get_src(ctx, &intr->src[2])[0]; - ldib = ir3_LDIB(b, ssbo_idx(ctx, intr->src[0]), 0, offset, 0); + ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0); ldib->regs[0]->wrmask = MASK(intr->num_components); ldib->cat6.iim_val = intr->num_components; ldib->cat6.d = 1; ldib->cat6.type = intr->dest.ssa.bit_size == 16 ? TYPE_U16 : TYPE_U32; ldib->barrier_class = IR3_BARRIER_BUFFER_R; ldib->barrier_conflict = IR3_BARRIER_BUFFER_W; - handle_bindless_cat6(ldib, intr->src[0]); + ir3_handle_bindless_cat6(ldib, intr->src[0]); ir3_split_dest(b, dst, ldib, 0, intr->num_components); } @@ -103,25 +66,23 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_block *b = ctx->block; struct ir3_instruction *stib, *val, *offset; - /* TODO handle wrmask properly, see _store_shared().. but I think - * it is more a PITA than that, since blob ends up loading the - * masked components and writing them back out. - */ - unsigned wrmask = intr->const_index[0]; + unsigned wrmask = nir_intrinsic_write_mask(intr); unsigned ncomp = ffs(~wrmask) - 1; + assert(wrmask == BITFIELD_MASK(intr->num_components)); + /* src0 is offset, src1 is value: */ val = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp); offset = ir3_get_src(ctx, &intr->src[3])[0]; - stib = ir3_STIB(b, ssbo_idx(ctx, intr->src[1]), 0, offset, 0, val, 0); + stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0, val, 0); stib->cat6.iim_val = ncomp; stib->cat6.d = 1; stib->cat6.type = intr->src[0].ssa->bit_size == 16 ? TYPE_U16 : TYPE_U32; stib->barrier_class = IR3_BARRIER_BUFFER_W; stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; - handle_bindless_cat6(stib, intr->src[1]); + ir3_handle_bindless_cat6(stib, intr->src[1]); array_insert(b, b->keeps, stib); } @@ -150,7 +111,7 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *atomic, *ibo, *src0, *src1, *data, *dummy; type_t type = TYPE_U32; - ibo = ssbo_idx(ctx, intr->src[0]); + ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]); data = ir3_get_src(ctx, &intr->src[2])[0]; @@ -225,7 +186,7 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) atomic->cat6.type = type; atomic->barrier_class = IR3_BARRIER_BUFFER_W; atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; - handle_bindless_cat6(atomic, intr->src[0]); + ir3_handle_bindless_cat6(atomic, intr->src[0]); /* even if nothing consume the result, we can't DCE the instruction: */ array_insert(b, b->keeps, atomic); @@ -243,7 +204,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(intr, NULL); - ldib = ir3_LDIB(b, image_idx(ctx, intr->src[0]), 0, + ldib = ir3_LDIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0, ir3_create_collect(ctx, coords, ncoords), 0); ldib->regs[0]->wrmask = MASK(intr->num_components); ldib->cat6.iim_val = intr->num_components; @@ -252,7 +213,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, ldib->cat6.typed = true; ldib->barrier_class = IR3_BARRIER_IMAGE_R; ldib->barrier_conflict = IR3_BARRIER_IMAGE_W; - handle_bindless_cat6(ldib, intr->src[0]); + ir3_handle_bindless_cat6(ldib, intr->src[0]); ir3_split_dest(b, dst, ldib, 0, intr->num_components); } @@ -271,7 +232,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) /* src0 is offset, src1 is value: */ - stib = ir3_STIB(b, image_idx(ctx, intr->src[0]), 0, + stib = ir3_STIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0, ir3_create_collect(ctx, coords, ncoords), 0, ir3_create_collect(ctx, value, ncomp), 0); stib->cat6.iim_val = ncomp; @@ -280,7 +241,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) stib->cat6.typed = true; stib->barrier_class = IR3_BARRIER_IMAGE_W; stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; - handle_bindless_cat6(stib, intr->src[0]); + ir3_handle_bindless_cat6(stib, intr->src[0]); array_insert(b, b->keeps, stib); } @@ -295,7 +256,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0]; unsigned ncoords = ir3_get_image_coords(intr, NULL); - ibo = image_idx(ctx, intr->src[0]); + ibo = ir3_image_to_ibo(ctx, intr->src[0]); /* So this gets a bit creative: * @@ -371,7 +332,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) atomic->cat6.typed = true; atomic->barrier_class = IR3_BARRIER_IMAGE_W; atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; - handle_bindless_cat6(atomic, intr->src[0]); + ir3_handle_bindless_cat6(atomic, intr->src[0]); /* even if nothing consume the result, we can't DCE the instruction: */ array_insert(b, b->keeps, atomic); @@ -379,6 +340,25 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) return atomic; } +static void +emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]); + struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0); + resinfo->cat6.iim_val = 1; + resinfo->cat6.d = intr->num_components; + resinfo->cat6.type = TYPE_U32; + resinfo->cat6.typed = false; + /* resinfo has no writemask and always writes out 3 components: */ + compile_assert(ctx, intr->num_components <= 3); + resinfo->regs[0]->wrmask = MASK(3); + ir3_handle_bindless_cat6(resinfo, intr->src[0]); + + ir3_split_dest(b, dst, resinfo, 0, intr->num_components); +} + const struct ir3_context_funcs ir3_a6xx_funcs = { .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo, .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo, @@ -386,6 +366,7 @@ const struct ir3_context_funcs ir3_a6xx_funcs = { .emit_intrinsic_load_image = emit_intrinsic_load_image, .emit_intrinsic_store_image = emit_intrinsic_store_image, .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image, + .emit_intrinsic_image_size = emit_intrinsic_image_size, }; /* @@ -425,8 +406,7 @@ get_atomic_dest_mov(struct ir3_instruction *atomic) /* it will have already been appended to the end of the block, which * isn't where we want it, so fix-up the location: */ - list_delinit(&mov->node); - list_add(&mov->node, &atomic->node); + ir3_instr_move_after(mov, atomic); return atomic->data = mov; } @@ -447,8 +427,6 @@ ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) foreach_block (block, &ir->block_list) { foreach_instr_safe (instr, &block->instr_list) { - struct ir3_register *reg; - foreach_src (reg, instr) { struct ir3_instruction *src = reg->instr; @@ -464,7 +442,6 @@ ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) } /* we also need to fixup shader outputs: */ - struct ir3_instruction *out; foreach_output_n (out, n, ir) { if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G)) { ir->outputs[n] = get_atomic_dest_mov(out);