From 8e221f58a66fe3bbb2f3354a1c7b888f5248bcd6 Mon Sep 17 00:00:00 2001 From: Italo Nicola Date: Mon, 31 Aug 2020 11:17:48 +0000 Subject: [PATCH] panfrost: add atomic ops infrastructure Signed-off-by: Italo Nicola Reviewed-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/midgard_compile.c | 63 ++++++++++++++++++++++++++ src/panfrost/midgard/midgard_emit.c | 13 +++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 06ed3f62cf4..5c22ba7c45b 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -667,6 +667,15 @@ nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components) return false; } +#define ATOMIC_CASE_IMPL(ctx, instr, nir, op, is_shared) \ + case nir_intrinsic_##nir: \ + emit_atomic(ctx, instr, is_shared, midgard_op_##op); \ + break; + +#define ATOMIC_CASE(ctx, instr, nir, op) \ + ATOMIC_CASE_IMPL(ctx, instr, shared_atomic_##nir, atomic_##op, true); \ + ATOMIC_CASE_IMPL(ctx, instr, global_atomic_##nir, atomic_##op, false); + #define ALU_CASE(nir, _op) \ case nir_op_##nir: \ op = midgard_alu_op_##_op; \ @@ -1370,6 +1379,60 @@ emit_global( emit_mir_instruction(ctx, ins); } +/* If is_shared is off, the only other possible value are globals, since + * SSBO's are being lowered to globals through a NIR pass. */ +static void +emit_atomic( + compiler_context *ctx, + nir_intrinsic_instr *instr, + bool is_shared, + midgard_load_store_op op) +{ + unsigned bitsize = nir_src_bit_size(instr->src[1]); + nir_alu_type type = + (op == midgard_op_atomic_imin || op == midgard_op_atomic_imax) ? + nir_type_int : nir_type_uint; + + unsigned dest = nir_dest_index(&instr->dest); + unsigned val = nir_src_index(ctx, &instr->src[1]); + emit_explicit_constant(ctx, val, val); + + midgard_instruction ins = { + .type = TAG_LOAD_STORE_4, + .mask = 0xF, + .dest = dest, + .src = { ~0, ~0, ~0, val }, + .src_types = { 0, 0, 0, type | bitsize }, + .op = op + }; + + nir_src *src_offset = nir_get_io_offset_src(instr); + + /* cmpxchg takes an extra value in arg_2, so we don't use it for the offset */ + if (op == midgard_op_atomic_cmpxchg) { + unsigned addr = nir_src_index(ctx, src_offset); + + ins.src[1] = addr; + ins.src_types[1] = nir_type_uint | nir_src_bit_size(*src_offset); + + unsigned xchg_val = nir_src_index(ctx, &instr->src[2]); + emit_explicit_constant(ctx, xchg_val, xchg_val); + + ins.src[2] = val; + ins.src_types[2] = type | bitsize; + ins.src[3] = xchg_val; + + if (is_shared) + ins.load_store.arg_1 |= 0x6E; + } else { + mir_set_offset(ctx, &ins, src_offset, is_shared); + } + + mir_set_intr_mask(&instr->instr, &ins, true); + + emit_mir_instruction(ctx, ins); +} + static void emit_varying_read( compiler_context *ctx, diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index a0fbe20c774..213b7024e81 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -499,6 +499,15 @@ load_store_from_instr(midgard_instruction *ins) ldst.reg = SSA_REG_FROM_FIXED(ins->dest); } + /* Atomic opcode swizzles have a special meaning: + * - The first two bits say which component of the implicit register should be used + * - The next two bits say if the implicit register is r26 or r27 */ + if (OP_IS_ATOMIC(ins->op)) { + ldst.swizzle = 0; + ldst.swizzle |= ins->swizzle[3][0] & 3; + ldst.swizzle |= (SSA_REG_FROM_FIXED(ins->src[3]) & 1 ? 1 : 0) << 2; + } + if (ins->src[1] != ~0) { unsigned src = SSA_REG_FROM_FIXED(ins->src[1]); unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]); @@ -855,7 +864,9 @@ emit_binary_bundle(compiler_context *ctx, for (unsigned i = 0; i < bundle->instruction_count; ++i) { mir_pack_ldst_mask(bundle->instructions[i]); - mir_pack_swizzle_ldst(bundle->instructions[i]); + /* Atomic ops don't use this swizzle the same way as other ops */ + if (!OP_IS_ATOMIC(bundle->instructions[i]->op)) + mir_pack_swizzle_ldst(bundle->instructions[i]); /* Apply a constant offset */ unsigned offset = bundle->instructions[i]->constants.u32[0]; -- 2.30.2