From: Gert Wollny Date: Sat, 16 May 2020 13:54:11 +0000 (+0200) Subject: r600/sfn: Add support for shared atomics X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=43c23ba9bff17a1438a5091d26035b6284d50d35;p=mesa.git r600/sfn: Add support for shared atomics Signed-off-by: Gert Wollny Part-of: --- diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h index 4986fa9728c..9869697f7dc 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h @@ -77,6 +77,7 @@ public: cond_if, cond_else, cond_endif, + lds_atomic, lds_read, lds_write, loop_begin, diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp index 570f8f2a93e..eecc7184869 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp @@ -47,6 +47,47 @@ bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const m_dest_value == other.m_dest_value; } +LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op): + Instruction(lds_atomic), + m_address(address), + m_dest_value(dest), + m_src0_value(src0), + m_src1_value(src1), + m_opcode(op) +{ + add_remappable_src_value(&m_src0_value); + add_remappable_src_value(&m_src1_value); + add_remappable_src_value(&m_address); + add_remappable_dst_value(&m_dest_value); +} + +LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op): + LDSAtomicInstruction(dest, src0, PValue(), address, op) +{ + +} + + +void LDSAtomicInstruction::do_print(std::ostream& os) const +{ + os << "LDS " << m_opcode << " " << *m_dest_value << " "; + os << "[" << *m_address << "] " << *m_src0_value; + if (m_src1_value) + os << ", " << *m_src1_value; +} + +bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const +{ + auto& other = static_cast(lhs); + + return m_opcode == other.m_opcode && + *m_dest_value == *other.m_dest_value && + *m_src0_value == *other.m_src0_value && + *m_address == *other.m_address && + ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) || + (!m_src1_value && !other.m_src1_value)); +} + LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0): LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue()) diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h index 925b60ecc35..7aaa7b1b407 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h @@ -21,6 +21,28 @@ private: std::vector m_dest_value; }; +class LDSAtomicInstruction : public Instruction { +public: + LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op); + LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op); + + const Value& address() const { return *m_address;} + const Value& dest() const { return *m_dest_value;} + const Value& src0() const { return *m_src0_value;} + const PValue& src1() const { return m_src1_value;} + unsigned op() const {return m_opcode;} + +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Instruction& lhs) const override; + + PValue m_address; + PValue m_dest_value; + PValue m_src0_value; + PValue m_src1_value; + unsigned m_opcode; +}; + class LDSWriteInstruction : public Instruction { public: LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0); diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp index 415e17a903a..6af1dd3fde6 100644 --- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp @@ -67,6 +67,7 @@ private: bool emit_rat(const RatInstruction& instr); bool emit_ldswrite(const LDSWriteInstruction& instr); bool emit_ldsread(const LDSReadInstruction& instr); + bool emit_ldsatomic(const LDSAtomicInstruction& instr); bool emit_tf_write(const GDSStoreTessFactor& instr); bool emit_load_addr(PValue addr); @@ -200,6 +201,8 @@ bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i) return emit_ldswrite(static_cast(*i)); case Instruction::lds_read: return emit_ldsread(static_cast(*i)); + case Instruction::lds_atomic: + return emit_ldsatomic(static_cast(*i)); case Instruction::tf_write: return emit_tf_write(static_cast(*i)); default: @@ -1069,6 +1072,44 @@ bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction& instr) return true; } +bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction& instr) +{ + if (m_bc->cf_last->ndw > 240 - 4) + m_bc->force_add_cf = 1; + + r600_bytecode_alu alu_fetch; + r600_bytecode_alu alu_read; + + memset(&alu_fetch, 0, sizeof(r600_bytecode_alu)); + alu_fetch.is_lds_idx_op = true; + alu_fetch.op = instr.op(); + + copy_src(alu_fetch.src[0], instr.address()); + auto& src0 = instr.src0(); + alu_fetch.src[1].sel = src0.sel(); + alu_fetch.src[1].chan = src0.chan(); + if (instr.src1()) { + auto& src1 = *instr.src1(); + alu_fetch.src[2].sel = src1.sel(); + alu_fetch.src[2].chan = src1.chan(); + } + alu_fetch.last = 1; + int r = r600_bytecode_add_alu(m_bc, &alu_fetch); + if (r) + return false; + + memset(&alu_read, 0, sizeof(r600_bytecode_alu)); + copy_dst(alu_read.dst, instr.dest()); + alu_read.op = ALU_OP1_MOV; + alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; + alu_read.last = 1; + alu_read.dst.write = 1; + r = r600_bytecode_add_alu(m_bc, &alu_read); + if (r) + return false; + return true; +} + bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr) { struct r600_bytecode_gds gds; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp index 7a41b2af2b9..6123cf255b1 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -131,6 +131,8 @@ bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr) sh_info().has_txq_cube_array_z_comp = true; } + + default: ; } @@ -513,6 +515,51 @@ bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr) return true; } +static unsigned +lds_op_from_intrinsic(nir_intrinsic_op op) { + switch (op) { + case nir_intrinsic_shared_atomic_add: + return LDS_OP2_LDS_ADD_RET; + case nir_intrinsic_shared_atomic_and: + return LDS_OP2_LDS_AND_RET; + case nir_intrinsic_shared_atomic_or: + return LDS_OP2_LDS_OR_RET; + case nir_intrinsic_shared_atomic_imax: + return LDS_OP2_LDS_MAX_INT_RET; + case nir_intrinsic_shared_atomic_umax: + return LDS_OP2_LDS_MAX_UINT_RET; + case nir_intrinsic_shared_atomic_imin: + return LDS_OP2_LDS_MIN_INT_RET; + case nir_intrinsic_shared_atomic_umin: + return LDS_OP2_LDS_MIN_UINT_RET; + case nir_intrinsic_shared_atomic_xor: + return LDS_OP2_LDS_XOR_RET; + case nir_intrinsic_shared_atomic_exchange: + return LDS_OP2_LDS_XCHG_RET; + case nir_intrinsic_shared_atomic_comp_swap: + return LDS_OP3_LDS_CMP_XCHG_RET; + default: + unreachable("Unsupported shared atomic opcode"); + } +} + +bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr) +{ + auto address = from_nir(instr->src[0], 0); + auto dest_value = from_nir(instr->dest, 0); + auto value = from_nir(instr->src[1], 0); + auto op = lds_op_from_intrinsic(instr->intrinsic); + + if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) { + auto value2 = from_nir(instr->src[2], 0); + emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op)); + } else { + emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op)); + } + return true; +} + + bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr) { unsigned write_mask = nir_intrinsic_write_mask(instr); @@ -595,6 +642,17 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins case nir_intrinsic_memory_barrier_shared: case nir_intrinsic_memory_barrier: return emit_barrier(instr); + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + return emit_atomic_local_shared(instr); case nir_intrinsic_copy_deref: case nir_intrinsic_load_constant: case nir_intrinsic_load_input: diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h index 11c403d15bb..0dcf36b0876 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.h @@ -105,6 +105,7 @@ protected: bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset); bool emit_load_local_shared(nir_intrinsic_instr* instr); bool emit_store_local_shared(nir_intrinsic_instr* instr); + bool emit_atomic_local_shared(nir_intrinsic_instr* instr); bool emit_barrier(nir_intrinsic_instr* instr);