r600/sfn: Add support for shared atomics
authorGert Wollny <gert.wollny@collabora.com>
Sat, 16 May 2020 13:54:11 +0000 (15:54 +0200)
committerMarge Bot <eric+marge@anholt.net>
Mon, 22 Jun 2020 12:01:17 +0000 (12:01 +0000)
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5575>

src/gallium/drivers/r600/sfn/sfn_instruction_base.h
src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
src/gallium/drivers/r600/sfn/sfn_shader_base.h

index 4986fa9728c2e20cfda373a9b03ded72de00c9fa..9869697f7dcab66b11236dd7df6cf8dbee600891 100644 (file)
@@ -77,6 +77,7 @@ public:
       cond_if,
       cond_else,
       cond_endif,
+      lds_atomic,
       lds_read,
       lds_write,
       loop_begin,
index 570f8f2a93eb4b5bbc0c43cc72e953c3d0000f9f..eecc7184869ba93d8a0238a501e3d121d0f2a92e 100644 (file)
@@ -47,6 +47,47 @@ bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const
          m_dest_value == other.m_dest_value;
 }
 
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
+   Instruction(lds_atomic),
+   m_address(address),
+   m_dest_value(dest),
+   m_src0_value(src0),
+   m_src1_value(src1),
+   m_opcode(op)
+{
+   add_remappable_src_value(&m_src0_value);
+   add_remappable_src_value(&m_src1_value);
+   add_remappable_src_value(&m_address);
+   add_remappable_dst_value(&m_dest_value);
+}
+
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
+   LDSAtomicInstruction(dest, src0, PValue(), address, op)
+{
+
+}
+
+
+void LDSAtomicInstruction::do_print(std::ostream& os) const
+{
+   os << "LDS " << m_opcode << " " << *m_dest_value << " ";
+   os << "[" << *m_address << "] " << *m_src0_value;
+   if (m_src1_value)
+      os << ", " << *m_src1_value;
+}
+
+bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
+{
+   auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
+
+   return m_opcode == other.m_opcode &&
+         *m_dest_value == *other.m_dest_value &&
+         *m_src0_value == *other.m_src0_value &&
+         *m_address == *other.m_address &&
+         ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
+          (!m_src1_value && !other.m_src1_value));
+}
+
 LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
    LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
 
index 925b60ecc3586fc95e06ef40a36b490f9175494c..7aaa7b1b4076586de110e211b38e65bbbe83d4a9 100644 (file)
@@ -21,6 +21,28 @@ private:
    std::vector<PValue> m_dest_value;
 };
 
+class LDSAtomicInstruction : public Instruction {
+public:
+   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
+   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
+
+   const Value& address() const { return *m_address;}
+   const Value& dest() const { return *m_dest_value;}
+   const Value& src0() const { return *m_src0_value;}
+   const PValue& src1() const { return m_src1_value;}
+   unsigned op() const {return m_opcode;}
+
+private:
+   void do_print(std::ostream& os) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+
+   PValue m_address;
+   PValue m_dest_value;
+   PValue m_src0_value;
+   PValue m_src1_value;
+   unsigned m_opcode;
+};
+
 class LDSWriteInstruction : public Instruction {
 public:
    LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
index 415e17a903ab9c705bab24cc347cb497efe37e66..6af1dd3fde60f7e9de0c8a0cc5af49cfadd06f7c 100644 (file)
@@ -67,6 +67,7 @@ private:
    bool emit_rat(const RatInstruction& instr);
    bool emit_ldswrite(const LDSWriteInstruction& instr);
    bool emit_ldsread(const LDSReadInstruction& instr);
+   bool emit_ldsatomic(const LDSAtomicInstruction& instr);
    bool emit_tf_write(const GDSStoreTessFactor& instr);
 
    bool emit_load_addr(PValue addr);
@@ -200,6 +201,8 @@ bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
       return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
    case Instruction::lds_read:
       return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
+   case Instruction::lds_atomic:
+      return emit_ldsatomic(static_cast<const LDSAtomicInstruction&>(*i));
    case Instruction::tf_write:
       return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
    default:
@@ -1069,6 +1072,44 @@ bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction& instr)
    return true;
 }
 
+bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction& instr)
+{
+   if (m_bc->cf_last->ndw > 240 - 4)
+      m_bc->force_add_cf = 1;
+
+   r600_bytecode_alu alu_fetch;
+   r600_bytecode_alu alu_read;
+
+   memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
+   alu_fetch.is_lds_idx_op = true;
+   alu_fetch.op = instr.op();
+
+   copy_src(alu_fetch.src[0], instr.address());
+   auto& src0 = instr.src0();
+   alu_fetch.src[1].sel = src0.sel();
+   alu_fetch.src[1].chan = src0.chan();
+   if (instr.src1()) {
+      auto& src1 = *instr.src1();
+      alu_fetch.src[2].sel = src1.sel();
+      alu_fetch.src[2].chan = src1.chan();
+   }
+   alu_fetch.last = 1;
+   int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
+   if (r)
+      return false;
+
+   memset(&alu_read, 0, sizeof(r600_bytecode_alu));
+   copy_dst(alu_read.dst, instr.dest());
+   alu_read.op = ALU_OP1_MOV;
+   alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+   alu_read.last = 1;
+   alu_read.dst.write = 1;
+   r = r600_bytecode_add_alu(m_bc, &alu_read);
+   if (r)
+      return false;
+   return true;
+}
+
 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
 {
    struct r600_bytecode_gds gds;
index 7a41b2af2b93c2a2ba4dda65e3484fc7d6c7b43c..6123cf255b14ba52937aa9d412b869d69729d645 100644 (file)
@@ -131,6 +131,8 @@ bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
             sh_info().has_txq_cube_array_z_comp = true;
       }
 
+
+
       default:
          ;
       }
@@ -513,6 +515,51 @@ bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
    return true;
 }
 
+static unsigned
+lds_op_from_intrinsic(nir_intrinsic_op op) {
+   switch (op) {
+   case nir_intrinsic_shared_atomic_add:
+      return LDS_OP2_LDS_ADD_RET;
+   case nir_intrinsic_shared_atomic_and:
+      return LDS_OP2_LDS_AND_RET;
+   case nir_intrinsic_shared_atomic_or:
+      return LDS_OP2_LDS_OR_RET;
+   case nir_intrinsic_shared_atomic_imax:
+      return LDS_OP2_LDS_MAX_INT_RET;
+   case nir_intrinsic_shared_atomic_umax:
+      return LDS_OP2_LDS_MAX_UINT_RET;
+   case nir_intrinsic_shared_atomic_imin:
+      return LDS_OP2_LDS_MIN_INT_RET;
+   case nir_intrinsic_shared_atomic_umin:
+      return LDS_OP2_LDS_MIN_UINT_RET;
+   case nir_intrinsic_shared_atomic_xor:
+      return LDS_OP2_LDS_XOR_RET;
+   case nir_intrinsic_shared_atomic_exchange:
+      return LDS_OP2_LDS_XCHG_RET;
+   case nir_intrinsic_shared_atomic_comp_swap:
+      return LDS_OP3_LDS_CMP_XCHG_RET;
+   default:
+      unreachable("Unsupported shared atomic opcode");
+   }
+}
+
+bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
+{
+   auto address = from_nir(instr->src[0], 0);
+   auto dest_value = from_nir(instr->dest, 0);
+   auto value = from_nir(instr->src[1], 0);
+   auto op = lds_op_from_intrinsic(instr->intrinsic);
+
+   if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
+      auto value2 = from_nir(instr->src[2], 0);
+      emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
+   } else {
+      emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
+   }
+   return true;
+}
+
+
 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
 {
    unsigned write_mask = nir_intrinsic_write_mask(instr);
@@ -595,6 +642,17 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
    case nir_intrinsic_memory_barrier_shared:
    case nir_intrinsic_memory_barrier:
       return emit_barrier(instr);
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_shared_atomic_xor:
+   case nir_intrinsic_shared_atomic_exchange:
+   case nir_intrinsic_shared_atomic_comp_swap:
+      return emit_atomic_local_shared(instr);
    case nir_intrinsic_copy_deref:
    case nir_intrinsic_load_constant:
    case nir_intrinsic_load_input:
index 11c403d15bb9e734f099c42d38d78c9db6cc76a6..0dcf36b087629f2408cca879ba6d5fb3e7d2c28e 100644 (file)
@@ -105,6 +105,7 @@ protected:
    bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
    bool emit_load_local_shared(nir_intrinsic_instr* instr);
    bool emit_store_local_shared(nir_intrinsic_instr* instr);
+   bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
 
    bool emit_barrier(nir_intrinsic_instr* instr);