cond_if,
cond_else,
cond_endif,
+ lds_atomic,
lds_read,
lds_write,
loop_begin,
m_dest_value == other.m_dest_value;
}
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
+ Instruction(lds_atomic),
+ m_address(address),
+ m_dest_value(dest),
+ m_src0_value(src0),
+ m_src1_value(src1),
+ m_opcode(op)
+{
+ add_remappable_src_value(&m_src0_value);
+ add_remappable_src_value(&m_src1_value);
+ add_remappable_src_value(&m_address);
+ add_remappable_dst_value(&m_dest_value);
+}
+
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
+ LDSAtomicInstruction(dest, src0, PValue(), address, op)
+{
+
+}
+
+
+void LDSAtomicInstruction::do_print(std::ostream& os) const
+{
+ os << "LDS " << m_opcode << " " << *m_dest_value << " ";
+ os << "[" << *m_address << "] " << *m_src0_value;
+ if (m_src1_value)
+ os << ", " << *m_src1_value;
+}
+
+bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
+{
+ auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
+
+ return m_opcode == other.m_opcode &&
+ *m_dest_value == *other.m_dest_value &&
+ *m_src0_value == *other.m_src0_value &&
+ *m_address == *other.m_address &&
+ ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
+ (!m_src1_value && !other.m_src1_value));
+}
+
LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
std::vector<PValue> m_dest_value;
};
+class LDSAtomicInstruction : public Instruction {
+public:
+ LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
+ LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
+
+ const Value& address() const { return *m_address;}
+ const Value& dest() const { return *m_dest_value;}
+ const Value& src0() const { return *m_src0_value;}
+ const PValue& src1() const { return m_src1_value;}
+ unsigned op() const {return m_opcode;}
+
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+
+ PValue m_address;
+ PValue m_dest_value;
+ PValue m_src0_value;
+ PValue m_src1_value;
+ unsigned m_opcode;
+};
+
class LDSWriteInstruction : public Instruction {
public:
LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
bool emit_rat(const RatInstruction& instr);
bool emit_ldswrite(const LDSWriteInstruction& instr);
bool emit_ldsread(const LDSReadInstruction& instr);
+ bool emit_ldsatomic(const LDSAtomicInstruction& instr);
bool emit_tf_write(const GDSStoreTessFactor& instr);
bool emit_load_addr(PValue addr);
return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
case Instruction::lds_read:
return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
+ case Instruction::lds_atomic:
+ return emit_ldsatomic(static_cast<const LDSAtomicInstruction&>(*i));
case Instruction::tf_write:
return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
default:
return true;
}
+bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction& instr)
+{
+ if (m_bc->cf_last->ndw > 240 - 4)
+ m_bc->force_add_cf = 1;
+
+ r600_bytecode_alu alu_fetch;
+ r600_bytecode_alu alu_read;
+
+ memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
+ alu_fetch.is_lds_idx_op = true;
+ alu_fetch.op = instr.op();
+
+ copy_src(alu_fetch.src[0], instr.address());
+ auto& src0 = instr.src0();
+ alu_fetch.src[1].sel = src0.sel();
+ alu_fetch.src[1].chan = src0.chan();
+ if (instr.src1()) {
+ auto& src1 = *instr.src1();
+ alu_fetch.src[2].sel = src1.sel();
+ alu_fetch.src[2].chan = src1.chan();
+ }
+ alu_fetch.last = 1;
+ int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
+ if (r)
+ return false;
+
+ memset(&alu_read, 0, sizeof(r600_bytecode_alu));
+ copy_dst(alu_read.dst, instr.dest());
+ alu_read.op = ALU_OP1_MOV;
+ alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+ alu_read.last = 1;
+ alu_read.dst.write = 1;
+ r = r600_bytecode_add_alu(m_bc, &alu_read);
+ if (r)
+ return false;
+ return true;
+}
+
bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
{
struct r600_bytecode_gds gds;
sh_info().has_txq_cube_array_z_comp = true;
}
+
+
default:
;
}
return true;
}
+static unsigned
+lds_op_from_intrinsic(nir_intrinsic_op op) {
+ switch (op) {
+ case nir_intrinsic_shared_atomic_add:
+ return LDS_OP2_LDS_ADD_RET;
+ case nir_intrinsic_shared_atomic_and:
+ return LDS_OP2_LDS_AND_RET;
+ case nir_intrinsic_shared_atomic_or:
+ return LDS_OP2_LDS_OR_RET;
+ case nir_intrinsic_shared_atomic_imax:
+ return LDS_OP2_LDS_MAX_INT_RET;
+ case nir_intrinsic_shared_atomic_umax:
+ return LDS_OP2_LDS_MAX_UINT_RET;
+ case nir_intrinsic_shared_atomic_imin:
+ return LDS_OP2_LDS_MIN_INT_RET;
+ case nir_intrinsic_shared_atomic_umin:
+ return LDS_OP2_LDS_MIN_UINT_RET;
+ case nir_intrinsic_shared_atomic_xor:
+ return LDS_OP2_LDS_XOR_RET;
+ case nir_intrinsic_shared_atomic_exchange:
+ return LDS_OP2_LDS_XCHG_RET;
+ case nir_intrinsic_shared_atomic_comp_swap:
+ return LDS_OP3_LDS_CMP_XCHG_RET;
+ default:
+ unreachable("Unsupported shared atomic opcode");
+ }
+}
+
+bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
+{
+ auto address = from_nir(instr->src[0], 0);
+ auto dest_value = from_nir(instr->dest, 0);
+ auto value = from_nir(instr->src[1], 0);
+ auto op = lds_op_from_intrinsic(instr->intrinsic);
+
+ if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
+ auto value2 = from_nir(instr->src[2], 0);
+ emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
+ } else {
+ emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
+ }
+ return true;
+}
+
+
bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
{
unsigned write_mask = nir_intrinsic_write_mask(instr);
case nir_intrinsic_memory_barrier_shared:
case nir_intrinsic_memory_barrier:
return emit_barrier(instr);
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ return emit_atomic_local_shared(instr);
case nir_intrinsic_copy_deref:
case nir_intrinsic_load_constant:
case nir_intrinsic_load_input:
bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
bool emit_load_local_shared(nir_intrinsic_instr* instr);
bool emit_store_local_shared(nir_intrinsic_instr* instr);
+ bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
bool emit_barrier(nir_intrinsic_instr* instr);