bool emit_rat(const RatInstruction& instr);
bool emit_ldswrite(const LDSWriteInstruction& instr);
bool emit_ldsread(const LDSReadInstruction& instr);
+ bool emit_ldsatomic(const LDSAtomicInstruction& instr);
bool emit_tf_write(const GDSStoreTessFactor& instr);
bool emit_load_addr(PValue addr);
return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
case Instruction::lds_read:
return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
+ case Instruction::lds_atomic:
+ return emit_ldsatomic(static_cast<const LDSAtomicInstruction&>(*i));
case Instruction::tf_write:
return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
default:
return false;
}
+ unsigned old_nliterals_in_group = m_nliterals_in_group;
for (unsigned i = 0; i < ai.n_sources(); ++i) {
auto& s = ai.src(i);
if (s.type() == Value::literal)
sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n";
alu.op = ALU_OP0_NOP;
alu.last = 1;
+ alu.dst.chan = 3;
int retval = r600_bytecode_add_alu(m_bc, &alu);
if (retval)
return false;
memset(&alu, 0, sizeof(alu));
- m_nliterals_in_group = 0;
+ m_nliterals_in_group -= old_nliterals_in_group;
}
alu.op = opcode_map.at(ai.opcode());
output.gpr = instr.gpr().sel();
output.type = instr.type();
- output.elem_size = instr.ncomp();
- output.comp_mask = 0xF;
+ output.elem_size = 3;
+ output.comp_mask = 0xf;
output.burst_count = 1;
output.op = instr.op();
if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
{
auto addr = tex_instr.sampler_offset();
if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
- || m_bc->index_reg[1] != addr->sel())) {
+ || m_bc->index_reg[1] != addr->sel()
+ || m_bc->index_reg_chan[1] != addr->chan())) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = opcode_map.at(op1_mova_int);
return false;
m_bc->index_reg[1] = addr->sel();
+ m_bc->index_reg_chan[1] = addr->chan();
m_bc->index_loaded[1] = true;
}
buffer_offset = boffs.value();
} else {
index_mode = bim_zero;
- if ((!m_bc->index_loaded[0] || m_loop_nesting || m_bc->index_reg[0] != addr->sel())) {
+ if ((!m_bc->index_loaded[0] || m_loop_nesting ||
+ m_bc->index_reg[0] != addr->sel() ||
+ m_bc->index_reg_chan[0] != addr->chan())) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = opcode_map.at(op1_mova_int);
return false;
m_bc->index_reg[0] = addr->sel();
+ m_bc->index_reg_chan[0] = addr->chan();
m_bc->index_loaded[0] = true;
}
}
auto addr = instr.uav_id();
if (addr->type() != Value::literal) {
if (!m_bc->index_loaded[1] || m_loop_nesting ||
- m_bc->index_reg[1] != addr->sel()) {
+ m_bc->index_reg[1] != addr->sel()
+ || m_bc->index_reg_chan[1] != addr->chan()) {
struct r600_bytecode_alu alu;
- memset(&alu, 0, sizeof(alu));
- alu.op = opcode_map.at(op2_lshr_int);
- alu.dst.sel = addr->sel();
- alu.dst.chan = addr->chan();
- alu.src[0].sel = addr->sel();
- alu.src[0].chan = addr->chan();
- alu.src[1].sel = ALU_SRC_LITERAL;
- alu.src[1].value = 2;
- alu.last = 1;
- alu.dst.write = 1;
- int r = r600_bytecode_add_alu(m_bc, &alu);
- if (r)
- return false;
-
memset(&alu, 0, sizeof(alu));
alu.op = opcode_map.at(op1_mova_int);
alu.dst.chan = 0;
alu.src[0].sel = addr->sel();
alu.src[0].chan = addr->chan();
alu.last = 1;
- r = r600_bytecode_add_alu(m_bc, &alu);
+ int r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return false;
return false;
m_bc->index_reg[1] = addr->sel();
+ m_bc->index_reg_chan[1] = addr->chan();
m_bc->index_loaded[1] = true;
}
} else {
const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
- uav_idx = addr_reg.value() >> 2;
+ uav_idx = addr_reg.value();
}
memset(&gds, 0, sizeof(struct r600_bytecode_gds));
gds.op = ds_opcode_map.at(instr.op());
- gds.dst_gpr = instr.dest_sel();
- gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
- gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
- gds.src_gpr = instr.src_sel();
+ gds.dst_gpr = instr.dest_sel();
+ gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
+ gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
+ gds.src_gpr = instr.src_sel();
- if (instr.op() == DS_OP_CMP_XCHG_RET) {
- gds.src_sel_z = 1;
- } else {
- gds.src_sel_z = 7;
- }
+ gds.src_sel_x = instr.src_swizzle(0);
+ gds.src_sel_y = instr.src_swizzle(1);
+ gds.src_sel_z = instr.src_swizzle(2);
- gds.src_sel_x = instr.src_swizzle(0);
- gds.src_sel_y = instr.src_swizzle(1);
-
- gds.dst_sel_x = 0;
- gds.dst_sel_y = 7;
- gds.dst_sel_z = 7;
- gds.dst_sel_w = 7;
- gds.src_gpr2 = 0;
- gds.alloc_consume = 1; // Not Cayman
+ gds.dst_sel_x = instr.dest_swizzle(0);
+ gds.dst_sel_y = 7;
+ gds.dst_sel_z = 7;
+ gds.dst_sel_w = 7;
+ gds.src_gpr2 = 0;
+ gds.alloc_consume = 1; // Not Cayman
int r = r600_bytecode_add_gds(m_bc, &gds);
- if (r)
- return false;
- m_bc->cf_last->vpm = 1;
+ if (r)
+ return false;
+ m_bc->cf_last->vpm = 1;
+ m_bc->cf_last->barrier = 1;
return true;
}
return true;
}
+bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction& instr)
+{
+ if (m_bc->cf_last->ndw > 240 - 4)
+ m_bc->force_add_cf = 1;
+
+ r600_bytecode_alu alu_fetch;
+ r600_bytecode_alu alu_read;
+
+ memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
+ alu_fetch.is_lds_idx_op = true;
+ alu_fetch.op = instr.op();
+
+ copy_src(alu_fetch.src[0], instr.address());
+ auto& src0 = instr.src0();
+ alu_fetch.src[1].sel = src0.sel();
+ alu_fetch.src[1].chan = src0.chan();
+ if (instr.src1()) {
+ auto& src1 = *instr.src1();
+ alu_fetch.src[2].sel = src1.sel();
+ alu_fetch.src[2].chan = src1.chan();
+ }
+ alu_fetch.last = 1;
+ int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
+ if (r)
+ return false;
+
+ memset(&alu_read, 0, sizeof(r600_bytecode_alu));
+ copy_dst(alu_read.dst, instr.dest());
+ alu_read.op = ALU_OP1_MOV;
+ alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+ alu_read.last = 1;
+ alu_read.dst.write = 1;
+ r = r600_bytecode_add_alu(m_bc, &alu_read);
+ if (r)
+ return false;
+ return true;
+}
+
bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
{
struct r600_bytecode_gds gds;
- int rat_idx = -1;
+ int rat_idx = instr.rat_id();
EBufferIndexMode rat_index_mode = bim_none;
auto addr = instr.rat_id_offset();
if (addr) {
if (addr->type() != Value::literal) {
rat_index_mode = bim_one;
- if (!m_bc->index_loaded[1] || m_loop_nesting || m_bc->index_reg[1] != addr->sel()) {
+ if (!m_bc->index_loaded[1] || m_loop_nesting ||
+ m_bc->index_reg[1] != addr->sel()
+ || m_bc->index_reg_chan[1] != addr->chan()) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
return false;
m_bc->index_reg[1] = addr->sel();
+ m_bc->index_reg_chan[1] = addr->chan();
m_bc->index_loaded[1] = true;
}
} else {
const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
- rat_idx = addr_reg.value();
+ rat_idx += addr_reg.value();
}
}
memset(&gds, 0, sizeof(struct r600_bytecode_gds));
cf->output.index_gpr = instr.index_gpr();
cf->output.comp_mask = instr.comp_mask();
cf->output.burst_count = instr.burst_count();
- cf->output.swizzle_x = instr.data_swz(0);
- cf->output.swizzle_y = instr.data_swz(1);
- cf->output.swizzle_z = instr.data_swz(2);
- cf->output.swizzle_w = instr.data_swz(3);
+ assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
+ if (cf->rat.inst != RatInstruction::STORE_TYPED) {
+ assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
+ instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
+ assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
+ instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
+ }
+
cf->vpm = 1;
cf->barrier = 1;
cf->mark = instr.need_ack();
dst.sel = d.sel();
dst.chan = d.chan();
- if (m_bc->index_reg[1] == dst.sel)
+ if (m_bc->index_reg[1] == dst.sel &&
+ m_bc->index_reg_chan[1] == dst.chan)
m_bc->index_loaded[1] = false;
- if (m_bc->index_reg[0] == dst.sel)
+ if (m_bc->index_reg[0] == dst.sel &&
+ m_bc->index_reg_chan[0] == dst.chan)
m_bc->index_loaded[0] = false;
return true;
{op1_mov, ALU_OP1_MOV},
{op0_nop, ALU_OP0_NOP},
{op2_mul_64, ALU_OP2_MUL_64},
- {op1_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
- {op1v_flt64_to_flt32, ALU_OP1_FLT32_TO_FLT64},
+ {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
+ {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
{op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
{op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
{op2_pred_sete, ALU_OP2_PRED_SETE},