Currently, we don't do this.
+## RDNA / GFX10 hazards
+
+### SMEM store followed by a load with the same address
+
+We found that an `s_buffer_load` will produce incorrect results if it is preceded
+by an `s_buffer_store` with the same address. Inserting an `s_nop` between them
+does not mitigate the issue, so an `s_waitcnt lgkmcnt(0)` must be inserted.
+This is not mentioned by LLVM among the other GFX10 bugs, but LLVM doesn't use
+SMEM stores, so it's not surprising that they didn't notice it.
uint8_t vs_cnt = 0;
bool pending_flat_lgkm = false;
bool pending_flat_vm = false;
+ bool pending_s_buffer_store = false; /* GFX10 workaround */
wait_imm barrier_imm[barrier_count];
vs_cnt = std::max(vs_cnt, other->vs_cnt);
pending_flat_lgkm |= other->pending_flat_lgkm;
pending_flat_vm |= other->pending_flat_vm;
+ pending_s_buffer_store |= other->pending_s_buffer_store;
for (std::pair<PhysReg,wait_entry> entry : other->gpr_map)
{
*/
if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb)
imm.lgkm = 0;
+
+ /* GFX10: A store followed by a load at the same address causes a problem because
+ * the load doesn't load the correct values unless we wait for the store first.
+ * This is NOT mitigated by an s_nop.
+ *
+ * TODO: Refine this when we have proper alias analysis.
+ */
+ SMEM_instruction *smem = static_cast<SMEM_instruction *>(instr);
+ if (ctx.pending_s_buffer_store &&
+ !smem->definitions.empty() &&
+ !smem->can_reorder && smem->barrier == barrier_buffer) {
+ imm.lgkm = 0;
+ }
}
if (instr->format == Format::PSEUDO_BARRIER) {
if (imm.vm == 0)
ctx.pending_flat_vm = false;
- if (imm.lgkm == 0)
+ if (imm.lgkm == 0) {
ctx.pending_flat_lgkm = false;
+ ctx.pending_s_buffer_store = false;
+ }
return imm;
}
break;
}
case Format::SMEM: {
+ SMEM_instruction *smem = static_cast<SMEM_instruction*>(instr);
update_counters(ctx, event_smem, static_cast<SMEM_instruction*>(instr)->barrier);
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], event_smem);
+ else if (ctx.chip_class >= GFX10 &&
+ !smem->can_reorder &&
+ smem->barrier == barrier_buffer)
+ ctx.pending_s_buffer_store = true;
+
break;
}
case Format::DS: {