imm.vs = std::min<uint8_t>(imm.vs, static_cast<SOPK_instruction*>(instr)->imm);
return imm;
} else if (instr->opcode == aco_opcode::s_waitcnt) {
- return wait_imm(ctx.chip_class, static_cast<SOPK_instruction*>(instr)->imm);
+ return wait_imm(ctx.chip_class, static_cast<SOPP_instruction*>(instr)->imm);
}
return wait_imm();
}
switch (instr->opcode) {
case aco_opcode::p_memory_barrier_all:
for (unsigned i = 0; i < barrier_count; i++) {
- if ((1 << i) == barrier_shared && workgroup_size <= 64)
+ if ((1 << i) == barrier_shared && workgroup_size <= ctx.program->wave_size)
continue;
imm.combine(ctx.barrier_imm[i]);
}
imm.combine(ctx.barrier_imm[ffs(barrier_image) - 1]);
break;
case aco_opcode::p_memory_barrier_shared:
- if (workgroup_size > 64)
+ if (workgroup_size > ctx.program->wave_size)
imm.combine(ctx.barrier_imm[ffs(barrier_shared) - 1]);
break;
default:
if (ctx.lgkm_cnt <= ctx.max_lgkm_cnt)
ctx.lgkm_cnt++;
- if (ctx.lgkm_cnt <= ctx.max_vm_cnt)
- ctx.vm_cnt++;
+ if (ctx.vm_cnt <= ctx.max_vm_cnt)
+ ctx.vm_cnt++;
update_barrier_imm(ctx, counter_vm | counter_lgkm, barrier);
}
}
+ if (!queued_imm.empty())
+ emit_waitcnt(ctx, new_instructions, queued_imm);
+
block.instructions.swap(new_instructions);
}