aco: disable add combining for ds_swizzle_b32
[mesa.git] / src / amd / compiler / aco_insert_waitcnt.cpp
index 5d65ae253ffc373eff2704f5bb42b6821f168032..a8343d1889430dbac6a3f99e079f628d829fd1c0 100644 (file)
@@ -359,7 +359,7 @@ wait_imm parse_wait_instr(wait_ctx& ctx, Instruction *instr)
       imm.vs = std::min<uint8_t>(imm.vs, static_cast<SOPK_instruction*>(instr)->imm);
       return imm;
    } else if (instr->opcode == aco_opcode::s_waitcnt) {
-      return wait_imm(ctx.chip_class, static_cast<SOPK_instruction*>(instr)->imm);
+      return wait_imm(ctx.chip_class, static_cast<SOPP_instruction*>(instr)->imm);
    }
    return wait_imm();
 }
@@ -399,7 +399,7 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx)
       switch (instr->opcode) {
       case aco_opcode::p_memory_barrier_all:
          for (unsigned i = 0; i < barrier_count; i++) {
-            if ((1 << i) == barrier_shared && workgroup_size <= 64)
+            if ((1 << i) == barrier_shared && workgroup_size <= ctx.program->wave_size)
                continue;
             imm.combine(ctx.barrier_imm[i]);
          }
@@ -414,7 +414,7 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx)
          imm.combine(ctx.barrier_imm[ffs(barrier_image) - 1]);
          break;
       case aco_opcode::p_memory_barrier_shared:
-         if (workgroup_size > 64)
+         if (workgroup_size > ctx.program->wave_size)
             imm.combine(ctx.barrier_imm[ffs(barrier_shared) - 1]);
          break;
       default:
@@ -552,8 +552,8 @@ void update_counters_for_flat_load(wait_ctx& ctx, barrier_interaction barrier=ba
 
    if (ctx.lgkm_cnt <= ctx.max_lgkm_cnt)
       ctx.lgkm_cnt++;
-   if (ctx.lgkm_cnt <= ctx.max_vm_cnt)
-   ctx.vm_cnt++;
+   if (ctx.vm_cnt <= ctx.max_vm_cnt)
+      ctx.vm_cnt++;
 
    update_barrier_imm(ctx, counter_vm | counter_lgkm, barrier);
 
@@ -729,6 +729,9 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx)
       }
    }
 
+   if (!queued_imm.empty())
+      emit_waitcnt(ctx, new_instructions, queued_imm);
+
    block.instructions.swap(new_instructions);
 }