aco: optimize some masked swizzles to DPP

[mesa.git] / src / amd / compiler / aco_spill.cpp
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp

index bfa3c266a76673fe51858e50ad193480cf55ce61..62ab69a045be7cf8f9106d1b36151250a6a9a77f 100644 (file)
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -28,6 +28,7 @@
  #include "sid.h"
  
  #include <map>
+#include <set>
  #include <stack>
  
  /*
@@ -65,7 +66,7 @@ struct spill_ctx {
     spill_ctx(const RegisterDemand target_pressure, Program* program,
               std::vector<std::vector<RegisterDemand>> register_demand)
        : target_pressure(target_pressure), program(program),
-        register_demand(register_demand), renames(program->blocks.size()),
+        register_demand(std::move(register_demand)), renames(program->blocks.size()),
          spills_entry(program->blocks.size()), spills_exit(program->blocks.size()),
          processed(program->blocks.size(), false), wave_size(program->wave_size) {}
  
@@ -213,7 +214,7 @@ void next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set<uint32_t>&
  
  }
  
-void compute_global_next_uses(spill_ctx& ctx, std::vector<std::set<Temp>>& live_out)
+void compute_global_next_uses(spill_ctx& ctx)
  {
     ctx.next_use_distances_start.resize(ctx.program->blocks.size());
     ctx.next_use_distances_end.resize(ctx.program->blocks.size());
@@ -660,15 +661,10 @@ RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_id
  RegisterDemand get_demand_before(spill_ctx& ctx, unsigned block_idx, unsigned idx)
  {
     if (idx == 0) {
-      RegisterDemand demand_before = ctx.register_demand[block_idx][idx];
+      RegisterDemand demand = ctx.register_demand[block_idx][idx];
        aco_ptr<Instruction>& instr = ctx.program->blocks[block_idx].instructions[idx];
-      for (const Definition& def : instr->definitions)
-         demand_before -= def.getTemp();
-      for (const Operand& op : instr->operands) {
-         if (op.isFirstKill())
-            demand_before += op.getTemp();
-      }
-      return demand_before;
+      aco_ptr<Instruction> instr_before(nullptr);
+      return get_demand_before(demand, instr, instr_before);
     } else {
        return ctx.register_demand[block_idx][idx - 1];
     }
@@ -1575,9 +1571,9 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
                       split->definitions[i] = bld.def(v1);
                    bld.insert(split);
                    for (unsigned i = 0; i < temp.size(); i++)
-                     bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
+                     bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
                 } else {
-                  bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
+                  bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false);
                 }
              } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
                 ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
@@ -1641,11 +1637,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
                    for (unsigned i = 0; i < def.size(); i++) {
                       Temp tmp = bld.tmp(v1);
                       vec->operands[i] = Operand(tmp);
-                     bld.mubuf(opcode, Definition(tmp), Operand(), scratch_rsrc, scratch_offset, offset + i * 4, false);
+                     bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false);
                    }
                    bld.insert(vec);
                 } else {
-                  bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
+                  bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false);
                 }
              } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
                 uint32_t spill_slot = sgpr_slot[spill_id];
@@ -1769,7 +1765,7 @@ void spill(Program* program, live& live_vars, const struct radv_nir_compiler_opt
  
     /* initialize ctx */
     spill_ctx ctx(register_target, program, live_vars.register_demand);
-   compute_global_next_uses(ctx, live_vars.live_out);
+   compute_global_next_uses(ctx);
     get_rematerialize_info(ctx);
  
     /* create spills and reloads */