aco: fix off-by-one error when initializing sgpr_live_in
[mesa.git] / src / amd / compiler / aco_register_allocation.cpp
index 621bc1f7636cb972d9045b68f4b45a23a345e7a5..8370effdbc3c48e7699ef47b217463d7318686f3 100644 (file)
@@ -759,11 +759,18 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
 
       /* count variables to be moved and check war_hint */
       bool war_hint = false;
-      for (unsigned j = reg_lo; j <= reg_hi; j++) {
-         if (reg_file[j] != 0)
+      bool linear_vgpr = false;
+      for (unsigned j = reg_lo; j <= reg_hi && !linear_vgpr; j++) {
+         if (reg_file[j] != 0) {
             k++;
+            /* we cannot split live ranges of linear vgprs */
+            if (ctx.assignments[reg_file[j]].second & (1 << 6))
+               linear_vgpr = true;
+         }
          war_hint |= ctx.war_hint[j];
       }
+      if (linear_vgpr || (war_hint && !best_war_hint))
+         continue;
 
       /* count operands in wrong positions */
       for (unsigned j = 0, offset = 0; j < instr->operands.size(); offset += instr->operands[j].size(), j++) {
@@ -775,7 +782,7 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
             k += instr->operands[j].size();
       }
       bool aligned = rc == RegClass::v4 && reg_lo % 4 == 0;
-      if (k > num_moves || (!aligned && k == num_moves) || (war_hint && !best_war_hint))
+      if (k > num_moves || (!aligned && k == num_moves))
          continue;
 
       best_pos = reg_lo;
@@ -881,7 +888,15 @@ void handle_pseudo(ra_ctx& ctx,
          break;
       }
    }
-   if (!writes_sgpr)
+   /* if all operands are constant, no need to care either */
+   bool reads_sgpr = false;
+   for (Operand& op : instr->operands) {
+      if (op.isTemp() && op.getTemp().type() == RegType::sgpr) {
+         reads_sgpr = true;
+         break;
+      }
+   }
+   if (!(writes_sgpr && reads_sgpr))
       return;
 
    Pseudo_instruction *pi = (Pseudo_instruction *)instr;
@@ -953,7 +968,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
 
    handle_live_in = [&](Temp val, Block *block) -> Temp {
       std::vector<unsigned>& preds = val.is_linear() ? block->linear_preds : block->logical_preds;
-      if (preds.size() == 0 && block->index != 0) {
+      if (preds.size() == 0 || val.regClass() == val.regClass().as_linear()) {
          renames[block->index][val.id()] = val;
          return val;
       }
@@ -1097,18 +1112,10 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
       std::vector<aco_ptr<Instruction>>::reverse_iterator rit;
       for (rit = block.instructions.rbegin(); rit != block.instructions.rend(); ++rit) {
          aco_ptr<Instruction>& instr = *rit;
-         if (!is_phi(instr)) {
-            for (const Operand& op : instr->operands) {
-               if (op.isTemp())
-                  live.emplace(op.getTemp());
-            }
-            if (instr->opcode == aco_opcode::p_create_vector) {
-               for (const Operand& op : instr->operands) {
-                  if (op.isTemp() && op.getTemp().type() == instr->definitions[0].getTemp().type())
-                     vectors[op.tempId()] = instr.get();
-               }
-            }
-         } else if (!instr->definitions[0].isKill() && !instr->definitions[0].isFixed()) {
+         if (is_phi(instr)) {
+            live.erase(instr->definitions[0].getTemp());
+            if (instr->definitions[0].isKill() || instr->definitions[0].isFixed())
+               continue;
             /* collect information about affinity-related temporaries */
             std::vector<Temp> affinity_related;
             /* affinity_related[0] is the last seen affinity-related temp */
@@ -1121,15 +1128,41 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
                }
             }
             phi_ressources.emplace_back(std::move(affinity_related));
+            continue;
          }
 
-         /* erase from live */
-         for (const Definition& def : instr->definitions) {
-            if (def.isTemp()) {
-               live.erase(def.getTemp());
-               std::map<unsigned, unsigned>::iterator it = temp_to_phi_ressources.find(def.tempId());
-               if (it != temp_to_phi_ressources.end() && def.regClass() == phi_ressources[it->second][0].regClass())
-                  phi_ressources[it->second][0] = def.getTemp();
+         /* add vector affinities */
+         if (instr->opcode == aco_opcode::p_create_vector) {
+            for (const Operand& op : instr->operands) {
+               if (op.isTemp() && op.getTemp().type() == instr->definitions[0].getTemp().type())
+                  vectors[op.tempId()] = instr.get();
+            }
+         }
+
+         /* add operands to live variables */
+         for (const Operand& op : instr->operands) {
+            if (op.isTemp())
+               live.emplace(op.getTemp());
+         }
+
+         /* erase definitions from live */
+         for (unsigned i = 0; i < instr->definitions.size(); i++) {
+            const Definition& def = instr->definitions[i];
+            if (!def.isTemp())
+               continue;
+            live.erase(def.getTemp());
+            /* mark last-seen phi operand */
+            std::map<unsigned, unsigned>::iterator it = temp_to_phi_ressources.find(def.tempId());
+            if (it != temp_to_phi_ressources.end() && def.regClass() == phi_ressources[it->second][0].regClass()) {
+               phi_ressources[it->second][0] = def.getTemp();
+               /* try to coalesce phi affinities with parallelcopies */
+               if (!def.isFixed() && instr->opcode == aco_opcode::p_parallelcopy) {
+                  Operand op = instr->operands[i];
+                  if (op.isTemp() && op.isFirstKill() && def.regClass() == op.regClass()) {
+                     phi_ressources[it->second].emplace_back(op.getTemp());
+                     temp_to_phi_ressources[op.tempId()] = it->second;
+                  }
+               }
             }
          }
       }
@@ -1272,6 +1305,29 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
 
             /* process parallelcopy */
             for (std::pair<Operand, Definition> pc : parallelcopy) {
+               /* see if it's a copy from a different phi */
+               //TODO: prefer moving some previous phis over live-ins
+               //TODO: somehow prevent phis fixed before the RA from being updated (shouldn't be a problem in practice since they can only be fixed to exec)
+               Instruction *prev_phi = NULL;
+               std::vector<aco_ptr<Instruction>>::iterator phi_it;
+               for (phi_it = instructions.begin(); phi_it != instructions.end(); ++phi_it) {
+                  if ((*phi_it)->definitions[0].tempId() == pc.first.tempId())
+                     prev_phi = phi_it->get();
+               }
+               phi_it = it;
+               while (!prev_phi && is_phi(*++phi_it)) {
+                  if ((*phi_it)->definitions[0].tempId() == pc.first.tempId())
+                     prev_phi = phi_it->get();
+               }
+               if (prev_phi) {
+                  /* if so, just update that phi's register */
+                  prev_phi->definitions[0].setFixed(pc.second.physReg());
+                  ctx.assignments[prev_phi->definitions[0].tempId()] = {pc.second.physReg(), pc.second.regClass()};
+                  for (unsigned reg = pc.second.physReg(); reg < pc.second.physReg() + pc.second.size(); reg++)
+                     register_file[reg] = prev_phi->definitions[0].tempId();
+                  continue;
+               }
+
                /* rename */
                std::map<unsigned, Temp>::iterator orig_it = ctx.orig_names.find(pc.first.tempId());
                Temp orig = pc.first.getTemp();
@@ -1282,20 +1338,6 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
                renames[block.index][orig.id()] = pc.second.getTemp();
                renames[block.index][pc.second.tempId()] = pc.second.getTemp();
 
-               /* see if it's a copy from a previous phi */
-               //TODO: prefer moving some previous phis over live-ins
-               //TODO: somehow prevent phis fixed before the RA from being updated (shouldn't be a problem in practice since they can only be fixed to exec)
-               Instruction *prev_phi = NULL;
-               for (auto it2 = instructions.begin(); it2 != instructions.end(); ++it2) {
-                  if ((*it2)->definitions[0].tempId() == pc.first.tempId())
-                     prev_phi = it2->get();
-               }
-               if (prev_phi) {
-                  /* if so, just update that phi */
-                  prev_phi->definitions[0] = pc.second;
-                  continue;
-               }
-
                /* otherwise, this is a live-in and we need to create a new phi
                 * to move it in this block's predecessors */
                aco_opcode opcode = pc.first.getTemp().is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
@@ -1323,7 +1365,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
       }
 
       /* fill in sgpr_live_in */
-      for (unsigned i = 0; i < ctx.max_used_sgpr; i++)
+      for (unsigned i = 0; i <= ctx.max_used_sgpr; i++)
          sgpr_live_in[block.index][i] = register_file[i];
       sgpr_live_in[block.index][127] = register_file[scc.reg];
 
@@ -1405,7 +1447,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
                      for (unsigned j = 0; j < i; j++) {
                         Operand& op = instr->operands[j];
                         if (op.isTemp() && op.tempId() == blocking_id) {
-                           op = Operand(pc_def.getTemp());
+                           op.setTemp(pc_def.getTemp());
                            op.setFixed(reg);
                         }
                      }
@@ -1473,9 +1515,8 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
              instr->operands[1].getTemp().type() == RegType::vgpr) { /* TODO: swap src0 and src1 in this case */
             VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
             bool can_use_mac = !(vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
-                                 vop3->opsel[0] || vop3->opsel[1] || vop3->opsel[2] ||
                                  vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
-                                 vop3->clamp || vop3->omod);
+                                 vop3->clamp || vop3->omod || vop3->opsel);
             if (can_use_mac) {
                instr->format = Format::VOP2;
                instr->opcode = aco_opcode::v_mac_f32;
@@ -1485,7 +1526,8 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
          /* handle definitions which must have the same register as an operand */
          if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
              instr->opcode == aco_opcode::v_mac_f32 ||
-             instr->opcode == aco_opcode::v_writelane_b32) {
+             instr->opcode == aco_opcode::v_writelane_b32 ||
+             instr->opcode == aco_opcode::v_writelane_b32_e64) {
             instr->definitions[0].setFixed(instr->operands[2].physReg());
          } else if (instr->opcode == aco_opcode::s_addk_i32 ||
                     instr->opcode == aco_opcode::s_mulk_i32) {
@@ -1695,6 +1737,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
 
                pc->operands[i] = parallelcopy[i].first;
                pc->definitions[i] = parallelcopy[i].second;
+               assert(pc->operands[i].size() == pc->definitions[i].size());
 
                /* it might happen that the operand is already renamed. we have to restore the original name. */
                std::map<unsigned, Temp>::iterator it = ctx.orig_names.find(pc->operands[i].tempId());
@@ -1767,7 +1810,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
          if (instr_needs_vop3) {
 
             /* if the first operand is a literal, we have to move it to a reg */
-            if (instr->operands.size() && instr->operands[0].isLiteral()) {
+            if (instr->operands.size() && instr->operands[0].isLiteral() && program->chip_class < GFX10) {
                bool can_sgpr = true;
                /* check, if we have to move to vgpr */
                for (const Operand& op : instr->operands) {