aco: implement 64-bit VGPR constant copies in handle_operands()
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 20 Mar 2020 16:07:08 +0000 (16:07 +0000)
committerMarge Bot <eric+marge@anholt.net>
Tue, 24 Mar 2020 11:28:55 +0000 (11:28 +0000)
64-bit VGPR constant copies can happen because of 64-bit constant copy
propagation. Since this optimization is beneficial and more annoying to
deal with in the optimizer, I've implemented 64-bit VGPR constant copies
in handle_operands().

This also sets copy_operation::size correctly for 64-bit constant copies.

Cc: 20.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>

src/amd/compiler/aco_ir.h
src/amd/compiler/aco_lower_to_hw_instr.cpp

index 1eae6c5d0ccb8f2f137ae13a2905de67894f50e6..0be646d8b0f834c4153c4e8dbc0da7c220c6adfc 100644 (file)
@@ -472,6 +472,36 @@ public:
       return isConstant() && constantValue() == cmp;
    }
 
+   constexpr uint64_t constantValue64(bool signext=false) const noexcept
+   {
+      if (is64BitConst_) {
+         if (reg_.reg <= 192)
+            return reg_.reg - 128;
+         else if (reg_.reg <= 208)
+            return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
+
+         switch (reg_.reg) {
+         case 240:
+            return 0x3FE0000000000000;
+         case 241:
+            return 0xBFE0000000000000;
+         case 242:
+            return 0x3FF0000000000000;
+         case 243:
+            return 0xBFF0000000000000;
+         case 244:
+            return 0x4000000000000000;
+         case 245:
+            return 0xC000000000000000;
+         case 246:
+            return 0x4010000000000000;
+         case 247:
+            return 0xC010000000000000;
+         }
+      }
+      return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
+   }
+
    /* Indicates that the killed operand's live range intersects with the
     * instruction's definitions. Unlike isKill() and isFirstKill(), this is
     * not set by liveness analysis. */
index 240f6c3a3733268b05ebf2b7744a1a76b4c5da46..f42484c502fa6e7a190e4f03e47f3e7bf5ff48e7 100644 (file)
@@ -764,6 +764,11 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
             preserve_scc = true;
          } else if (it->second.size == 2 && it->second.def.getTemp().type() == RegType::sgpr) {
             bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2));
+         } else if (it->second.size == 2 && it->second.op.isConstant()) {
+            uint64_t val = it->second.op.constantValue64();
+            bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val));
+            bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1),
+                     Operand((uint32_t)(val >> 32)));
          } else {
             bld.copy(it->second.def, it->second.op);
          }
@@ -905,7 +910,7 @@ void lower_to_hw_instr(Program* program)
                   if (op.isConstant()) {
                      const PhysReg reg = PhysReg{instr->definitions[0].physReg() + reg_idx};
                      const Definition def = Definition(reg, rc_def);
-                     copy_operations[reg] = {op, def, 0, 1};
+                     copy_operations[reg] = {op, def, 0, op.size()};
                      reg_idx++;
                      continue;
                   }
@@ -932,7 +937,7 @@ void lower_to_hw_instr(Program* program)
                   for (unsigned j = 0; j < k; j++) {
                      Operand op = Operand(PhysReg{instr->operands[0].physReg() + (i*k+j)}, rc_op);
                      Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, rc_def);
-                     copy_operations[def.physReg()] = {op, def, 0, 1};
+                     copy_operations[def.physReg()] = {op, def, 0, op.size()};
                   }
                }
                handle_operands(copy_operations, &ctx, program->chip_class, pi);
@@ -947,7 +952,7 @@ void lower_to_hw_instr(Program* program)
                   Operand operand = instr->operands[i];
                   if (operand.isConstant() || operand.size() == 1) {
                      assert(instr->definitions[i].size() == operand.size());
-                     copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, 1};
+                     copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.size()};
                   } else {
                      RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1);
                      RegClass op_rc = RegClass(operand.getTemp().type(), 1);
@@ -1019,7 +1024,7 @@ void lower_to_hw_instr(Program* program)
                   Operand operand = instr->operands[0];
                   if (operand.isConstant() || operand.size() == 1) {
                      assert(instr->definitions[0].size() == 1);
-                     copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, 1};
+                     copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.size()};
                   } else {
                      for (unsigned i = 0; i < operand.size(); i++)
                      {