aco: Fix integer overflows when emitting parallel copies during RA
authorTony Wasserka <tony.wasserka@gmx.de>
Wed, 2 Sep 2020 16:28:36 +0000 (18:28 +0200)
committerMarge Bot <eric+marge@anholt.net>
Thu, 3 Sep 2020 20:20:24 +0000 (20:20 +0000)
32-bit shifts were accidentally used before this change despite the intended
output being 64 bits.

This was observed when compiling Dolphin's ubershaders.

Cc: mesa-stable
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6568>

src/amd/compiler/aco_register_allocation.cpp

index ca7a0fa4815745a3db6906f60bf0fcbcd013183f..d5746e5a6369a7520874fcc54a55d817b63f3985 100644 (file)
@@ -2235,11 +2235,11 @@ void register_allocation(Program *program, std::vector<TempSet>& live_out_per_bl
                   if (!sgpr_operands_alias_defs) {
                      unsigned reg = parallelcopy[i].first.physReg().reg();
                      unsigned size = parallelcopy[i].first.getTemp().size();
                   if (!sgpr_operands_alias_defs) {
                      unsigned reg = parallelcopy[i].first.physReg().reg();
                      unsigned size = parallelcopy[i].first.getTemp().size();
-                     sgpr_operands[reg / 64u] |= ((1u << size) - 1) << (reg % 64u);
+                     sgpr_operands[reg / 64u] |= u_bit_consecutive64(reg % 64u, size);
 
                      reg = parallelcopy[i].second.physReg().reg();
                      size = parallelcopy[i].second.getTemp().size();
 
                      reg = parallelcopy[i].second.physReg().reg();
                      size = parallelcopy[i].second.getTemp().size();
-                     if (sgpr_operands[reg / 64u] & ((1u << size) - 1) << (reg % 64u))
+                     if (sgpr_operands[reg / 64u] & u_bit_consecutive64(reg % 64u, size))
                         sgpr_operands_alias_defs = true;
                   }
                }
                         sgpr_operands_alias_defs = true;
                   }
                }