aco: fix edge check with sub-dword temporaries
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 18 Jun 2020 12:30:50 +0000 (13:30 +0100)
committerMarge Bot <eric+marge@anholt.net>
Thu, 18 Jun 2020 17:29:33 +0000 (17:29 +0000)
Fixes RA failure for a parallel-rdp shader on pitcairn.

fossil-db (Navi):
Totals from 2 (0.00% of 128733) affected shaders:
CodeSize: 203656 -> 205724 (+1.02%)
Instrs: 32267 -> 32529 (+0.81%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5544>

src/amd/compiler/aco_ir.h
src/amd/compiler/aco_register_allocation.cpp

index 3db6b4b6d4385922858ceffc3c9ea1495ef1cbca..a165a1681adf8fd440af7719f9756a81ac23ae45 100644 (file)
@@ -307,7 +307,7 @@ struct PhysReg {
    constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
    constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
    constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
-   constexpr PhysReg advance(unsigned bytes) const { PhysReg res = *this; res.reg_b += bytes; return res; }
+   constexpr PhysReg advance(int bytes) const { PhysReg res = *this; res.reg_b += bytes; return res; }
 
    uint16_t reg_b = 0;
 };
index 985aae4cafdaf91e8e2e9e679692ff79906584d7..096fe80f6ea87977e319dc697f0e822efbf18600 100644 (file)
@@ -214,6 +214,10 @@ public:
       clear(def.physReg(), def.regClass());
    }
 
+   unsigned get_id(PhysReg reg) {
+      return regs[reg] == 0xF0000000 ? subdword_regs[reg][reg.byte()] : regs[reg];
+   }
+
 private:
    void fill(PhysReg start, unsigned size, uint32_t val) {
       for (unsigned i = 0; i < size; i++)
@@ -942,9 +946,9 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
    unsigned reg_hi = lb + size - 1;
    for (reg_lo = lb, reg_hi = lb + size - 1; reg_hi < ub; reg_lo += stride, reg_hi += stride) {
       /* first check the edges: this is what we have to fix to allow for num_moves > size */
-      if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file[reg_lo] == reg_file[reg_lo - 1])
+      if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1)))
          continue;
-      if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file[reg_hi] == reg_file[reg_hi + 1])
+      if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4)))
          continue;
 
       /* second, check that we have at most k=num_moves elements in the window
@@ -1300,9 +1304,9 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
       // TODO: this can be improved */
       if (reg_lo < lb || reg_hi >= ub || reg_lo % stride != 0)
          continue;
-      if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file[reg_lo] == reg_file[reg_lo - 1])
+      if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1)))
          continue;
-      if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file[reg_hi] == reg_file[reg_hi + 1])
+      if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4)))
          continue;
 
       /* count variables to be moved and check war_hint */