aco: prefer 4-byte aligned definitions

author Rhys Perry <pendingchaos02@gmail.com>

Tue, 12 May 2020 14:08:05 +0000 (15:08 +0100)

committer Marge Bot <eric+marge@anholt.net>

Wed, 10 Jun 2020 15:05:11 +0000 (15:05 +0000)
author Rhys Perry <pendingchaos02@gmail.com>
Tue, 12 May 2020 14:08:05 +0000 (15:08 +0100)
committer Marge Bot <eric+marge@anholt.net>
Wed, 10 Jun 2020 15:05:11 +0000 (15:05 +0000)
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp

index 6a1e2b78c56072525d24f4b14abb6efd1b4c16d0..a198c9312c4a61656222f09c99a51974c93e5ada 100644 (file)
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -567,38 +567,9 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
     uint32_t lb = info.lb;
     uint32_t ub = info.ub;
     uint32_t size = info.size;
-   uint32_t stride = info.stride;
+   uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride;
     RegClass rc = info.rc;
  
-   if (rc.is_subdword()) {
-      for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) {
-         assert(reg_file[entry.first] == 0xF0000000);
-         if (lb > entry.first || entry.first >= ub)
-            continue;
-
-         for (unsigned i = 0; i < 4; i+= stride) {
-            if (entry.second[i] != 0)
-               continue;
-
-            bool reg_found = true;
-            for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++)
-               reg_found &= entry.second[i + j] == 0;
-
-            /* check neighboring reg if needed */
-            reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0);
-            if (reg_found) {
-               PhysReg res{entry.first};
-               res.reg_b += i;
-               adjust_max_used_regs(ctx, rc, entry.first);
-               return {res, true};
-            }
-         }
-      }
-
-      stride = 1; /* stride in full registers */
-      rc = info.rc = RegClass(RegType::vgpr, size);
-   }
-
     if (stride == 1) {
  
        for (unsigned stride = 8; stride > 1; stride /= 2) {
@@ -689,6 +660,35 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
        reg_lo += stride;
     }
  
+   /* do this late because using the upper bytes of a register can require
+    * larger instruction encodings or copies
+    * TODO: don't do this in situations where it doesn't benefit */
+   if (rc.is_subdword()) {
+      for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) {
+         assert(reg_file[entry.first] == 0xF0000000);
+         if (lb > entry.first || entry.first >= ub)
+            continue;
+
+         for (unsigned i = 0; i < 4; i+= info.stride) {
+            if (entry.second[i] != 0)
+               continue;
+
+            bool reg_found = true;
+            for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++)
+               reg_found &= entry.second[i + j] == 0;
+
+            /* check neighboring reg if needed */
+            reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0);
+            if (reg_found) {
+               PhysReg res{entry.first};
+               res.reg_b += i;
+               adjust_max_used_regs(ctx, rc, entry.first);
+               return {res, true};
+            }
+         }
+      }
+   }
+
     return {{}, false};
  }
author	Rhys Perry <pendingchaos02@gmail.com>
	Tue, 12 May 2020 14:08:05 +0000 (15:08 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Wed, 10 Jun 2020 15:05:11 +0000 (15:05 +0000)