aco: align subdword registers during RA when necessary
authorDaniel Schürmann <daniel@schuermann.dev>
Wed, 25 Mar 2020 11:06:41 +0000 (12:06 +0100)
committerDaniel Schürmann <daniel@schuermann.dev>
Fri, 3 Apr 2020 22:13:15 +0000 (23:13 +0100)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-By: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4002>

src/amd/compiler/aco_register_allocation.cpp

index 43273531d3c5b8573afcd25907689d1901132338..a7a44103d27cc537596ef6ac269cbd5affde2f7a 100644 (file)
@@ -285,6 +285,11 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file,
    }
 }
 
+bool instr_can_access_subdword(aco_ptr<Instruction>& instr)
+{
+   return instr->isSDWA() || instr->format == Format::PSEUDO;
+}
+
 std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
                                         RegisterFile& reg_file,
                                         uint32_t lb, uint32_t ub,
@@ -784,6 +789,16 @@ PhysReg get_reg(ra_ctx& ctx,
          stride = 4;
    }
 
+   if (rc.is_subdword()) {
+      /* stride in bytes */
+      if(!instr_can_access_subdword(instr))
+         stride = 4;
+      else if (rc.bytes() % 4 == 0)
+         stride = 4;
+      else if (rc.bytes() % 2 == 0)
+         stride = 2;
+   }
+
    std::pair<PhysReg, bool> res = {{}, false};
    /* try to find space without live-range splits */
    if (rc.type() == RegType::vgpr && (size == 4 || size == 8))
@@ -977,6 +992,9 @@ bool get_reg_specified(ra_ctx& ctx,
       ub = ctx.program->max_reg_demand.sgpr;
    }
 
+   if (rc.is_subdword() && reg.byte() && !instr_can_access_subdword(instr))
+      return false;
+
    uint32_t reg_lo = reg.reg();
    uint32_t reg_hi = reg + (size - 1);
 
@@ -1051,6 +1069,9 @@ void handle_pseudo(ra_ctx& ctx,
 
 bool operand_can_use_reg(aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg)
 {
+   if (!instr_can_access_subdword(instr) && reg.byte())
+      return false;
+
    switch (instr->format) {
    case Format::SMEM:
       return reg != scc &&
@@ -1790,6 +1811,9 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
                   PhysReg reg = res.first;
                   if (res.second) {
                      reg.reg_b += byte_offset;
+                     /* make sure to only use byte offset if the instruction supports it */
+                     if (vec->definitions[0].regClass().is_subdword() && reg.byte() && !instr_can_access_subdword(instr))
+                        reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
                   } else {
                      reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
                   }