aco: don't create vector affinities for operands which are not killed or are duplicates
[mesa.git] / src / amd / compiler / aco_register_allocation.cpp
index 0857084a486e8817a9faf1deea5b004e2b229ef0..a3e01b11c68359ea6b78a3b2f40edeaa968a68c8 100644 (file)
@@ -1410,7 +1410,7 @@ void register_allocation(Program *program, std::vector<TempSet>& live_out_per_bl
          /* add vector affinities */
          if (instr->opcode == aco_opcode::p_create_vector) {
             for (const Operand& op : instr->operands) {
-               if (op.isTemp() && op.getTemp().type() == instr->definitions[0].getTemp().type())
+               if (op.isTemp() && op.isFirstKill() && op.getTemp().type() == instr->definitions[0].getTemp().type())
                   ctx.vectors[op.tempId()] = instr.get();
             }
          }
@@ -1839,8 +1839,17 @@ void register_allocation(Program *program, std::vector<TempSet>& live_out_per_bl
                definition.setFixed(reg);
             }
 
-            if (!definition.isFixed())
-               definition.setFixed(get_reg(ctx, register_file, definition.getTemp(), parallelcopy, instr));
+            if (!definition.isFixed()) {
+               Temp tmp = definition.getTemp();
+               /* subdword instructions before RDNA write full registers */
+               if (tmp.regClass().is_subdword() &&
+                   !instr_can_access_subdword(instr) &&
+                   ctx.program->chip_class <= GFX9) {
+                  assert(tmp.bytes() <= 4);
+                  tmp = Temp(definition.tempId(), v1);
+               }
+               definition.setFixed(get_reg(ctx, register_file, tmp, parallelcopy, instr));
+            }
 
             assert(definition.isFixed() && ((definition.getTemp().type() == RegType::vgpr && definition.physReg() >= 256) ||
                                             (definition.getTemp().type() != RegType::vgpr && definition.physReg() < 256)));