aco: fix max_waves_per_simd on Polaris, VegaM and GFX10.3

author Rhys Perry <pendingchaos02@gmail.com>

Thu, 18 Jun 2020 13:30:51 +0000 (14:30 +0100)

committer Rhys Perry <pendingchaos02@gmail.com>

Tue, 4 Aug 2020 19:39:33 +0000 (20:39 +0100)
author Rhys Perry <pendingchaos02@gmail.com>
Thu, 18 Jun 2020 13:30:51 +0000 (14:30 +0100)
committer Rhys Perry <pendingchaos02@gmail.com>
Tue, 4 Aug 2020 19:39:33 +0000 (20:39 +0100)
diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp

index 0378dbaf335e87b6ded33892457fe1b0c6e2d7d2..08f2d68c80a8b8d1fe58471d492e2c856b773c62 100644 (file)
--- a/src/amd/compiler/aco_live_var_analysis.cpp
+++ b/src/amd/compiler/aco_live_var_analysis.cpp
@@ -337,6 +337,8 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
  {
     /* TODO: max_waves_per_simd, simd_per_cu and the number of physical vgprs for Navi */
     unsigned max_waves_per_simd = 10;
+   if ((program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM) || program->chip_class >= GFX10_3)
+      max_waves_per_simd = 8;
     unsigned simd_per_cu = 4;
  
     bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */
diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp

index 102f0bf3ee6ce7ffe2dc4d7f49c43bfa75e098bd..40941e4c539c6bae159bdc4c7e47b52df6756134 100644 (file)
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -882,6 +882,11 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v
  
  void schedule_program(Program *program, live& live_vars)
  {
+   /* don't use program->max_reg_demand because that is affected by max_waves_per_simd */
+   RegisterDemand demand;
+   for (Block& block : program->blocks)
+      demand.update(block.register_demand);
+
     sched_ctx ctx;
     ctx.mv.depends_on.resize(program->peekAllocationId());
     ctx.mv.RAR_dependencies.resize(program->peekAllocationId());
@@ -891,15 +896,14 @@ void schedule_program(Program *program, live& live_vars)
      * seem to hurt anything else. */
     if (program->num_waves <= 5)
        ctx.num_waves = program->num_waves;
-   else if (program->max_reg_demand.vgpr >= 32)
+   else if (demand.vgpr >= 29)
        ctx.num_waves = 5;
-   else if (program->max_reg_demand.vgpr >= 28)
+   else if (demand.vgpr >= 25)
        ctx.num_waves = 6;
-   else if (program->max_reg_demand.vgpr >= 24)
-      ctx.num_waves = 7;
     else
-      ctx.num_waves = 8;
+      ctx.num_waves = 7;
     ctx.num_waves = std::max<uint16_t>(ctx.num_waves, program->min_waves);
+   ctx.num_waves = std::min<uint16_t>(ctx.num_waves, program->max_waves);
  
     assert(ctx.num_waves > 0 && ctx.num_waves <= program->num_waves);
     ctx.mv.max_registers = { int16_t(get_addr_vgpr_from_waves(program, ctx.num_waves) - 2),
author	Rhys Perry <pendingchaos02@gmail.com>
	Thu, 18 Jun 2020 13:30:51 +0000 (14:30 +0100)
committer	Rhys Perry <pendingchaos02@gmail.com>
	Tue, 4 Aug 2020 19:39:33 +0000 (20:39 +0100)
src/amd/compiler/aco_live_var_analysis.cpp		patch \| blob \| history
src/amd/compiler/aco_scheduler.cpp		patch \| blob \| history