ac: import ac_get_compute_resource_limits() from RadeonSI

author Samuel Pitoiset <samuel.pitoiset@gmail.com>

Fri, 12 Jul 2019 10:17:11 +0000 (12:17 +0200)

committer Samuel Pitoiset <samuel.pitoiset@gmail.com>

Fri, 12 Jul 2019 15:47:11 +0000 (17:47 +0200)
author Samuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 12 Jul 2019 10:17:11 +0000 (12:17 +0200)
committer Samuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 12 Jul 2019 15:47:11 +0000 (17:47 +0200)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c

index 596a9ebe5088b5e4a88b3a0f44d2e503bd54edf9..a501d840b25986aa4d3b251520ea855e6bc6003f 100644 (file)
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -895,3 +895,35 @@ ac_get_harvested_configs(struct radeon_info *info,
                 }
         }
  }
+
+unsigned ac_get_compute_resource_limits(struct radeon_info *info,
+                                       unsigned waves_per_threadgroup,
+                                       unsigned max_waves_per_sh,
+                                       unsigned threadgroups_per_cu)
+{
+       unsigned compute_resource_limits =
+               S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
+
+       if (info->chip_class >= GFX7) {
+               unsigned num_cu_per_se = info->num_good_compute_units /
+                                        info->max_se;
+
+               /* Force even distribution on all SIMDs in CU if the workgroup
+                * size is 64. This has shown some good improvements if # of CUs
+                * per SE is not a multiple of 4.
+                */
+               if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
+                       compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
+
+               assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
+               compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
+                                          S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
+       } else {
+               /* GFX6 */
+               if (max_waves_per_sh) {
+                       unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16);
+                       compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16);
+               }
+       }
+       return compute_resource_limits;
+}
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h

index c42548f83524afb0fef01e4514a8da98da00057a..3ec3e44d665c9c2bf7f8f50709bddbb05352acda 100644 (file)
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -167,6 +167,10 @@ void ac_get_harvested_configs(struct radeon_info *info,
                               unsigned raster_config,
                               unsigned *cik_raster_config_1_p,
                               unsigned *raster_config_se);
+unsigned ac_get_compute_resource_limits(struct radeon_info *info,
+                                       unsigned waves_per_threadgroup,
+                                       unsigned max_waves_per_sh,
+                                       unsigned threadgroups_per_cu);
  
  static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
  {
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c

index 0989181aba46fce0132c69292e78aceedd38a656..07b1293049f72bf11ea8e2223d33e293ce9eb1c2 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -772,38 +772,6 @@ static void si_setup_tgsi_user_data(struct si_context *sctx,
         }
  }
  
-unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
-                                       unsigned waves_per_threadgroup,
-                                       unsigned max_waves_per_sh,
-                                       unsigned threadgroups_per_cu)
-{
-       unsigned compute_resource_limits =
-               S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
-
-       if (sscreen->info.chip_class >= GFX7) {
-               unsigned num_cu_per_se = sscreen->info.num_good_compute_units /
-                                        sscreen->info.max_se;
-
-               /* Force even distribution on all SIMDs in CU if the workgroup
-                * size is 64. This has shown some good improvements if # of CUs
-                * per SE is not a multiple of 4.
-                */
-               if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
-                       compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
-
-               assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
-               compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
-                                          S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
-       } else {
-               /* GFX6 */
-               if (max_waves_per_sh) {
-                       unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16);
-                       compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16);
-               }
-       }
-       return compute_resource_limits;
-}
-
  static void si_emit_dispatch_packets(struct si_context *sctx,
                                       const struct pipe_grid_info *info)
  {
@@ -820,7 +788,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
                 threadgroups_per_cu = 2;
  
         radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
-                         si_get_compute_resource_limits(sscreen, waves_per_threadgroup,
+                         ac_get_compute_resource_limits(&sscreen->info,
+                                                        waves_per_threadgroup,
                                                          sctx->cs_max_waves_per_sh,
                                                          threadgroups_per_cu));
  
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c

index e16c0791a279826f357116eab9eaedf2ad018ffd..ad33c8de1c5cf51d9184c2276f36091933713413 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -1426,8 +1426,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
                                 S_00B84C_LDS_SIZE(shader->config.lds_size));
  
                 radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
-                       si_get_compute_resource_limits(sctx->screen, WAVES_PER_TG,
-                                                      MAX_WAVES_PER_SH, THREADGROUPS_PER_CU));
+                       ac_get_compute_resource_limits(&sctx->screen->info,
+                                                      WAVES_PER_TG,
+                                                      MAX_WAVES_PER_SH,
+                                                      THREADGROUPS_PER_CU));
                 sctx->compute_ib_last_shader = shader;
         }
  
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index cd8fb5d5df41a36142a32dcc96b2714f42a9581d..96a7fa4ebf27bbec87a9a6c9e8c85a53e0a409bd 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1396,10 +1396,6 @@ unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
  
  /* si_compute.c */
  void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs);
-unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
-                                       unsigned waves_per_threadgroup,
-                                       unsigned max_waves_per_sh,
-                                       unsigned threadgroups_per_cu);
  void si_init_compute_functions(struct si_context *sctx);
  
  /* si_compute_prim_discard.c */
author	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Fri, 12 Jul 2019 10:17:11 +0000 (12:17 +0200)
committer	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Fri, 12 Jul 2019 15:47:11 +0000 (17:47 +0200)
src/amd/common/ac_gpu_info.c		patch \| blob \| history
src/amd/common/ac_gpu_info.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_compute.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_compute_prim_discard.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history