radeonsi: always use Wave64 for HS/GS/VS shader stages (except GS fast launch)
authorMarek Olšák <marek.olsak@amd.com>
Tue, 16 Jun 2020 18:53:03 +0000 (14:53 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 30 Jun 2020 10:56:41 +0000 (10:56 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5524>

src/gallium/drivers/radeonsi/si_pipe.c

index 7364277c96253d3a567f683d0bc030aab7fa5157..27b3e9200a51f32674242c53cd75eeab2b12b21f 100644 (file)
@@ -1215,11 +1215,15 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    sscreen->compute_wave_size = 64;
 
    if (sscreen->info.chip_class >= GFX10) {
-      /* Pixels shaders: Wave64 is recommended.
-       * Compute shaders: There are piglit failures with Wave32.
+      /* Pixel shaders: Wave64 is always fastest.
+       * Vertex shaders: Wave64 is probably better, because:
+       * - greater chance of L0 cache hits, because more threads are assigned
+       *   to the same CU
+       * - scalar instructions are only executed once for 64 threads instead of twice
+       * - VGPR allocation granularity is half of Wave32, so 1 Wave64 can
+       *   sometimes use fewer VGPRs than 2 Wave32
+       * - TessMark X64 with NGG culling is faster with Wave64
        */
-      sscreen->ge_wave_size = 32;
-
       if (sscreen->debug_flags & DBG(W32_GE))
          sscreen->ge_wave_size = 32;
       if (sscreen->debug_flags & DBG(W32_PS))