radv/gfx10: add Wave32 support for vertex, tessellation and geometry shaders

author Samuel Pitoiset <samuel.pitoiset@gmail.com>

Thu, 1 Aug 2019 08:43:42 +0000 (10:43 +0200)

committer Samuel Pitoiset <samuel.pitoiset@gmail.com>

Fri, 2 Aug 2019 07:37:36 +0000 (09:37 +0200)
author Samuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 1 Aug 2019 08:43:42 +0000 (10:43 +0200)
committer Samuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 2 Aug 2019 07:37:36 +0000 (09:37 +0200)
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h

index 65dbec6e90d43527825e6d176a4a79d9e029db1b..ef5b331d188447da9499e0b497f58505b12faf13 100644 (file)
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -66,6 +66,7 @@ enum {
         RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
         RADV_PERFTEST_CS_WAVE_32     = 0x100,
         RADV_PERFTEST_PS_WAVE_32     = 0x200,
+       RADV_PERFTEST_GE_WAVE_32     = 0x400,
  };
  
  bool
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c

index b66b15edf7323c93ef04b6fa0cb84ad1808e31f9..fc961040b6e047c6661cc7a5a6df4717f6745b8d 100644 (file)
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -386,6 +386,7 @@ radv_physical_device_init(struct radv_physical_device *device,
         /* Determine the number of threads per wave for all stages. */
         device->cs_wave_size = 64;
         device->ps_wave_size = 64;
+       device->ge_wave_size = 64;
  
         if (device->rad_info.chip_class >= GFX10) {
                 if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
@@ -394,6 +395,9 @@ radv_physical_device_init(struct radv_physical_device *device,
                 /* For pixel shaders, wave64 is recommanded. */
                 if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
                         device->ps_wave_size = 32;
+
+               if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
+                       device->ge_wave_size = 32;
         }
  
         radv_physical_device_init_mem_types(device);
@@ -509,6 +513,7 @@ static const struct debug_control radv_perftest_options[] = {
         {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
         {"cswave32", RADV_PERFTEST_CS_WAVE_32},
         {"pswave32", RADV_PERFTEST_PS_WAVE_32},
+       {"gewave32", RADV_PERFTEST_GE_WAVE_32},
         {NULL, 0}
  };
  
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c

index bba5849b152cc5de915d055e59f4f6c68305ee92..91251aa69bdba79ba819515053640d1fe83e8a64 100644 (file)
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -295,7 +295,7 @@ get_tcs_num_patches(struct radv_shader_context *ctx)
  
         /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
         if (ctx->options->chip_class == GFX6) {
-               unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+               unsigned one_wave = ctx->options->ge_wave_size / MAX2(num_tcs_input_cp, num_tcs_output_cp);
                 num_patches = MIN2(num_patches, one_wave);
         }
         return num_patches;
@@ -3038,7 +3038,8 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
                 LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 24, 4);
                 vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
                                          LLVMBuildMul(ctx->ac.builder, wave_idx,
-                                                     LLVMConstInt(ctx->ac.i32, 64, false), ""), "");
+                                                     LLVMConstInt(ctx->ac.i32,
+                                                                  ctx->ac.wave_size, false), ""), "");
                 lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
                                         LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
         }
@@ -3140,7 +3141,7 @@ static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx)
         LLVMBuilderRef builder = ctx->ac.builder;
         LLVMValueRef tmp;
         tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
-                          LLVMConstInt(ctx->ac.i32, 64, false), "");
+                          LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), "");
         return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
  }
  
@@ -4190,7 +4191,7 @@ ac_setup_rings(struct radv_shader_context *ctx)
                  */
                 LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
                 uint64_t stream_offset = 0;
-               unsigned num_records = 64;
+               unsigned num_records = ctx->ac.wave_size;
                 LLVMValueRef base_ring;
  
                 base_ring =
@@ -4223,7 +4224,7 @@ ac_setup_rings(struct radv_shader_context *ctx)
                         ring = LLVMBuildInsertElement(ctx->ac.builder,
                                                       ring, tmp, ctx->ac.i32_0, "");
  
-                       stream_offset += stride * 64;
+                       stream_offset += stride * ctx->ac.wave_size;
  
                         ring = LLVMBuildBitCast(ctx->ac.builder, ring,
                                                 ctx->ac.v4i32, "");
@@ -4325,7 +4326,7 @@ radv_nir_shader_wave_size(struct nir_shader *const *shaders, int shader_count,
                 return options->cs_wave_size;
         else if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
                 return options->ps_wave_size;
-       return 64;
+       return options->ge_wave_size;
  }
  
  static
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c

index dbfe261c98278e966e5669def1d8fd0a97596924..d722d558a434b4fca4045156e8c258c3e4f2bf91 100644 (file)
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1782,7 +1782,7 @@ calculate_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
  
         /* Round up towards full wave sizes for better ALU utilization. */
         if (!max_vert_out_per_gs_instance) {
-               const unsigned wavesize = 64;
+               const unsigned wavesize = pipeline->device->physical_device->ge_wave_size;
                 unsigned orig_max_esverts;
                 unsigned orig_max_gsprims;
                 do {
@@ -4125,6 +4125,14 @@ radv_compute_vgt_shader_stages_en(const struct radv_pipeline *pipeline)
         if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
                 stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
  
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+           pipeline->device->physical_device->ge_wave_size == 32) {
+               /* legacy GS only supports Wave64 */
+               stages |= S_028B54_HS_W32_EN(1) |
+                         S_028B54_GS_W32_EN(radv_pipeline_has_ngg(pipeline)) |
+                         S_028B54_VS_W32_EN(1);
+       }
+
         return stages;
  }
  
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h

index a1347060190021a2988a0934f609d53921b26a4a..1a0b22d63b85ab822ba57761f8ba6e88cc4930c5 100644 (file)
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -304,6 +304,7 @@ struct radv_physical_device {
         /* Number of threads per wave. */
         uint8_t ps_wave_size;
         uint8_t cs_wave_size;
+       uint8_t ge_wave_size;
  
         /* This is the drivers on-disk cache used as a fallback as opposed to
          * the pipeline cache defined by apps.
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c

index 48ed86c99b1f425ac3697b0cd4f06e73e4cfae40..97fa80b348c19f6582d558e26f9381650f605c09 100644 (file)
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -675,7 +675,7 @@ radv_get_shader_wave_size(const struct radv_physical_device *pdevice,
                 return pdevice->cs_wave_size;
         else if (stage == MESA_SHADER_FRAGMENT)
                 return pdevice->ps_wave_size;
-       return 64;
+       return pdevice->ge_wave_size;
  }
  
  static void radv_postprocess_config(const struct radv_physical_device *pdevice,
@@ -1144,6 +1144,7 @@ shader_variant_compile(struct radv_device *device,
         options->address32_hi = device->physical_device->rad_info.address32_hi;
         options->cs_wave_size = device->physical_device->cs_wave_size;
         options->ps_wave_size = device->physical_device->ps_wave_size;
+       options->ge_wave_size = device->physical_device->ge_wave_size;
  
         if (options->supports_spill)
                 tm_options |= AC_TM_SUPPORTS_SPILL;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h

index 0ef49628b5d38a7003ade088a1e6294c80b902bf..0ab7db20181498ae79f5bf547a7a3fd0fe283018 100644 (file)
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -130,6 +130,7 @@ struct radv_nir_compiler_options {
         uint32_t address32_hi;
         uint8_t cs_wave_size;
         uint8_t ps_wave_size;
+       uint8_t ge_wave_size;
  };
  
  enum radv_ud_index {
author	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Thu, 1 Aug 2019 08:43:42 +0000 (10:43 +0200)
committer	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Fri, 2 Aug 2019 07:37:36 +0000 (09:37 +0200)
src/amd/vulkan/radv_debug.h		patch \| blob \| history
src/amd/vulkan/radv_device.c		patch \| blob \| history
src/amd/vulkan/radv_nir_to_llvm.c		patch \| blob \| history
src/amd/vulkan/radv_pipeline.c		patch \| blob \| history
src/amd/vulkan/radv_private.h		patch \| blob \| history
src/amd/vulkan/radv_shader.c		patch \| blob \| history
src/amd/vulkan/radv_shader.h		patch \| blob \| history