radv: Get max workgroup size without nir.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 1 Jun 2019 18:25:47 +0000 (20:25 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 12 Aug 2019 21:00:24 +0000 (23:00 +0200)
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index 3f343cf6544f2b8c714bcdb1b4c2e57dc89d07ed..7c3e840104d9ce6e1a3d845be5e6f857ff3b35fe 100644 (file)
@@ -4258,23 +4258,8 @@ radv_nir_get_max_workgroup_size(enum chip_class chip_class,
                                gl_shader_stage stage,
                                const struct nir_shader *nir)
 {
-       switch (stage) {
-       case MESA_SHADER_TESS_CTRL:
-               return chip_class >= GFX7 ? 128 : 64;
-       case MESA_SHADER_GEOMETRY:
-               return chip_class >= GFX9 ? 128 : 64;
-       case MESA_SHADER_COMPUTE:
-               break;
-       default:
-               return 0;
-       }
-
-       if (!nir)
-               return chip_class >= GFX9 ? 128 : 64;
-       unsigned max_workgroup_size = nir->info.cs.local_size[0] *
-               nir->info.cs.local_size[1] *
-               nir->info.cs.local_size[2];
-       return max_workgroup_size;
+       const unsigned backup_sizes[] = {chip_class >= GFX9 ? 128 : 64, 1, 1};
+       return radv_get_max_workgroup_size(chip_class, stage, nir ? nir->info.cs.local_size : backup_sizes);
 }
 
 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */
index 0f0703d66d158a59f04883ad02e7f4684eafcc95..d06abb3d648a422e3b9b3cc5b6b56f029b96b2a9 100644 (file)
@@ -1279,6 +1279,25 @@ radv_get_shader_name(struct radv_shader_variant_info *info,
        };
 }
 
+unsigned
+radv_get_max_workgroup_size(enum chip_class chip_class,
+                            gl_shader_stage stage,
+                            const unsigned *sizes)
+{
+       switch (stage) {
+       case MESA_SHADER_TESS_CTRL:
+               return chip_class >= GFX7 ? 128 : 64;
+       case MESA_SHADER_GEOMETRY:
+               return chip_class >= GFX9 ? 128 : 64;
+       case MESA_SHADER_COMPUTE:
+               break;
+       default:
+               return 0;
+       }
+
+       unsigned max_workgroup_size = sizes[0] * sizes[1] * sizes[2];
+       return max_workgroup_size;
+}
 
 unsigned
 radv_get_max_waves(struct radv_device *device,
@@ -1300,7 +1319,7 @@ radv_get_max_waves(struct radv_device *device,
                                     lds_increment);
        } else if (stage == MESA_SHADER_COMPUTE) {
                unsigned max_workgroup_size =
-                       radv_nir_get_max_workgroup_size(chip_class, stage, variant->nir);
+                       radv_get_max_workgroup_size(chip_class, stage, variant->info.cs.block_size);
                lds_per_wave = (conf->lds_size * lds_increment) /
                               DIV_ROUND_UP(max_workgroup_size, wave_size);
        }
@@ -1409,7 +1428,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
                        statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
 
                        if (stage == MESA_SHADER_COMPUTE) {
-                               unsigned *local_size = variant->nir->info.cs.local_size;
+                               unsigned *local_size = variant->info.cs.block_size;
                                unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
 
                                statistics.numAvailableVgprs = statistics.numPhysicalVgprs /
index f93b1ec0fe2263c6aa155a95bd44bed41220cb1d..af097215f53c212489fd36ed000291da8c7c8099 100644 (file)
@@ -419,6 +419,11 @@ radv_get_max_waves(struct radv_device *device,
                    struct radv_shader_variant *variant,
                    gl_shader_stage stage);
 
+unsigned
+radv_get_max_workgroup_size(enum chip_class chip_class,
+                            gl_shader_stage stage,
+                            const unsigned *sizes);
+
 const char *
 radv_get_shader_name(struct radv_shader_variant_info *info,
                     gl_shader_stage stage);