ac: align num_vgprs for gfx10.3
authorMarek Olšák <marek.olsak@amd.com>
Wed, 29 Jan 2020 04:35:49 +0000 (23:35 -0500)
committerMarge Bot <eric+marge@anholt.net>
Tue, 9 Jun 2020 16:17:36 +0000 (16:17 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>

src/amd/common/ac_binary.c
src/amd/common/ac_binary.h
src/amd/common/ac_rtld.c
src/amd/common/ac_rtld.h
src/amd/vulkan/radv_shader.c
src/gallium/drivers/radeonsi/si_shader_llvm.c

index 8761422bd6b63d4a3b2b9bc55f9ee5a132015c0d..4651c064abd5731f807f8af08ecf243226790da6 100644 (file)
@@ -21,6 +21,7 @@
  * SOFTWARE.
  */
 
+#include "ac_gpu_info.h"
 #include "ac_binary.h"
 
 #include "util/u_math.h"
@@ -39,6 +40,7 @@
 void ac_parse_shader_binary_config(const char *data, size_t nbytes,
                                   unsigned wave_size,
                                   bool really_needs_scratch,
+                                  const struct radeon_info *info,
                                   struct ac_shader_config *conf)
 {
        uint32_t scratch_size = 0;
@@ -127,6 +129,16 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,
                conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
        }
 
+       /* GFX 10.3 internally:
+        * - aligns VGPRS to 16 for Wave32 and 8 for Wave64
+        * - aligns LDS to 1024
+        *
+        * For shader-db stats, set num_vgprs that the hw actually uses.
+        */
+       if (info->chip_class >= GFX10_3) {
+               conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
+       }
+
        /* Enable 64-bit and 16-bit denormals, because there is no performance
         * cost.
         *
index d7dbb2db32e60f0b4d313a829c1eb8085216234e..0d981423696a061e37935e816d0e4f378e4cf94f 100644 (file)
@@ -32,6 +32,8 @@
 extern "C" {
 #endif
 
+struct radeon_info;
+
 struct ac_shader_config {
        unsigned num_sgprs;
        unsigned num_vgprs;
@@ -51,6 +53,7 @@ struct ac_shader_config {
 void ac_parse_shader_binary_config(const char *data, size_t nbytes,
                                   unsigned wave_size,
                                   bool really_needs_scratch,
+                                  const struct radeon_info *info,
                                   struct ac_shader_config *conf);
 
 #ifdef __cplusplus
index e1552ec30826ce12887b548b9d034fa24ab281ec..e512b8f73275686b228297602b94c6b876667443 100644 (file)
@@ -514,7 +514,8 @@ bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name
        return get_section_by_name(&binary->parts[0], name, data, nbytes);
 }
 
-bool ac_rtld_read_config(struct ac_rtld_binary *binary,
+bool ac_rtld_read_config(const struct radeon_info *info,
+                        struct ac_rtld_binary *binary,
                         struct ac_shader_config *config)
 {
        for (unsigned i = 0; i < binary->num_parts; ++i) {
@@ -529,7 +530,7 @@ bool ac_rtld_read_config(struct ac_rtld_binary *binary,
                /* TODO: be precise about scratch use? */
                struct ac_shader_config c = {};
                ac_parse_shader_binary_config(config_data, config_nbytes,
-                                             binary->wave_size, true, &c);
+                                             binary->wave_size, true, info, &c);
 
                config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
                config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
index a4343d9b3ef50a95d23a06f92efef204a149030a..2470a5243f1ea4b5bd077d2ffede7d28fc0b1e89 100644 (file)
@@ -116,7 +116,8 @@ void ac_rtld_close(struct ac_rtld_binary *binary);
 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
                                 const char **data, size_t *nbytes);
 
-bool ac_rtld_read_config(struct ac_rtld_binary *binary,
+bool ac_rtld_read_config(const struct radeon_info *info,
+                        struct ac_rtld_binary *binary,
                         struct ac_shader_config *config);
 
 struct ac_rtld_upload_info {
index dbce0d75aeebedb59dbcf1ce18aca0b85bbd8ab9..a6fb13a60e7ac119f33aec5ebbba3c4265edbb84 100644 (file)
@@ -982,7 +982,8 @@ radv_shader_variant_create(struct radv_device *device,
                        return NULL;
                }
 
-               if (!ac_rtld_read_config(&rtld_binary, &config)) {
+               if (!ac_rtld_read_config(&device->physical_device->rad_info,
+                                        &rtld_binary, &config)) {
                        ac_rtld_close(&rtld_binary);
                        free(variant);
                        return NULL;
index 538c0ffde82326e704515b17f2af57811ff129db..18ce02e151caec772a95dbf368c9c02332473430 100644 (file)
@@ -121,7 +121,7 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
                                .elf_sizes = &binary->elf_size}))
       return false;
 
-   bool ok = ac_rtld_read_config(&rtld, conf);
+   bool ok = ac_rtld_read_config(&sscreen->info, &rtld, conf);
    ac_rtld_close(&rtld);
    return ok;
 }