From: Marek Olšák Date: Wed, 29 Jan 2020 04:35:49 +0000 (-0500) Subject: ac: align num_vgprs for gfx10.3 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=789cdab3b6188aa8c075eb311dbd8c05d6531d3e;p=mesa.git ac: align num_vgprs for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c index 8761422bd6b..4651c064abd 100644 --- a/src/amd/common/ac_binary.c +++ b/src/amd/common/ac_binary.c @@ -21,6 +21,7 @@ * SOFTWARE. */ +#include "ac_gpu_info.h" #include "ac_binary.h" #include "util/u_math.h" @@ -39,6 +40,7 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size, bool really_needs_scratch, + const struct radeon_info *info, struct ac_shader_config *conf) { uint32_t scratch_size = 0; @@ -127,6 +129,16 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4; } + /* GFX 10.3 internally: + * - aligns VGPRS to 16 for Wave32 and 8 for Wave64 + * - aligns LDS to 1024 + * + * For shader-db stats, set num_vgprs that the hw actually uses. + */ + if (info->chip_class >= GFX10_3) { + conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8); + } + /* Enable 64-bit and 16-bit denormals, because there is no performance * cost. * diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h index d7dbb2db32e..0d981423696 100644 --- a/src/amd/common/ac_binary.h +++ b/src/amd/common/ac_binary.h @@ -32,6 +32,8 @@ extern "C" { #endif +struct radeon_info; + struct ac_shader_config { unsigned num_sgprs; unsigned num_vgprs; @@ -51,6 +53,7 @@ struct ac_shader_config { void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size, bool really_needs_scratch, + const struct radeon_info *info, struct ac_shader_config *conf); #ifdef __cplusplus diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c index e1552ec3082..e512b8f7327 100644 --- a/src/amd/common/ac_rtld.c +++ b/src/amd/common/ac_rtld.c @@ -514,7 +514,8 @@ bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name return get_section_by_name(&binary->parts[0], name, data, nbytes); } -bool ac_rtld_read_config(struct ac_rtld_binary *binary, +bool ac_rtld_read_config(const struct radeon_info *info, + struct ac_rtld_binary *binary, struct ac_shader_config *config) { for (unsigned i = 0; i < binary->num_parts; ++i) { @@ -529,7 +530,7 @@ bool ac_rtld_read_config(struct ac_rtld_binary *binary, /* TODO: be precise about scratch use? */ struct ac_shader_config c = {}; ac_parse_shader_binary_config(config_data, config_nbytes, - binary->wave_size, true, &c); + binary->wave_size, true, info, &c); config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs); config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs); diff --git a/src/amd/common/ac_rtld.h b/src/amd/common/ac_rtld.h index a4343d9b3ef..2470a5243f1 100644 --- a/src/amd/common/ac_rtld.h +++ b/src/amd/common/ac_rtld.h @@ -116,7 +116,8 @@ void ac_rtld_close(struct ac_rtld_binary *binary); bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data, size_t *nbytes); -bool ac_rtld_read_config(struct ac_rtld_binary *binary, +bool ac_rtld_read_config(const struct radeon_info *info, + struct ac_rtld_binary *binary, struct ac_shader_config *config); struct ac_rtld_upload_info { diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index dbce0d75aee..a6fb13a60e7 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -982,7 +982,8 @@ radv_shader_variant_create(struct radv_device *device, return NULL; } - if (!ac_rtld_read_config(&rtld_binary, &config)) { + if (!ac_rtld_read_config(&device->physical_device->rad_info, + &rtld_binary, &config)) { ac_rtld_close(&rtld_binary); free(variant); return NULL; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 538c0ffde82..18ce02e151c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -121,7 +121,7 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary, .elf_sizes = &binary->elf_size})) return false; - bool ok = ac_rtld_read_config(&rtld, conf); + bool ok = ac_rtld_read_config(&sscreen->info, &rtld, conf); ac_rtld_close(&rtld); return ok; }