Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>
+#include "ac_gpu_info.h"
#include "ac_binary.h"
#include "util/u_math.h"
#include "ac_binary.h"
#include "util/u_math.h"
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
unsigned wave_size,
bool really_needs_scratch,
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
unsigned wave_size,
bool really_needs_scratch,
+ const struct radeon_info *info,
struct ac_shader_config *conf)
{
uint32_t scratch_size = 0;
struct ac_shader_config *conf)
{
uint32_t scratch_size = 0;
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
}
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
}
+ /* GFX 10.3 internally:
+ * - aligns VGPRS to 16 for Wave32 and 8 for Wave64
+ * - aligns LDS to 1024
+ *
+ * For shader-db stats, set num_vgprs that the hw actually uses.
+ */
+ if (info->chip_class >= GFX10_3) {
+ conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
+ }
+
/* Enable 64-bit and 16-bit denormals, because there is no performance
* cost.
*
/* Enable 64-bit and 16-bit denormals, because there is no performance
* cost.
*
struct ac_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
struct ac_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
unsigned wave_size,
bool really_needs_scratch,
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
unsigned wave_size,
bool really_needs_scratch,
+ const struct radeon_info *info,
struct ac_shader_config *conf);
#ifdef __cplusplus
struct ac_shader_config *conf);
#ifdef __cplusplus
return get_section_by_name(&binary->parts[0], name, data, nbytes);
}
return get_section_by_name(&binary->parts[0], name, data, nbytes);
}
-bool ac_rtld_read_config(struct ac_rtld_binary *binary,
+bool ac_rtld_read_config(const struct radeon_info *info,
+ struct ac_rtld_binary *binary,
struct ac_shader_config *config)
{
for (unsigned i = 0; i < binary->num_parts; ++i) {
struct ac_shader_config *config)
{
for (unsigned i = 0; i < binary->num_parts; ++i) {
/* TODO: be precise about scratch use? */
struct ac_shader_config c = {};
ac_parse_shader_binary_config(config_data, config_nbytes,
/* TODO: be precise about scratch use? */
struct ac_shader_config c = {};
ac_parse_shader_binary_config(config_data, config_nbytes,
- binary->wave_size, true, &c);
+ binary->wave_size, true, info, &c);
config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
const char **data, size_t *nbytes);
bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
const char **data, size_t *nbytes);
-bool ac_rtld_read_config(struct ac_rtld_binary *binary,
+bool ac_rtld_read_config(const struct radeon_info *info,
+ struct ac_rtld_binary *binary,
struct ac_shader_config *config);
struct ac_rtld_upload_info {
struct ac_shader_config *config);
struct ac_rtld_upload_info {
- if (!ac_rtld_read_config(&rtld_binary, &config)) {
+ if (!ac_rtld_read_config(&device->physical_device->rad_info,
+ &rtld_binary, &config)) {
ac_rtld_close(&rtld_binary);
free(variant);
return NULL;
ac_rtld_close(&rtld_binary);
free(variant);
return NULL;
.elf_sizes = &binary->elf_size}))
return false;
.elf_sizes = &binary->elf_size}))
return false;
- bool ok = ac_rtld_read_config(&rtld, conf);
+ bool ok = ac_rtld_read_config(&sscreen->info, &rtld, conf);
ac_rtld_close(&rtld);
return ok;
}
ac_rtld_close(&rtld);
return ok;
}