From d7b0d9a8d8a5a7f3b26a30c8e7005c2fb3f2f4af Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 11 Nov 2019 13:41:32 +0000 Subject: [PATCH] radv: enable FP16/FP64 denormals earlier and only for LLVM ACO sets this itself and will have to set it differently in the future to support shaderDenormFlushToZeroFloat64. Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_shader.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c2562c028c4..1e550526f56 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -700,20 +700,6 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, config_out->num_sgprs = num_sgprs; config_out->num_shared_vgprs = num_shared_vgprs; - /* Enable 64-bit and 16-bit denormals, because there is no performance - * cost. - * - * If denormals are enabled, all floating-point output modifiers are - * ignored. - * - * Don't enable denormals for 32-bit floats, because: - * - Floating-point output modifiers would be ignored by the hw. - * - Some opcodes don't support denormals, such as v_mad_f32. We would - * have to stop using those. - * - GFX6 & GFX7 would be very slow. - */ - config_out->float_mode |= V_00B028_FP_64_DENORMS; - config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) | S_00B12C_SCRATCH_EN(scratch_enabled); @@ -971,6 +957,20 @@ radv_shader_variant_create(struct radv_device *device, return NULL; } + /* Enable 64-bit and 16-bit denormals, because there is no performance + * cost. + * + * If denormals are enabled, all floating-point output modifiers are + * ignored. + * + * Don't enable denormals for 32-bit floats, because: + * - Floating-point output modifiers would be ignored by the hw. + * - Some opcodes don't support denormals, such as v_mad_f32. We would + * have to stop using those. + * - GFX6 & GFX7 would be very slow. + */ + config.float_mode |= V_00B028_FP_64_DENORMS; + if (rtld_binary.lds_size > 0) { unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256; config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity; -- 2.30.2