From c84f11e7b67cfa3c01780210ca31665b658e5ebd Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 23 Jun 2020 18:29:22 +0200 Subject: [PATCH] radv: lower 64-bit drcp/dsqrt/drsq for fixing precision issues MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The hardware precision of v_rcp_f64, v_sqrt_f64 and v_rsq_f64 is less than what Vulkan requires. This lowers using the Goldschmidt's algorithm to improve precision. Fixes dEQP-VK.glsl.builtin.precision_double.* on both compiler backends. Signed-off-by: Samuel Pitoiset Reviewed-by: Daniel Schürmann Part-of: --- .gitlab-ci/deqp-radv-fiji-aco-fails.txt | 27 ------------------- .gitlab-ci/deqp-radv-navi10-aco-fails.txt | 27 ------------------- .gitlab-ci/deqp-radv-pitcairn-aco-fails.txt | 27 ------------------- .gitlab-ci/deqp-radv-polaris10-aco-fails.txt | 27 ------------------- .gitlab-ci/deqp-radv-raven-aco-fails.txt | 27 ------------------- .gitlab-ci/deqp-radv-vega10-aco-fails.txt | 27 ------------------- .../compiler/aco_instruction_selection.cpp | 3 +++ src/amd/vulkan/radv_shader.c | 13 +++++++++ 8 files changed, 16 insertions(+), 162 deletions(-) diff --git a/.gitlab-ci/deqp-radv-fiji-aco-fails.txt b/.gitlab-ci/deqp-radv-fiji-aco-fails.txt index 884d2e4ca72..65bce8df814 100644 --- a/.gitlab-ci/deqp-radv-fiji-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-fiji-aco-fails.txt @@ -18,33 +18,6 @@ dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_8.d32_sfloat_ dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_8.d32_sfloat_s8_uint_separate_layouts.stencil_min dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_8.d32_sfloat_s8_uint_separate_layouts.stencil_zero -dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.length.compute.scalar -dEQP-VK.glsl.builtin.precision_double.length.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4 dEQP-VK.rasterization.flatshading.line_strip_wide dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide dEQP-VK.rasterization.flatshading.non_strict_lines_wide diff --git a/.gitlab-ci/deqp-radv-navi10-aco-fails.txt b/.gitlab-ci/deqp-radv-navi10-aco-fails.txt index db1428482f7..a8c614c83ea 100644 --- a/.gitlab-ci/deqp-radv-navi10-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-navi10-aco-fails.txt @@ -2,33 +2,6 @@ dEQP-VK.transform_feedback.simple.multistreams_1 dEQP-VK.transform_feedback.simple.multistreams_3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.length.compute.scalar -dEQP-VK.glsl.builtin.precision_double.length.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp dEQP-VK.rasterization.flatshading.line_strip_wide diff --git a/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt b/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt index 6c482799d4e..72549cb804c 100644 --- a/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt @@ -1,34 +1,7 @@ -dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4 dEQP-VK.glsl.builtin.precision_double.floor.compute.scalar dEQP-VK.glsl.builtin.precision_double.floor.compute.vec2 dEQP-VK.glsl.builtin.precision_double.floor.compute.vec3 dEQP-VK.glsl.builtin.precision_double.floor.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.length.compute.scalar -dEQP-VK.glsl.builtin.precision_double.length.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp dEQP-VK.pipeline.depth.format.d16_unorm.compare_ops.never_zerodepthbounds_depthdisabled_stencilenabled diff --git a/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt b/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt index 9a2ded72a31..5c77255ae6f 100644 --- a/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-polaris10-aco-fails.txt @@ -1,30 +1,3 @@ -dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.length.compute.scalar -dEQP-VK.glsl.builtin.precision_double.length.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4 dEQP-VK.rasterization.flatshading.line_strip_wide dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide dEQP-VK.rasterization.flatshading.non_strict_lines_wide diff --git a/.gitlab-ci/deqp-radv-raven-aco-fails.txt b/.gitlab-ci/deqp-radv-raven-aco-fails.txt index db1428482f7..a8c614c83ea 100644 --- a/.gitlab-ci/deqp-radv-raven-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-raven-aco-fails.txt @@ -2,33 +2,6 @@ dEQP-VK.transform_feedback.simple.multistreams_1 dEQP-VK.transform_feedback.simple.multistreams_3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.length.compute.scalar -dEQP-VK.glsl.builtin.precision_double.length.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp dEQP-VK.rasterization.flatshading.line_strip_wide diff --git a/.gitlab-ci/deqp-radv-vega10-aco-fails.txt b/.gitlab-ci/deqp-radv-vega10-aco-fails.txt index 0f23adc5907..78423d7d59f 100644 --- a/.gitlab-ci/deqp-radv-vega10-aco-fails.txt +++ b/.gitlab-ci/deqp-radv-vega10-aco-fails.txt @@ -2,33 +2,6 @@ dEQP-VK.transform_feedback.simple.multistreams_1 dEQP-VK.transform_feedback.simple.multistreams_3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.length.compute.scalar -dEQP-VK.glsl.builtin.precision_double.length.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.length.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3 -dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4 dEQP-VK.rasterization.flatshading.line_strip_wide dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide dEQP-VK.rasterization.flatshading.non_strict_lines_wide diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0af920b5629..7f4da877068 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1907,6 +1907,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } else if (dst.regClass() == v1) { emit_rsq(ctx, bld, Definition(dst), src); } else if (dst.regClass() == v2) { + /* Lowered at NIR level for precision reasons. */ emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f64, dst); } else { fprintf(stderr, "Unimplemented NIR instr bit size: "); @@ -1998,6 +1999,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } else if (dst.regClass() == v1) { emit_rcp(ctx, bld, Definition(dst), src); } else if (dst.regClass() == v2) { + /* Lowered at NIR level for precision reasons. */ emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f64, dst); } else { fprintf(stderr, "Unimplemented NIR instr bit size: "); @@ -2025,6 +2027,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } else if (dst.regClass() == v1) { emit_sqrt(ctx, bld, Definition(dst), src); } else if (dst.regClass() == v2) { + /* Lowered at NIR level for precision reasons. */ emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f64, dst); } else { fprintf(stderr, "Unimplemented NIR instr bit size: "); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 78227890b46..a53100fd48b 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -86,6 +86,10 @@ static const struct nir_shader_compiler_options nir_options_llvm = { nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64, + .lower_doubles_options = nir_lower_drcp | + nir_lower_dsqrt | + nir_lower_drsq | + nir_lower_ddiv, }; static const struct nir_shader_compiler_options nir_options_aco = { @@ -122,6 +126,10 @@ static const struct nir_shader_compiler_options nir_options_aco = { nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64, + .lower_doubles_options = nir_lower_drcp | + nir_lower_dsqrt | + nir_lower_drsq | + nir_lower_ddiv, }; bool @@ -466,6 +474,11 @@ radv_shader_compile_to_nir(struct radv_device *device, NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout); if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE) NIR_PASS_V(nir, nir_lower_discard_to_demote); + + nir_lower_doubles_options lower_doubles = + nir->options->lower_doubles_options; + + NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles); } /* Vulkan uses the separate-shader linking model */ -- 2.30.2