radv: lower 64-bit drcp/dsqrt/drsq for fixing precision issues
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 23 Jun 2020 16:29:22 +0000 (18:29 +0200)
committerMarge Bot <eric+marge@anholt.net>
Thu, 25 Jun 2020 12:09:08 +0000 (12:09 +0000)
The hardware precision of v_rcp_f64, v_sqrt_f64 and v_rsq_f64
is less than what Vulkan requires.

This lowers using the Goldschmidt's algorithm to improve precision.

Fixes dEQP-VK.glsl.builtin.precision_double.* on both compiler
backends.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5609>

.gitlab-ci/deqp-radv-fiji-aco-fails.txt
.gitlab-ci/deqp-radv-navi10-aco-fails.txt
.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt
.gitlab-ci/deqp-radv-polaris10-aco-fails.txt
.gitlab-ci/deqp-radv-raven-aco-fails.txt
.gitlab-ci/deqp-radv-vega10-aco-fails.txt
src/amd/compiler/aco_instruction_selection.cpp
src/amd/vulkan/radv_shader.c

index 884d2e4ca727152baf23829167b052e5a4bc9622..65bce8df81496b9d7f3aca8eac672bba55bbf2ac 100644 (file)
@@ -18,33 +18,6 @@ dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_8.d32_sfloat_
 dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_8.d32_sfloat_s8_uint_separate_layouts.stencil_min
 dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_8.d32_sfloat_s8_uint_separate_layouts.stencil_zero
 
-dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.length.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4
 dEQP-VK.rasterization.flatshading.line_strip_wide
 dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide
 dEQP-VK.rasterization.flatshading.non_strict_lines_wide
index db1428482f71da28b9f323e8cbd34308d411bfc3..a8c614c83ea5a356cf0c4bc910e893da77dd2593 100644 (file)
@@ -2,33 +2,6 @@
 dEQP-VK.transform_feedback.simple.multistreams_1
 dEQP-VK.transform_feedback.simple.multistreams_3
 
-dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.length.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4
 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp
 dEQP-VK.rasterization.flatshading.line_strip_wide
index 6c482799d4efa52ad010c6cdcd5d1d2c25000d5e..72549cb804cf3f4fa6f439231dabbdbc7cfe23b9 100644 (file)
@@ -1,34 +1,7 @@
-dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4
 dEQP-VK.glsl.builtin.precision_double.floor.compute.scalar
 dEQP-VK.glsl.builtin.precision_double.floor.compute.vec2
 dEQP-VK.glsl.builtin.precision_double.floor.compute.vec3
 dEQP-VK.glsl.builtin.precision_double.floor.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.length.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4
 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp
 dEQP-VK.pipeline.depth.format.d16_unorm.compare_ops.never_zerodepthbounds_depthdisabled_stencilenabled
index 9a2ded72a31ae7d514110fe3ae7fe2f49557d1e9..5c77255ae6f78c8f8d50357be39405cf182d92cc 100644 (file)
@@ -1,30 +1,3 @@
-dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.length.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4
 dEQP-VK.rasterization.flatshading.line_strip_wide
 dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide
 dEQP-VK.rasterization.flatshading.non_strict_lines_wide
index db1428482f71da28b9f323e8cbd34308d411bfc3..a8c614c83ea5a356cf0c4bc910e893da77dd2593 100644 (file)
@@ -2,33 +2,6 @@
 dEQP-VK.transform_feedback.simple.multistreams_1
 dEQP-VK.transform_feedback.simple.multistreams_3
 
-dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.length.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4
 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
 dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.image.guard_nonlocal.workgroup.comp
 dEQP-VK.rasterization.flatshading.line_strip_wide
index 0f23adc590731c7037f4f1069e5be2a42e53d626..78423d7d59ff0bb703a632834a95391ae451e2fd 100644 (file)
@@ -2,33 +2,6 @@
 dEQP-VK.transform_feedback.simple.multistreams_1
 dEQP-VK.transform_feedback.simple.multistreams_3
 
-dEQP-VK.glsl.builtin.precision_double.distance.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.distance.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.inverse.compute.mat2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.inversesqrt.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.length.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.length.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.normalize.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.refract.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.smoothstep.compute.vec4
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.scalar
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec2
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec3
-dEQP-VK.glsl.builtin.precision_double.sqrt.compute.vec4
 dEQP-VK.rasterization.flatshading.line_strip_wide
 dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide
 dEQP-VK.rasterization.flatshading.non_strict_lines_wide
index 0af920b562952f7500cb98431bf90ddc2a83e2e8..7f4da8770680bc747a9507c02dc34fbc11c504bf 100644 (file)
@@ -1907,6 +1907,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       } else if (dst.regClass() == v1) {
          emit_rsq(ctx, bld, Definition(dst), src);
       } else if (dst.regClass() == v2) {
+         /* Lowered at NIR level for precision reasons. */
          emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f64, dst);
       } else {
          fprintf(stderr, "Unimplemented NIR instr bit size: ");
@@ -1998,6 +1999,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       } else if (dst.regClass() == v1) {
          emit_rcp(ctx, bld, Definition(dst), src);
       } else if (dst.regClass() == v2) {
+         /* Lowered at NIR level for precision reasons. */
          emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f64, dst);
       } else {
          fprintf(stderr, "Unimplemented NIR instr bit size: ");
@@ -2025,6 +2027,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       } else if (dst.regClass() == v1) {
          emit_sqrt(ctx, bld, Definition(dst), src);
       } else if (dst.regClass() == v2) {
+         /* Lowered at NIR level for precision reasons. */
          emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f64, dst);
       } else {
          fprintf(stderr, "Unimplemented NIR instr bit size: ");
index 78227890b4603916eaa872d09c4123d450230321..a53100fd48bd6685a30788895f2543d675a64a48 100644 (file)
@@ -86,6 +86,10 @@ static const struct nir_shader_compiler_options nir_options_llvm = {
                                nir_lower_divmod64 |
                                nir_lower_minmax64 |
                                nir_lower_iabs64,
+       .lower_doubles_options = nir_lower_drcp |
+                                nir_lower_dsqrt |
+                                nir_lower_drsq |
+                                nir_lower_ddiv,
 };
 
 static const struct nir_shader_compiler_options nir_options_aco = {
@@ -122,6 +126,10 @@ static const struct nir_shader_compiler_options nir_options_aco = {
                                nir_lower_divmod64 |
                                nir_lower_minmax64 |
                                nir_lower_iabs64,
+       .lower_doubles_options = nir_lower_drcp |
+                                nir_lower_dsqrt |
+                                nir_lower_drsq |
+                                nir_lower_ddiv,
 };
 
 bool
@@ -466,6 +474,11 @@ radv_shader_compile_to_nir(struct radv_device *device,
                NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
                if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
                        NIR_PASS_V(nir, nir_lower_discard_to_demote);
+
+               nir_lower_doubles_options lower_doubles =
+                       nir->options->lower_doubles_options;
+
+               NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
        }
 
        /* Vulkan uses the separate-shader linking model */