i965: Add an INTEL_PRECISE_TRIG=1 option to fix SIN/COS output range.
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 27 Jan 2016 20:21:04 +0000 (12:21 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 4 Apr 2016 18:35:16 +0000 (11:35 -0700)
The SIN and COS instructions on Intel hardware can produce values
slightly outside of the [-1.0, 1.0] range for a small set of values.
Obviously, this can break everyone's expectations about trig functions.

According to an internal presentation, the COS instruction can produce
a value up to 1.000027 for inputs in the range (0.08296, 0.09888).  One
suggested workaround is to multiply by 0.99997, scaling down the
amplitude slightly.  Apparently this also minimizes the error function,
reducing the maximum error from 0.00006 to about 0.00003.

When enabled, fixes 16 dEQP precision tests

   dEQP-GLES31.functional.shaders.builtin_functions.precision.
   {cos,sin}.{highp,mediump}_compute.{scalar,vec2,vec4,vec4}.

at the cost of making every sin and cos call more expensive (about
twice the number of cycles on recent hardware).  Enabling this
option has been shown to reduce GPUTest Volplosion performance by
about 10%.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_compiler.c
src/mesa/drivers/dri/i965/brw_compiler.h
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index 3da6aac2cbfbaf79b49a2d270e348fe907b8a4e8..6509267a52e7e3fdf1f3b7f93ed16198986efea6 100644 (file)
@@ -147,6 +147,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
    brw_fs_alloc_reg_sets(compiler);
    brw_vec4_alloc_reg_set(compiler);
 
+   compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
+
    compiler->scalar_stage[MESA_SHADER_VERTEX] =
       devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
    compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
index 27a95a3c6612cb1701883a7d6b13842f070131c0..231e0001d545e1e9ded26b870d702bda5b6f3992 100644 (file)
@@ -92,6 +92,12 @@ struct brw_compiler {
 
    bool scalar_stage[MESA_SHADER_STAGES];
    struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
+
+   /**
+    * Apply workarounds for SIN and COS output range problems.
+    * This can negatively impact performance.
+    */
+   bool precise_trig;
 };
 
 
index 7839428c52e644dc205695c479ddc79f2d9472ec..5cca91ec5b44544b4555afd6073ec846eaad8b80 100644 (file)
@@ -775,12 +775,24 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       break;
 
    case nir_op_fsin:
-      inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
+      if (!compiler->precise_trig) {
+         inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
+      } else {
+         fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
+         inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
+         inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
+      }
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fcos:
-      inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
+      if (!compiler->precise_trig) {
+         inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
+      } else {
+         fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
+         inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
+         inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
+      }
       inst->saturate = instr->dest.saturate;
       break;
 
index ee6929b16a2ed16c4f7f0cfdca9321f4413fbc8a..6c8fd06fb5e8b48e9440b3f0880030c21d1e99db 100644 (file)
@@ -1101,12 +1101,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
 
    case nir_op_fsin:
-      inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
+      if (!compiler->precise_trig) {
+         inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
+      } else {
+         src_reg tmp = src_reg(this, glsl_type::vec4_type);
+         inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
+         inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
+      }
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fcos:
-      inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
+      if (!compiler->precise_trig) {
+         inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
+      } else {
+         src_reg tmp = src_reg(this, glsl_type::vec4_type);
+         inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
+         inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
+      }
       inst->saturate = instr->dest.saturate;
       break;