i965: Port INTEL_PRECISE_TRIG=1 to NIR.

author Kenneth Graunke <kenneth@whitecape.org>

Thu, 7 Apr 2016 22:04:35 +0000 (15:04 -0700)

committer Kenneth Graunke <kenneth@whitecape.org>

Tue, 12 Apr 2016 01:44:17 +0000 (18:44 -0700)
author Kenneth Graunke <kenneth@whitecape.org>
Thu, 7 Apr 2016 22:04:35 +0000 (15:04 -0700)
committer Kenneth Graunke <kenneth@whitecape.org>
Tue, 12 Apr 2016 01:44:17 +0000 (18:44 -0700)
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am

index 0db5a51e7259ec413d62fb0bf5e2f71ebfb87556..a41c8305a80966dca73354408ef25fa988880065 100644 (file)
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -33,6 +33,7 @@ AM_CFLAGS = \
         -I$(top_srcdir)/src/mesa/drivers/dri/common \
         -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
         -I$(top_srcdir)/src/gtest/include \
+       -I$(top_srcdir)/src/compiler/nir \
         -I$(top_builddir)/src/compiler/nir \
         -I$(top_builddir)/src/mesa/drivers/dri/common \
         $(DEFINES) \
@@ -41,6 +42,10 @@ AM_CFLAGS = \
  
  AM_CXXFLAGS = $(AM_CFLAGS)
  
+brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
+       $(MKDIR_GEN)
+       $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false)
+
  noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la
  libi965_dri_la_SOURCES = $(i965_FILES)
  libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS)
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources

index 468958824140a6f40c8c919f1bc1868701fdf973..2619e4360bc4ba1a69be828b883574ce412078c7 100644 (file)
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -44,6 +44,7 @@ i965_compiler_FILES = \
         brw_nir.c \
         brw_nir_analyze_boolean_resolves.c \
         brw_nir_attribute_workarounds.c \
+       brw_nir_trig_workarounds.c \
         brw_nir_opt_peephole_ffma.c \
         brw_nir_uniforms.cpp \
         brw_packed_float.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 90b878913b3700b325a720c73280be710acdbc59..bd6314a3e37e35802141b93456acaaf4c48a11c0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -775,24 +775,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        break;
  
     case nir_op_fsin:
-      if (!compiler->precise_trig) {
-         inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
-      } else {
-         fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
-         inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
-         inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
-      }
+      inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
  
     case nir_op_fcos:
-      if (!compiler->precise_trig) {
-         inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
-      } else {
-         fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
-         inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
-         inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
-      }
+      inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
  
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c

index 1821c0d7d230452c3c841a1df3d7536745188e9e..932979a77199f8b2539c31e371867297db5afd9a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -447,6 +447,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
     if (nir->stage == MESA_SHADER_GEOMETRY)
        OPT(nir_lower_gs_intrinsics);
  
+   if (compiler->precise_trig)
+      OPT(brw_nir_apply_trig_workarounds);
+
     static const nir_lower_tex_options tex_options = {
        .lower_txp = ~0,
     };
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h

index b10c0838fe003ea5e77e19c3f095ce65a06b0b8f..2711606511d5173393d7d172d5606d3f24270a20 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -106,6 +106,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
                                           bool use_legacy_snorm_formula,
                                           const uint8_t *attrib_wa_flags);
  
+bool brw_nir_apply_trig_workarounds(nir_shader *nir);
+
  nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
                                        const struct brw_device_info *devinfo,
                                        const struct brw_sampler_prog_key_data *key,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py

new file mode 100755 (executable)

index 0000000..67dab9a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py
@@ -0,0 +1,43 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2016 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import nir_algebraic
+
+# The SIN and COS instructions on Intel hardware can produce values
+# slightly outside of the [-1.0, 1.0] range for a small set of values.
+# Obviously, this can break everyone's expectations about trig functions.
+#
+# According to an internal presentation, the COS instruction can produce
+# a value up to 1.000027 for inputs in the range (0.08296, 0.09888).  One
+# suggested workaround is to multiply by 0.99997, scaling down the
+# amplitude slightly.  Apparently this also minimizes the error function,
+# reducing the maximum error from 0.00006 to about 0.00003.
+
+trig_workarounds = [
+   (('fsin', 'x'), ('fmul', ('fsin', 'x'), 0.99997)),
+   (('fcos', 'x'), ('fmul', ('fcos', 'x'), 0.99997)),
+]
+
+print '#include "brw_nir.h"'
+print nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds",
+                                  trig_workarounds).render()
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index d9f96c583793c9a7dc804d3cc9063b1bd5e85a7e..e4e8c38e703392873e768882bb1d2a0d57f12c22 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1101,24 +1101,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        break;
  
     case nir_op_fsin:
-      if (!compiler->precise_trig) {
-         inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
-      } else {
-         src_reg tmp = src_reg(this, glsl_type::vec4_type);
-         inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
-         inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
-      }
+      inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
  
     case nir_op_fcos:
-      if (!compiler->precise_trig) {
-         inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
-      } else {
-         src_reg tmp = src_reg(this, glsl_type::vec4_type);
-         inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
-         inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
-      }
+      inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
author	Kenneth Graunke <kenneth@whitecape.org>
	Thu, 7 Apr 2016 22:04:35 +0000 (15:04 -0700)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Tue, 12 Apr 2016 01:44:17 +0000 (18:44 -0700)
src/mesa/drivers/dri/i965/Makefile.am		patch \| blob \| history
src/mesa/drivers/dri/i965/Makefile.sources		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_nir.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_nir.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py	[new file with mode: 0755]	patch \| blob
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp		patch \| blob \| history