-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
-I$(top_srcdir)/src/gtest/include \
+ -I$(top_srcdir)/src/compiler/nir \
-I$(top_builddir)/src/compiler/nir \
-I$(top_builddir)/src/mesa/drivers/dri/common \
$(DEFINES) \
AM_CXXFLAGS = $(AM_CFLAGS)
+brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
+ $(MKDIR_GEN)
+ $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false)
+
noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la
libi965_dri_la_SOURCES = $(i965_FILES)
libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS)
brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
brw_nir_attribute_workarounds.c \
+ brw_nir_trig_workarounds.c \
brw_nir_opt_peephole_ffma.c \
brw_nir_uniforms.cpp \
brw_packed_float.c \
break;
case nir_op_fsin:
- if (!compiler->precise_trig) {
- inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
- } else {
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
- inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
- inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
- }
+ inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fcos:
- if (!compiler->precise_trig) {
- inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
- } else {
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
- inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
- inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
- }
+ inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
if (nir->stage == MESA_SHADER_GEOMETRY)
OPT(nir_lower_gs_intrinsics);
+ if (compiler->precise_trig)
+ OPT(brw_nir_apply_trig_workarounds);
+
static const nir_lower_tex_options tex_options = {
.lower_txp = ~0,
};
bool use_legacy_snorm_formula,
const uint8_t *attrib_wa_flags);
+bool brw_nir_apply_trig_workarounds(nir_shader *nir);
+
nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
const struct brw_device_info *devinfo,
const struct brw_sampler_prog_key_data *key,
--- /dev/null
+#! /usr/bin/env python
+#
+# Copyright (C) 2016 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import nir_algebraic
+
+# The SIN and COS instructions on Intel hardware can produce values
+# slightly outside of the [-1.0, 1.0] range for a small set of values.
+# Obviously, this can break everyone's expectations about trig functions.
+#
+# According to an internal presentation, the COS instruction can produce
+# a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One
+# suggested workaround is to multiply by 0.99997, scaling down the
+# amplitude slightly. Apparently this also minimizes the error function,
+# reducing the maximum error from 0.00006 to about 0.00003.
+
+trig_workarounds = [
+ (('fsin', 'x'), ('fmul', ('fsin', 'x'), 0.99997)),
+ (('fcos', 'x'), ('fmul', ('fcos', 'x'), 0.99997)),
+]
+
+print '#include "brw_nir.h"'
+print nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds",
+ trig_workarounds).render()
break;
case nir_op_fsin:
- if (!compiler->precise_trig) {
- inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
- } else {
- src_reg tmp = src_reg(this, glsl_type::vec4_type);
- inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
- inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
- }
+ inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fcos:
- if (!compiler->precise_trig) {
- inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
- } else {
- src_reg tmp = src_reg(this, glsl_type::vec4_type);
- inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
- inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
- }
+ inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
inst->saturate = instr->dest.saturate;
break;