i965: Implement the WaPreventHSTessLevelsInterference workaround.
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 8 Jun 2016 01:18:49 +0000 (18:18 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 18 Aug 2016 07:46:55 +0000 (00:46 -0700)
Fixes several GL44-CTS.tessellation_shader (and GL45 and ES31) subcases:
- vertex_spacing
- tessellation_shader_point_mode.points_verification
- tessellation_shader_quads_tessellation.inner_tessellation_level_rounding

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_compiler.h
src/mesa/drivers/dri/i965/brw_nir.h
src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_tcs.c
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp

index 02705a1b775a978c56fc43292f7b5f7e59295196..0eccef21bd9cf65769679e02b8e9e9ad1e0e7188 100644 (file)
@@ -48,6 +48,7 @@ i965_compiler_FILES = \
        brw_nir_attribute_workarounds.c \
        brw_nir_intrinsics.c \
        brw_nir_opt_peephole_ffma.c \
+       brw_nir_tcs_workarounds.c \
        brw_packed_float.c \
        brw_predicated_break.cpp \
        brw_reg.h \
index 10e9f4744bebb84bfc8b7135625d0f31108f0e06..7d15c281a3535b2bf364a6f899777ac99115b867 100644 (file)
@@ -220,6 +220,8 @@ struct brw_tcs_prog_key
    /** A bitfield of per-vertex outputs written. */
    uint64_t outputs_written;
 
+   bool quads_workaround;
+
    struct brw_sampler_prog_key_data tex;
 };
 
index 74c354f5c82c1f1d5de29f8fcd93ab89e20b6539..618531096539971a03cd8d95c6068013f11fcfbf 100644 (file)
@@ -117,6 +117,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
 
 bool brw_nir_apply_trig_workarounds(nir_shader *nir);
 
+void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
+
 nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
                                       const struct brw_device_info *devinfo,
                                       const struct brw_sampler_prog_key_data *key,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
new file mode 100644 (file)
index 0000000..0626981
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "brw_nir.h"
+
+/**
+ * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
+ *
+ * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
+ * definition of the patch header layouts):
+ *
+ *    "HW Bug: The Tessellation stage will incorrectly add domain points
+ *     along patch edges under the following conditions, which may result
+ *     in conformance failures and/or cracking artifacts:
+ *
+ *       * QUAD domain
+ *       * INTEGER partitioning
+ *       * All three TessFactors in a given U or V direction (e.g., V
+ *         direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
+ *       * All three TessFactors in the other direction are > 1.0 and all
+ *         round up to the same integer value (e.g, U direction:
+ *         VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
+ *
+ *     The suggested workaround (to be implemented as part of the postamble
+ *     to the HS shader in the HS kernel) is:
+ *
+ *     if (
+ *        (TF[UEQ0] > 1.0) ||
+ *        (TF[VEQ0] > 1.0) ||
+ *        (TF[UEQ1] > 1.0) ||
+ *        (TF[VEQ1] > 1.0) ||
+ *        (TF[INSIDE_U] > 1.0) ||
+ *        (TF[INSIDE_V] > 1.0) )
+ *     {
+ *        TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
+ *        TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
+ *     }"
+ *
+ * There's a subtlety here.  Intel internal HSD-ES bug 1208668495 notes
+ * that the above workaround fails to fix certain GL/ES CTS tests which
+ * have inside tessellation factors of -1.0.  This can be explained by
+ * a quote from the ARB_tessellation_shader specification:
+ *
+ *    "If "equal_spacing" is used, the floating-point tessellation level is
+ *     first clamped to the range [1,<max>], where <max> is implementation-
+ *     dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
+ *
+ * In other words, the actual inner tessellation factor used is
+ * clamp(TF[INSIDE_*], 1.0, 64.0).  So we want to compare the clamped
+ * value against 1.0.  To accomplish this, we change the comparison from
+ * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
+ */
+
+static inline nir_ssa_def *
+load_output(nir_builder *b, int num_components, int offset)
+{
+   nir_intrinsic_instr *load =
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
+   nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
+   load->num_components = num_components;
+   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+   nir_intrinsic_set_base(load, offset);
+
+   nir_builder_instr_insert(b, &load->instr);
+
+   return &load->dest.ssa;
+}
+
+static inline void
+store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps)
+{
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+   store->num_components = comps;
+   nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
+   store->src[0] = nir_src_for_ssa(value);
+   store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+   nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+emit_quads_workaround(nir_builder *b, nir_block *block)
+{
+   /* We're going to insert a new if-statement in a predecessor of the end
+    * block.  This would normally create a new block (after the if) which
+    * would then become the predecessor of the end block, causing our set
+    * walking to get screwed up.  To avoid this, just emit a constant at
+    * the end of our current block, and insert the if before that.
+    */
+   b->cursor = nir_after_block_before_jump(block);
+   b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr);
+
+   nir_ssa_def *inner = load_output(b, 2, 0);
+   nir_ssa_def *outer = load_output(b, 4, 1);
+
+   nir_ssa_def *any_greater_than_1 =
+       nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
+                  nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
+
+   nir_if *if_stmt = nir_if_create(b->shader);
+   if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
+   nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+   /* Fill out the new then-block */
+   b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+   store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
+                                nir_imm_float(b, 2.0f), inner), 0, 2);
+}
+
+void
+brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
+{
+   assert(nir->stage == MESA_SHADER_TESS_CTRL);
+
+   nir_foreach_function(func, nir) {
+      if (!func->impl)
+         continue;
+
+      nir_builder b;
+      nir_builder_init(&b, func->impl);
+
+      struct set_entry *entry;
+      set_foreach(func->impl->end_block->predecessors, entry) {
+         nir_block *pred = (nir_block *) entry->key;
+         emit_quads_workaround(&b, pred);
+      }
+
+      nir_metadata_preserve(func->impl, 0);
+   }
+}
index da94bf275c96b1ad0e89becb8b0524d1b2a7cf63..2a4c775418202dbcdcefb5712c62feb2c8b14a04 100644 (file)
@@ -153,6 +153,8 @@ brw_tcs_debug_recompile(struct brw_context *brw,
                       key->patch_outputs_written);
    found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
                       key->tes_primitive_mode);
+   found |= key_debug(brw, "quads and equal_spacing workaround",
+                      old_key->quads_workaround, key->quads_workaround);
    found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
 
    if (!found) {
@@ -346,6 +348,9 @@ brw_upload_tcs_prog(struct brw_context *brw,
     * based on the domain the DS is expecting to tessellate.
     */
    key.tes_primitive_mode = tep->program.PrimitiveMode;
+   key.quads_workaround = brw->gen < 9 &&
+                          tep->program.PrimitiveMode == GL_QUADS &&
+                          tep->program.Spacing == GL_EQUAL;
 
    if (tcp) {
       key.program_string_id = tcp->id;
@@ -382,6 +387,8 @@ brw_tcs_precompile(struct gl_context *ctx,
 
    struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog;
    struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp);
+   const struct gl_linked_shader *tes =
+      shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
 
    memset(&key, 0, sizeof(key));
 
@@ -394,9 +401,14 @@ brw_tcs_precompile(struct gl_context *ctx,
          _LinkedShaders[MESA_SHADER_TESS_CTRL]->info.TessCtrl.VerticesOut;
    }
 
-   key.tes_primitive_mode = shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]
-      ? shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]->info.TessEval.PrimitiveMode
-      : GL_TRIANGLES;
+   if (tes) {
+      key.tes_primitive_mode = tes->info.TessEval.PrimitiveMode;
+      key.quads_workaround = brw->gen < 9 &&
+                             tes->info.TessEval.PrimitiveMode == GL_QUADS &&
+                             tes->info.TessEval.Spacing == GL_EQUAL;
+   } else {
+      key.tes_primitive_mode = GL_TRIANGLES;
+   }
 
    key.outputs_written = prog->OutputsWritten;
    key.patch_outputs_written = prog->PatchOutputsWritten;
index 30c81c546f7fbae26da3856f181a79f93abd7f28..9944803f31b9deef809b10eb3f5e54e0408da417 100644 (file)
@@ -471,6 +471,9 @@ brw_compile_tcs(const struct brw_compiler *compiler,
    nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
    brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
    brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
+   if (key->quads_workaround)
+      brw_nir_apply_tcs_quads_workaround(nir);
+
    nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
 
    if (is_scalar)