i965: Enable EGL_KHR_gl_texture_3D_image
[mesa.git] / src / mesa / drivers / dri / i965 / brw_nir_tcs_workarounds.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/nir/nir_builder.h"
25 #include "brw_nir.h"
26
27 /**
28 * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
29 *
30 * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
31 * definition of the patch header layouts):
32 *
33 * "HW Bug: The Tessellation stage will incorrectly add domain points
34 * along patch edges under the following conditions, which may result
35 * in conformance failures and/or cracking artifacts:
36 *
37 * * QUAD domain
38 * * INTEGER partitioning
39 * * All three TessFactors in a given U or V direction (e.g., V
40 * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
41 * * All three TessFactors in the other direction are > 1.0 and all
42 * round up to the same integer value (e.g, U direction:
43 * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
44 *
45 * The suggested workaround (to be implemented as part of the postamble
46 * to the HS shader in the HS kernel) is:
47 *
48 * if (
49 * (TF[UEQ0] > 1.0) ||
50 * (TF[VEQ0] > 1.0) ||
51 * (TF[UEQ1] > 1.0) ||
52 * (TF[VEQ1] > 1.0) ||
53 * (TF[INSIDE_U] > 1.0) ||
54 * (TF[INSIDE_V] > 1.0) )
55 * {
56 * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
57 * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
58 * }"
59 *
60 * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes
61 * that the above workaround fails to fix certain GL/ES CTS tests which
62 * have inside tessellation factors of -1.0. This can be explained by
63 * a quote from the ARB_tessellation_shader specification:
64 *
65 * "If "equal_spacing" is used, the floating-point tessellation level is
66 * first clamped to the range [1,<max>], where <max> is implementation-
67 * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
68 *
69 * In other words, the actual inner tessellation factor used is
70 * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped
71 * value against 1.0. To accomplish this, we change the comparison from
72 * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
73 */
74
75 static inline nir_ssa_def *
76 load_output(nir_builder *b, int num_components, int offset)
77 {
78 nir_intrinsic_instr *load =
79 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
80 nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
81 load->num_components = num_components;
82 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
83 nir_intrinsic_set_base(load, offset);
84
85 nir_builder_instr_insert(b, &load->instr);
86
87 return &load->dest.ssa;
88 }
89
90 static inline void
91 store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps)
92 {
93 nir_intrinsic_instr *store =
94 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
95 store->num_components = comps;
96 nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
97 store->src[0] = nir_src_for_ssa(value);
98 store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
99 nir_builder_instr_insert(b, &store->instr);
100 }
101
102 static void
103 emit_quads_workaround(nir_builder *b, nir_block *block)
104 {
105 b->cursor = nir_after_block_before_jump(block);
106
107 nir_ssa_def *inner = load_output(b, 2, 0);
108 nir_ssa_def *outer = load_output(b, 4, 1);
109
110 nir_ssa_def *any_greater_than_1 =
111 nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
112 nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
113
114 nir_if *if_stmt = nir_if_create(b->shader);
115 if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
116 nir_builder_cf_insert(b, &if_stmt->cf_node);
117
118 /* Fill out the new then-block */
119 b->cursor = nir_after_cf_list(&if_stmt->then_list);
120
121 store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
122 nir_imm_float(b, 2.0f), inner), 0, 2);
123 }
124
125 void
126 brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
127 {
128 assert(nir->stage == MESA_SHADER_TESS_CTRL);
129
130 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
131
132 nir_builder b;
133 nir_builder_init(&b, impl);
134
135 /* emit_quads_workaround() inserts an if statement into each block,
136 * which splits it in two. This changes the set of predecessors of
137 * the end block. We want to process the original set, so to be safe,
138 * save it off to an array first.
139 */
140 const unsigned num_end_preds = impl->end_block->predecessors->entries;
141 nir_block *end_preds[num_end_preds];
142 unsigned i = 0;
143 struct set_entry *entry;
144
145 set_foreach(impl->end_block->predecessors, entry) {
146 end_preds[i++] = (nir_block *) entry->key;
147 }
148
149 for (i = 0; i < num_end_preds; i++) {
150 emit_quads_workaround(&b, end_preds[i]);
151 }
152
153 nir_metadata_preserve(impl, 0);
154 }