2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/nir/nir_builder.h"
28 * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
30 * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
31 * definition of the patch header layouts):
33 * "HW Bug: The Tessellation stage will incorrectly add domain points
34 * along patch edges under the following conditions, which may result
35 * in conformance failures and/or cracking artifacts:
38 * * INTEGER partitioning
39 * * All three TessFactors in a given U or V direction (e.g., V
40 * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
41 * * All three TessFactors in the other direction are > 1.0 and all
42 * round up to the same integer value (e.g, U direction:
43 * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
45 * The suggested workaround (to be implemented as part of the postamble
46 * to the HS shader in the HS kernel) is:
53 * (TF[INSIDE_U] > 1.0) ||
54 * (TF[INSIDE_V] > 1.0) )
56 * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
57 * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
60 * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes
61 * that the above workaround fails to fix certain GL/ES CTS tests which
62 * have inside tessellation factors of -1.0. This can be explained by
63 * a quote from the ARB_tessellation_shader specification:
65 * "If "equal_spacing" is used, the floating-point tessellation level is
66 * first clamped to the range [1,<max>], where <max> is implementation-
67 * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
69 * In other words, the actual inner tessellation factor used is
70 * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped
71 * value against 1.0. To accomplish this, we change the comparison from
72 * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
75 static inline nir_ssa_def
*
76 load_output(nir_builder
*b
, int num_components
, int offset
)
78 nir_intrinsic_instr
*load
=
79 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_output
);
80 nir_ssa_dest_init(&load
->instr
, &load
->dest
, num_components
, 32, NULL
);
81 load
->num_components
= num_components
;
82 load
->src
[0] = nir_src_for_ssa(nir_imm_int(b
, 0));
83 nir_intrinsic_set_base(load
, offset
);
85 nir_builder_instr_insert(b
, &load
->instr
);
87 return &load
->dest
.ssa
;
91 store_output(nir_builder
*b
, nir_ssa_def
*value
, int offset
, unsigned comps
)
93 nir_intrinsic_instr
*store
=
94 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_output
);
95 store
->num_components
= comps
;
96 nir_intrinsic_set_write_mask(store
, (1u << comps
) - 1);
97 store
->src
[0] = nir_src_for_ssa(value
);
98 store
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, 0));
99 nir_builder_instr_insert(b
, &store
->instr
);
103 emit_quads_workaround(nir_builder
*b
, nir_block
*block
)
105 /* We're going to insert a new if-statement in a predecessor of the end
106 * block. This would normally create a new block (after the if) which
107 * would then become the predecessor of the end block, causing our set
108 * walking to get screwed up. To avoid this, just emit a constant at
109 * the end of our current block, and insert the if before that.
111 b
->cursor
= nir_after_block_before_jump(block
);
112 b
->cursor
= nir_before_instr(nir_imm_int(b
, 0)->parent_instr
);
114 nir_ssa_def
*inner
= load_output(b
, 2, 0);
115 nir_ssa_def
*outer
= load_output(b
, 4, 1);
117 nir_ssa_def
*any_greater_than_1
=
118 nir_ior(b
, nir_bany(b
, nir_flt(b
, nir_imm_float(b
, 1.0f
), outer
)),
119 nir_bany(b
, nir_flt(b
, nir_imm_float(b
, 1.0f
), inner
)));
121 nir_if
*if_stmt
= nir_if_create(b
->shader
);
122 if_stmt
->condition
= nir_src_for_ssa(any_greater_than_1
);
123 nir_builder_cf_insert(b
, &if_stmt
->cf_node
);
125 /* Fill out the new then-block */
126 b
->cursor
= nir_after_cf_list(&if_stmt
->then_list
);
128 store_output(b
, nir_bcsel(b
, nir_fge(b
, nir_imm_float(b
, 1.0f
), inner
),
129 nir_imm_float(b
, 2.0f
), inner
), 0, 2);
133 brw_nir_apply_tcs_quads_workaround(nir_shader
*nir
)
135 assert(nir
->stage
== MESA_SHADER_TESS_CTRL
);
137 nir_foreach_function(func
, nir
) {
142 nir_builder_init(&b
, func
->impl
);
144 struct set_entry
*entry
;
145 set_foreach(func
->impl
->end_block
->predecessors
, entry
) {
146 nir_block
*pred
= (nir_block
*) entry
->key
;
147 emit_quads_workaround(&b
, pred
);
150 nir_metadata_preserve(func
->impl
, 0);