2 * Copyright 2019 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
26 #include "ac_llvm_cull.h"
27 #include <llvm-c/Core.h>
29 struct ac_position_w_info
{
30 /* If a primitive intersects the W=0 plane, it causes a reflection
31 * of the determinant used for face culling. Every vertex behind
32 * the W=0 plane negates the determinant, so having 2 vertices behind
33 * the plane has no effect. This is i1 true if the determinant should be
36 LLVMValueRef w_reflection
;
38 /* If we simplify the "-w <= p <= w" view culling equation, we get
39 * "-w <= w", which can't be satisfied when w is negative.
40 * In perspective projection, a negative W means that the primitive
41 * is behind the viewer, but the equation is independent of the type
44 * w_accepted is false when all W are negative and therefore
45 * the primitive is invisible.
47 LLVMValueRef w_accepted
;
49 LLVMValueRef all_w_positive
;
50 LLVMValueRef any_w_negative
;
53 static void ac_analyze_position_w(struct ac_llvm_context
*ctx
,
54 LLVMValueRef pos
[3][4],
55 struct ac_position_w_info
*w
)
57 LLVMBuilderRef builder
= ctx
->builder
;
58 LLVMValueRef all_w_negative
= ctx
->i1true
;
60 w
->w_reflection
= ctx
->i1false
;
61 w
->any_w_negative
= ctx
->i1false
;
63 for (unsigned i
= 0; i
< 3; i
++) {
66 neg_w
= LLVMBuildFCmp(builder
, LLVMRealOLT
, pos
[i
][3], ctx
->f32_0
, "");
67 /* If neg_w is true, negate w_reflection. */
68 w
->w_reflection
= LLVMBuildXor(builder
, w
->w_reflection
, neg_w
, "");
69 w
->any_w_negative
= LLVMBuildOr(builder
, w
->any_w_negative
, neg_w
, "");
70 all_w_negative
= LLVMBuildAnd(builder
, all_w_negative
, neg_w
, "");
72 w
->all_w_positive
= LLVMBuildNot(builder
, w
->any_w_negative
, "");
73 w
->w_accepted
= LLVMBuildNot(builder
, all_w_negative
, "");
76 /* Perform front/back face culling and return true if the primitive is accepted. */
77 static LLVMValueRef
ac_cull_face(struct ac_llvm_context
*ctx
,
78 LLVMValueRef pos
[3][4],
79 struct ac_position_w_info
*w
,
84 LLVMBuilderRef builder
= ctx
->builder
;
86 if (cull_front
&& cull_back
)
89 if (!cull_front
&& !cull_back
&& !cull_zero_area
)
92 /* Front/back face culling. Also if the determinant == 0, the triangle
95 LLVMValueRef det_t0
= LLVMBuildFSub(builder
, pos
[2][0], pos
[0][0], "");
96 LLVMValueRef det_t1
= LLVMBuildFSub(builder
, pos
[1][1], pos
[0][1], "");
97 LLVMValueRef det_t2
= LLVMBuildFSub(builder
, pos
[0][0], pos
[1][0], "");
98 LLVMValueRef det_t3
= LLVMBuildFSub(builder
, pos
[0][1], pos
[2][1], "");
99 LLVMValueRef det_p0
= LLVMBuildFMul(builder
, det_t0
, det_t1
, "");
100 LLVMValueRef det_p1
= LLVMBuildFMul(builder
, det_t2
, det_t3
, "");
101 LLVMValueRef det
= LLVMBuildFSub(builder
, det_p0
, det_p1
, "");
103 /* Negative W negates the determinant. */
104 det
= LLVMBuildSelect(builder
, w
->w_reflection
,
105 LLVMBuildFNeg(builder
, det
, ""),
108 LLVMValueRef accepted
= NULL
;
110 LLVMRealPredicate cond
= cull_zero_area
? LLVMRealOGT
: LLVMRealOGE
;
111 accepted
= LLVMBuildFCmp(builder
, cond
, det
, ctx
->f32_0
, "");
112 } else if (cull_back
) {
113 LLVMRealPredicate cond
= cull_zero_area
? LLVMRealOLT
: LLVMRealOLE
;
114 accepted
= LLVMBuildFCmp(builder
, cond
, det
, ctx
->f32_0
, "");
115 } else if (cull_zero_area
) {
116 accepted
= LLVMBuildFCmp(builder
, LLVMRealONE
, det
, ctx
->f32_0
, "");
121 /* Perform view culling and small primitive elimination and return true
122 * if the primitive is accepted and initially_accepted == true. */
123 static LLVMValueRef
cull_bbox(struct ac_llvm_context
*ctx
,
124 LLVMValueRef pos
[3][4],
125 LLVMValueRef initially_accepted
,
126 struct ac_position_w_info
*w
,
127 LLVMValueRef vp_scale
[2],
128 LLVMValueRef vp_translate
[2],
129 LLVMValueRef small_prim_precision
,
131 bool cull_view_near_z
,
132 bool cull_view_far_z
,
133 bool cull_small_prims
,
134 bool use_halfz_clip_space
)
136 LLVMBuilderRef builder
= ctx
->builder
;
138 if (!cull_view_xy
&& !cull_view_near_z
&& !cull_view_far_z
&& !cull_small_prims
)
141 /* Skip the culling if the primitive has already been rejected or
142 * if any W is negative. The bounding box culling doesn't work when
145 LLVMValueRef cond
= LLVMBuildAnd(builder
, initially_accepted
,
146 w
->all_w_positive
, "");
147 LLVMValueRef accepted_var
= ac_build_alloca_undef(ctx
, ctx
->i1
, "");
148 LLVMBuildStore(builder
, initially_accepted
, accepted_var
);
150 ac_build_ifcc(ctx
, cond
, 10000000 /* does this matter? */);
152 LLVMValueRef bbox_min
[3], bbox_max
[3];
153 LLVMValueRef accepted
= initially_accepted
;
155 /* Compute the primitive bounding box for easy culling. */
156 for (unsigned chan
= 0; chan
< 3; chan
++) {
157 bbox_min
[chan
] = ac_build_fmin(ctx
, pos
[0][chan
], pos
[1][chan
]);
158 bbox_min
[chan
] = ac_build_fmin(ctx
, bbox_min
[chan
], pos
[2][chan
]);
160 bbox_max
[chan
] = ac_build_fmax(ctx
, pos
[0][chan
], pos
[1][chan
]);
161 bbox_max
[chan
] = ac_build_fmax(ctx
, bbox_max
[chan
], pos
[2][chan
]);
165 if (cull_view_xy
|| cull_view_near_z
|| cull_view_far_z
) {
166 for (unsigned chan
= 0; chan
< 3; chan
++) {
167 LLVMValueRef visible
;
169 if ((cull_view_xy
&& chan
<= 1) ||
170 (cull_view_near_z
&& chan
== 2)) {
171 float t
= chan
== 2 && use_halfz_clip_space
? 0 : -1;
172 visible
= LLVMBuildFCmp(builder
, LLVMRealOGE
, bbox_max
[chan
],
173 LLVMConstReal(ctx
->f32
, t
), "");
174 accepted
= LLVMBuildAnd(builder
, accepted
, visible
, "");
177 if ((cull_view_xy
&& chan
<= 1) ||
178 (cull_view_far_z
&& chan
== 2)) {
179 visible
= LLVMBuildFCmp(builder
, LLVMRealOLE
, bbox_min
[chan
],
181 accepted
= LLVMBuildAnd(builder
, accepted
, visible
, "");
186 /* Small primitive elimination. */
187 if (cull_small_prims
) {
188 /* Assuming a sample position at (0.5, 0.5), if we round
189 * the bounding box min/max extents and the results of
190 * the rounding are equal in either the X or Y direction,
191 * the bounding box does not intersect the sample.
193 * See these GDC slides for pictures:
194 * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
196 LLVMValueRef min
, max
, not_equal
[2], visible
;
198 for (unsigned chan
= 0; chan
< 2; chan
++) {
199 /* Convert the position to screen-space coordinates. */
200 min
= ac_build_fmad(ctx
, bbox_min
[chan
],
201 vp_scale
[chan
], vp_translate
[chan
]);
202 max
= ac_build_fmad(ctx
, bbox_max
[chan
],
203 vp_scale
[chan
], vp_translate
[chan
]);
204 /* Scale the bounding box according to the precision of
205 * the rasterizer and the number of MSAA samples. */
206 min
= LLVMBuildFSub(builder
, min
, small_prim_precision
, "");
207 max
= LLVMBuildFAdd(builder
, max
, small_prim_precision
, "");
209 /* Determine if the bbox intersects the sample point.
210 * It also works for MSAA, but vp_scale, vp_translate,
211 * and small_prim_precision are computed differently.
213 min
= ac_build_round(ctx
, min
);
214 max
= ac_build_round(ctx
, max
);
215 not_equal
[chan
] = LLVMBuildFCmp(builder
, LLVMRealONE
, min
, max
, "");
217 visible
= LLVMBuildAnd(builder
, not_equal
[0], not_equal
[1], "");
218 accepted
= LLVMBuildAnd(builder
, accepted
, visible
, "");
221 LLVMBuildStore(builder
, accepted
, accepted_var
);
223 ac_build_endif(ctx
, 10000000);
225 return LLVMBuildLoad(builder
, accepted_var
, "");
229 * Return i1 true if the primitive is accepted (not culled).
231 * \param pos Vertex positions 3x vec4
232 * \param initially_accepted AND'ed with the result. Some computations can be
233 * skipped if this is false.
234 * \param vp_scale Viewport scale XY.
235 * For MSAA, multiply them by the number of samples.
236 * \param vp_translate Viewport translation XY.
237 * For MSAA, multiply them by the number of samples.
238 * \param small_prim_precision Precision of small primitive culling. This should
239 * be the same as or greater than the precision of
240 * the rasterizer. Set to num_samples / 2^subpixel_bits.
241 * subpixel_bits are defined by the quantization mode.
242 * \param options See ac_cull_options.
244 LLVMValueRef
ac_cull_triangle(struct ac_llvm_context
*ctx
,
245 LLVMValueRef pos
[3][4],
246 LLVMValueRef initially_accepted
,
247 LLVMValueRef vp_scale
[2],
248 LLVMValueRef vp_translate
[2],
249 LLVMValueRef small_prim_precision
,
250 struct ac_cull_options
*options
)
252 struct ac_position_w_info w
;
253 ac_analyze_position_w(ctx
, pos
, &w
);
256 LLVMValueRef accepted
= options
->cull_w
? w
.w_accepted
: ctx
->i1true
;
257 accepted
= LLVMBuildAnd(ctx
->builder
, accepted
, initially_accepted
, "");
260 accepted
= LLVMBuildAnd(ctx
->builder
, accepted
,
261 ac_cull_face(ctx
, pos
, &w
,
264 options
->cull_zero_area
), "");
266 /* View culling and small primitive elimination. */
267 accepted
= cull_bbox(ctx
, pos
, accepted
, &w
, vp_scale
, vp_translate
,
268 small_prim_precision
,
269 options
->cull_view_xy
,
270 options
->cull_view_near_z
,
271 options
->cull_view_far_z
,
272 options
->cull_small_prims
,
273 options
->use_halfz_clip_space
);