2 * Copyright 2019 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
26 #include "ac_llvm_cull.h"
28 #include <llvm-c/Core.h>
30 struct ac_position_w_info
{
31 /* If a primitive intersects the W=0 plane, it causes a reflection
32 * of the determinant used for face culling. Every vertex behind
33 * the W=0 plane negates the determinant, so having 2 vertices behind
34 * the plane has no effect. This is i1 true if the determinant should be
37 LLVMValueRef w_reflection
;
39 /* If we simplify the "-w <= p <= w" view culling equation, we get
40 * "-w <= w", which can't be satisfied when w is negative.
41 * In perspective projection, a negative W means that the primitive
42 * is behind the viewer, but the equation is independent of the type
45 * w_accepted is false when all W are negative and therefore
46 * the primitive is invisible.
48 LLVMValueRef w_accepted
;
50 LLVMValueRef all_w_positive
;
51 LLVMValueRef any_w_negative
;
54 static void ac_analyze_position_w(struct ac_llvm_context
*ctx
, LLVMValueRef pos
[3][4],
55 struct ac_position_w_info
*w
)
57 LLVMBuilderRef builder
= ctx
->builder
;
58 LLVMValueRef all_w_negative
= ctx
->i1true
;
60 w
->w_reflection
= ctx
->i1false
;
61 w
->any_w_negative
= ctx
->i1false
;
63 for (unsigned i
= 0; i
< 3; i
++) {
66 neg_w
= LLVMBuildFCmp(builder
, LLVMRealOLT
, pos
[i
][3], ctx
->f32_0
, "");
67 /* If neg_w is true, negate w_reflection. */
68 w
->w_reflection
= LLVMBuildXor(builder
, w
->w_reflection
, neg_w
, "");
69 w
->any_w_negative
= LLVMBuildOr(builder
, w
->any_w_negative
, neg_w
, "");
70 all_w_negative
= LLVMBuildAnd(builder
, all_w_negative
, neg_w
, "");
72 w
->all_w_positive
= LLVMBuildNot(builder
, w
->any_w_negative
, "");
73 w
->w_accepted
= LLVMBuildNot(builder
, all_w_negative
, "");
76 /* Perform front/back face culling and return true if the primitive is accepted. */
77 static LLVMValueRef
ac_cull_face(struct ac_llvm_context
*ctx
, LLVMValueRef pos
[3][4],
78 struct ac_position_w_info
*w
, bool cull_front
, bool cull_back
,
81 LLVMBuilderRef builder
= ctx
->builder
;
83 if (cull_front
&& cull_back
)
86 if (!cull_front
&& !cull_back
&& !cull_zero_area
)
89 /* Front/back face culling. Also if the determinant == 0, the triangle
92 LLVMValueRef det_t0
= LLVMBuildFSub(builder
, pos
[2][0], pos
[0][0], "");
93 LLVMValueRef det_t1
= LLVMBuildFSub(builder
, pos
[1][1], pos
[0][1], "");
94 LLVMValueRef det_t2
= LLVMBuildFSub(builder
, pos
[0][0], pos
[1][0], "");
95 LLVMValueRef det_t3
= LLVMBuildFSub(builder
, pos
[0][1], pos
[2][1], "");
96 LLVMValueRef det_p0
= LLVMBuildFMul(builder
, det_t0
, det_t1
, "");
97 LLVMValueRef det_p1
= LLVMBuildFMul(builder
, det_t2
, det_t3
, "");
98 LLVMValueRef det
= LLVMBuildFSub(builder
, det_p0
, det_p1
, "");
100 /* Negative W negates the determinant. */
101 det
= LLVMBuildSelect(builder
, w
->w_reflection
, LLVMBuildFNeg(builder
, det
, ""), det
, "");
103 LLVMValueRef accepted
= NULL
;
105 LLVMRealPredicate cond
= cull_zero_area
? LLVMRealOGT
: LLVMRealOGE
;
106 accepted
= LLVMBuildFCmp(builder
, cond
, det
, ctx
->f32_0
, "");
107 } else if (cull_back
) {
108 LLVMRealPredicate cond
= cull_zero_area
? LLVMRealOLT
: LLVMRealOLE
;
109 accepted
= LLVMBuildFCmp(builder
, cond
, det
, ctx
->f32_0
, "");
110 } else if (cull_zero_area
) {
111 accepted
= LLVMBuildFCmp(builder
, LLVMRealONE
, det
, ctx
->f32_0
, "");
116 /* Perform view culling and small primitive elimination and return true
117 * if the primitive is accepted and initially_accepted == true. */
118 static LLVMValueRef
cull_bbox(struct ac_llvm_context
*ctx
, LLVMValueRef pos
[3][4],
119 LLVMValueRef initially_accepted
, struct ac_position_w_info
*w
,
120 LLVMValueRef vp_scale
[2], LLVMValueRef vp_translate
[2],
121 LLVMValueRef small_prim_precision
, bool cull_view_xy
,
122 bool cull_view_near_z
, bool cull_view_far_z
, bool cull_small_prims
,
123 bool use_halfz_clip_space
)
125 LLVMBuilderRef builder
= ctx
->builder
;
127 if (!cull_view_xy
&& !cull_view_near_z
&& !cull_view_far_z
&& !cull_small_prims
)
128 return initially_accepted
;
130 /* Skip the culling if the primitive has already been rejected or
131 * if any W is negative. The bounding box culling doesn't work when
134 LLVMValueRef cond
= LLVMBuildAnd(builder
, initially_accepted
, w
->all_w_positive
, "");
135 LLVMValueRef accepted_var
= ac_build_alloca_undef(ctx
, ctx
->i1
, "");
136 LLVMBuildStore(builder
, initially_accepted
, accepted_var
);
138 ac_build_ifcc(ctx
, cond
, 10000000 /* does this matter? */);
140 LLVMValueRef bbox_min
[3], bbox_max
[3];
141 LLVMValueRef accepted
= initially_accepted
;
143 /* Compute the primitive bounding box for easy culling. */
144 for (unsigned chan
= 0; chan
< (cull_view_near_z
|| cull_view_far_z
? 3 : 2); chan
++) {
145 bbox_min
[chan
] = ac_build_fmin(ctx
, pos
[0][chan
], pos
[1][chan
]);
146 bbox_min
[chan
] = ac_build_fmin(ctx
, bbox_min
[chan
], pos
[2][chan
]);
148 bbox_max
[chan
] = ac_build_fmax(ctx
, pos
[0][chan
], pos
[1][chan
]);
149 bbox_max
[chan
] = ac_build_fmax(ctx
, bbox_max
[chan
], pos
[2][chan
]);
153 if (cull_view_xy
|| cull_view_near_z
|| cull_view_far_z
) {
154 for (unsigned chan
= 0; chan
< 3; chan
++) {
155 LLVMValueRef visible
;
157 if ((cull_view_xy
&& chan
<= 1) || (cull_view_near_z
&& chan
== 2)) {
158 float t
= chan
== 2 && use_halfz_clip_space
? 0 : -1;
159 visible
= LLVMBuildFCmp(builder
, LLVMRealOGE
, bbox_max
[chan
],
160 LLVMConstReal(ctx
->f32
, t
), "");
161 accepted
= LLVMBuildAnd(builder
, accepted
, visible
, "");
164 if ((cull_view_xy
&& chan
<= 1) || (cull_view_far_z
&& chan
== 2)) {
165 visible
= LLVMBuildFCmp(builder
, LLVMRealOLE
, bbox_min
[chan
], ctx
->f32_1
, "");
166 accepted
= LLVMBuildAnd(builder
, accepted
, visible
, "");
171 /* Small primitive elimination. */
172 if (cull_small_prims
) {
173 /* Assuming a sample position at (0.5, 0.5), if we round
174 * the bounding box min/max extents and the results of
175 * the rounding are equal in either the X or Y direction,
176 * the bounding box does not intersect the sample.
178 * See these GDC slides for pictures:
179 * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
181 LLVMValueRef min
, max
, not_equal
[2], visible
;
183 for (unsigned chan
= 0; chan
< 2; chan
++) {
184 /* Convert the position to screen-space coordinates. */
185 min
= ac_build_fmad(ctx
, bbox_min
[chan
], vp_scale
[chan
], vp_translate
[chan
]);
186 max
= ac_build_fmad(ctx
, bbox_max
[chan
], vp_scale
[chan
], vp_translate
[chan
]);
187 /* Scale the bounding box according to the precision of
188 * the rasterizer and the number of MSAA samples. */
189 min
= LLVMBuildFSub(builder
, min
, small_prim_precision
, "");
190 max
= LLVMBuildFAdd(builder
, max
, small_prim_precision
, "");
192 /* Determine if the bbox intersects the sample point.
193 * It also works for MSAA, but vp_scale, vp_translate,
194 * and small_prim_precision are computed differently.
196 min
= ac_build_round(ctx
, min
);
197 max
= ac_build_round(ctx
, max
);
198 not_equal
[chan
] = LLVMBuildFCmp(builder
, LLVMRealONE
, min
, max
, "");
200 visible
= LLVMBuildAnd(builder
, not_equal
[0], not_equal
[1], "");
201 accepted
= LLVMBuildAnd(builder
, accepted
, visible
, "");
204 LLVMBuildStore(builder
, accepted
, accepted_var
);
206 ac_build_endif(ctx
, 10000000);
208 return LLVMBuildLoad(builder
, accepted_var
, "");
212 * Return i1 true if the primitive is accepted (not culled).
214 * \param pos Vertex positions 3x vec4
215 * \param initially_accepted AND'ed with the result. Some computations can be
216 * skipped if this is false.
217 * \param vp_scale Viewport scale XY.
218 * For MSAA, multiply them by the number of samples.
219 * \param vp_translate Viewport translation XY.
220 * For MSAA, multiply them by the number of samples.
221 * \param small_prim_precision Precision of small primitive culling. This should
222 * be the same as or greater than the precision of
223 * the rasterizer. Set to num_samples / 2^subpixel_bits.
224 * subpixel_bits are defined by the quantization mode.
225 * \param options See ac_cull_options.
227 LLVMValueRef
ac_cull_triangle(struct ac_llvm_context
*ctx
, LLVMValueRef pos
[3][4],
228 LLVMValueRef initially_accepted
, LLVMValueRef vp_scale
[2],
229 LLVMValueRef vp_translate
[2], LLVMValueRef small_prim_precision
,
230 struct ac_cull_options
*options
)
232 struct ac_position_w_info w
;
233 ac_analyze_position_w(ctx
, pos
, &w
);
236 LLVMValueRef accepted
= options
->cull_w
? w
.w_accepted
: ctx
->i1true
;
237 accepted
= LLVMBuildAnd(ctx
->builder
, accepted
, initially_accepted
, "");
240 accepted
= LLVMBuildAnd(
241 ctx
->builder
, accepted
,
242 ac_cull_face(ctx
, pos
, &w
, options
->cull_front
, options
->cull_back
, options
->cull_zero_area
),
245 /* View culling and small primitive elimination. */
246 accepted
= cull_bbox(ctx
, pos
, accepted
, &w
, vp_scale
, vp_translate
, small_prim_precision
,
247 options
->cull_view_xy
, options
->cull_view_near_z
, options
->cull_view_far_z
,
248 options
->cull_small_prims
, options
->use_halfz_clip_space
);