ac: add LLVM code for triangle culling
[mesa.git] / src / amd / common / ac_llvm_cull.c
1 /*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include "ac_llvm_cull.h"
27 #include <llvm-c/Core.h>
28
29 struct ac_position_w_info {
30 /* If a primitive intersects the W=0 plane, it causes a reflection
31 * of the determinant used for face culling. Every vertex behind
32 * the W=0 plane negates the determinant, so having 2 vertices behind
33 * the plane has no effect. This is i1 true if the determinant should be
34 * negated.
35 */
36 LLVMValueRef w_reflection;
37
38 /* If we simplify the "-w <= p <= w" view culling equation, we get
39 * "-w <= w", which can't be satisfied when w is negative.
40 * In perspective projection, a negative W means that the primitive
41 * is behind the viewer, but the equation is independent of the type
42 * of projection.
43 *
44 * w_accepted is false when all W are negative and therefore
45 * the primitive is invisible.
46 */
47 LLVMValueRef w_accepted;
48
49 LLVMValueRef all_w_positive;
50 LLVMValueRef any_w_negative;
51 };
52
53 static void ac_analyze_position_w(struct ac_llvm_context *ctx,
54 LLVMValueRef pos[3][4],
55 struct ac_position_w_info *w)
56 {
57 LLVMBuilderRef builder = ctx->builder;
58 LLVMValueRef all_w_negative = ctx->i1true;
59
60 w->w_reflection = ctx->i1false;
61 w->any_w_negative = ctx->i1false;
62
63 for (unsigned i = 0; i < 3; i++) {
64 LLVMValueRef neg_w;
65
66 neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
67 /* If neg_w is true, negate w_reflection. */
68 w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
69 w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
70 all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
71 }
72 w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
73 w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
74 }
75
76 /* Perform front/back face culling and return true if the primitive is accepted. */
77 static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
78 LLVMValueRef pos[3][4],
79 struct ac_position_w_info *w,
80 bool cull_front,
81 bool cull_back,
82 bool cull_zero_area)
83 {
84 LLVMBuilderRef builder = ctx->builder;
85
86 if (cull_front && cull_back)
87 return ctx->i1false;
88
89 if (!cull_front && !cull_back && !cull_zero_area)
90 return ctx->i1true;
91
92 /* Front/back face culling. Also if the determinant == 0, the triangle
93 * area is 0.
94 */
95 LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
96 LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
97 LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
98 LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
99 LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
100 LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
101 LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
102
103 /* Negative W negates the determinant. */
104 det = LLVMBuildSelect(builder, w->w_reflection,
105 LLVMBuildFNeg(builder, det, ""),
106 det, "");
107
108 LLVMValueRef accepted = NULL;
109 if (cull_front) {
110 LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
111 accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
112 } else if (cull_back) {
113 LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
114 accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
115 } else if (cull_zero_area) {
116 accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
117 }
118 return accepted;
119 }
120
121 /* Perform view culling and small primitive elimination and return true
122 * if the primitive is accepted and initially_accepted == true. */
123 static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
124 LLVMValueRef pos[3][4],
125 LLVMValueRef initially_accepted,
126 struct ac_position_w_info *w,
127 LLVMValueRef vp_scale[2],
128 LLVMValueRef vp_translate[2],
129 LLVMValueRef small_prim_precision,
130 bool cull_view_xy,
131 bool cull_view_near_z,
132 bool cull_view_far_z,
133 bool cull_small_prims,
134 bool use_halfz_clip_space)
135 {
136 LLVMBuilderRef builder = ctx->builder;
137
138 if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
139 return ctx->i1true;
140
141 /* Skip the culling if the primitive has already been rejected or
142 * if any W is negative. The bounding box culling doesn't work when
143 * W is negative.
144 */
145 LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
146 w->all_w_positive, "");
147 LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
148 LLVMBuildStore(builder, initially_accepted, accepted_var);
149
150 ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
151 {
152 LLVMValueRef bbox_min[3], bbox_max[3];
153 LLVMValueRef accepted = initially_accepted;
154
155 /* Compute the primitive bounding box for easy culling. */
156 for (unsigned chan = 0; chan < 3; chan++) {
157 bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
158 bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
159
160 bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
161 bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
162 }
163
164 /* View culling. */
165 if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
166 for (unsigned chan = 0; chan < 3; chan++) {
167 LLVMValueRef visible;
168
169 if ((cull_view_xy && chan <= 1) ||
170 (cull_view_near_z && chan == 2)) {
171 float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
172 visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
173 LLVMConstReal(ctx->f32, t), "");
174 accepted = LLVMBuildAnd(builder, accepted, visible, "");
175 }
176
177 if ((cull_view_xy && chan <= 1) ||
178 (cull_view_far_z && chan == 2)) {
179 visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
180 ctx->f32_1, "");
181 accepted = LLVMBuildAnd(builder, accepted, visible, "");
182 }
183 }
184 }
185
186 /* Small primitive elimination. */
187 if (cull_small_prims) {
188 /* Assuming a sample position at (0.5, 0.5), if we round
189 * the bounding box min/max extents and the results of
190 * the rounding are equal in either the X or Y direction,
191 * the bounding box does not intersect the sample.
192 *
193 * See these GDC slides for pictures:
194 * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
195 */
196 LLVMValueRef min, max, not_equal[2], visible;
197
198 for (unsigned chan = 0; chan < 2; chan++) {
199 /* Convert the position to screen-space coordinates. */
200 min = ac_build_fmad(ctx, bbox_min[chan],
201 vp_scale[chan], vp_translate[chan]);
202 max = ac_build_fmad(ctx, bbox_max[chan],
203 vp_scale[chan], vp_translate[chan]);
204 /* Scale the bounding box according to the precision of
205 * the rasterizer and the number of MSAA samples. */
206 min = LLVMBuildFSub(builder, min, small_prim_precision, "");
207 max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
208
209 /* Determine if the bbox intersects the sample point.
210 * It also works for MSAA, but vp_scale, vp_translate,
211 * and small_prim_precision are computed differently.
212 */
213 min = ac_build_round(ctx, min);
214 max = ac_build_round(ctx, max);
215 not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
216 }
217 visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
218 accepted = LLVMBuildAnd(builder, accepted, visible, "");
219 }
220
221 LLVMBuildStore(builder, accepted, accepted_var);
222 }
223 ac_build_endif(ctx, 10000000);
224
225 return LLVMBuildLoad(builder, accepted_var, "");
226 }
227
228 /**
229 * Return i1 true if the primitive is accepted (not culled).
230 *
231 * \param pos Vertex positions 3x vec4
232 * \param initially_accepted AND'ed with the result. Some computations can be
233 * skipped if this is false.
234 * \param vp_scale Viewport scale XY.
235 * For MSAA, multiply them by the number of samples.
236 * \param vp_translate Viewport translation XY.
237 * For MSAA, multiply them by the number of samples.
238 * \param small_prim_precision Precision of small primitive culling. This should
239 * be the same as or greater than the precision of
240 * the rasterizer. Set to num_samples / 2^subpixel_bits.
241 * subpixel_bits are defined by the quantization mode.
242 * \param options See ac_cull_options.
243 */
244 LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
245 LLVMValueRef pos[3][4],
246 LLVMValueRef initially_accepted,
247 LLVMValueRef vp_scale[2],
248 LLVMValueRef vp_translate[2],
249 LLVMValueRef small_prim_precision,
250 struct ac_cull_options *options)
251 {
252 struct ac_position_w_info w;
253 ac_analyze_position_w(ctx, pos, &w);
254
255 /* W culling. */
256 LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
257 accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
258
259 /* Face culling. */
260 accepted = LLVMBuildAnd(ctx->builder, accepted,
261 ac_cull_face(ctx, pos, &w,
262 options->cull_front,
263 options->cull_back,
264 options->cull_zero_area), "");
265
266 /* View culling and small primitive elimination. */
267 accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
268 small_prim_precision,
269 options->cull_view_xy,
270 options->cull_view_near_z,
271 options->cull_view_far_z,
272 options->cull_small_prims,
273 options->use_halfz_clip_space);
274 return accepted;
275 }