gallivm/llvmpipe: simplify front/back stencil ref value handling
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_depth.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 * Z11 Z12 Z13 Z14 ...
44 * Z21 Z22 Z23 Z24 ...
45 * Z31 Z32 Z33 Z34 ...
46 * Z41 Z42 Z43 Z44 ...
47 * ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
54 *
55 *
56 * Stencil test:
57 * Two-sided stencil test is supported but probably not as efficient as
58 * it could be. Currently, we use if/then/else constructs to do the
59 * operations for front vs. back-facing polygons. We could probably do
60 * both the front and back arithmetic then use a Select() instruction to
61 * choose the result depending on polyon orientation. We'd have to
62 * measure performance both ways and see which is better.
63 *
64 * @author Jose Fonseca <jfonseca@vmware.com>
65 */
66
67 #include "pipe/p_state.h"
68 #include "util/u_format.h"
69
70 #include "lp_bld_type.h"
71 #include "lp_bld_arit.h"
72 #include "lp_bld_const.h"
73 #include "lp_bld_logic.h"
74 #include "lp_bld_flow.h"
75 #include "lp_bld_debug.h"
76 #include "lp_bld_depth.h"
77 #include "lp_bld_swizzle.h"
78
79
80 /** Used to select fields from pipe_stencil_state */
81 enum stencil_op {
82 S_FAIL_OP,
83 Z_FAIL_OP,
84 Z_PASS_OP
85 };
86
87
88
89 /**
90 * Do the stencil test comparison (compare FB stencil values against ref value).
91 * This will be used twice when generating two-sided stencil code.
92 * \param stencil the front/back stencil state
93 * \param stencilRef the stencil reference value, replicated as a vector
94 * \param stencilVals vector of stencil values from framebuffer
95 * \return vector mask of pass/fail values (~0 or 0)
96 */
97 static LLVMValueRef
98 lp_build_stencil_test_single(struct lp_build_context *bld,
99 const struct pipe_stencil_state *stencil,
100 LLVMValueRef stencilRef,
101 LLVMValueRef stencilVals)
102 {
103 const unsigned stencilMax = 255; /* XXX fix */
104 struct lp_type type = bld->type;
105 LLVMValueRef res;
106
107 assert(stencil->enabled);
108
109 if (stencil->valuemask != stencilMax) {
110 /* compute stencilRef = stencilRef & valuemask */
111 LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
112 stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
113 /* compute stencilVals = stencilVals & valuemask */
114 stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
115 }
116
117 res = lp_build_cmp(bld, stencil->func, stencilVals, stencilRef);
118
119 return res;
120 }
121
122
123 /**
124 * Do the one or two-sided stencil test comparison.
125 * \sa lp_build_stencil_test_single
126 * \param face an integer indicating front (+) or back (-) facing polygon.
127 * If NULL, assume front-facing.
128 */
129 static LLVMValueRef
130 lp_build_stencil_test(struct lp_build_context *bld,
131 const struct pipe_stencil_state stencil[2],
132 LLVMValueRef stencilRefs[2],
133 LLVMValueRef stencilVals,
134 LLVMValueRef face)
135 {
136 LLVMValueRef res;
137
138 assert(stencil[0].enabled);
139
140 if (stencil[1].enabled && face) {
141 /* do two-sided test */
142 struct lp_build_flow_context *flow_ctx;
143 struct lp_build_if_state if_ctx;
144 LLVMValueRef front_facing;
145 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
146 LLVMValueRef result = NULL;
147
148 flow_ctx = lp_build_flow_create(bld->builder);
149 lp_build_flow_scope_begin(flow_ctx);
150
151 lp_build_flow_scope_declare(flow_ctx, &result);
152
153 /* front_facing = face > 0.0 */
154 front_facing = lp_build_cmp(bld, PIPE_FUNC_GREATER, face, zero);
155
156 lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
157 {
158 result = lp_build_stencil_test_single(bld, &stencil[0],
159 stencilRefs[0], stencilVals);
160 }
161 lp_build_else(&if_ctx);
162 {
163 result = lp_build_stencil_test_single(bld, &stencil[1],
164 stencilRefs[1], stencilVals);
165 }
166 lp_build_endif(&if_ctx);
167
168 lp_build_flow_scope_end(flow_ctx);
169 lp_build_flow_destroy(flow_ctx);
170
171 res = result;
172 }
173 else {
174 /* do single-side test */
175 res = lp_build_stencil_test_single(bld, &stencil[0],
176 stencilRefs[0], stencilVals);
177 }
178
179 return res;
180 }
181
182
183 /**
184 * Apply the stencil operator (add/sub/keep/etc) to the given vector
185 * of stencil values.
186 * \return new stencil values vector
187 */
188 static LLVMValueRef
189 lp_build_stencil_op_single(struct lp_build_context *bld,
190 const struct pipe_stencil_state *stencil,
191 enum stencil_op op,
192 LLVMValueRef stencilRef,
193 LLVMValueRef stencilVals,
194 LLVMValueRef mask)
195
196 {
197 const unsigned stencilMax = 255; /* XXX fix */
198 struct lp_type type = bld->type;
199 LLVMValueRef res;
200 LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
201 unsigned stencil_op;
202
203 switch (op) {
204 case S_FAIL_OP:
205 stencil_op = stencil->fail_op;
206 break;
207 case Z_FAIL_OP:
208 stencil_op = stencil->zfail_op;
209 break;
210 case Z_PASS_OP:
211 stencil_op = stencil->zpass_op;
212 break;
213 default:
214 assert(0 && "Invalid stencil_op mode");
215 stencil_op = PIPE_STENCIL_OP_KEEP;
216 }
217
218 switch (stencil_op) {
219 case PIPE_STENCIL_OP_KEEP:
220 res = stencilVals;
221 /* we can return early for this case */
222 return res;
223 case PIPE_STENCIL_OP_ZERO:
224 res = bld->zero;
225 break;
226 case PIPE_STENCIL_OP_REPLACE:
227 res = stencilRef;
228 break;
229 case PIPE_STENCIL_OP_INCR:
230 res = lp_build_add(bld, stencilVals, bld->one);
231 res = lp_build_min(bld, res, max);
232 break;
233 case PIPE_STENCIL_OP_DECR:
234 res = lp_build_sub(bld, stencilVals, bld->one);
235 res = lp_build_max(bld, res, bld->zero);
236 break;
237 case PIPE_STENCIL_OP_INCR_WRAP:
238 res = lp_build_add(bld, stencilVals, bld->one);
239 res = LLVMBuildAnd(bld->builder, res, max, "");
240 break;
241 case PIPE_STENCIL_OP_DECR_WRAP:
242 res = lp_build_sub(bld, stencilVals, bld->one);
243 res = LLVMBuildAnd(bld->builder, res, max, "");
244 break;
245 case PIPE_STENCIL_OP_INVERT:
246 res = LLVMBuildNot(bld->builder, stencilVals, "");
247 break;
248 default:
249 assert(0 && "bad stencil op mode");
250 res = NULL;
251 }
252
253 if (stencil->writemask != stencilMax) {
254 /* compute res = (res & mask) | (stencilVals & ~mask) */
255 LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask);
256 LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask");
257 LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1");
258 LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2");
259 res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2");
260 }
261
262 /* only the update the vector elements enabled by 'mask' */
263 res = lp_build_select(bld, mask, res, stencilVals);
264
265 return res;
266 }
267
268
269 /**
270 * Do the one or two-sided stencil test op/update.
271 */
272 static LLVMValueRef
273 lp_build_stencil_op(struct lp_build_context *bld,
274 const struct pipe_stencil_state stencil[2],
275 enum stencil_op op,
276 LLVMValueRef stencilRefs[2],
277 LLVMValueRef stencilVals,
278 LLVMValueRef mask,
279 LLVMValueRef face)
280
281 {
282 assert(stencil[0].enabled);
283
284 if (stencil[1].enabled && face) {
285 /* do two-sided op */
286 struct lp_build_flow_context *flow_ctx;
287 struct lp_build_if_state if_ctx;
288 LLVMValueRef front_facing;
289 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
290 LLVMValueRef result = NULL;
291
292 flow_ctx = lp_build_flow_create(bld->builder);
293 lp_build_flow_scope_begin(flow_ctx);
294
295 lp_build_flow_scope_declare(flow_ctx, &result);
296
297 /* front_facing = face > 0.0 */
298 front_facing = lp_build_cmp(bld, PIPE_FUNC_GREATER, face, zero);
299
300 lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
301 {
302 result = lp_build_stencil_op_single(bld, &stencil[0], op,
303 stencilRefs[0], stencilVals, mask);
304 }
305 lp_build_else(&if_ctx);
306 {
307 result = lp_build_stencil_op_single(bld, &stencil[1], op,
308 stencilRefs[1], stencilVals, mask);
309 }
310 lp_build_endif(&if_ctx);
311
312 lp_build_flow_scope_end(flow_ctx);
313 lp_build_flow_destroy(flow_ctx);
314
315 return result;
316 }
317 else {
318 /* do single-sided op */
319 return lp_build_stencil_op_single(bld, &stencil[0], op,
320 stencilRefs[0], stencilVals, mask);
321 }
322 }
323
324
325
326 /**
327 * Return a type appropriate for depth/stencil testing.
328 */
329 struct lp_type
330 lp_depth_type(const struct util_format_description *format_desc,
331 unsigned length)
332 {
333 struct lp_type type;
334 unsigned swizzle;
335
336 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
337 assert(format_desc->block.width == 1);
338 assert(format_desc->block.height == 1);
339
340 swizzle = format_desc->swizzle[0];
341 assert(swizzle < 4);
342
343 memset(&type, 0, sizeof type);
344 type.width = format_desc->block.bits;
345
346 if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
347 type.floating = TRUE;
348 assert(swizzle == 0);
349 assert(format_desc->channel[swizzle].size == format_desc->block.bits);
350 }
351 else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
352 assert(format_desc->block.bits <= 32);
353 if(format_desc->channel[swizzle].normalized)
354 type.norm = TRUE;
355 }
356 else
357 assert(0);
358
359 assert(type.width <= length);
360 type.length = length / type.width;
361
362 return type;
363 }
364
365
366 /**
367 * Generate code for performing depth and/or stencil tests.
368 * We operate on a vector of values (typically a 2x2 quad).
369 *
370 * \param type the data type of the fragment depth/stencil values
371 * \param format_desc description of the depth/stencil surface
372 * \param mask the alive/dead pixel mask for the quad
373 * \param src the incoming depth/stencil values (a 2x2 quad)
374 * \param dst_ptr the outgoing/updated depth/stencil values
375 */
376 void
377 lp_build_depth_stencil_test(LLVMBuilderRef builder,
378 const struct pipe_depth_state *depth,
379 const struct pipe_stencil_state stencil[2],
380 struct lp_type type,
381 const struct util_format_description *format_desc,
382 struct lp_build_mask_context *mask,
383 LLVMValueRef stencil_refs[2],
384 LLVMValueRef z_src,
385 LLVMValueRef zs_dst_ptr)
386 {
387 struct lp_build_context bld;
388 unsigned z_swizzle, s_swizzle;
389 LLVMValueRef zs_dst, z_dst = NULL;
390 LLVMValueRef stencil_vals = NULL;
391 LLVMValueRef z_bitmask = NULL, s_bitmask = NULL;
392 LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
393 LLVMValueRef orig_mask = mask->value;
394 LLVMValueRef face = NULL;
395
396 assert(depth->enabled || stencil[0].enabled);
397
398 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
399 assert(format_desc->block.width == 1);
400 assert(format_desc->block.height == 1);
401
402 z_swizzle = format_desc->swizzle[0];
403 s_swizzle = format_desc->swizzle[1];
404
405 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
406 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
407
408 /* Sanity checking */
409 assert(z_swizzle < 4);
410 assert(format_desc->block.bits == type.width);
411 if(type.floating) {
412 assert(z_swizzle == 0);
413 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT);
414 assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
415 }
416 else {
417 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
418 assert(format_desc->channel[z_swizzle].normalized);
419 assert(!type.fixed);
420 assert(!type.sign);
421 assert(type.norm);
422 }
423
424 /* Setup build context */
425 lp_build_context_init(&bld, builder, type);
426
427 /* Load current z/stencil value from z/stencil buffer */
428 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
429
430 lp_build_name(zs_dst, "zsbufval");
431
432 /* Align the source depth bits with the destination's, and mask out any
433 * stencil or padding bits from both */
434 if(format_desc->channel[z_swizzle].size == format_desc->block.bits) {
435 assert(z_swizzle == 0);
436 z_dst = zs_dst;
437 }
438 else {
439 /* shift/mask bits to right-justify the Z bits */
440 unsigned padding_left;
441 unsigned padding_right;
442 unsigned chan;
443
444 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
445 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
446 assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
447 assert(format_desc->channel[z_swizzle].normalized);
448
449 padding_right = 0;
450 for(chan = 0; chan < z_swizzle; ++chan)
451 padding_right += format_desc->channel[chan].size;
452 padding_left = format_desc->block.bits -
453 (padding_right + format_desc->channel[z_swizzle].size);
454
455 if(padding_left || padding_right) {
456 const unsigned long long mask_left = (1ULL << (format_desc->block.bits - padding_left)) - 1;
457 const unsigned long long mask_right = (1ULL << (padding_right)) - 1;
458 z_bitmask = lp_build_const_int_vec(type, mask_left ^ mask_right);
459 }
460
461 s_bitmask = LLVMBuildNot(builder, z_bitmask, "");
462
463 stencil_vals = LLVMBuildAnd(builder, zs_dst, s_bitmask, "");
464
465 if(padding_left)
466 z_src = LLVMBuildLShr(builder, z_src,
467 lp_build_const_int_vec(type, padding_left), "");
468 if(padding_right)
469 z_src = LLVMBuildAnd(builder, z_src, z_bitmask, "");
470 if(padding_left || padding_right)
471 z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "");
472 else
473 z_dst = zs_dst;
474 }
475
476 lp_build_name(z_dst, "zsbuf.z");
477
478 /*
479 printf("build depth %d stencil %d\n",
480 depth->enabled,
481 stencil[0].enabled);
482 */
483
484 if (stencil[0].enabled) {
485 /* convert scalar stencil refs into vectors */
486 stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
487 stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);
488
489
490 s_pass_mask = lp_build_stencil_test(&bld, stencil,
491 stencil_refs, stencil_vals, face);
492
493 /* apply stencil-fail operator */
494 {
495 LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask);
496 stencil_vals = lp_build_stencil_op(&bld, stencil, S_FAIL_OP,
497 stencil_refs, stencil_vals,
498 s_fail_mask, face);
499 }
500 }
501
502 if (depth->enabled) {
503 /* compare src Z to dst Z, returning 'pass' mask */
504 z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
505
506 if (!stencil[0].enabled) {
507 /* We can potentially skip all remaining operations here, but only
508 * if stencil is disabled because we still need to update the stencil
509 * buffer values. Don't need to update Z buffer values.
510 */
511 lp_build_mask_update(mask, z_pass);
512 }
513
514 if (depth->writemask) {
515 if(z_bitmask)
516 z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, "");
517 else
518 z_bitmask = mask->value;
519
520 z_dst = lp_build_select(&bld, z_bitmask, z_src, z_dst);
521 }
522
523 if (stencil[0].enabled) {
524 /* update stencil buffer values according to z pass/fail result */
525 LLVMValueRef z_fail_mask, z_pass_mask;
526
527 /* apply Z-fail operator */
528 z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass);
529 stencil_vals = lp_build_stencil_op(&bld, stencil, Z_FAIL_OP,
530 stencil_refs, stencil_vals,
531 z_fail_mask, face);
532
533 /* apply Z-pass operator */
534 z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
535 stencil_vals = lp_build_stencil_op(&bld, stencil, Z_PASS_OP,
536 stencil_refs, stencil_vals,
537 z_pass_mask, face);
538 }
539 }
540 else {
541 /* No depth test: apply Z-pass operator to stencil buffer values which
542 * passed the stencil test.
543 */
544 s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
545 stencil_vals = lp_build_stencil_op(&bld, stencil, Z_PASS_OP, stencil_refs,
546 stencil_vals, s_pass_mask, face);
547 }
548
549 /* Finally, merge/store the z/stencil values */
550 if ((depth->enabled && depth->writemask) ||
551 (stencil[0].enabled && stencil[0].writemask)) {
552
553 if (z_dst && stencil_vals)
554 zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
555 else if (z_dst)
556 zs_dst = z_dst;
557 else
558 zs_dst = stencil_vals;
559
560 LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
561 }
562
563 if (s_pass_mask)
564 lp_build_mask_update(mask, s_pass_mask);
565
566 if (depth->enabled && stencil[0].enabled)
567 lp_build_mask_update(mask, z_pass);
568 }