gallivm: fix broken INCR/DECR stencil modes
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_depth.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 * Z11 Z12 Z13 Z14 ...
44 * Z21 Z22 Z23 Z24 ...
45 * Z31 Z32 Z33 Z34 ...
46 * Z41 Z42 Z43 Z44 ...
47 * ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
54 *
55 *
56 * Stencil test:
57 * Two-sided stencil test is supported but probably not as efficient as
58 * it could be. Currently, we use if/then/else constructs to do the
59 * operations for front vs. back-facing polygons. We could probably do
60 * both the front and back arithmetic then use a Select() instruction to
61 * choose the result depending on polyon orientation. We'd have to
62 * measure performance both ways and see which is better.
63 *
64 * @author Jose Fonseca <jfonseca@vmware.com>
65 */
66
67 #include "pipe/p_state.h"
68 #include "util/u_format.h"
69
70 #include "lp_bld_type.h"
71 #include "lp_bld_arit.h"
72 #include "lp_bld_const.h"
73 #include "lp_bld_logic.h"
74 #include "lp_bld_flow.h"
75 #include "lp_bld_debug.h"
76 #include "lp_bld_depth.h"
77 #include "lp_bld_swizzle.h"
78
79
80 /** Used to select fields from pipe_stencil_state */
81 enum stencil_op {
82 S_FAIL_OP,
83 Z_FAIL_OP,
84 Z_PASS_OP
85 };
86
87
88
89 /**
90 * Do the stencil test comparison (compare FB stencil values against ref value).
91 * This will be used twice when generating two-sided stencil code.
92 * \param stencil the front/back stencil state
93 * \param stencilRef the stencil reference value, replicated as a vector
94 * \param stencilVals vector of stencil values from framebuffer
95 * \return vector mask of pass/fail values (~0 or 0)
96 */
97 static LLVMValueRef
98 lp_build_stencil_test_single(struct lp_build_context *bld,
99 const struct pipe_stencil_state *stencil,
100 LLVMValueRef stencilRef,
101 LLVMValueRef stencilVals)
102 {
103 const unsigned stencilMax = 255; /* XXX fix */
104 struct lp_type type = bld->type;
105 LLVMValueRef res;
106
107 assert(type.sign);
108
109 assert(stencil->enabled);
110
111 if (stencil->valuemask != stencilMax) {
112 /* compute stencilRef = stencilRef & valuemask */
113 LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
114 stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
115 /* compute stencilVals = stencilVals & valuemask */
116 stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
117 }
118
119 res = lp_build_cmp(bld, stencil->func, stencilVals, stencilRef);
120
121 return res;
122 }
123
124
125 /**
126 * Do the one or two-sided stencil test comparison.
127 * \sa lp_build_stencil_test_single
128 * \param face an integer indicating front (+) or back (-) facing polygon.
129 * If NULL, assume front-facing.
130 */
131 static LLVMValueRef
132 lp_build_stencil_test(struct lp_build_context *bld,
133 const struct pipe_stencil_state stencil[2],
134 LLVMValueRef stencilRefs[2],
135 LLVMValueRef stencilVals,
136 LLVMValueRef face)
137 {
138 LLVMValueRef res;
139
140 assert(stencil[0].enabled);
141
142 if (stencil[1].enabled && face) {
143 /* do two-sided test */
144 struct lp_build_flow_context *flow_ctx;
145 struct lp_build_if_state if_ctx;
146 LLVMValueRef front_facing;
147 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
148 LLVMValueRef result = bld->undef;
149
150 flow_ctx = lp_build_flow_create(bld->builder);
151 lp_build_flow_scope_begin(flow_ctx);
152
153 lp_build_flow_scope_declare(flow_ctx, &result);
154
155 /* front_facing = face > 0.0 */
156 front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
157
158 lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
159 {
160 result = lp_build_stencil_test_single(bld, &stencil[0],
161 stencilRefs[0], stencilVals);
162 }
163 lp_build_else(&if_ctx);
164 {
165 result = lp_build_stencil_test_single(bld, &stencil[1],
166 stencilRefs[1], stencilVals);
167 }
168 lp_build_endif(&if_ctx);
169
170 lp_build_flow_scope_end(flow_ctx);
171 lp_build_flow_destroy(flow_ctx);
172
173 res = result;
174 }
175 else {
176 /* do single-side test */
177 res = lp_build_stencil_test_single(bld, &stencil[0],
178 stencilRefs[0], stencilVals);
179 }
180
181 return res;
182 }
183
184
185 /**
186 * Apply the stencil operator (add/sub/keep/etc) to the given vector
187 * of stencil values.
188 * \return new stencil values vector
189 */
190 static LLVMValueRef
191 lp_build_stencil_op_single(struct lp_build_context *bld,
192 const struct pipe_stencil_state *stencil,
193 enum stencil_op op,
194 LLVMValueRef stencilRef,
195 LLVMValueRef stencilVals,
196 LLVMValueRef mask)
197
198 {
199 const unsigned stencilMax = 255; /* XXX fix */
200 struct lp_type type = bld->type;
201 LLVMValueRef res;
202 LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
203 unsigned stencil_op;
204
205 assert(type.sign);
206
207 switch (op) {
208 case S_FAIL_OP:
209 stencil_op = stencil->fail_op;
210 break;
211 case Z_FAIL_OP:
212 stencil_op = stencil->zfail_op;
213 break;
214 case Z_PASS_OP:
215 stencil_op = stencil->zpass_op;
216 break;
217 default:
218 assert(0 && "Invalid stencil_op mode");
219 stencil_op = PIPE_STENCIL_OP_KEEP;
220 }
221
222 switch (stencil_op) {
223 case PIPE_STENCIL_OP_KEEP:
224 res = stencilVals;
225 /* we can return early for this case */
226 return res;
227 case PIPE_STENCIL_OP_ZERO:
228 res = bld->zero;
229 break;
230 case PIPE_STENCIL_OP_REPLACE:
231 res = stencilRef;
232 break;
233 case PIPE_STENCIL_OP_INCR:
234 res = lp_build_add(bld, stencilVals, bld->one);
235 res = lp_build_min(bld, res, max);
236 break;
237 case PIPE_STENCIL_OP_DECR:
238 res = lp_build_sub(bld, stencilVals, bld->one);
239 res = lp_build_max(bld, res, bld->zero);
240 break;
241 case PIPE_STENCIL_OP_INCR_WRAP:
242 res = lp_build_add(bld, stencilVals, bld->one);
243 res = LLVMBuildAnd(bld->builder, res, max, "");
244 break;
245 case PIPE_STENCIL_OP_DECR_WRAP:
246 res = lp_build_sub(bld, stencilVals, bld->one);
247 res = LLVMBuildAnd(bld->builder, res, max, "");
248 break;
249 case PIPE_STENCIL_OP_INVERT:
250 res = LLVMBuildNot(bld->builder, stencilVals, "");
251 res = LLVMBuildAnd(bld->builder, res, max, "");
252 break;
253 default:
254 assert(0 && "bad stencil op mode");
255 res = NULL;
256 }
257
258 if (stencil->writemask != stencilMax) {
259 /* compute res = (res & mask) | (stencilVals & ~mask) */
260 LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask);
261 LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask");
262 LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1");
263 LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2");
264 res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2");
265 }
266
267 /* only the update the vector elements enabled by 'mask' */
268 res = lp_build_select(bld, mask, res, stencilVals);
269
270 return res;
271 }
272
273
274 /**
275 * Do the one or two-sided stencil test op/update.
276 */
277 static LLVMValueRef
278 lp_build_stencil_op(struct lp_build_context *bld,
279 const struct pipe_stencil_state stencil[2],
280 enum stencil_op op,
281 LLVMValueRef stencilRefs[2],
282 LLVMValueRef stencilVals,
283 LLVMValueRef mask,
284 LLVMValueRef face)
285
286 {
287 assert(stencil[0].enabled);
288
289 if (stencil[1].enabled && face) {
290 /* do two-sided op */
291 struct lp_build_flow_context *flow_ctx;
292 struct lp_build_if_state if_ctx;
293 LLVMValueRef front_facing;
294 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
295 LLVMValueRef result = bld->undef;
296
297 flow_ctx = lp_build_flow_create(bld->builder);
298 lp_build_flow_scope_begin(flow_ctx);
299
300 lp_build_flow_scope_declare(flow_ctx, &result);
301
302 /* front_facing = face > 0.0 */
303 front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
304
305 lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
306 {
307 result = lp_build_stencil_op_single(bld, &stencil[0], op,
308 stencilRefs[0], stencilVals, mask);
309 }
310 lp_build_else(&if_ctx);
311 {
312 result = lp_build_stencil_op_single(bld, &stencil[1], op,
313 stencilRefs[1], stencilVals, mask);
314 }
315 lp_build_endif(&if_ctx);
316
317 lp_build_flow_scope_end(flow_ctx);
318 lp_build_flow_destroy(flow_ctx);
319
320 return result;
321 }
322 else {
323 /* do single-sided op */
324 return lp_build_stencil_op_single(bld, &stencil[0], op,
325 stencilRefs[0], stencilVals, mask);
326 }
327 }
328
329
330
331 /**
332 * Return a type appropriate for depth/stencil testing.
333 */
334 struct lp_type
335 lp_depth_type(const struct util_format_description *format_desc,
336 unsigned length)
337 {
338 struct lp_type type;
339 unsigned swizzle;
340
341 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
342 assert(format_desc->block.width == 1);
343 assert(format_desc->block.height == 1);
344
345 swizzle = format_desc->swizzle[0];
346 assert(swizzle < 4);
347
348 memset(&type, 0, sizeof type);
349 type.width = format_desc->block.bits;
350
351 if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
352 type.floating = TRUE;
353 assert(swizzle == 0);
354 assert(format_desc->channel[swizzle].size == format_desc->block.bits);
355 }
356 else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
357 assert(format_desc->block.bits <= 32);
358 if(format_desc->channel[swizzle].normalized)
359 type.norm = TRUE;
360 }
361 else
362 assert(0);
363
364 assert(type.width <= length);
365 type.length = length / type.width;
366
367 return type;
368 }
369
370
371 /**
372 * Generate code for performing depth and/or stencil tests.
373 * We operate on a vector of values (typically a 2x2 quad).
374 *
375 * \param depth the depth test state
376 * \param stencil the front/back stencil state
377 * \param type the data type of the fragment depth/stencil values
378 * \param format_desc description of the depth/stencil surface
379 * \param mask the alive/dead pixel mask for the quad (vector)
380 * \param stencil_refs the front/back stencil ref values (scalar)
381 * \param z_src the incoming depth/stencil values (a 2x2 quad)
382 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer
383 * \param facing contains float value indicating front/back facing polygon
384 */
385 void
386 lp_build_depth_stencil_test(LLVMBuilderRef builder,
387 const struct pipe_depth_state *depth,
388 const struct pipe_stencil_state stencil[2],
389 struct lp_type type,
390 const struct util_format_description *format_desc,
391 struct lp_build_mask_context *mask,
392 LLVMValueRef stencil_refs[2],
393 LLVMValueRef z_src,
394 LLVMValueRef zs_dst_ptr,
395 LLVMValueRef face)
396 {
397 struct lp_build_context bld;
398 struct lp_build_context sbld;
399 struct lp_type s_type;
400 unsigned z_swizzle, s_swizzle;
401 LLVMValueRef zs_dst, z_dst = NULL;
402 LLVMValueRef stencil_vals = NULL;
403 LLVMValueRef z_bitmask = NULL, s_bitmask = NULL;
404 LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
405 LLVMValueRef orig_mask = mask->value;
406
407 assert(depth->enabled || stencil[0].enabled);
408
409 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
410 assert(format_desc->block.width == 1);
411 assert(format_desc->block.height == 1);
412
413 z_swizzle = format_desc->swizzle[0];
414 s_swizzle = format_desc->swizzle[1];
415
416 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
417 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
418
419 /* Sanity checking */
420 assert(z_swizzle < 4);
421 assert(format_desc->block.bits == type.width);
422 if(type.floating) {
423 assert(z_swizzle == 0);
424 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT);
425 assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
426 }
427 else {
428 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
429 assert(format_desc->channel[z_swizzle].normalized);
430 assert(!type.fixed);
431 assert(!type.sign);
432 assert(type.norm);
433 }
434
435 /* Setup build context for Z vals */
436 lp_build_context_init(&bld, builder, type);
437
438 /* Setup build context for stencil vals */
439 s_type = lp_type_int_vec(type.width);
440 lp_build_context_init(&sbld, builder, s_type);
441
442 /* Load current z/stencil value from z/stencil buffer */
443 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
444
445 lp_build_name(zs_dst, "zsbufval");
446
447 /* Align the source depth bits with the destination's, and mask out any
448 * stencil or padding bits from both */
449 if(format_desc->channel[z_swizzle].size == format_desc->block.bits) {
450 assert(z_swizzle == 0);
451 z_dst = zs_dst;
452 }
453 else {
454 /* shift/mask bits to right-justify the Z bits */
455 unsigned padding_left;
456 unsigned padding_right;
457 unsigned chan;
458
459 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
460 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
461 assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
462 assert(format_desc->channel[z_swizzle].normalized);
463
464 padding_right = 0;
465 for(chan = 0; chan < z_swizzle; ++chan)
466 padding_right += format_desc->channel[chan].size;
467 padding_left = format_desc->block.bits -
468 (padding_right + format_desc->channel[z_swizzle].size);
469
470 if(padding_left || padding_right) {
471 const unsigned long long mask_left = (1ULL << (format_desc->block.bits - padding_left)) - 1;
472 const unsigned long long mask_right = (1ULL << (padding_right)) - 1;
473 z_bitmask = lp_build_const_int_vec(type, mask_left ^ mask_right);
474 }
475
476 s_bitmask = LLVMBuildNot(builder, z_bitmask, "");
477
478 stencil_vals = LLVMBuildAnd(builder, zs_dst, s_bitmask, "");
479
480 if(padding_left)
481 z_src = LLVMBuildLShr(builder, z_src,
482 lp_build_const_int_vec(type, padding_left), "");
483 if(padding_right)
484 z_src = LLVMBuildAnd(builder, z_src, z_bitmask, "");
485 if(padding_left || padding_right)
486 z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "");
487 else
488 z_dst = zs_dst;
489 }
490
491 lp_build_name(z_dst, "zsbuf.z");
492
493 /*
494 printf("build depth %d stencil %d\n",
495 depth->enabled,
496 stencil[0].enabled);
497 */
498
499 if (stencil[0].enabled) {
500 /* convert scalar stencil refs into vectors */
501 stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
502 stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);
503
504
505 s_pass_mask = lp_build_stencil_test(&sbld, stencil,
506 stencil_refs, stencil_vals, face);
507
508 /* apply stencil-fail operator */
509 {
510 LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask);
511 stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
512 stencil_refs, stencil_vals,
513 s_fail_mask, face);
514 }
515 }
516
517 if (depth->enabled) {
518 /* compare src Z to dst Z, returning 'pass' mask */
519 z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
520
521 if (!stencil[0].enabled) {
522 /* We can potentially skip all remaining operations here, but only
523 * if stencil is disabled because we still need to update the stencil
524 * buffer values. Don't need to update Z buffer values.
525 */
526 lp_build_mask_update(mask, z_pass);
527 }
528
529 if (depth->writemask) {
530 if(z_bitmask)
531 z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, "");
532 else
533 z_bitmask = mask->value;
534
535 z_dst = lp_build_select(&bld, z_bitmask, z_src, z_dst);
536 }
537
538 if (stencil[0].enabled) {
539 /* update stencil buffer values according to z pass/fail result */
540 LLVMValueRef z_fail_mask, z_pass_mask;
541
542 /* apply Z-fail operator */
543 z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass);
544 stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
545 stencil_refs, stencil_vals,
546 z_fail_mask, face);
547
548 /* apply Z-pass operator */
549 z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
550 stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
551 stencil_refs, stencil_vals,
552 z_pass_mask, face);
553 }
554 }
555 else {
556 /* No depth test: apply Z-pass operator to stencil buffer values which
557 * passed the stencil test.
558 */
559 s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
560 stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs,
561 stencil_vals, s_pass_mask, face);
562 }
563
564 /* Finally, merge/store the z/stencil values */
565 if ((depth->enabled && depth->writemask) ||
566 (stencil[0].enabled && stencil[0].writemask)) {
567
568 if (z_dst && stencil_vals)
569 zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
570 else if (z_dst)
571 zs_dst = z_dst;
572 else
573 zs_dst = stencil_vals;
574
575 LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
576 }
577
578 if (s_pass_mask)
579 lp_build_mask_update(mask, s_pass_mask);
580
581 if (depth->enabled && stencil[0].enabled)
582 lp_build_mask_update(mask, z_pass);
583 }