1 /**************************************************************************
3 * Copyright 2009-2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Depth/stencil testing to LLVM IR translation.
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
49 * will actually be stored in memory as
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
56 * @author Jose Fonseca <jfonseca@vmware.com>
57 * @author Brian Paul <jfonseca@vmware.com>
60 #include "pipe/p_state.h"
61 #include "util/u_format.h"
62 #include "util/u_cpu_detect.h"
64 #include "gallivm/lp_bld_type.h"
65 #include "gallivm/lp_bld_arit.h"
66 #include "gallivm/lp_bld_bitarit.h"
67 #include "gallivm/lp_bld_const.h"
68 #include "gallivm/lp_bld_conv.h"
69 #include "gallivm/lp_bld_logic.h"
70 #include "gallivm/lp_bld_flow.h"
71 #include "gallivm/lp_bld_intr.h"
72 #include "gallivm/lp_bld_debug.h"
73 #include "gallivm/lp_bld_swizzle.h"
75 #include "lp_bld_depth.h"
78 /** Used to select fields from pipe_stencil_state */
88 * Do the stencil test comparison (compare FB stencil values against ref value).
89 * This will be used twice when generating two-sided stencil code.
90 * \param stencil the front/back stencil state
91 * \param stencilRef the stencil reference value, replicated as a vector
92 * \param stencilVals vector of stencil values from framebuffer
93 * \return vector mask of pass/fail values (~0 or 0)
96 lp_build_stencil_test_single(struct lp_build_context
*bld
,
97 const struct pipe_stencil_state
*stencil
,
98 LLVMValueRef stencilRef
,
99 LLVMValueRef stencilVals
)
101 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
102 const unsigned stencilMax
= 255; /* XXX fix */
103 struct lp_type type
= bld
->type
;
107 * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
108 * are between 0..255 so ensure we generate the fastest comparisons for
111 if (type
.width
<= 8) {
117 assert(stencil
->enabled
);
119 if (stencil
->valuemask
!= stencilMax
) {
120 /* compute stencilRef = stencilRef & valuemask */
121 LLVMValueRef valuemask
= lp_build_const_int_vec(bld
->gallivm
, type
, stencil
->valuemask
);
122 stencilRef
= LLVMBuildAnd(builder
, stencilRef
, valuemask
, "");
123 /* compute stencilVals = stencilVals & valuemask */
124 stencilVals
= LLVMBuildAnd(builder
, stencilVals
, valuemask
, "");
127 res
= lp_build_cmp(bld
, stencil
->func
, stencilRef
, stencilVals
);
134 * Do the one or two-sided stencil test comparison.
135 * \sa lp_build_stencil_test_single
136 * \param front_facing an integer vector mask, indicating front (~0) or back
137 * (0) facing polygon. If NULL, assume front-facing.
140 lp_build_stencil_test(struct lp_build_context
*bld
,
141 const struct pipe_stencil_state stencil
[2],
142 LLVMValueRef stencilRefs
[2],
143 LLVMValueRef stencilVals
,
144 LLVMValueRef front_facing
)
148 assert(stencil
[0].enabled
);
150 /* do front face test */
151 res
= lp_build_stencil_test_single(bld
, &stencil
[0],
152 stencilRefs
[0], stencilVals
);
154 if (stencil
[1].enabled
&& front_facing
!= NULL
) {
155 /* do back face test */
156 LLVMValueRef back_res
;
158 back_res
= lp_build_stencil_test_single(bld
, &stencil
[1],
159 stencilRefs
[1], stencilVals
);
161 res
= lp_build_select(bld
, front_facing
, res
, back_res
);
169 * Apply the stencil operator (add/sub/keep/etc) to the given vector
171 * \return new stencil values vector
174 lp_build_stencil_op_single(struct lp_build_context
*bld
,
175 const struct pipe_stencil_state
*stencil
,
177 LLVMValueRef stencilRef
,
178 LLVMValueRef stencilVals
)
181 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
182 struct lp_type type
= bld
->type
;
184 LLVMValueRef max
= lp_build_const_int_vec(bld
->gallivm
, type
, 0xff);
191 stencil_op
= stencil
->fail_op
;
194 stencil_op
= stencil
->zfail_op
;
197 stencil_op
= stencil
->zpass_op
;
200 assert(0 && "Invalid stencil_op mode");
201 stencil_op
= PIPE_STENCIL_OP_KEEP
;
204 switch (stencil_op
) {
205 case PIPE_STENCIL_OP_KEEP
:
207 /* we can return early for this case */
209 case PIPE_STENCIL_OP_ZERO
:
212 case PIPE_STENCIL_OP_REPLACE
:
215 case PIPE_STENCIL_OP_INCR
:
216 res
= lp_build_add(bld
, stencilVals
, bld
->one
);
217 res
= lp_build_min(bld
, res
, max
);
219 case PIPE_STENCIL_OP_DECR
:
220 res
= lp_build_sub(bld
, stencilVals
, bld
->one
);
221 res
= lp_build_max(bld
, res
, bld
->zero
);
223 case PIPE_STENCIL_OP_INCR_WRAP
:
224 res
= lp_build_add(bld
, stencilVals
, bld
->one
);
225 res
= LLVMBuildAnd(builder
, res
, max
, "");
227 case PIPE_STENCIL_OP_DECR_WRAP
:
228 res
= lp_build_sub(bld
, stencilVals
, bld
->one
);
229 res
= LLVMBuildAnd(builder
, res
, max
, "");
231 case PIPE_STENCIL_OP_INVERT
:
232 res
= LLVMBuildNot(builder
, stencilVals
, "");
233 res
= LLVMBuildAnd(builder
, res
, max
, "");
236 assert(0 && "bad stencil op mode");
245 * Do the one or two-sided stencil test op/update.
248 lp_build_stencil_op(struct lp_build_context
*bld
,
249 const struct pipe_stencil_state stencil
[2],
251 LLVMValueRef stencilRefs
[2],
252 LLVMValueRef stencilVals
,
254 LLVMValueRef front_facing
)
257 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
260 assert(stencil
[0].enabled
);
262 /* do front face op */
263 res
= lp_build_stencil_op_single(bld
, &stencil
[0], op
,
264 stencilRefs
[0], stencilVals
);
266 if (stencil
[1].enabled
&& front_facing
!= NULL
) {
267 /* do back face op */
268 LLVMValueRef back_res
;
270 back_res
= lp_build_stencil_op_single(bld
, &stencil
[1], op
,
271 stencilRefs
[1], stencilVals
);
273 res
= lp_build_select(bld
, front_facing
, res
, back_res
);
276 if (stencil
[0].writemask
!= 0xff ||
277 (stencil
[1].enabled
&& front_facing
!= NULL
&& stencil
[1].writemask
!= 0xff)) {
278 /* mask &= stencil[0].writemask */
279 LLVMValueRef writemask
= lp_build_const_int_vec(bld
->gallivm
, bld
->type
,
280 stencil
[0].writemask
);
281 if (stencil
[1].enabled
&& stencil
[1].writemask
!= stencil
[0].writemask
&& front_facing
!= NULL
) {
282 LLVMValueRef back_writemask
= lp_build_const_int_vec(bld
->gallivm
, bld
->type
,
283 stencil
[1].writemask
);
284 writemask
= lp_build_select(bld
, front_facing
, writemask
, back_writemask
);
287 mask
= LLVMBuildAnd(builder
, mask
, writemask
, "");
288 /* res = (res & mask) | (stencilVals & ~mask) */
289 res
= lp_build_select_bitwise(bld
, mask
, res
, stencilVals
);
292 /* res = mask ? res : stencilVals */
293 res
= lp_build_select(bld
, mask
, res
, stencilVals
);
302 * Return a type that matches the depth/stencil format.
305 lp_depth_type(const struct util_format_description
*format_desc
,
311 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
312 assert(format_desc
->block
.width
== 1);
313 assert(format_desc
->block
.height
== 1);
315 swizzle
= format_desc
->swizzle
[0];
318 memset(&type
, 0, sizeof type
);
319 type
.width
= format_desc
->block
.bits
;
321 if(format_desc
->channel
[swizzle
].type
== UTIL_FORMAT_TYPE_FLOAT
) {
322 type
.floating
= TRUE
;
323 assert(swizzle
== 0);
324 assert(format_desc
->channel
[swizzle
].size
== format_desc
->block
.bits
);
326 else if(format_desc
->channel
[swizzle
].type
== UTIL_FORMAT_TYPE_UNSIGNED
) {
327 assert(format_desc
->block
.bits
<= 32);
328 assert(format_desc
->channel
[swizzle
].normalized
);
329 if (format_desc
->channel
[swizzle
].size
< format_desc
->block
.bits
) {
330 /* Prefer signed integers when possible, as SSE has less support
331 * for unsigned comparison;
339 type
.length
= length
;
346 * Compute bitmask and bit shift to apply to the incoming fragment Z values
347 * and the Z buffer values needed before doing the Z comparison.
349 * Note that we leave the Z bits in the position that we find them
350 * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us
351 * get by with fewer bit twiddling steps.
354 get_z_shift_and_mask(const struct util_format_description
*format_desc
,
355 unsigned *shift
, unsigned *width
, unsigned *mask
)
357 const unsigned total_bits
= format_desc
->block
.bits
;
360 unsigned padding_left
, padding_right
;
362 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
363 assert(format_desc
->block
.width
== 1);
364 assert(format_desc
->block
.height
== 1);
366 z_swizzle
= format_desc
->swizzle
[0];
368 if (z_swizzle
== UTIL_FORMAT_SWIZZLE_NONE
)
371 *width
= format_desc
->channel
[z_swizzle
].size
;
374 for (chan
= 0; chan
< z_swizzle
; ++chan
)
375 padding_right
+= format_desc
->channel
[chan
].size
;
378 total_bits
- (padding_right
+ *width
);
380 if (padding_left
|| padding_right
) {
381 unsigned long long mask_left
= (1ULL << (total_bits
- padding_left
)) - 1;
382 unsigned long long mask_right
= (1ULL << (padding_right
)) - 1;
383 *mask
= mask_left
^ mask_right
;
389 *shift
= padding_right
;
396 * Compute bitmask and bit shift to apply to the framebuffer pixel values
397 * to put the stencil bits in the least significant position.
401 get_s_shift_and_mask(const struct util_format_description
*format_desc
,
402 unsigned *shift
, unsigned *mask
)
407 s_swizzle
= format_desc
->swizzle
[1];
409 if (s_swizzle
== UTIL_FORMAT_SWIZZLE_NONE
)
413 for (chan
= 0; chan
< s_swizzle
; chan
++)
414 *shift
+= format_desc
->channel
[chan
].size
;
416 sz
= format_desc
->channel
[s_swizzle
].size
;
417 *mask
= (1U << sz
) - 1U;
424 * Perform the occlusion test and increase the counter.
425 * Test the depth mask. Add the number of channel which has none zero mask
426 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
427 * The counter will add 4.
429 * \param type holds element type of the mask vector.
430 * \param maskvalue is the depth test mask.
431 * \param counter is a pointer of the uint32 counter.
434 lp_build_occlusion_count(struct gallivm_state
*gallivm
,
436 LLVMValueRef maskvalue
,
437 LLVMValueRef counter
)
439 LLVMBuilderRef builder
= gallivm
->builder
;
440 LLVMContextRef context
= gallivm
->context
;
441 LLVMValueRef countmask
= lp_build_const_int_vec(gallivm
, type
, 1);
442 LLVMValueRef count
, newcount
;
444 assert(type
.length
<= 16);
445 assert(type
.floating
);
447 if(util_cpu_caps
.has_sse
&& type
.length
== 4) {
448 const char *movmskintr
= "llvm.x86.sse.movmsk.ps";
449 const char *popcntintr
= "llvm.ctpop.i32";
450 LLVMValueRef bits
= LLVMBuildBitCast(builder
, maskvalue
,
451 lp_build_vec_type(gallivm
, type
), "");
452 bits
= lp_build_intrinsic_unary(builder
, movmskintr
,
453 LLVMInt32TypeInContext(context
), bits
);
454 count
= lp_build_intrinsic_unary(builder
, popcntintr
,
455 LLVMInt32TypeInContext(context
), bits
);
457 else if(util_cpu_caps
.has_avx
&& type
.length
== 8) {
458 const char *movmskintr
= "llvm.x86.avx.movmsk.ps.256";
459 const char *popcntintr
= "llvm.ctpop.i32";
460 LLVMValueRef bits
= LLVMBuildBitCast(builder
, maskvalue
,
461 lp_build_vec_type(gallivm
, type
), "");
462 bits
= lp_build_intrinsic_unary(builder
, movmskintr
,
463 LLVMInt32TypeInContext(context
), bits
);
464 count
= lp_build_intrinsic_unary(builder
, popcntintr
,
465 LLVMInt32TypeInContext(context
), bits
);
469 LLVMValueRef countv
= LLVMBuildAnd(builder
, maskvalue
, countmask
, "countv");
470 LLVMTypeRef counttype
= LLVMIntTypeInContext(context
, type
.length
* 8);
471 LLVMTypeRef i8vntype
= LLVMVectorType(LLVMInt8TypeInContext(context
), type
.length
* 4);
472 LLVMValueRef shufflev
, countd
;
473 LLVMValueRef shuffles
[16];
474 const char *popcntintr
= NULL
;
476 countv
= LLVMBuildBitCast(builder
, countv
, i8vntype
, "");
478 for (i
= 0; i
< type
.length
; i
++) {
479 shuffles
[i
] = lp_build_const_int32(gallivm
, 4*i
);
482 shufflev
= LLVMConstVector(shuffles
, type
.length
);
483 countd
= LLVMBuildShuffleVector(builder
, countv
, LLVMGetUndef(i8vntype
), shufflev
, "");
484 countd
= LLVMBuildBitCast(builder
, countd
, counttype
, "countd");
488 * this is bad on cpus without popcount (on x86 supported by intel
489 * nehalem, amd barcelona, and up - not tied to sse42).
490 * Would be much faster to just sum the 4 elements of the vector with
491 * some horizontal add (shuffle/add/shuffle/add after the initial and).
493 switch (type
.length
) {
495 popcntintr
= "llvm.ctpop.i32";
498 popcntintr
= "llvm.ctpop.i64";
501 popcntintr
= "llvm.ctpop.i128";
506 count
= lp_build_intrinsic_unary(builder
, popcntintr
, counttype
, countd
);
508 if (type
.length
> 4) {
509 count
= LLVMBuildTrunc(builder
, count
, LLVMIntTypeInContext(context
, 32), "");
512 newcount
= LLVMBuildLoad(builder
, counter
, "origcount");
513 newcount
= LLVMBuildAdd(builder
, newcount
, count
, "newcount");
514 LLVMBuildStore(builder
, newcount
, counter
);
520 * Generate code for performing depth and/or stencil tests.
521 * We operate on a vector of values (typically n 2x2 quads).
523 * \param depth the depth test state
524 * \param stencil the front/back stencil state
525 * \param type the data type of the fragment depth/stencil values
526 * \param format_desc description of the depth/stencil surface
527 * \param mask the alive/dead pixel mask for the quad (vector)
528 * \param stencil_refs the front/back stencil ref values (scalar)
529 * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32)
530 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer
531 * \param face contains boolean value indicating front/back facing polygon
534 lp_build_depth_stencil_test(struct gallivm_state
*gallivm
,
535 const struct pipe_depth_state
*depth
,
536 const struct pipe_stencil_state stencil
[2],
537 struct lp_type z_src_type
,
538 const struct util_format_description
*format_desc
,
539 struct lp_build_mask_context
*mask
,
540 LLVMValueRef stencil_refs
[2],
542 LLVMValueRef zs_dst_ptr
,
544 LLVMValueRef
*zs_value
,
547 LLVMBuilderRef builder
= gallivm
->builder
;
548 struct lp_type zs_type
;
549 struct lp_type z_type
;
550 struct lp_build_context z_bld
;
551 struct lp_build_context s_bld
;
552 struct lp_type s_type
;
553 unsigned z_shift
= 0, z_width
= 0, z_mask
= 0;
554 LLVMValueRef zs_dst
, z_dst
= NULL
;
555 LLVMValueRef stencil_vals
= NULL
;
556 LLVMValueRef z_bitmask
= NULL
, stencil_shift
= NULL
;
557 LLVMValueRef z_pass
= NULL
, s_pass_mask
= NULL
;
558 LLVMValueRef orig_mask
= lp_build_mask_value(mask
);
559 LLVMValueRef front_facing
= NULL
;
563 * Depths are expected to be between 0 and 1, even if they are stored in
564 * floats. Setting these bits here will ensure that the lp_build_conv() call
565 * below won't try to unnecessarily clamp the incoming values.
567 if(z_src_type
.floating
) {
568 z_src_type
.sign
= FALSE
;
569 z_src_type
.norm
= TRUE
;
572 assert(!z_src_type
.sign
);
573 assert(z_src_type
.norm
);
576 /* Pick the type matching the depth-stencil format. */
577 zs_type
= lp_depth_type(format_desc
, z_src_type
.length
);
579 /* Pick the intermediate type for depth operations. */
581 /* FIXME: Cope with a depth test type with higher bit width. */
582 assert(zs_type
.width
<= z_src_type
.width
);
583 z_type
.width
= z_src_type
.width
;
584 assert(z_type
.length
== z_src_type
.length
);
586 /* FIXME: for non-float depth/stencil might generate better code
587 * if we'd always split it up to use 128bit operations.
588 * For stencil we'd almost certainly want to pack to 8xi16 values,
589 * for z just run twice.
592 /* Sanity checking */
594 const unsigned z_swizzle
= format_desc
->swizzle
[0];
595 const unsigned s_swizzle
= format_desc
->swizzle
[1];
597 assert(z_swizzle
!= UTIL_FORMAT_SWIZZLE_NONE
||
598 s_swizzle
!= UTIL_FORMAT_SWIZZLE_NONE
);
600 assert(depth
->enabled
|| stencil
[0].enabled
);
602 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
603 assert(format_desc
->block
.width
== 1);
604 assert(format_desc
->block
.height
== 1);
606 if (stencil
[0].enabled
) {
607 assert(format_desc
->format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
||
608 format_desc
->format
== PIPE_FORMAT_S8_UINT_Z24_UNORM
);
611 assert(z_swizzle
< 4);
612 assert(format_desc
->block
.bits
<= z_type
.width
);
613 if (z_type
.floating
) {
614 assert(z_swizzle
== 0);
615 assert(format_desc
->channel
[z_swizzle
].type
==
616 UTIL_FORMAT_TYPE_FLOAT
);
617 assert(format_desc
->channel
[z_swizzle
].size
==
618 format_desc
->block
.bits
);
621 assert(format_desc
->channel
[z_swizzle
].type
==
622 UTIL_FORMAT_TYPE_UNSIGNED
);
623 assert(format_desc
->channel
[z_swizzle
].normalized
);
624 assert(!z_type
.fixed
);
629 /* Setup build context for Z vals */
630 lp_build_context_init(&z_bld
, gallivm
, z_type
);
632 /* Setup build context for stencil vals */
633 s_type
= lp_int_type(z_type
);
634 lp_build_context_init(&s_bld
, gallivm
, s_type
);
636 /* Load current z/stencil value from z/stencil buffer */
637 zs_dst_ptr
= LLVMBuildBitCast(builder
,
639 LLVMPointerType(lp_build_vec_type(gallivm
, zs_type
), 0), "");
640 zs_dst
= LLVMBuildLoad(builder
, zs_dst_ptr
, "");
641 if (format_desc
->block
.bits
< z_type
.width
) {
642 /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
643 zs_dst
= LLVMBuildZExt(builder
, zs_dst
, z_bld
.vec_type
, "");
646 lp_build_name(zs_dst
, "zs_dst");
649 /* Compute and apply the Z/stencil bitmasks and shifts.
652 unsigned s_shift
, s_mask
;
654 if (get_z_shift_and_mask(format_desc
, &z_shift
, &z_width
, &z_mask
)) {
655 if (z_mask
!= 0xffffffff) {
656 z_bitmask
= lp_build_const_int_vec(gallivm
, z_type
, z_mask
);
660 * Align the framebuffer Z 's LSB to the right.
663 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, z_type
, z_shift
);
664 z_dst
= LLVMBuildLShr(builder
, zs_dst
, shift
, "z_dst");
665 } else if (z_bitmask
) {
666 /* TODO: Instead of loading a mask from memory and ANDing, it's
667 * probably faster to just shake the bits with two shifts. */
668 z_dst
= LLVMBuildAnd(builder
, zs_dst
, z_bitmask
, "z_dst");
671 lp_build_name(z_dst
, "z_dst");
675 if (get_s_shift_and_mask(format_desc
, &s_shift
, &s_mask
)) {
677 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, s_type
, s_shift
);
678 stencil_vals
= LLVMBuildLShr(builder
, zs_dst
, shift
, "");
679 stencil_shift
= shift
; /* used below */
682 stencil_vals
= zs_dst
;
685 if (s_mask
!= 0xffffffff) {
686 LLVMValueRef mask
= lp_build_const_int_vec(gallivm
, s_type
, s_mask
);
687 stencil_vals
= LLVMBuildAnd(builder
, stencil_vals
, mask
, "");
690 lp_build_name(stencil_vals
, "s_dst");
694 if (stencil
[0].enabled
) {
697 LLVMValueRef zero
= lp_build_const_int32(gallivm
, 0);
699 /* front_facing = face != 0 ? ~0 : 0 */
700 front_facing
= LLVMBuildICmp(builder
, LLVMIntNE
, face
, zero
, "");
701 front_facing
= LLVMBuildSExt(builder
, front_facing
,
702 LLVMIntTypeInContext(gallivm
->context
,
703 s_bld
.type
.length
*s_bld
.type
.width
),
705 front_facing
= LLVMBuildBitCast(builder
, front_facing
,
706 s_bld
.int_vec_type
, "");
709 /* convert scalar stencil refs into vectors */
710 stencil_refs
[0] = lp_build_broadcast_scalar(&s_bld
, stencil_refs
[0]);
711 stencil_refs
[1] = lp_build_broadcast_scalar(&s_bld
, stencil_refs
[1]);
713 s_pass_mask
= lp_build_stencil_test(&s_bld
, stencil
,
714 stencil_refs
, stencil_vals
,
717 /* apply stencil-fail operator */
719 LLVMValueRef s_fail_mask
= lp_build_andnot(&s_bld
, orig_mask
, s_pass_mask
);
720 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, S_FAIL_OP
,
721 stencil_refs
, stencil_vals
,
722 s_fail_mask
, front_facing
);
726 if (depth
->enabled
) {
728 * Convert fragment Z to the desired type, aligning the LSB to the right.
731 assert(z_type
.width
== z_src_type
.width
);
732 assert(z_type
.length
== z_src_type
.length
);
733 assert(lp_check_value(z_src_type
, z_src
));
734 if (z_src_type
.floating
) {
736 * Convert from floating point values
739 if (!z_type
.floating
) {
740 z_src
= lp_build_clamped_float_to_unsigned_norm(gallivm
,
747 * Convert from unsigned normalized values.
750 assert(!z_src_type
.sign
);
751 assert(!z_src_type
.fixed
);
752 assert(z_src_type
.norm
);
753 assert(!z_type
.floating
);
754 if (z_src_type
.width
> z_width
) {
755 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, z_src_type
,
756 z_src_type
.width
- z_width
);
757 z_src
= LLVMBuildLShr(builder
, z_src
, shift
, "");
760 assert(lp_check_value(z_type
, z_src
));
762 lp_build_name(z_src
, "z_src");
764 /* compare src Z to dst Z, returning 'pass' mask */
765 z_pass
= lp_build_cmp(&z_bld
, depth
->func
, z_src
, z_dst
);
767 if (!stencil
[0].enabled
) {
768 /* We can potentially skip all remaining operations here, but only
769 * if stencil is disabled because we still need to update the stencil
770 * buffer values. Don't need to update Z buffer values.
772 lp_build_mask_update(mask
, z_pass
);
775 lp_build_mask_check(mask
);
780 if (depth
->writemask
) {
781 LLVMValueRef zselectmask
;
783 /* mask off bits that failed Z test */
784 zselectmask
= LLVMBuildAnd(builder
, orig_mask
, z_pass
, "");
786 /* mask off bits that failed stencil test */
788 zselectmask
= LLVMBuildAnd(builder
, zselectmask
, s_pass_mask
, "");
791 /* Mix the old and new Z buffer values.
792 * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
794 z_dst
= lp_build_select(&z_bld
, zselectmask
, z_src
, z_dst
);
797 if (stencil
[0].enabled
) {
798 /* update stencil buffer values according to z pass/fail result */
799 LLVMValueRef z_fail_mask
, z_pass_mask
;
801 /* apply Z-fail operator */
802 z_fail_mask
= lp_build_andnot(&z_bld
, orig_mask
, z_pass
);
803 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, Z_FAIL_OP
,
804 stencil_refs
, stencil_vals
,
805 z_fail_mask
, front_facing
);
807 /* apply Z-pass operator */
808 z_pass_mask
= LLVMBuildAnd(builder
, orig_mask
, z_pass
, "");
809 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, Z_PASS_OP
,
810 stencil_refs
, stencil_vals
,
811 z_pass_mask
, front_facing
);
815 /* No depth test: apply Z-pass operator to stencil buffer values which
816 * passed the stencil test.
818 s_pass_mask
= LLVMBuildAnd(builder
, orig_mask
, s_pass_mask
, "");
819 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, Z_PASS_OP
,
820 stencil_refs
, stencil_vals
,
821 s_pass_mask
, front_facing
);
824 /* Put Z and ztencil bits in the right place */
825 if (z_dst
&& z_shift
) {
826 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, z_type
, z_shift
);
827 z_dst
= LLVMBuildShl(builder
, z_dst
, shift
, "");
829 if (stencil_vals
&& stencil_shift
)
830 stencil_vals
= LLVMBuildShl(builder
, stencil_vals
,
833 /* Finally, merge/store the z/stencil values */
834 if ((depth
->enabled
&& depth
->writemask
) ||
835 (stencil
[0].enabled
&& stencil
[0].writemask
)) {
837 if (z_dst
&& stencil_vals
)
838 zs_dst
= LLVMBuildOr(builder
, z_dst
, stencil_vals
, "");
842 zs_dst
= stencil_vals
;
848 lp_build_mask_update(mask
, s_pass_mask
);
850 if (depth
->enabled
&& stencil
[0].enabled
)
851 lp_build_mask_update(mask
, z_pass
);
854 lp_build_mask_check(mask
);
860 lp_build_depth_write(struct gallivm_state
*gallivm
,
861 struct lp_type z_src_type
,
862 const struct util_format_description
*format_desc
,
863 LLVMValueRef zs_dst_ptr
,
864 LLVMValueRef zs_value
)
866 LLVMBuilderRef builder
= gallivm
->builder
;
868 if (format_desc
->block
.bits
< z_src_type
.width
) {
869 /* Truncate income ZS values (e.g., when writing to Z16_UNORM) */
870 LLVMTypeRef zs_type
= LLVMIntTypeInContext(gallivm
->context
, format_desc
->block
.bits
);
871 if (z_src_type
.length
> 1) {
872 zs_type
= LLVMVectorType(zs_type
, z_src_type
.length
);
874 zs_value
= LLVMBuildTrunc(builder
, zs_value
, zs_type
, "");
877 zs_dst_ptr
= LLVMBuildBitCast(builder
, zs_dst_ptr
,
878 LLVMPointerType(LLVMTypeOf(zs_value
), 0), "");
880 LLVMBuildStore(builder
, zs_value
, zs_dst_ptr
);
885 lp_build_deferred_depth_write(struct gallivm_state
*gallivm
,
886 struct lp_type z_src_type
,
887 const struct util_format_description
*format_desc
,
888 struct lp_build_mask_context
*mask
,
889 LLVMValueRef zs_dst_ptr
,
890 LLVMValueRef zs_value
)
892 struct lp_type z_type
;
893 struct lp_build_context z_bld
;
895 LLVMBuilderRef builder
= gallivm
->builder
;
897 /* XXX: pointlessly redo type logic:
899 z_type
= lp_depth_type(format_desc
, z_src_type
.length
);
900 lp_build_context_init(&z_bld
, gallivm
, z_type
);
902 zs_dst_ptr
= LLVMBuildBitCast(builder
, zs_dst_ptr
,
903 LLVMPointerType(z_bld
.vec_type
, 0), "");
905 z_dst
= LLVMBuildLoad(builder
, zs_dst_ptr
, "zsbufval");
907 if (z_type
.width
< z_src_type
.width
) {
908 zs_value
= LLVMBuildTrunc(builder
, zs_value
, z_bld
.vec_type
, "");
911 z_dst
= lp_build_select(&z_bld
, lp_build_mask_value(mask
), zs_value
, z_dst
);
913 LLVMBuildStore(builder
, z_dst
, zs_dst_ptr
);