1 /**************************************************************************
3 * Copyright 2009-2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Depth/stencil testing to LLVM IR translation.
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
49 * will actually be stored in memory as
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
56 * @author Jose Fonseca <jfonseca@vmware.com>
57 * @author Brian Paul <jfonseca@vmware.com>
60 #include "pipe/p_state.h"
61 #include "util/u_format.h"
62 #include "util/u_cpu_detect.h"
64 #include "gallivm/lp_bld_type.h"
65 #include "gallivm/lp_bld_arit.h"
66 #include "gallivm/lp_bld_bitarit.h"
67 #include "gallivm/lp_bld_const.h"
68 #include "gallivm/lp_bld_conv.h"
69 #include "gallivm/lp_bld_logic.h"
70 #include "gallivm/lp_bld_flow.h"
71 #include "gallivm/lp_bld_intr.h"
72 #include "gallivm/lp_bld_debug.h"
73 #include "gallivm/lp_bld_swizzle.h"
75 #include "lp_bld_depth.h"
78 /** Used to select fields from pipe_stencil_state */
88 * Do the stencil test comparison (compare FB stencil values against ref value).
89 * This will be used twice when generating two-sided stencil code.
90 * \param stencil the front/back stencil state
91 * \param stencilRef the stencil reference value, replicated as a vector
92 * \param stencilVals vector of stencil values from framebuffer
93 * \return vector mask of pass/fail values (~0 or 0)
96 lp_build_stencil_test_single(struct lp_build_context
*bld
,
97 const struct pipe_stencil_state
*stencil
,
98 LLVMValueRef stencilRef
,
99 LLVMValueRef stencilVals
)
101 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
102 const unsigned stencilMax
= 255; /* XXX fix */
103 struct lp_type type
= bld
->type
;
107 * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
108 * are between 0..255 so ensure we generate the fastest comparisons for
111 if (type
.width
<= 8) {
117 assert(stencil
->enabled
);
119 if (stencil
->valuemask
!= stencilMax
) {
120 /* compute stencilRef = stencilRef & valuemask */
121 LLVMValueRef valuemask
= lp_build_const_int_vec(bld
->gallivm
, type
, stencil
->valuemask
);
122 stencilRef
= LLVMBuildAnd(builder
, stencilRef
, valuemask
, "");
123 /* compute stencilVals = stencilVals & valuemask */
124 stencilVals
= LLVMBuildAnd(builder
, stencilVals
, valuemask
, "");
127 res
= lp_build_cmp(bld
, stencil
->func
, stencilRef
, stencilVals
);
134 * Do the one or two-sided stencil test comparison.
135 * \sa lp_build_stencil_test_single
136 * \param front_facing an integer vector mask, indicating front (~0) or back
137 * (0) facing polygon. If NULL, assume front-facing.
140 lp_build_stencil_test(struct lp_build_context
*bld
,
141 const struct pipe_stencil_state stencil
[2],
142 LLVMValueRef stencilRefs
[2],
143 LLVMValueRef stencilVals
,
144 LLVMValueRef front_facing
)
148 assert(stencil
[0].enabled
);
150 /* do front face test */
151 res
= lp_build_stencil_test_single(bld
, &stencil
[0],
152 stencilRefs
[0], stencilVals
);
154 if (stencil
[1].enabled
&& front_facing
!= NULL
) {
155 /* do back face test */
156 LLVMValueRef back_res
;
158 back_res
= lp_build_stencil_test_single(bld
, &stencil
[1],
159 stencilRefs
[1], stencilVals
);
161 res
= lp_build_select(bld
, front_facing
, res
, back_res
);
169 * Apply the stencil operator (add/sub/keep/etc) to the given vector
171 * \return new stencil values vector
174 lp_build_stencil_op_single(struct lp_build_context
*bld
,
175 const struct pipe_stencil_state
*stencil
,
177 LLVMValueRef stencilRef
,
178 LLVMValueRef stencilVals
)
181 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
182 struct lp_type type
= bld
->type
;
184 LLVMValueRef max
= lp_build_const_int_vec(bld
->gallivm
, type
, 0xff);
191 stencil_op
= stencil
->fail_op
;
194 stencil_op
= stencil
->zfail_op
;
197 stencil_op
= stencil
->zpass_op
;
200 assert(0 && "Invalid stencil_op mode");
201 stencil_op
= PIPE_STENCIL_OP_KEEP
;
204 switch (stencil_op
) {
205 case PIPE_STENCIL_OP_KEEP
:
207 /* we can return early for this case */
209 case PIPE_STENCIL_OP_ZERO
:
212 case PIPE_STENCIL_OP_REPLACE
:
215 case PIPE_STENCIL_OP_INCR
:
216 res
= lp_build_add(bld
, stencilVals
, bld
->one
);
217 res
= lp_build_min(bld
, res
, max
);
219 case PIPE_STENCIL_OP_DECR
:
220 res
= lp_build_sub(bld
, stencilVals
, bld
->one
);
221 res
= lp_build_max(bld
, res
, bld
->zero
);
223 case PIPE_STENCIL_OP_INCR_WRAP
:
224 res
= lp_build_add(bld
, stencilVals
, bld
->one
);
225 res
= LLVMBuildAnd(builder
, res
, max
, "");
227 case PIPE_STENCIL_OP_DECR_WRAP
:
228 res
= lp_build_sub(bld
, stencilVals
, bld
->one
);
229 res
= LLVMBuildAnd(builder
, res
, max
, "");
231 case PIPE_STENCIL_OP_INVERT
:
232 res
= LLVMBuildNot(builder
, stencilVals
, "");
233 res
= LLVMBuildAnd(builder
, res
, max
, "");
236 assert(0 && "bad stencil op mode");
245 * Do the one or two-sided stencil test op/update.
248 lp_build_stencil_op(struct lp_build_context
*bld
,
249 const struct pipe_stencil_state stencil
[2],
251 LLVMValueRef stencilRefs
[2],
252 LLVMValueRef stencilVals
,
254 LLVMValueRef front_facing
)
257 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
260 assert(stencil
[0].enabled
);
262 /* do front face op */
263 res
= lp_build_stencil_op_single(bld
, &stencil
[0], op
,
264 stencilRefs
[0], stencilVals
);
266 if (stencil
[1].enabled
&& front_facing
!= NULL
) {
267 /* do back face op */
268 LLVMValueRef back_res
;
270 back_res
= lp_build_stencil_op_single(bld
, &stencil
[1], op
,
271 stencilRefs
[1], stencilVals
);
273 res
= lp_build_select(bld
, front_facing
, res
, back_res
);
276 if (stencil
[0].writemask
!= 0xff ||
277 (stencil
[1].enabled
&& front_facing
!= NULL
&& stencil
[1].writemask
!= 0xff)) {
278 /* mask &= stencil[0].writemask */
279 LLVMValueRef writemask
= lp_build_const_int_vec(bld
->gallivm
, bld
->type
,
280 stencil
[0].writemask
);
281 if (stencil
[1].enabled
&& stencil
[1].writemask
!= stencil
[0].writemask
&& front_facing
!= NULL
) {
282 LLVMValueRef back_writemask
= lp_build_const_int_vec(bld
->gallivm
, bld
->type
,
283 stencil
[1].writemask
);
284 writemask
= lp_build_select(bld
, front_facing
, writemask
, back_writemask
);
287 mask
= LLVMBuildAnd(builder
, mask
, writemask
, "");
288 /* res = (res & mask) | (stencilVals & ~mask) */
289 res
= lp_build_select_bitwise(bld
, mask
, res
, stencilVals
);
292 /* res = mask ? res : stencilVals */
293 res
= lp_build_select(bld
, mask
, res
, stencilVals
);
302 * Return a type appropriate for depth/stencil testing.
305 lp_depth_type(const struct util_format_description
*format_desc
,
311 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
312 assert(format_desc
->block
.width
== 1);
313 assert(format_desc
->block
.height
== 1);
315 swizzle
= format_desc
->swizzle
[0];
318 memset(&type
, 0, sizeof type
);
319 type
.width
= format_desc
->block
.bits
;
321 if(format_desc
->channel
[swizzle
].type
== UTIL_FORMAT_TYPE_FLOAT
) {
322 type
.floating
= TRUE
;
323 assert(swizzle
== 0);
324 assert(format_desc
->channel
[swizzle
].size
== format_desc
->block
.bits
);
326 else if(format_desc
->channel
[swizzle
].type
== UTIL_FORMAT_TYPE_UNSIGNED
) {
327 assert(format_desc
->block
.bits
<= 32);
328 assert(format_desc
->channel
[swizzle
].normalized
);
329 if (format_desc
->channel
[swizzle
].size
< format_desc
->block
.bits
) {
330 /* Prefer signed integers when possible, as SSE has less support
331 * for unsigned comparison;
339 assert(type
.width
<= length
);
340 type
.length
= length
/ type
.width
;
347 * Compute bitmask and bit shift to apply to the incoming fragment Z values
348 * and the Z buffer values needed before doing the Z comparison.
350 * Note that we leave the Z bits in the position that we find them
351 * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us
352 * get by with fewer bit twiddling steps.
355 get_z_shift_and_mask(const struct util_format_description
*format_desc
,
356 unsigned *shift
, unsigned *width
, unsigned *mask
)
358 const unsigned total_bits
= format_desc
->block
.bits
;
361 unsigned padding_left
, padding_right
;
363 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
364 assert(format_desc
->block
.width
== 1);
365 assert(format_desc
->block
.height
== 1);
367 z_swizzle
= format_desc
->swizzle
[0];
369 if (z_swizzle
== UTIL_FORMAT_SWIZZLE_NONE
)
372 *width
= format_desc
->channel
[z_swizzle
].size
;
375 for (chan
= 0; chan
< z_swizzle
; ++chan
)
376 padding_right
+= format_desc
->channel
[chan
].size
;
379 total_bits
- (padding_right
+ *width
);
381 if (padding_left
|| padding_right
) {
382 unsigned long long mask_left
= (1ULL << (total_bits
- padding_left
)) - 1;
383 unsigned long long mask_right
= (1ULL << (padding_right
)) - 1;
384 *mask
= mask_left
^ mask_right
;
390 *shift
= padding_right
;
397 * Compute bitmask and bit shift to apply to the framebuffer pixel values
398 * to put the stencil bits in the least significant position.
402 get_s_shift_and_mask(const struct util_format_description
*format_desc
,
403 unsigned *shift
, unsigned *mask
)
408 s_swizzle
= format_desc
->swizzle
[1];
410 if (s_swizzle
== UTIL_FORMAT_SWIZZLE_NONE
)
414 for (chan
= 0; chan
< s_swizzle
; chan
++)
415 *shift
+= format_desc
->channel
[chan
].size
;
417 sz
= format_desc
->channel
[s_swizzle
].size
;
418 *mask
= (1U << sz
) - 1U;
425 * Perform the occlusion test and increase the counter.
426 * Test the depth mask. Add the number of channel which has none zero mask
427 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
428 * The counter will add 4.
430 * \param type holds element type of the mask vector.
431 * \param maskvalue is the depth test mask.
432 * \param counter is a pointer of the uint32 counter.
435 lp_build_occlusion_count(struct gallivm_state
*gallivm
,
437 LLVMValueRef maskvalue
,
438 LLVMValueRef counter
)
440 LLVMBuilderRef builder
= gallivm
->builder
;
441 LLVMContextRef context
= gallivm
->context
;
442 LLVMValueRef countmask
= lp_build_const_int_vec(gallivm
, type
, 1);
443 LLVMValueRef count
, newcount
;
445 assert(type
.length
<= 16);
446 assert(type
.floating
);
448 if(util_cpu_caps
.has_sse
&& type
.length
== 4) {
449 const char *movmskintr
= "llvm.x86.sse.movmsk.ps";
450 const char *popcntintr
= "llvm.ctpop.i32";
451 LLVMValueRef bits
= LLVMBuildBitCast(builder
, maskvalue
,
452 lp_build_vec_type(gallivm
, type
), "");
453 bits
= lp_build_intrinsic_unary(builder
, movmskintr
,
454 LLVMInt32TypeInContext(context
), bits
);
455 count
= lp_build_intrinsic_unary(builder
, popcntintr
,
456 LLVMInt32TypeInContext(context
), bits
);
458 else if(util_cpu_caps
.has_avx
&& type
.length
== 8) {
459 const char *movmskintr
= "llvm.x86.avx.movmsk.ps.256";
460 const char *popcntintr
= "llvm.ctpop.i32";
461 LLVMValueRef bits
= LLVMBuildBitCast(builder
, maskvalue
,
462 lp_build_vec_type(gallivm
, type
), "");
463 bits
= lp_build_intrinsic_unary(builder
, movmskintr
,
464 LLVMInt32TypeInContext(context
), bits
);
465 count
= lp_build_intrinsic_unary(builder
, popcntintr
,
466 LLVMInt32TypeInContext(context
), bits
);
470 LLVMValueRef countv
= LLVMBuildAnd(builder
, maskvalue
, countmask
, "countv");
471 LLVMTypeRef counttype
= LLVMIntTypeInContext(context
, type
.length
* 8);
472 LLVMTypeRef i8vntype
= LLVMVectorType(LLVMInt8TypeInContext(context
), type
.length
* 4);
473 LLVMValueRef shufflev
, countd
;
474 LLVMValueRef shuffles
[16];
475 const char *popcntintr
= NULL
;
477 countv
= LLVMBuildBitCast(builder
, countv
, i8vntype
, "");
479 for (i
= 0; i
< type
.length
; i
++) {
480 shuffles
[i
] = lp_build_const_int32(gallivm
, 4*i
);
483 shufflev
= LLVMConstVector(shuffles
, type
.length
);
484 countd
= LLVMBuildShuffleVector(builder
, countv
, LLVMGetUndef(i8vntype
), shufflev
, "");
485 countd
= LLVMBuildBitCast(builder
, countd
, counttype
, "countd");
489 * this is bad on cpus without popcount (on x86 supported by intel
490 * nehalem, amd barcelona, and up - not tied to sse42).
491 * Would be much faster to just sum the 4 elements of the vector with
492 * some horizontal add (shuffle/add/shuffle/add after the initial and).
494 switch (type
.length
) {
496 popcntintr
= "llvm.ctpop.i32";
499 popcntintr
= "llvm.ctpop.i64";
502 popcntintr
= "llvm.ctpop.i128";
507 count
= lp_build_intrinsic_unary(builder
, popcntintr
, counttype
, countd
);
509 if (type
.length
> 4) {
510 count
= LLVMBuildTrunc(builder
, count
, LLVMIntTypeInContext(context
, 32), "");
513 newcount
= LLVMBuildLoad(builder
, counter
, "origcount");
514 newcount
= LLVMBuildAdd(builder
, newcount
, count
, "newcount");
515 LLVMBuildStore(builder
, newcount
, counter
);
521 * Generate code for performing depth and/or stencil tests.
522 * We operate on a vector of values (typically n 2x2 quads).
524 * \param depth the depth test state
525 * \param stencil the front/back stencil state
526 * \param type the data type of the fragment depth/stencil values
527 * \param format_desc description of the depth/stencil surface
528 * \param mask the alive/dead pixel mask for the quad (vector)
529 * \param stencil_refs the front/back stencil ref values (scalar)
530 * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32)
531 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer
532 * \param face contains boolean value indicating front/back facing polygon
535 lp_build_depth_stencil_test(struct gallivm_state
*gallivm
,
536 const struct pipe_depth_state
*depth
,
537 const struct pipe_stencil_state stencil
[2],
538 struct lp_type z_src_type
,
539 const struct util_format_description
*format_desc
,
540 struct lp_build_mask_context
*mask
,
541 LLVMValueRef stencil_refs
[2],
543 LLVMValueRef zs_dst_ptr
,
545 LLVMValueRef
*zs_value
,
548 LLVMBuilderRef builder
= gallivm
->builder
;
549 struct lp_type z_type
;
550 struct lp_build_context z_bld
;
551 struct lp_build_context s_bld
;
552 struct lp_type s_type
;
553 unsigned z_shift
= 0, z_width
= 0, z_mask
= 0;
554 LLVMValueRef zs_dst
, z_dst
= NULL
;
555 LLVMValueRef stencil_vals
= NULL
;
556 LLVMValueRef z_bitmask
= NULL
, stencil_shift
= NULL
;
557 LLVMValueRef z_pass
= NULL
, s_pass_mask
= NULL
;
558 LLVMValueRef orig_mask
= lp_build_mask_value(mask
);
559 LLVMValueRef front_facing
= NULL
;
563 * Depths are expected to be between 0 and 1, even if they are stored in
564 * floats. Setting these bits here will ensure that the lp_build_conv() call
565 * below won't try to unnecessarily clamp the incoming values.
567 if(z_src_type
.floating
) {
568 z_src_type
.sign
= FALSE
;
569 z_src_type
.norm
= TRUE
;
572 assert(!z_src_type
.sign
);
573 assert(z_src_type
.norm
);
576 /* Pick the depth type. */
577 z_type
= lp_depth_type(format_desc
, z_src_type
.width
*z_src_type
.length
);
579 /* FIXME: Cope with a depth test type with a different bit width. */
580 assert(z_type
.width
== z_src_type
.width
);
581 assert(z_type
.length
== z_src_type
.length
);
583 /* FIXME: for non-float depth/stencil might generate better code
584 * if we'd always split it up to use 128bit operations.
585 * For stencil we'd almost certainly want to pack to 8xi16 values,
586 * for z just run twice.
589 /* Sanity checking */
591 const unsigned z_swizzle
= format_desc
->swizzle
[0];
592 const unsigned s_swizzle
= format_desc
->swizzle
[1];
594 assert(z_swizzle
!= UTIL_FORMAT_SWIZZLE_NONE
||
595 s_swizzle
!= UTIL_FORMAT_SWIZZLE_NONE
);
597 assert(depth
->enabled
|| stencil
[0].enabled
);
599 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
600 assert(format_desc
->block
.width
== 1);
601 assert(format_desc
->block
.height
== 1);
603 if (stencil
[0].enabled
) {
604 assert(format_desc
->format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
||
605 format_desc
->format
== PIPE_FORMAT_S8_UINT_Z24_UNORM
);
608 assert(z_swizzle
< 4);
609 assert(format_desc
->block
.bits
== z_type
.width
);
610 if (z_type
.floating
) {
611 assert(z_swizzle
== 0);
612 assert(format_desc
->channel
[z_swizzle
].type
==
613 UTIL_FORMAT_TYPE_FLOAT
);
614 assert(format_desc
->channel
[z_swizzle
].size
==
615 format_desc
->block
.bits
);
618 assert(format_desc
->channel
[z_swizzle
].type
==
619 UTIL_FORMAT_TYPE_UNSIGNED
);
620 assert(format_desc
->channel
[z_swizzle
].normalized
);
621 assert(!z_type
.fixed
);
626 /* Setup build context for Z vals */
627 lp_build_context_init(&z_bld
, gallivm
, z_type
);
629 /* Setup build context for stencil vals */
630 s_type
= lp_int_type(z_type
);
631 lp_build_context_init(&s_bld
, gallivm
, s_type
);
633 /* Load current z/stencil value from z/stencil buffer */
634 zs_dst_ptr
= LLVMBuildBitCast(builder
,
636 LLVMPointerType(z_bld
.vec_type
, 0), "");
637 zs_dst
= LLVMBuildLoad(builder
, zs_dst_ptr
, "");
639 lp_build_name(zs_dst
, "zs_dst");
642 /* Compute and apply the Z/stencil bitmasks and shifts.
645 unsigned s_shift
, s_mask
;
647 if (get_z_shift_and_mask(format_desc
, &z_shift
, &z_width
, &z_mask
)) {
648 if (z_mask
!= 0xffffffff) {
649 z_bitmask
= lp_build_const_int_vec(gallivm
, z_type
, z_mask
);
653 * Align the framebuffer Z 's LSB to the right.
656 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, z_type
, z_shift
);
657 z_dst
= LLVMBuildLShr(builder
, zs_dst
, shift
, "z_dst");
658 } else if (z_bitmask
) {
659 /* TODO: Instead of loading a mask from memory and ANDing, it's
660 * probably faster to just shake the bits with two shifts. */
661 z_dst
= LLVMBuildAnd(builder
, zs_dst
, z_bitmask
, "z_dst");
664 lp_build_name(z_dst
, "z_dst");
668 if (get_s_shift_and_mask(format_desc
, &s_shift
, &s_mask
)) {
670 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, s_type
, s_shift
);
671 stencil_vals
= LLVMBuildLShr(builder
, zs_dst
, shift
, "");
672 stencil_shift
= shift
; /* used below */
675 stencil_vals
= zs_dst
;
678 if (s_mask
!= 0xffffffff) {
679 LLVMValueRef mask
= lp_build_const_int_vec(gallivm
, s_type
, s_mask
);
680 stencil_vals
= LLVMBuildAnd(builder
, stencil_vals
, mask
, "");
683 lp_build_name(stencil_vals
, "s_dst");
687 if (stencil
[0].enabled
) {
690 LLVMValueRef zero
= lp_build_const_int32(gallivm
, 0);
692 /* front_facing = face != 0 ? ~0 : 0 */
693 front_facing
= LLVMBuildICmp(builder
, LLVMIntNE
, face
, zero
, "");
694 front_facing
= LLVMBuildSExt(builder
, front_facing
,
695 LLVMIntTypeInContext(gallivm
->context
,
696 s_bld
.type
.length
*s_bld
.type
.width
),
698 front_facing
= LLVMBuildBitCast(builder
, front_facing
,
699 s_bld
.int_vec_type
, "");
702 /* convert scalar stencil refs into vectors */
703 stencil_refs
[0] = lp_build_broadcast_scalar(&s_bld
, stencil_refs
[0]);
704 stencil_refs
[1] = lp_build_broadcast_scalar(&s_bld
, stencil_refs
[1]);
706 s_pass_mask
= lp_build_stencil_test(&s_bld
, stencil
,
707 stencil_refs
, stencil_vals
,
710 /* apply stencil-fail operator */
712 LLVMValueRef s_fail_mask
= lp_build_andnot(&s_bld
, orig_mask
, s_pass_mask
);
713 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, S_FAIL_OP
,
714 stencil_refs
, stencil_vals
,
715 s_fail_mask
, front_facing
);
719 if (depth
->enabled
) {
721 * Convert fragment Z to the desired type, aligning the LSB to the right.
724 assert(z_type
.width
== z_src_type
.width
);
725 assert(z_type
.length
== z_src_type
.length
);
726 assert(lp_check_value(z_src_type
, z_src
));
727 if (z_src_type
.floating
) {
729 * Convert from floating point values
732 if (!z_type
.floating
) {
733 z_src
= lp_build_clamped_float_to_unsigned_norm(gallivm
,
740 * Convert from unsigned normalized values.
743 assert(!z_src_type
.sign
);
744 assert(!z_src_type
.fixed
);
745 assert(z_src_type
.norm
);
746 assert(!z_type
.floating
);
747 if (z_src_type
.width
> z_width
) {
748 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, z_src_type
,
749 z_src_type
.width
- z_width
);
750 z_src
= LLVMBuildLShr(builder
, z_src
, shift
, "");
753 assert(lp_check_value(z_type
, z_src
));
755 lp_build_name(z_src
, "z_src");
757 /* compare src Z to dst Z, returning 'pass' mask */
758 z_pass
= lp_build_cmp(&z_bld
, depth
->func
, z_src
, z_dst
);
760 if (!stencil
[0].enabled
) {
761 /* We can potentially skip all remaining operations here, but only
762 * if stencil is disabled because we still need to update the stencil
763 * buffer values. Don't need to update Z buffer values.
765 lp_build_mask_update(mask
, z_pass
);
768 lp_build_mask_check(mask
);
773 if (depth
->writemask
) {
774 LLVMValueRef zselectmask
;
776 /* mask off bits that failed Z test */
777 zselectmask
= LLVMBuildAnd(builder
, orig_mask
, z_pass
, "");
779 /* mask off bits that failed stencil test */
781 zselectmask
= LLVMBuildAnd(builder
, zselectmask
, s_pass_mask
, "");
784 /* Mix the old and new Z buffer values.
785 * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
787 z_dst
= lp_build_select(&z_bld
, zselectmask
, z_src
, z_dst
);
790 if (stencil
[0].enabled
) {
791 /* update stencil buffer values according to z pass/fail result */
792 LLVMValueRef z_fail_mask
, z_pass_mask
;
794 /* apply Z-fail operator */
795 z_fail_mask
= lp_build_andnot(&z_bld
, orig_mask
, z_pass
);
796 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, Z_FAIL_OP
,
797 stencil_refs
, stencil_vals
,
798 z_fail_mask
, front_facing
);
800 /* apply Z-pass operator */
801 z_pass_mask
= LLVMBuildAnd(builder
, orig_mask
, z_pass
, "");
802 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, Z_PASS_OP
,
803 stencil_refs
, stencil_vals
,
804 z_pass_mask
, front_facing
);
808 /* No depth test: apply Z-pass operator to stencil buffer values which
809 * passed the stencil test.
811 s_pass_mask
= LLVMBuildAnd(builder
, orig_mask
, s_pass_mask
, "");
812 stencil_vals
= lp_build_stencil_op(&s_bld
, stencil
, Z_PASS_OP
,
813 stencil_refs
, stencil_vals
,
814 s_pass_mask
, front_facing
);
817 /* Put Z and ztencil bits in the right place */
818 if (z_dst
&& z_shift
) {
819 LLVMValueRef shift
= lp_build_const_int_vec(gallivm
, z_type
, z_shift
);
820 z_dst
= LLVMBuildShl(builder
, z_dst
, shift
, "");
822 if (stencil_vals
&& stencil_shift
)
823 stencil_vals
= LLVMBuildShl(builder
, stencil_vals
,
826 /* Finally, merge/store the z/stencil values */
827 if ((depth
->enabled
&& depth
->writemask
) ||
828 (stencil
[0].enabled
&& stencil
[0].writemask
)) {
830 if (z_dst
&& stencil_vals
)
831 zs_dst
= LLVMBuildOr(builder
, z_dst
, stencil_vals
, "");
835 zs_dst
= stencil_vals
;
841 lp_build_mask_update(mask
, s_pass_mask
);
843 if (depth
->enabled
&& stencil
[0].enabled
)
844 lp_build_mask_update(mask
, z_pass
);
847 lp_build_mask_check(mask
);
853 lp_build_depth_write(LLVMBuilderRef builder
,
854 const struct util_format_description
*format_desc
,
855 LLVMValueRef zs_dst_ptr
,
856 LLVMValueRef zs_value
)
858 zs_dst_ptr
= LLVMBuildBitCast(builder
, zs_dst_ptr
,
859 LLVMPointerType(LLVMTypeOf(zs_value
), 0), "");
861 LLVMBuildStore(builder
, zs_value
, zs_dst_ptr
);
866 lp_build_deferred_depth_write(struct gallivm_state
*gallivm
,
867 struct lp_type z_src_type
,
868 const struct util_format_description
*format_desc
,
869 struct lp_build_mask_context
*mask
,
870 LLVMValueRef zs_dst_ptr
,
871 LLVMValueRef zs_value
)
873 struct lp_type z_type
;
874 struct lp_build_context z_bld
;
876 LLVMBuilderRef builder
= gallivm
->builder
;
878 /* XXX: pointlessly redo type logic:
880 z_type
= lp_depth_type(format_desc
, z_src_type
.width
*z_src_type
.length
);
881 lp_build_context_init(&z_bld
, gallivm
, z_type
);
883 zs_dst_ptr
= LLVMBuildBitCast(builder
, zs_dst_ptr
,
884 LLVMPointerType(z_bld
.vec_type
, 0), "");
886 z_dst
= LLVMBuildLoad(builder
, zs_dst_ptr
, "zsbufval");
887 z_dst
= lp_build_select(&z_bld
, lp_build_mask_value(mask
), zs_value
, z_dst
);
889 LLVMBuildStore(builder
, z_dst
, zs_dst_ptr
);