1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Depth/stencil testing to LLVM IR translation.
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
49 * will actually be stored in memory as
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
55 * FIXME: Code generate stencil test
57 * @author Jose Fonseca <jfonseca@vmware.com>
60 #include "pipe/p_state.h"
61 #include "util/u_format.h"
63 #include "lp_bld_type.h"
64 #include "lp_bld_arit.h"
65 #include "lp_bld_const.h"
66 #include "lp_bld_logic.h"
67 #include "lp_bld_flow.h"
68 #include "lp_bld_debug.h"
69 #include "lp_bld_depth.h"
70 #include "lp_bld_swizzle.h"
75 * Do the stencil test comparison (compare fb Z values against ref value.
76 * \param stencilVals vector of stencil values from framebuffer
77 * \param stencilRef the stencil reference value, replicated as a vector
78 * \return mask of pass/fail values
81 lp_build_stencil_test(struct lp_build_context
*bld
,
82 const struct pipe_stencil_state
*stencil
,
83 LLVMValueRef stencilRef
,
84 LLVMValueRef stencilVals
)
86 const unsigned stencilMax
= 255; /* XXX fix */
87 struct lp_type type
= bld
->type
;
90 assert(stencil
->enabled
);
92 if (stencil
->valuemask
!= stencilMax
) {
93 /* compute stencilRef = stencilRef & valuemask */
94 LLVMValueRef valuemask
= lp_build_const_int_vec(type
, stencil
->valuemask
);
95 stencilRef
= LLVMBuildAnd(bld
->builder
, stencilRef
, valuemask
, "");
96 /* compute stencilVals = stencilVals & valuemask */
97 stencilVals
= LLVMBuildAnd(bld
->builder
, stencilVals
, valuemask
, "");
100 res
= lp_build_cmp(bld
, stencil
->func
, stencilVals
, stencilRef
);
107 * Apply the stencil operator (add/sub/keep/etc) to the given vector
109 * \return new stencil values vector
112 lp_build_stencil_op(struct lp_build_context
*bld
,
113 const struct pipe_stencil_state
*stencil
,
115 LLVMValueRef stencilRef
,
116 LLVMValueRef stencilVals
,
120 const unsigned stencilMax
= 255; /* XXX fix */
121 struct lp_type type
= bld
->type
;
123 LLVMValueRef max
= lp_build_const_int_vec(type
, stencilMax
);
125 switch (stencil_op
) {
126 case PIPE_STENCIL_OP_KEEP
:
128 /* we can return early for this case */
130 case PIPE_STENCIL_OP_ZERO
:
133 case PIPE_STENCIL_OP_REPLACE
:
136 case PIPE_STENCIL_OP_INCR
:
137 res
= lp_build_add(bld
, stencilVals
, bld
->one
);
138 res
= lp_build_min(bld
, res
, max
);
140 case PIPE_STENCIL_OP_DECR
:
141 res
= lp_build_sub(bld
, stencilVals
, bld
->one
);
142 res
= lp_build_max(bld
, res
, bld
->zero
);
144 case PIPE_STENCIL_OP_INCR_WRAP
:
145 res
= lp_build_add(bld
, stencilVals
, bld
->one
);
146 res
= LLVMBuildAnd(bld
->builder
, res
, max
, "");
148 case PIPE_STENCIL_OP_DECR_WRAP
:
149 res
= lp_build_sub(bld
, stencilVals
, bld
->one
);
150 res
= LLVMBuildAnd(bld
->builder
, res
, max
, "");
152 case PIPE_STENCIL_OP_INVERT
:
153 res
= LLVMBuildNot(bld
->builder
, stencilVals
, "");
156 assert(0 && "bad stencil op mode");
160 if (stencil
->writemask
!= stencilMax
) {
161 /* compute res = (res & mask) | (stencilVals & ~mask) */
162 LLVMValueRef mask
= lp_build_const_int_vec(type
, stencil
->writemask
);
163 LLVMValueRef cmask
= LLVMBuildNot(bld
->builder
, mask
, "notWritemask");
164 LLVMValueRef t1
= LLVMBuildAnd(bld
->builder
, res
, mask
, "t1");
165 LLVMValueRef t2
= LLVMBuildAnd(bld
->builder
, stencilVals
, cmask
, "t2");
166 res
= LLVMBuildOr(bld
->builder
, t1
, t2
, "t1_or_t2");
169 /* only the update the vector elements enabled by 'mask' */
170 res
= lp_build_select(bld
, mask
, res
, stencilVals
);
177 * Return a type appropriate for depth/stencil testing.
180 lp_depth_type(const struct util_format_description
*format_desc
,
186 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
187 assert(format_desc
->block
.width
== 1);
188 assert(format_desc
->block
.height
== 1);
190 swizzle
= format_desc
->swizzle
[0];
193 memset(&type
, 0, sizeof type
);
194 type
.width
= format_desc
->block
.bits
;
196 if(format_desc
->channel
[swizzle
].type
== UTIL_FORMAT_TYPE_FLOAT
) {
197 type
.floating
= TRUE
;
198 assert(swizzle
== 0);
199 assert(format_desc
->channel
[swizzle
].size
== format_desc
->block
.bits
);
201 else if(format_desc
->channel
[swizzle
].type
== UTIL_FORMAT_TYPE_UNSIGNED
) {
202 assert(format_desc
->block
.bits
<= 32);
203 if(format_desc
->channel
[swizzle
].normalized
)
209 assert(type
.width
<= length
);
210 type
.length
= length
/ type
.width
;
217 lp_build_get_stencil_ref(struct lp_build_context
*bld
,
218 struct lp_type type
, LLVMValueRef stencil_refs_ptr
)
220 LLVMValueRef indexes
[2], ptr
, ref
, ref_vec
;
222 /* load 0th element of the array */
223 indexes
[0] = indexes
[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
224 ptr
= LLVMBuildGEP(bld
->builder
, stencil_refs_ptr
, indexes
, 2, "");
225 ref
= LLVMBuildLoad(bld
->builder
, ptr
, "");
227 /* convert int8 value to i32 */
228 ref
= LLVMBuildZExt(bld
->builder
, ref
, LLVMIntType(type
.width
), "");
230 /* make scalar into vector */
231 ref_vec
= lp_build_broadcast_scalar(bld
, ref
);
238 * Generate code for performing depth and/or stencil tests.
239 * We operate on a vector of values (typically a 2x2 quad).
241 * \param type the data type of the fragment depth/stencil values
242 * \param format_desc description of the depth/stencil surface
243 * \param mask the alive/dead pixel mask for the quad
244 * \param src the incoming depth/stencil values (a 2x2 quad)
245 * \param dst_ptr the outgoing/updated depth/stencil values
248 lp_build_depth_stencil_test(LLVMBuilderRef builder
,
249 const struct pipe_depth_state
*depth
,
250 const struct pipe_stencil_state stencil
[2],
252 const struct util_format_description
*format_desc
,
253 struct lp_build_mask_context
*mask
,
254 LLVMValueRef stencil_refs
,
256 LLVMValueRef zs_dst_ptr
)
258 struct lp_build_context bld
;
259 unsigned z_swizzle
, s_swizzle
;
260 LLVMValueRef zs_dst
, z_dst
= NULL
;
261 LLVMValueRef stencil_vals
= NULL
;
262 LLVMValueRef z_bitmask
= NULL
, s_bitmask
= NULL
;
263 LLVMValueRef z_pass
= NULL
, s_pass_mask
= NULL
;
264 LLVMValueRef orig_mask
= mask
->value
;
266 assert(depth
->enabled
|| stencil
[0].enabled
);
268 assert(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
);
269 assert(format_desc
->block
.width
== 1);
270 assert(format_desc
->block
.height
== 1);
272 z_swizzle
= format_desc
->swizzle
[0];
273 s_swizzle
= format_desc
->swizzle
[1];
275 assert(z_swizzle
!= UTIL_FORMAT_SWIZZLE_NONE
||
276 s_swizzle
!= UTIL_FORMAT_SWIZZLE_NONE
);
278 /* Sanity checking */
279 assert(z_swizzle
< 4);
280 assert(format_desc
->block
.bits
== type
.width
);
282 assert(z_swizzle
== 0);
283 assert(format_desc
->channel
[z_swizzle
].type
== UTIL_FORMAT_TYPE_FLOAT
);
284 assert(format_desc
->channel
[z_swizzle
].size
== format_desc
->block
.bits
);
287 assert(format_desc
->channel
[z_swizzle
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
288 assert(format_desc
->channel
[z_swizzle
].normalized
);
294 /* Setup build context */
295 lp_build_context_init(&bld
, builder
, type
);
297 /* Load current z/stencil value from z/stencil buffer */
298 zs_dst
= LLVMBuildLoad(builder
, zs_dst_ptr
, "");
300 lp_build_name(zs_dst
, "zsbufval");
302 /* Align the source depth bits with the destination's, and mask out any
303 * stencil or padding bits from both */
304 if(format_desc
->channel
[z_swizzle
].size
== format_desc
->block
.bits
) {
305 assert(z_swizzle
== 0);
309 /* shift/mask bits to right-justify the Z bits */
310 unsigned padding_left
;
311 unsigned padding_right
;
314 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
315 assert(format_desc
->channel
[z_swizzle
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
316 assert(format_desc
->channel
[z_swizzle
].size
<= format_desc
->block
.bits
);
317 assert(format_desc
->channel
[z_swizzle
].normalized
);
320 for(chan
= 0; chan
< z_swizzle
; ++chan
)
321 padding_right
+= format_desc
->channel
[chan
].size
;
322 padding_left
= format_desc
->block
.bits
-
323 (padding_right
+ format_desc
->channel
[z_swizzle
].size
);
325 if(padding_left
|| padding_right
) {
326 const unsigned long long mask_left
= (1ULL << (format_desc
->block
.bits
- padding_left
)) - 1;
327 const unsigned long long mask_right
= (1ULL << (padding_right
)) - 1;
328 z_bitmask
= lp_build_const_int_vec(type
, mask_left
^ mask_right
);
331 s_bitmask
= LLVMBuildNot(builder
, z_bitmask
, "");
333 stencil_vals
= LLVMBuildAnd(builder
, zs_dst
, s_bitmask
, "");
336 z_src
= LLVMBuildLShr(builder
, z_src
,
337 lp_build_const_int_vec(type
, padding_left
), "");
339 z_src
= LLVMBuildAnd(builder
, z_src
, z_bitmask
, "");
340 if(padding_left
|| padding_right
)
341 z_dst
= LLVMBuildAnd(builder
, zs_dst
, z_bitmask
, "");
346 lp_build_name(z_dst
, "zsbuf.z");
349 printf("build depth %d stencil %d\n",
354 if (stencil
[0].enabled
) {
355 /* Incoming stencil_refs is ptr to int8[2]. Get/convert to int32[4]. */
356 stencil_refs
= lp_build_get_stencil_ref(&bld
, type
, stencil_refs
);
358 s_pass_mask
= lp_build_stencil_test(&bld
, stencil
,
359 stencil_refs
, stencil_vals
);
361 /* apply stencil-fail operator */
363 LLVMValueRef s_fail_mask
= lp_build_andc(&bld
, orig_mask
, s_pass_mask
);
364 stencil_vals
= lp_build_stencil_op(&bld
, stencil
, stencil
[0].fail_op
,
365 stencil_refs
, stencil_vals
,
370 if (depth
->enabled
) {
371 /* compare src Z to dst Z, returning 'pass' mask */
372 z_pass
= lp_build_cmp(&bld
, depth
->func
, z_src
, z_dst
);
374 if (!stencil
[0].enabled
) {
375 /* We can potentially skip all remaining operations here, but only
376 * if stencil is disabled because we still need to update the stencil
377 * buffer values. Don't need to update Z buffer values.
379 lp_build_mask_update(mask
, z_pass
);
382 if (depth
->writemask
) {
384 z_bitmask
= LLVMBuildAnd(builder
, mask
->value
, z_bitmask
, "");
386 z_bitmask
= mask
->value
;
388 z_dst
= lp_build_select(&bld
, z_bitmask
, z_src
, z_dst
);
391 if (stencil
[0].enabled
) {
392 /* update stencil buffer values according to z pass/fail result */
393 LLVMValueRef z_fail_mask
, z_pass_mask
;
395 /* apply Z-fail operator */
396 z_fail_mask
= lp_build_andc(&bld
, orig_mask
, z_pass
);
397 stencil_vals
= lp_build_stencil_op(&bld
, stencil
, stencil
[0].zfail_op
,
398 stencil_refs
, stencil_vals
,
401 /* apply Z-pass operator */
402 z_pass_mask
= LLVMBuildAnd(bld
.builder
, orig_mask
, z_pass
, "");
403 stencil_vals
= lp_build_stencil_op(&bld
, stencil
, stencil
[0].zpass_op
,
404 stencil_refs
, stencil_vals
,
409 /* No depth test: apply Z-pass operator to stencil buffer values which
410 * passed the stencil test.
412 s_pass_mask
= LLVMBuildAnd(bld
.builder
, orig_mask
, s_pass_mask
, "");
413 stencil_vals
= lp_build_stencil_op(&bld
, stencil
, stencil
[0].zpass_op
,
414 stencil_refs
, stencil_vals
, s_pass_mask
);
417 /* Finally, merge/store the z/stencil values */
418 if ((depth
->enabled
&& depth
->writemask
) ||
419 (stencil
[0].enabled
&& stencil
[0].writemask
)) {
421 if (z_dst
&& stencil_vals
)
422 zs_dst
= LLVMBuildOr(bld
.builder
, z_dst
, stencil_vals
, "");
426 zs_dst
= stencil_vals
;
428 LLVMBuildStore(builder
, zs_dst
, zs_dst_ptr
);
432 lp_build_mask_update(mask
, s_pass_mask
);
434 if (depth
->enabled
&& stencil
[0].enabled
)
435 lp_build_mask_update(mask
, z_pass
);