llvmpipe: remove misleading debug string
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_depth.c
1 /**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 * Z11 Z12 Z13 Z14 ...
44 * Z21 Z22 Z23 Z24 ...
45 * Z31 Z32 Z33 Z34 ...
46 * Z41 Z42 Z43 Z44 ...
47 * ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
54 *
55 *
56 * @author Jose Fonseca <jfonseca@vmware.com>
57 * @author Brian Paul <jfonseca@vmware.com>
58 */
59
60 #include "pipe/p_state.h"
61 #include "util/u_format.h"
62
63 #include "gallivm/lp_bld_type.h"
64 #include "gallivm/lp_bld_arit.h"
65 #include "gallivm/lp_bld_bitarit.h"
66 #include "gallivm/lp_bld_const.h"
67 #include "gallivm/lp_bld_conv.h"
68 #include "gallivm/lp_bld_logic.h"
69 #include "gallivm/lp_bld_flow.h"
70 #include "gallivm/lp_bld_intr.h"
71 #include "gallivm/lp_bld_debug.h"
72 #include "gallivm/lp_bld_swizzle.h"
73
74 #include "lp_bld_depth.h"
75
76
77 /** Used to select fields from pipe_stencil_state */
78 enum stencil_op {
79 S_FAIL_OP,
80 Z_FAIL_OP,
81 Z_PASS_OP
82 };
83
84
85
86 /**
87 * Do the stencil test comparison (compare FB stencil values against ref value).
88 * This will be used twice when generating two-sided stencil code.
89 * \param stencil the front/back stencil state
90 * \param stencilRef the stencil reference value, replicated as a vector
91 * \param stencilVals vector of stencil values from framebuffer
92 * \return vector mask of pass/fail values (~0 or 0)
93 */
94 static LLVMValueRef
95 lp_build_stencil_test_single(struct lp_build_context *bld,
96 const struct pipe_stencil_state *stencil,
97 LLVMValueRef stencilRef,
98 LLVMValueRef stencilVals)
99 {
100 const unsigned stencilMax = 255; /* XXX fix */
101 struct lp_type type = bld->type;
102 LLVMValueRef res;
103
104 assert(type.sign);
105
106 assert(stencil->enabled);
107
108 if (stencil->valuemask != stencilMax) {
109 /* compute stencilRef = stencilRef & valuemask */
110 LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
111 stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
112 /* compute stencilVals = stencilVals & valuemask */
113 stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
114 }
115
116 res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
117
118 return res;
119 }
120
121
122 /**
123 * Do the one or two-sided stencil test comparison.
124 * \sa lp_build_stencil_test_single
125 * \param front_facing an integer vector mask, indicating front (~0) or back
126 * (0) facing polygon. If NULL, assume front-facing.
127 */
128 static LLVMValueRef
129 lp_build_stencil_test(struct lp_build_context *bld,
130 const struct pipe_stencil_state stencil[2],
131 LLVMValueRef stencilRefs[2],
132 LLVMValueRef stencilVals,
133 LLVMValueRef front_facing)
134 {
135 LLVMValueRef res;
136
137 assert(stencil[0].enabled);
138
139 /* do front face test */
140 res = lp_build_stencil_test_single(bld, &stencil[0],
141 stencilRefs[0], stencilVals);
142
143 if (stencil[1].enabled && front_facing) {
144 /* do back face test */
145 LLVMValueRef back_res;
146
147 back_res = lp_build_stencil_test_single(bld, &stencil[1],
148 stencilRefs[1], stencilVals);
149
150 res = lp_build_select(bld, front_facing, res, back_res);
151 }
152
153 return res;
154 }
155
156
157 /**
158 * Apply the stencil operator (add/sub/keep/etc) to the given vector
159 * of stencil values.
160 * \return new stencil values vector
161 */
162 static LLVMValueRef
163 lp_build_stencil_op_single(struct lp_build_context *bld,
164 const struct pipe_stencil_state *stencil,
165 enum stencil_op op,
166 LLVMValueRef stencilRef,
167 LLVMValueRef stencilVals)
168
169 {
170 struct lp_type type = bld->type;
171 LLVMValueRef res;
172 LLVMValueRef max = lp_build_const_int_vec(type, 0xff);
173 unsigned stencil_op;
174
175 assert(type.sign);
176
177 switch (op) {
178 case S_FAIL_OP:
179 stencil_op = stencil->fail_op;
180 break;
181 case Z_FAIL_OP:
182 stencil_op = stencil->zfail_op;
183 break;
184 case Z_PASS_OP:
185 stencil_op = stencil->zpass_op;
186 break;
187 default:
188 assert(0 && "Invalid stencil_op mode");
189 stencil_op = PIPE_STENCIL_OP_KEEP;
190 }
191
192 switch (stencil_op) {
193 case PIPE_STENCIL_OP_KEEP:
194 res = stencilVals;
195 /* we can return early for this case */
196 return res;
197 case PIPE_STENCIL_OP_ZERO:
198 res = bld->zero;
199 break;
200 case PIPE_STENCIL_OP_REPLACE:
201 res = stencilRef;
202 break;
203 case PIPE_STENCIL_OP_INCR:
204 res = lp_build_add(bld, stencilVals, bld->one);
205 res = lp_build_min(bld, res, max);
206 break;
207 case PIPE_STENCIL_OP_DECR:
208 res = lp_build_sub(bld, stencilVals, bld->one);
209 res = lp_build_max(bld, res, bld->zero);
210 break;
211 case PIPE_STENCIL_OP_INCR_WRAP:
212 res = lp_build_add(bld, stencilVals, bld->one);
213 res = LLVMBuildAnd(bld->builder, res, max, "");
214 break;
215 case PIPE_STENCIL_OP_DECR_WRAP:
216 res = lp_build_sub(bld, stencilVals, bld->one);
217 res = LLVMBuildAnd(bld->builder, res, max, "");
218 break;
219 case PIPE_STENCIL_OP_INVERT:
220 res = LLVMBuildNot(bld->builder, stencilVals, "");
221 res = LLVMBuildAnd(bld->builder, res, max, "");
222 break;
223 default:
224 assert(0 && "bad stencil op mode");
225 res = bld->undef;
226 }
227
228 return res;
229 }
230
231
232 /**
233 * Do the one or two-sided stencil test op/update.
234 */
235 static LLVMValueRef
236 lp_build_stencil_op(struct lp_build_context *bld,
237 const struct pipe_stencil_state stencil[2],
238 enum stencil_op op,
239 LLVMValueRef stencilRefs[2],
240 LLVMValueRef stencilVals,
241 LLVMValueRef mask,
242 LLVMValueRef front_facing)
243
244 {
245 LLVMValueRef res;
246
247 assert(stencil[0].enabled);
248
249 /* do front face op */
250 res = lp_build_stencil_op_single(bld, &stencil[0], op,
251 stencilRefs[0], stencilVals);
252
253 if (stencil[1].enabled && front_facing) {
254 /* do back face op */
255 LLVMValueRef back_res;
256
257 back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
258 stencilRefs[1], stencilVals);
259
260 res = lp_build_select(bld, front_facing, res, back_res);
261 }
262
263 if (stencil->writemask != 0xff) {
264 /* mask &= stencil->writemask */
265 LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask);
266 mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
267 /* res = (res & mask) | (stencilVals & ~mask) */
268 res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
269 }
270 else {
271 /* res = mask ? res : stencilVals */
272 res = lp_build_select(bld, mask, res, stencilVals);
273 }
274
275 return res;
276 }
277
278
279
280 /**
281 * Return a type appropriate for depth/stencil testing.
282 */
283 struct lp_type
284 lp_depth_type(const struct util_format_description *format_desc,
285 unsigned length)
286 {
287 struct lp_type type;
288 unsigned swizzle;
289
290 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
291 assert(format_desc->block.width == 1);
292 assert(format_desc->block.height == 1);
293
294 swizzle = format_desc->swizzle[0];
295 assert(swizzle < 4);
296
297 memset(&type, 0, sizeof type);
298 type.width = format_desc->block.bits;
299
300 if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
301 type.floating = TRUE;
302 assert(swizzle == 0);
303 assert(format_desc->channel[swizzle].size == format_desc->block.bits);
304 }
305 else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
306 assert(format_desc->block.bits <= 32);
307 assert(format_desc->channel[swizzle].normalized);
308 if (format_desc->channel[swizzle].size < format_desc->block.bits) {
309 /* Prefer signed integers when possible, as SSE has less support
310 * for unsigned comparison;
311 */
312 type.sign = TRUE;
313 }
314 }
315 else
316 assert(0);
317
318 assert(type.width <= length);
319 type.length = length / type.width;
320
321 return type;
322 }
323
324
325 /**
326 * Compute bitmask and bit shift to apply to the incoming fragment Z values
327 * and the Z buffer values needed before doing the Z comparison.
328 *
329 * Note that we leave the Z bits in the position that we find them
330 * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us
331 * get by with fewer bit twiddling steps.
332 */
333 static boolean
334 get_z_shift_and_mask(const struct util_format_description *format_desc,
335 unsigned *shift, unsigned *width, unsigned *mask)
336 {
337 const unsigned total_bits = format_desc->block.bits;
338 unsigned z_swizzle;
339 unsigned chan;
340 unsigned padding_left, padding_right;
341
342 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
343 assert(format_desc->block.width == 1);
344 assert(format_desc->block.height == 1);
345
346 z_swizzle = format_desc->swizzle[0];
347
348 if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
349 return FALSE;
350
351 *width = format_desc->channel[z_swizzle].size;
352
353 padding_right = 0;
354 for (chan = 0; chan < z_swizzle; ++chan)
355 padding_right += format_desc->channel[chan].size;
356
357 padding_left =
358 total_bits - (padding_right + *width);
359
360 if (padding_left || padding_right) {
361 unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
362 unsigned long long mask_right = (1ULL << (padding_right)) - 1;
363 *mask = mask_left ^ mask_right;
364 }
365 else {
366 *mask = 0xffffffff;
367 }
368
369 *shift = padding_right;
370
371 return TRUE;
372 }
373
374
375 /**
376 * Compute bitmask and bit shift to apply to the framebuffer pixel values
377 * to put the stencil bits in the least significant position.
378 * (i.e. 0x000000ff)
379 */
380 static boolean
381 get_s_shift_and_mask(const struct util_format_description *format_desc,
382 unsigned *shift, unsigned *mask)
383 {
384 unsigned s_swizzle;
385 unsigned chan, sz;
386
387 s_swizzle = format_desc->swizzle[1];
388
389 if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
390 return FALSE;
391
392 *shift = 0;
393 for (chan = 0; chan < s_swizzle; chan++)
394 *shift += format_desc->channel[chan].size;
395
396 sz = format_desc->channel[s_swizzle].size;
397 *mask = (1U << sz) - 1U;
398
399 return TRUE;
400 }
401
402
403 /**
404 * Perform the occlusion test and increase the counter.
405 * Test the depth mask. Add the number of channel which has none zero mask
406 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
407 * The counter will add 4.
408 *
409 * \param type holds element type of the mask vector.
410 * \param maskvalue is the depth test mask.
411 * \param counter is a pointer of the uint32 counter.
412 */
413 void
414 lp_build_occlusion_count(LLVMBuilderRef builder,
415 struct lp_type type,
416 LLVMValueRef maskvalue,
417 LLVMValueRef counter)
418 {
419 LLVMValueRef countmask = lp_build_const_int_vec(type, 1);
420 LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
421 LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16);
422 LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
423 LLVMValueRef maskarray[4] = {
424 LLVMConstInt(LLVMInt32Type(), 0, 0),
425 LLVMConstInt(LLVMInt32Type(), 4, 0),
426 LLVMConstInt(LLVMInt32Type(), 8, 0),
427 LLVMConstInt(LLVMInt32Type(), 12, 0),
428 };
429 LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
430 LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
431 LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle");
432 LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle);
433 LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
434 LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
435 LLVMBuildStore(builder, incr, counter);
436 }
437
438
439
440 /**
441 * Generate code for performing depth and/or stencil tests.
442 * We operate on a vector of values (typically a 2x2 quad).
443 *
444 * \param depth the depth test state
445 * \param stencil the front/back stencil state
446 * \param type the data type of the fragment depth/stencil values
447 * \param format_desc description of the depth/stencil surface
448 * \param mask the alive/dead pixel mask for the quad (vector)
449 * \param stencil_refs the front/back stencil ref values (scalar)
450 * \param z_src the incoming depth/stencil values (a 2x2 quad, float32)
451 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer
452 * \param facing contains boolean value indicating front/back facing polygon
453 */
454 void
455 lp_build_depth_stencil_test(LLVMBuilderRef builder,
456 const struct pipe_depth_state *depth,
457 const struct pipe_stencil_state stencil[2],
458 struct lp_type z_src_type,
459 const struct util_format_description *format_desc,
460 struct lp_build_mask_context *mask,
461 LLVMValueRef stencil_refs[2],
462 LLVMValueRef z_src,
463 LLVMValueRef zs_dst_ptr,
464 LLVMValueRef face,
465 LLVMValueRef *zs_value,
466 boolean do_branch)
467 {
468 struct lp_type z_type;
469 struct lp_build_context z_bld;
470 struct lp_build_context s_bld;
471 struct lp_type s_type;
472 unsigned z_shift = 0, z_width = 0, z_mask = 0;
473 LLVMValueRef zs_dst, z_dst = NULL;
474 LLVMValueRef stencil_vals = NULL;
475 LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
476 LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
477 LLVMValueRef orig_mask = lp_build_mask_value(mask);
478 LLVMValueRef front_facing = NULL;
479
480
481 /*
482 * Depths are expected to be between 0 and 1, even if they are stored in
483 * floats. Setting these bits here will ensure that the lp_build_conv() call
484 * below won't try to unnecessarily clamp the incoming values.
485 */
486 if(z_src_type.floating) {
487 z_src_type.sign = FALSE;
488 z_src_type.norm = TRUE;
489 }
490 else {
491 assert(!z_src_type.sign);
492 assert(z_src_type.norm);
493 }
494
495 /* Pick the depth type. */
496 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
497
498 /* FIXME: Cope with a depth test type with a different bit width. */
499 assert(z_type.width == z_src_type.width);
500 assert(z_type.length == z_src_type.length);
501
502 /* Sanity checking */
503 {
504 const unsigned z_swizzle = format_desc->swizzle[0];
505 const unsigned s_swizzle = format_desc->swizzle[1];
506
507 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
508 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
509
510 assert(depth->enabled || stencil[0].enabled);
511
512 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
513 assert(format_desc->block.width == 1);
514 assert(format_desc->block.height == 1);
515
516 if (stencil[0].enabled) {
517 assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
518 format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
519 }
520
521 assert(z_swizzle < 4);
522 assert(format_desc->block.bits == z_type.width);
523 if (z_type.floating) {
524 assert(z_swizzle == 0);
525 assert(format_desc->channel[z_swizzle].type ==
526 UTIL_FORMAT_TYPE_FLOAT);
527 assert(format_desc->channel[z_swizzle].size ==
528 format_desc->block.bits);
529 }
530 else {
531 assert(format_desc->channel[z_swizzle].type ==
532 UTIL_FORMAT_TYPE_UNSIGNED);
533 assert(format_desc->channel[z_swizzle].normalized);
534 assert(!z_type.fixed);
535 }
536 }
537
538
539 /* Setup build context for Z vals */
540 lp_build_context_init(&z_bld, builder, z_type);
541
542 /* Setup build context for stencil vals */
543 s_type = lp_type_int_vec(z_type.width);
544 lp_build_context_init(&s_bld, builder, s_type);
545
546 /* Load current z/stencil value from z/stencil buffer */
547 zs_dst_ptr = LLVMBuildBitCast(builder,
548 zs_dst_ptr,
549 LLVMPointerType(z_bld.vec_type, 0), "");
550 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
551
552 lp_build_name(zs_dst, "zs_dst");
553
554
555 /* Compute and apply the Z/stencil bitmasks and shifts.
556 */
557 {
558 unsigned s_shift, s_mask;
559
560 if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
561 if (z_mask != 0xffffffff) {
562 z_bitmask = lp_build_const_int_vec(z_type, z_mask);
563 }
564
565 /*
566 * Align the framebuffer Z 's LSB to the right.
567 */
568 if (z_shift) {
569 LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
570 z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
571 } else if (z_bitmask) {
572 /* TODO: Instead of loading a mask from memory and ANDing, it's
573 * probably faster to just shake the bits with two shifts. */
574 z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
575 } else {
576 z_dst = zs_dst;
577 lp_build_name(z_dst, "z_dst");
578 }
579 }
580
581 if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
582 if (s_shift) {
583 LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift);
584 stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
585 stencil_shift = shift; /* used below */
586 }
587 else {
588 stencil_vals = zs_dst;
589 }
590
591 if (s_mask != 0xffffffff) {
592 LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask);
593 stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
594 }
595
596 lp_build_name(stencil_vals, "s_dst");
597 }
598 }
599
600 if (stencil[0].enabled) {
601
602 if (face) {
603 LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
604
605 /* front_facing = face != 0 ? ~0 : 0 */
606 front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
607 front_facing = LLVMBuildSExt(builder, front_facing,
608 LLVMIntType(s_bld.type.length*s_bld.type.width),
609 "");
610 front_facing = LLVMBuildBitCast(builder, front_facing,
611 s_bld.int_vec_type, "");
612 }
613
614 /* convert scalar stencil refs into vectors */
615 stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
616 stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
617
618 s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
619 stencil_refs, stencil_vals,
620 front_facing);
621
622 /* apply stencil-fail operator */
623 {
624 LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
625 stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
626 stencil_refs, stencil_vals,
627 s_fail_mask, front_facing);
628 }
629 }
630
631 if (depth->enabled) {
632 /*
633 * Convert fragment Z to the desired type, aligning the LSB to the right.
634 */
635
636 assert(z_type.width == z_src_type.width);
637 assert(z_type.length == z_src_type.length);
638 assert(lp_check_value(z_src_type, z_src));
639 if (z_src_type.floating) {
640 /*
641 * Convert from floating point values
642 */
643
644 if (!z_type.floating) {
645 z_src = lp_build_clamped_float_to_unsigned_norm(builder,
646 z_src_type,
647 z_width,
648 z_src);
649 }
650 } else {
651 /*
652 * Convert from unsigned normalized values.
653 */
654
655 assert(!z_src_type.sign);
656 assert(!z_src_type.fixed);
657 assert(z_src_type.norm);
658 assert(!z_type.floating);
659 if (z_src_type.width > z_width) {
660 LLVMValueRef shift = lp_build_const_int_vec(z_src_type,
661 z_src_type.width - z_width);
662 z_src = LLVMBuildLShr(builder, z_src, shift, "");
663 }
664 }
665 assert(lp_check_value(z_type, z_src));
666
667 lp_build_name(z_src, "z_src");
668
669 /* compare src Z to dst Z, returning 'pass' mask */
670 z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
671
672 if (!stencil[0].enabled) {
673 /* We can potentially skip all remaining operations here, but only
674 * if stencil is disabled because we still need to update the stencil
675 * buffer values. Don't need to update Z buffer values.
676 */
677 lp_build_mask_update(mask, z_pass);
678
679 if (do_branch) {
680 lp_build_mask_check(mask);
681 do_branch = FALSE;
682 }
683 }
684
685 if (depth->writemask) {
686 LLVMValueRef zselectmask;
687
688 /* mask off bits that failed Z test */
689 zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
690
691 /* mask off bits that failed stencil test */
692 if (s_pass_mask) {
693 zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
694 }
695
696 /* Mix the old and new Z buffer values.
697 * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
698 */
699 z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
700 }
701
702 if (stencil[0].enabled) {
703 /* update stencil buffer values according to z pass/fail result */
704 LLVMValueRef z_fail_mask, z_pass_mask;
705
706 /* apply Z-fail operator */
707 z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
708 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
709 stencil_refs, stencil_vals,
710 z_fail_mask, front_facing);
711
712 /* apply Z-pass operator */
713 z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, "");
714 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
715 stencil_refs, stencil_vals,
716 z_pass_mask, front_facing);
717 }
718 }
719 else {
720 /* No depth test: apply Z-pass operator to stencil buffer values which
721 * passed the stencil test.
722 */
723 s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, "");
724 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
725 stencil_refs, stencil_vals,
726 s_pass_mask, front_facing);
727 }
728
729 /* Put Z and ztencil bits in the right place */
730 if (z_dst && z_shift) {
731 LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
732 z_dst = LLVMBuildShl(builder, z_dst, shift, "");
733 }
734 if (stencil_vals && stencil_shift)
735 stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals,
736 stencil_shift, "");
737
738 /* Finally, merge/store the z/stencil values */
739 if ((depth->enabled && depth->writemask) ||
740 (stencil[0].enabled && stencil[0].writemask)) {
741
742 if (z_dst && stencil_vals)
743 zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, "");
744 else if (z_dst)
745 zs_dst = z_dst;
746 else
747 zs_dst = stencil_vals;
748
749 *zs_value = zs_dst;
750 }
751
752 if (s_pass_mask)
753 lp_build_mask_update(mask, s_pass_mask);
754
755 if (depth->enabled && stencil[0].enabled)
756 lp_build_mask_update(mask, z_pass);
757
758 if (do_branch)
759 lp_build_mask_check(mask);
760
761 }
762
763
764 void
765 lp_build_depth_write(LLVMBuilderRef builder,
766 const struct util_format_description *format_desc,
767 LLVMValueRef zs_dst_ptr,
768 LLVMValueRef zs_value)
769 {
770 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
771 LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
772
773 LLVMBuildStore(builder, zs_value, zs_dst_ptr);
774 }
775
776
777 void
778 lp_build_deferred_depth_write(LLVMBuilderRef builder,
779 struct lp_type z_src_type,
780 const struct util_format_description *format_desc,
781 struct lp_build_mask_context *mask,
782 LLVMValueRef zs_dst_ptr,
783 LLVMValueRef zs_value)
784 {
785 struct lp_type z_type;
786 struct lp_build_context z_bld;
787 LLVMValueRef z_dst;
788
789 /* XXX: pointlessly redo type logic:
790 */
791 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
792 lp_build_context_init(&z_bld, builder, z_type);
793
794 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
795 LLVMPointerType(z_bld.vec_type, 0), "");
796
797 z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
798 z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
799
800 LLVMBuildStore(builder, z_dst, zs_dst_ptr);
801 }