svga: Don't advertise pixel shader addr register support.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_depth.c
1 /**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 * Z11 Z12 Z13 Z14 ...
44 * Z21 Z22 Z23 Z24 ...
45 * Z31 Z32 Z33 Z34 ...
46 * Z41 Z42 Z43 Z44 ...
47 * ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
54 *
55 *
56 * @author Jose Fonseca <jfonseca@vmware.com>
57 * @author Brian Paul <jfonseca@vmware.com>
58 */
59
60 #include "pipe/p_state.h"
61 #include "util/u_format.h"
62
63 #include "gallivm/lp_bld_type.h"
64 #include "gallivm/lp_bld_arit.h"
65 #include "gallivm/lp_bld_bitarit.h"
66 #include "gallivm/lp_bld_const.h"
67 #include "gallivm/lp_bld_conv.h"
68 #include "gallivm/lp_bld_logic.h"
69 #include "gallivm/lp_bld_flow.h"
70 #include "gallivm/lp_bld_intr.h"
71 #include "gallivm/lp_bld_debug.h"
72 #include "gallivm/lp_bld_swizzle.h"
73
74 #include "lp_bld_depth.h"
75
76
77 /** Used to select fields from pipe_stencil_state */
78 enum stencil_op {
79 S_FAIL_OP,
80 Z_FAIL_OP,
81 Z_PASS_OP
82 };
83
84
85
86 /**
87 * Do the stencil test comparison (compare FB stencil values against ref value).
88 * This will be used twice when generating two-sided stencil code.
89 * \param stencil the front/back stencil state
90 * \param stencilRef the stencil reference value, replicated as a vector
91 * \param stencilVals vector of stencil values from framebuffer
92 * \return vector mask of pass/fail values (~0 or 0)
93 */
94 static LLVMValueRef
95 lp_build_stencil_test_single(struct lp_build_context *bld,
96 const struct pipe_stencil_state *stencil,
97 LLVMValueRef stencilRef,
98 LLVMValueRef stencilVals)
99 {
100 LLVMBuilderRef builder = bld->gallivm->builder;
101 const unsigned stencilMax = 255; /* XXX fix */
102 struct lp_type type = bld->type;
103 LLVMValueRef res;
104
105 assert(type.sign);
106
107 assert(stencil->enabled);
108
109 if (stencil->valuemask != stencilMax) {
110 /* compute stencilRef = stencilRef & valuemask */
111 LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
112 stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
113 /* compute stencilVals = stencilVals & valuemask */
114 stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
115 }
116
117 res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
118
119 return res;
120 }
121
122
123 /**
124 * Do the one or two-sided stencil test comparison.
125 * \sa lp_build_stencil_test_single
126 * \param front_facing an integer vector mask, indicating front (~0) or back
127 * (0) facing polygon. If NULL, assume front-facing.
128 */
129 static LLVMValueRef
130 lp_build_stencil_test(struct lp_build_context *bld,
131 const struct pipe_stencil_state stencil[2],
132 LLVMValueRef stencilRefs[2],
133 LLVMValueRef stencilVals,
134 LLVMValueRef front_facing)
135 {
136 LLVMValueRef res;
137
138 assert(stencil[0].enabled);
139
140 /* do front face test */
141 res = lp_build_stencil_test_single(bld, &stencil[0],
142 stencilRefs[0], stencilVals);
143
144 if (stencil[1].enabled && front_facing) {
145 /* do back face test */
146 LLVMValueRef back_res;
147
148 back_res = lp_build_stencil_test_single(bld, &stencil[1],
149 stencilRefs[1], stencilVals);
150
151 res = lp_build_select(bld, front_facing, res, back_res);
152 }
153
154 return res;
155 }
156
157
158 /**
159 * Apply the stencil operator (add/sub/keep/etc) to the given vector
160 * of stencil values.
161 * \return new stencil values vector
162 */
163 static LLVMValueRef
164 lp_build_stencil_op_single(struct lp_build_context *bld,
165 const struct pipe_stencil_state *stencil,
166 enum stencil_op op,
167 LLVMValueRef stencilRef,
168 LLVMValueRef stencilVals)
169
170 {
171 LLVMBuilderRef builder = bld->gallivm->builder;
172 struct lp_type type = bld->type;
173 LLVMValueRef res;
174 LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
175 unsigned stencil_op;
176
177 assert(type.sign);
178
179 switch (op) {
180 case S_FAIL_OP:
181 stencil_op = stencil->fail_op;
182 break;
183 case Z_FAIL_OP:
184 stencil_op = stencil->zfail_op;
185 break;
186 case Z_PASS_OP:
187 stencil_op = stencil->zpass_op;
188 break;
189 default:
190 assert(0 && "Invalid stencil_op mode");
191 stencil_op = PIPE_STENCIL_OP_KEEP;
192 }
193
194 switch (stencil_op) {
195 case PIPE_STENCIL_OP_KEEP:
196 res = stencilVals;
197 /* we can return early for this case */
198 return res;
199 case PIPE_STENCIL_OP_ZERO:
200 res = bld->zero;
201 break;
202 case PIPE_STENCIL_OP_REPLACE:
203 res = stencilRef;
204 break;
205 case PIPE_STENCIL_OP_INCR:
206 res = lp_build_add(bld, stencilVals, bld->one);
207 res = lp_build_min(bld, res, max);
208 break;
209 case PIPE_STENCIL_OP_DECR:
210 res = lp_build_sub(bld, stencilVals, bld->one);
211 res = lp_build_max(bld, res, bld->zero);
212 break;
213 case PIPE_STENCIL_OP_INCR_WRAP:
214 res = lp_build_add(bld, stencilVals, bld->one);
215 res = LLVMBuildAnd(builder, res, max, "");
216 break;
217 case PIPE_STENCIL_OP_DECR_WRAP:
218 res = lp_build_sub(bld, stencilVals, bld->one);
219 res = LLVMBuildAnd(builder, res, max, "");
220 break;
221 case PIPE_STENCIL_OP_INVERT:
222 res = LLVMBuildNot(builder, stencilVals, "");
223 res = LLVMBuildAnd(builder, res, max, "");
224 break;
225 default:
226 assert(0 && "bad stencil op mode");
227 res = bld->undef;
228 }
229
230 return res;
231 }
232
233
234 /**
235 * Do the one or two-sided stencil test op/update.
236 */
237 static LLVMValueRef
238 lp_build_stencil_op(struct lp_build_context *bld,
239 const struct pipe_stencil_state stencil[2],
240 enum stencil_op op,
241 LLVMValueRef stencilRefs[2],
242 LLVMValueRef stencilVals,
243 LLVMValueRef mask,
244 LLVMValueRef front_facing)
245
246 {
247 LLVMBuilderRef builder = bld->gallivm->builder;
248 LLVMValueRef res;
249
250 assert(stencil[0].enabled);
251
252 /* do front face op */
253 res = lp_build_stencil_op_single(bld, &stencil[0], op,
254 stencilRefs[0], stencilVals);
255
256 if (stencil[1].enabled && front_facing) {
257 /* do back face op */
258 LLVMValueRef back_res;
259
260 back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
261 stencilRefs[1], stencilVals);
262
263 res = lp_build_select(bld, front_facing, res, back_res);
264 }
265
266 if (stencil->writemask != 0xff) {
267 /* mask &= stencil->writemask */
268 LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
269 stencil->writemask);
270 mask = LLVMBuildAnd(builder, mask, writemask, "");
271 /* res = (res & mask) | (stencilVals & ~mask) */
272 res = lp_build_select_bitwise(bld, mask, res, stencilVals);
273 }
274 else {
275 /* res = mask ? res : stencilVals */
276 res = lp_build_select(bld, mask, res, stencilVals);
277 }
278
279 return res;
280 }
281
282
283
284 /**
285 * Return a type appropriate for depth/stencil testing.
286 */
287 struct lp_type
288 lp_depth_type(const struct util_format_description *format_desc,
289 unsigned length)
290 {
291 struct lp_type type;
292 unsigned swizzle;
293
294 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
295 assert(format_desc->block.width == 1);
296 assert(format_desc->block.height == 1);
297
298 swizzle = format_desc->swizzle[0];
299 assert(swizzle < 4);
300
301 memset(&type, 0, sizeof type);
302 type.width = format_desc->block.bits;
303
304 if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
305 type.floating = TRUE;
306 assert(swizzle == 0);
307 assert(format_desc->channel[swizzle].size == format_desc->block.bits);
308 }
309 else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
310 assert(format_desc->block.bits <= 32);
311 assert(format_desc->channel[swizzle].normalized);
312 if (format_desc->channel[swizzle].size < format_desc->block.bits) {
313 /* Prefer signed integers when possible, as SSE has less support
314 * for unsigned comparison;
315 */
316 type.sign = TRUE;
317 }
318 }
319 else
320 assert(0);
321
322 assert(type.width <= length);
323 type.length = length / type.width;
324
325 return type;
326 }
327
328
329 /**
330 * Compute bitmask and bit shift to apply to the incoming fragment Z values
331 * and the Z buffer values needed before doing the Z comparison.
332 *
333 * Note that we leave the Z bits in the position that we find them
334 * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us
335 * get by with fewer bit twiddling steps.
336 */
337 static boolean
338 get_z_shift_and_mask(const struct util_format_description *format_desc,
339 unsigned *shift, unsigned *width, unsigned *mask)
340 {
341 const unsigned total_bits = format_desc->block.bits;
342 unsigned z_swizzle;
343 unsigned chan;
344 unsigned padding_left, padding_right;
345
346 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
347 assert(format_desc->block.width == 1);
348 assert(format_desc->block.height == 1);
349
350 z_swizzle = format_desc->swizzle[0];
351
352 if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
353 return FALSE;
354
355 *width = format_desc->channel[z_swizzle].size;
356
357 padding_right = 0;
358 for (chan = 0; chan < z_swizzle; ++chan)
359 padding_right += format_desc->channel[chan].size;
360
361 padding_left =
362 total_bits - (padding_right + *width);
363
364 if (padding_left || padding_right) {
365 unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
366 unsigned long long mask_right = (1ULL << (padding_right)) - 1;
367 *mask = mask_left ^ mask_right;
368 }
369 else {
370 *mask = 0xffffffff;
371 }
372
373 *shift = padding_right;
374
375 return TRUE;
376 }
377
378
379 /**
380 * Compute bitmask and bit shift to apply to the framebuffer pixel values
381 * to put the stencil bits in the least significant position.
382 * (i.e. 0x000000ff)
383 */
384 static boolean
385 get_s_shift_and_mask(const struct util_format_description *format_desc,
386 unsigned *shift, unsigned *mask)
387 {
388 unsigned s_swizzle;
389 unsigned chan, sz;
390
391 s_swizzle = format_desc->swizzle[1];
392
393 if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
394 return FALSE;
395
396 *shift = 0;
397 for (chan = 0; chan < s_swizzle; chan++)
398 *shift += format_desc->channel[chan].size;
399
400 sz = format_desc->channel[s_swizzle].size;
401 *mask = (1U << sz) - 1U;
402
403 return TRUE;
404 }
405
406
407 /**
408 * Perform the occlusion test and increase the counter.
409 * Test the depth mask. Add the number of channel which has none zero mask
410 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
411 * The counter will add 4.
412 *
413 * \param type holds element type of the mask vector.
414 * \param maskvalue is the depth test mask.
415 * \param counter is a pointer of the uint32 counter.
416 */
417 void
418 lp_build_occlusion_count(struct gallivm_state *gallivm,
419 struct lp_type type,
420 LLVMValueRef maskvalue,
421 LLVMValueRef counter)
422 {
423 LLVMBuilderRef builder = gallivm->builder;
424 LLVMContextRef context = gallivm->context;
425 LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
426 LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
427 LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8TypeInContext(context), 16);
428 LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
429 LLVMValueRef maskarray[4] = {
430 lp_build_const_int32(gallivm, 0),
431 lp_build_const_int32(gallivm, 4),
432 lp_build_const_int32(gallivm, 8),
433 lp_build_const_int32(gallivm, 12)
434 };
435 LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
436 LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
437 LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32TypeInContext(context), "shuffle");
438 LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32TypeInContext(context), shuffle);
439 LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
440 LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
441 LLVMBuildStore(builder, incr, counter);
442 }
443
444
445
446 /**
447 * Generate code for performing depth and/or stencil tests.
448 * We operate on a vector of values (typically a 2x2 quad).
449 *
450 * \param depth the depth test state
451 * \param stencil the front/back stencil state
452 * \param type the data type of the fragment depth/stencil values
453 * \param format_desc description of the depth/stencil surface
454 * \param mask the alive/dead pixel mask for the quad (vector)
455 * \param stencil_refs the front/back stencil ref values (scalar)
456 * \param z_src the incoming depth/stencil values (a 2x2 quad, float32)
457 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer
458 * \param facing contains boolean value indicating front/back facing polygon
459 */
460 void
461 lp_build_depth_stencil_test(struct gallivm_state *gallivm,
462 const struct pipe_depth_state *depth,
463 const struct pipe_stencil_state stencil[2],
464 struct lp_type z_src_type,
465 const struct util_format_description *format_desc,
466 struct lp_build_mask_context *mask,
467 LLVMValueRef stencil_refs[2],
468 LLVMValueRef z_src,
469 LLVMValueRef zs_dst_ptr,
470 LLVMValueRef face,
471 LLVMValueRef *zs_value,
472 boolean do_branch)
473 {
474 LLVMBuilderRef builder = gallivm->builder;
475 struct lp_type z_type;
476 struct lp_build_context z_bld;
477 struct lp_build_context s_bld;
478 struct lp_type s_type;
479 unsigned z_shift = 0, z_width = 0, z_mask = 0;
480 LLVMValueRef zs_dst, z_dst = NULL;
481 LLVMValueRef stencil_vals = NULL;
482 LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
483 LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
484 LLVMValueRef orig_mask = lp_build_mask_value(mask);
485 LLVMValueRef front_facing = NULL;
486
487
488 /*
489 * Depths are expected to be between 0 and 1, even if they are stored in
490 * floats. Setting these bits here will ensure that the lp_build_conv() call
491 * below won't try to unnecessarily clamp the incoming values.
492 */
493 if(z_src_type.floating) {
494 z_src_type.sign = FALSE;
495 z_src_type.norm = TRUE;
496 }
497 else {
498 assert(!z_src_type.sign);
499 assert(z_src_type.norm);
500 }
501
502 /* Pick the depth type. */
503 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
504
505 /* FIXME: Cope with a depth test type with a different bit width. */
506 assert(z_type.width == z_src_type.width);
507 assert(z_type.length == z_src_type.length);
508
509 /* Sanity checking */
510 {
511 const unsigned z_swizzle = format_desc->swizzle[0];
512 const unsigned s_swizzle = format_desc->swizzle[1];
513
514 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
515 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
516
517 assert(depth->enabled || stencil[0].enabled);
518
519 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
520 assert(format_desc->block.width == 1);
521 assert(format_desc->block.height == 1);
522
523 if (stencil[0].enabled) {
524 assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
525 format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
526 }
527
528 assert(z_swizzle < 4);
529 assert(format_desc->block.bits == z_type.width);
530 if (z_type.floating) {
531 assert(z_swizzle == 0);
532 assert(format_desc->channel[z_swizzle].type ==
533 UTIL_FORMAT_TYPE_FLOAT);
534 assert(format_desc->channel[z_swizzle].size ==
535 format_desc->block.bits);
536 }
537 else {
538 assert(format_desc->channel[z_swizzle].type ==
539 UTIL_FORMAT_TYPE_UNSIGNED);
540 assert(format_desc->channel[z_swizzle].normalized);
541 assert(!z_type.fixed);
542 }
543 }
544
545
546 /* Setup build context for Z vals */
547 lp_build_context_init(&z_bld, gallivm, z_type);
548
549 /* Setup build context for stencil vals */
550 s_type = lp_type_int_vec(z_type.width);
551 lp_build_context_init(&s_bld, gallivm, s_type);
552
553 /* Load current z/stencil value from z/stencil buffer */
554 zs_dst_ptr = LLVMBuildBitCast(builder,
555 zs_dst_ptr,
556 LLVMPointerType(z_bld.vec_type, 0), "");
557 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
558
559 lp_build_name(zs_dst, "zs_dst");
560
561
562 /* Compute and apply the Z/stencil bitmasks and shifts.
563 */
564 {
565 unsigned s_shift, s_mask;
566
567 if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
568 if (z_mask != 0xffffffff) {
569 z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
570 }
571
572 /*
573 * Align the framebuffer Z 's LSB to the right.
574 */
575 if (z_shift) {
576 LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
577 z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
578 } else if (z_bitmask) {
579 /* TODO: Instead of loading a mask from memory and ANDing, it's
580 * probably faster to just shake the bits with two shifts. */
581 z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
582 } else {
583 z_dst = zs_dst;
584 lp_build_name(z_dst, "z_dst");
585 }
586 }
587
588 if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
589 if (s_shift) {
590 LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
591 stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
592 stencil_shift = shift; /* used below */
593 }
594 else {
595 stencil_vals = zs_dst;
596 }
597
598 if (s_mask != 0xffffffff) {
599 LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
600 stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
601 }
602
603 lp_build_name(stencil_vals, "s_dst");
604 }
605 }
606
607 if (stencil[0].enabled) {
608
609 if (face) {
610 LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
611
612 /* front_facing = face != 0 ? ~0 : 0 */
613 front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
614 front_facing = LLVMBuildSExt(builder, front_facing,
615 LLVMIntTypeInContext(gallivm->context,
616 s_bld.type.length*s_bld.type.width),
617 "");
618 front_facing = LLVMBuildBitCast(builder, front_facing,
619 s_bld.int_vec_type, "");
620 }
621
622 /* convert scalar stencil refs into vectors */
623 stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
624 stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
625
626 s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
627 stencil_refs, stencil_vals,
628 front_facing);
629
630 /* apply stencil-fail operator */
631 {
632 LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
633 stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
634 stencil_refs, stencil_vals,
635 s_fail_mask, front_facing);
636 }
637 }
638
639 if (depth->enabled) {
640 /*
641 * Convert fragment Z to the desired type, aligning the LSB to the right.
642 */
643
644 assert(z_type.width == z_src_type.width);
645 assert(z_type.length == z_src_type.length);
646 assert(lp_check_value(z_src_type, z_src));
647 if (z_src_type.floating) {
648 /*
649 * Convert from floating point values
650 */
651
652 if (!z_type.floating) {
653 z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
654 z_src_type,
655 z_width,
656 z_src);
657 }
658 } else {
659 /*
660 * Convert from unsigned normalized values.
661 */
662
663 assert(!z_src_type.sign);
664 assert(!z_src_type.fixed);
665 assert(z_src_type.norm);
666 assert(!z_type.floating);
667 if (z_src_type.width > z_width) {
668 LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
669 z_src_type.width - z_width);
670 z_src = LLVMBuildLShr(builder, z_src, shift, "");
671 }
672 }
673 assert(lp_check_value(z_type, z_src));
674
675 lp_build_name(z_src, "z_src");
676
677 /* compare src Z to dst Z, returning 'pass' mask */
678 z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
679
680 if (!stencil[0].enabled) {
681 /* We can potentially skip all remaining operations here, but only
682 * if stencil is disabled because we still need to update the stencil
683 * buffer values. Don't need to update Z buffer values.
684 */
685 lp_build_mask_update(mask, z_pass);
686
687 if (do_branch) {
688 lp_build_mask_check(mask);
689 do_branch = FALSE;
690 }
691 }
692
693 if (depth->writemask) {
694 LLVMValueRef zselectmask;
695
696 /* mask off bits that failed Z test */
697 zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
698
699 /* mask off bits that failed stencil test */
700 if (s_pass_mask) {
701 zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
702 }
703
704 /* Mix the old and new Z buffer values.
705 * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
706 */
707 z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
708 }
709
710 if (stencil[0].enabled) {
711 /* update stencil buffer values according to z pass/fail result */
712 LLVMValueRef z_fail_mask, z_pass_mask;
713
714 /* apply Z-fail operator */
715 z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
716 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
717 stencil_refs, stencil_vals,
718 z_fail_mask, front_facing);
719
720 /* apply Z-pass operator */
721 z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
722 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
723 stencil_refs, stencil_vals,
724 z_pass_mask, front_facing);
725 }
726 }
727 else {
728 /* No depth test: apply Z-pass operator to stencil buffer values which
729 * passed the stencil test.
730 */
731 s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
732 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
733 stencil_refs, stencil_vals,
734 s_pass_mask, front_facing);
735 }
736
737 /* Put Z and ztencil bits in the right place */
738 if (z_dst && z_shift) {
739 LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
740 z_dst = LLVMBuildShl(builder, z_dst, shift, "");
741 }
742 if (stencil_vals && stencil_shift)
743 stencil_vals = LLVMBuildShl(builder, stencil_vals,
744 stencil_shift, "");
745
746 /* Finally, merge/store the z/stencil values */
747 if ((depth->enabled && depth->writemask) ||
748 (stencil[0].enabled && stencil[0].writemask)) {
749
750 if (z_dst && stencil_vals)
751 zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
752 else if (z_dst)
753 zs_dst = z_dst;
754 else
755 zs_dst = stencil_vals;
756
757 *zs_value = zs_dst;
758 }
759
760 if (s_pass_mask)
761 lp_build_mask_update(mask, s_pass_mask);
762
763 if (depth->enabled && stencil[0].enabled)
764 lp_build_mask_update(mask, z_pass);
765
766 if (do_branch)
767 lp_build_mask_check(mask);
768
769 }
770
771
772 void
773 lp_build_depth_write(LLVMBuilderRef builder,
774 const struct util_format_description *format_desc,
775 LLVMValueRef zs_dst_ptr,
776 LLVMValueRef zs_value)
777 {
778 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
779 LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
780
781 LLVMBuildStore(builder, zs_value, zs_dst_ptr);
782 }
783
784
785 void
786 lp_build_deferred_depth_write(struct gallivm_state *gallivm,
787 struct lp_type z_src_type,
788 const struct util_format_description *format_desc,
789 struct lp_build_mask_context *mask,
790 LLVMValueRef zs_dst_ptr,
791 LLVMValueRef zs_value)
792 {
793 struct lp_type z_type;
794 struct lp_build_context z_bld;
795 LLVMValueRef z_dst;
796 LLVMBuilderRef builder = gallivm->builder;
797
798 /* XXX: pointlessly redo type logic:
799 */
800 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
801 lp_build_context_init(&z_bld, gallivm, z_type);
802
803 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
804 LLVMPointerType(z_bld.vec_type, 0), "");
805
806 z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
807 z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
808
809 LLVMBuildStore(builder, z_dst, zs_dst_ptr);
810 }