src/gallium/drivers/llvmpipe/lp_bld_depth.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009-2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Depth/stencil testing to LLVM IR translation.
  31  *
  32  * To be done accurately/efficiently the depth/stencil test must be done with
  33  * the same type/format of the depth/stencil buffer, which implies massaging
  34  * the incoming depths to fit into place. Using a more straightforward
  35  * type/format for depth/stencil values internally and only convert when
  36  * flushing would avoid this, but it would most likely result in depth fighting
  37  * artifacts.
  38  *
  39  * We are free to use a different pixel layout though. Since our basic
  40  * processing unit is a quad (2x2 pixel block) we store the depth/stencil
  41  * values tiled, a quad at time. That is, a depth buffer containing
  42  *
  43  *  Z11 Z12 Z13 Z14 ...
  44  *  Z21 Z22 Z23 Z24 ...
  45  *  Z31 Z32 Z33 Z34 ...
  46  *  Z41 Z42 Z43 Z44 ...
  47  *  ... ... ... ... ...
  48  *
  49  * will actually be stored in memory as
  50  *
  51  *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
  52  *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
  53  *  ... ... ... ... ... ... ... ... ...
  54  *
  55  *
  56  * @author Jose Fonseca <jfonseca@vmware.com>
  57  * @author Brian Paul <jfonseca@vmware.com>
  58  */
  59
  60 #include "pipe/p_state.h"
  61 #include "util/u_format.h"
  62 #include "util/u_cpu_detect.h"
  63
  64 #include "gallivm/lp_bld_type.h"
  65 #include "gallivm/lp_bld_arit.h"
  66 #include "gallivm/lp_bld_bitarit.h"
  67 #include "gallivm/lp_bld_const.h"
  68 #include "gallivm/lp_bld_conv.h"
  69 #include "gallivm/lp_bld_logic.h"
  70 #include "gallivm/lp_bld_flow.h"
  71 #include "gallivm/lp_bld_intr.h"
  72 #include "gallivm/lp_bld_debug.h"
  73 #include "gallivm/lp_bld_swizzle.h"
  74
  75 #include "lp_bld_depth.h"
  76
  77
  78 /** Used to select fields from pipe_stencil_state */
  79 enum stencil_op {
  80    S_FAIL_OP,
  81    Z_FAIL_OP,
  82    Z_PASS_OP
  83 };
  84
  85
  86
  87 /**
  88  * Do the stencil test comparison (compare FB stencil values against ref value).
  89  * This will be used twice when generating two-sided stencil code.
  90  * \param stencil  the front/back stencil state
  91  * \param stencilRef  the stencil reference value, replicated as a vector
  92  * \param stencilVals  vector of stencil values from framebuffer
  93  * \return vector mask of pass/fail values (~0 or 0)
  94  */
  95 static LLVMValueRef
  96 lp_build_stencil_test_single(struct lp_build_context *bld,
  97                              const struct pipe_stencil_state *stencil,
  98                              LLVMValueRef stencilRef,
  99                              LLVMValueRef stencilVals)
 100 {
 101    LLVMBuilderRef builder = bld->gallivm->builder;
 102    const unsigned stencilMax = 255; /* XXX fix */
 103    struct lp_type type = bld->type;
 104    LLVMValueRef res;
 105
 106    /*
 107     * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
 108     * are between 0..255 so ensure we generate the fastest comparisons for
 109     * wider elements.
 110     */
 111    if (type.width <= 8) {
 112       assert(!type.sign);
 113    } else {
 114       assert(type.sign);
 115    }
 116
 117    assert(stencil->enabled);
 118
 119    if (stencil->valuemask != stencilMax) {
 120       /* compute stencilRef = stencilRef & valuemask */
 121       LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
 122       stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
 123       /* compute stencilVals = stencilVals & valuemask */
 124       stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
 125    }
 126
 127    res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
 128
 129    return res;
 130 }
 131
 132
 133 /**
 134  * Do the one or two-sided stencil test comparison.
 135  * \sa lp_build_stencil_test_single
 136  * \param front_facing  an integer vector mask, indicating front (~0) or back
 137  *                      (0) facing polygon. If NULL, assume front-facing.
 138  */
 139 static LLVMValueRef
 140 lp_build_stencil_test(struct lp_build_context *bld,
 141                       const struct pipe_stencil_state stencil[2],
 142                       LLVMValueRef stencilRefs[2],
 143                       LLVMValueRef stencilVals,
 144                       LLVMValueRef front_facing)
 145 {
 146    LLVMValueRef res;
 147
 148    assert(stencil[0].enabled);
 149
 150    /* do front face test */
 151    res = lp_build_stencil_test_single(bld, &stencil[0],
 152                                       stencilRefs[0], stencilVals);
 153
 154    if (stencil[1].enabled && front_facing != NULL) {
 155       /* do back face test */
 156       LLVMValueRef back_res;
 157
 158       back_res = lp_build_stencil_test_single(bld, &stencil[1],
 159                                               stencilRefs[1], stencilVals);
 160
 161       res = lp_build_select(bld, front_facing, res, back_res);
 162    }
 163
 164    return res;
 165 }
 166
 167
 168 /**
 169  * Apply the stencil operator (add/sub/keep/etc) to the given vector
 170  * of stencil values.
 171  * \return  new stencil values vector
 172  */
 173 static LLVMValueRef
 174 lp_build_stencil_op_single(struct lp_build_context *bld,
 175                            const struct pipe_stencil_state *stencil,
 176                            enum stencil_op op,
 177                            LLVMValueRef stencilRef,
 178                            LLVMValueRef stencilVals)
 179
 180 {
 181    LLVMBuilderRef builder = bld->gallivm->builder;
 182    struct lp_type type = bld->type;
 183    LLVMValueRef res;
 184    LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
 185    unsigned stencil_op;
 186
 187    assert(type.sign);
 188
 189    switch (op) {
 190    case S_FAIL_OP:
 191       stencil_op = stencil->fail_op;
 192       break;
 193    case Z_FAIL_OP:
 194       stencil_op = stencil->zfail_op;
 195       break;
 196    case Z_PASS_OP:
 197       stencil_op = stencil->zpass_op;
 198       break;
 199    default:
 200       assert(0 && "Invalid stencil_op mode");
 201       stencil_op = PIPE_STENCIL_OP_KEEP;
 202    }
 203
 204    switch (stencil_op) {
 205    case PIPE_STENCIL_OP_KEEP:
 206       res = stencilVals;
 207       /* we can return early for this case */
 208       return res;
 209    case PIPE_STENCIL_OP_ZERO:
 210       res = bld->zero;
 211       break;
 212    case PIPE_STENCIL_OP_REPLACE:
 213       res = stencilRef;
 214       break;
 215    case PIPE_STENCIL_OP_INCR:
 216       res = lp_build_add(bld, stencilVals, bld->one);
 217       res = lp_build_min(bld, res, max);
 218       break;
 219    case PIPE_STENCIL_OP_DECR:
 220       res = lp_build_sub(bld, stencilVals, bld->one);
 221       res = lp_build_max(bld, res, bld->zero);
 222       break;
 223    case PIPE_STENCIL_OP_INCR_WRAP:
 224       res = lp_build_add(bld, stencilVals, bld->one);
 225       res = LLVMBuildAnd(builder, res, max, "");
 226       break;
 227    case PIPE_STENCIL_OP_DECR_WRAP:
 228       res = lp_build_sub(bld, stencilVals, bld->one);
 229       res = LLVMBuildAnd(builder, res, max, "");
 230       break;
 231    case PIPE_STENCIL_OP_INVERT:
 232       res = LLVMBuildNot(builder, stencilVals, "");
 233       res = LLVMBuildAnd(builder, res, max, "");
 234       break;
 235    default:
 236       assert(0 && "bad stencil op mode");
 237       res = bld->undef;
 238    }
 239
 240    return res;
 241 }
 242
 243
 244 /**
 245  * Do the one or two-sided stencil test op/update.
 246  */
 247 static LLVMValueRef
 248 lp_build_stencil_op(struct lp_build_context *bld,
 249                     const struct pipe_stencil_state stencil[2],
 250                     enum stencil_op op,
 251                     LLVMValueRef stencilRefs[2],
 252                     LLVMValueRef stencilVals,
 253                     LLVMValueRef mask,
 254                     LLVMValueRef front_facing)
 255
 256 {
 257    LLVMBuilderRef builder = bld->gallivm->builder;
 258    LLVMValueRef res;
 259
 260    assert(stencil[0].enabled);
 261
 262    /* do front face op */
 263    res = lp_build_stencil_op_single(bld, &stencil[0], op,
 264                                      stencilRefs[0], stencilVals);
 265
 266    if (stencil[1].enabled && front_facing != NULL) {
 267       /* do back face op */
 268       LLVMValueRef back_res;
 269
 270       back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
 271                                             stencilRefs[1], stencilVals);
 272
 273       res = lp_build_select(bld, front_facing, res, back_res);
 274    }
 275
 276    if (stencil[0].writemask != 0xff ||
 277        (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) {
 278       /* mask &= stencil[0].writemask */
 279       LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
 280                                                       stencil[0].writemask);
 281       if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) {
 282          LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
 283                                                          stencil[1].writemask);
 284          writemask = lp_build_select(bld, front_facing, writemask, back_writemask);
 285       }
 286
 287       mask = LLVMBuildAnd(builder, mask, writemask, "");
 288       /* res = (res & mask) | (stencilVals & ~mask) */
 289       res = lp_build_select_bitwise(bld, mask, res, stencilVals);
 290    }
 291    else {
 292       /* res = mask ? res : stencilVals */
 293       res = lp_build_select(bld, mask, res, stencilVals);
 294    }
 295
 296    return res;
 297 }
 298
 299
 300
 301 /**
 302  * Return a type that matches the depth/stencil format.
 303  */
 304 struct lp_type
 305 lp_depth_type(const struct util_format_description *format_desc,
 306               unsigned length)
 307 {
 308    struct lp_type type;
 309    unsigned swizzle;
 310
 311    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
 312    assert(format_desc->block.width == 1);
 313    assert(format_desc->block.height == 1);
 314
 315    swizzle = format_desc->swizzle[0];
 316    assert(swizzle < 4);
 317
 318    memset(&type, 0, sizeof type);
 319    type.width = format_desc->block.bits;
 320
 321    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
 322       type.floating = TRUE;
 323       assert(swizzle == 0);
 324       assert(format_desc->channel[swizzle].size == format_desc->block.bits);
 325    }
 326    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
 327       assert(format_desc->block.bits <= 32);
 328       assert(format_desc->channel[swizzle].normalized);
 329       if (format_desc->channel[swizzle].size < format_desc->block.bits) {
 330          /* Prefer signed integers when possible, as SSE has less support
 331           * for unsigned comparison;
 332           */
 333          type.sign = TRUE;
 334       }
 335    }
 336    else
 337       assert(0);
 338
 339    type.length = length;
 340
 341    return type;
 342 }
 343
 344
 345 /**
 346  * Compute bitmask and bit shift to apply to the incoming fragment Z values
 347  * and the Z buffer values needed before doing the Z comparison.
 348  *
 349  * Note that we leave the Z bits in the position that we find them
 350  * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
 351  * get by with fewer bit twiddling steps.
 352  */
 353 static boolean
 354 get_z_shift_and_mask(const struct util_format_description *format_desc,
 355                      unsigned *shift, unsigned *width, unsigned *mask)
 356 {
 357    const unsigned total_bits = format_desc->block.bits;
 358    unsigned z_swizzle;
 359    unsigned chan;
 360    unsigned padding_left, padding_right;
 361
 362    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
 363    assert(format_desc->block.width == 1);
 364    assert(format_desc->block.height == 1);
 365
 366    z_swizzle = format_desc->swizzle[0];
 367
 368    if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
 369       return FALSE;
 370
 371    *width = format_desc->channel[z_swizzle].size;
 372
 373    padding_right = 0;
 374    for (chan = 0; chan < z_swizzle; ++chan)
 375       padding_right += format_desc->channel[chan].size;
 376
 377    padding_left =
 378       total_bits - (padding_right + *width);
 379
 380    if (padding_left || padding_right) {
 381       unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
 382       unsigned long long mask_right = (1ULL << (padding_right)) - 1;
 383       *mask = mask_left ^ mask_right;
 384    }
 385    else {
 386       *mask = 0xffffffff;
 387    }
 388
 389    *shift = padding_right;
 390
 391    return TRUE;
 392 }
 393
 394
 395 /**
 396  * Compute bitmask and bit shift to apply to the framebuffer pixel values
 397  * to put the stencil bits in the least significant position.
 398  * (i.e. 0x000000ff)
 399  */
 400 static boolean
 401 get_s_shift_and_mask(const struct util_format_description *format_desc,
 402                      unsigned *shift, unsigned *mask)
 403 {
 404    unsigned s_swizzle;
 405    unsigned chan, sz;
 406
 407    s_swizzle = format_desc->swizzle[1];
 408
 409    if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
 410       return FALSE;
 411
 412    *shift = 0;
 413    for (chan = 0; chan < s_swizzle; chan++)
 414       *shift += format_desc->channel[chan].size;
 415
 416    sz = format_desc->channel[s_swizzle].size;
 417    *mask = (1U << sz) - 1U;
 418
 419    return TRUE;
 420 }
 421
 422
 423 /**
 424  * Perform the occlusion test and increase the counter.
 425  * Test the depth mask. Add the number of channel which has none zero mask
 426  * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
 427  * The counter will add 4.
 428  *
 429  * \param type holds element type of the mask vector.
 430  * \param maskvalue is the depth test mask.
 431  * \param counter is a pointer of the uint32 counter.
 432  */
 433 void
 434 lp_build_occlusion_count(struct gallivm_state *gallivm,
 435                          struct lp_type type,
 436                          LLVMValueRef maskvalue,
 437                          LLVMValueRef counter)
 438 {
 439    LLVMBuilderRef builder = gallivm->builder;
 440    LLVMContextRef context = gallivm->context;
 441    LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
 442    LLVMValueRef count, newcount;
 443
 444    assert(type.length <= 16);
 445    assert(type.floating);
 446
 447    if(util_cpu_caps.has_sse && type.length == 4) {
 448       const char *movmskintr = "llvm.x86.sse.movmsk.ps";
 449       const char *popcntintr = "llvm.ctpop.i32";
 450       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
 451                                            lp_build_vec_type(gallivm, type), "");
 452       bits = lp_build_intrinsic_unary(builder, movmskintr,
 453                                       LLVMInt32TypeInContext(context), bits);
 454       count = lp_build_intrinsic_unary(builder, popcntintr,
 455                                        LLVMInt32TypeInContext(context), bits);
 456    }
 457    else if(util_cpu_caps.has_avx && type.length == 8) {
 458       const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
 459       const char *popcntintr = "llvm.ctpop.i32";
 460       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
 461                                            lp_build_vec_type(gallivm, type), "");
 462       bits = lp_build_intrinsic_unary(builder, movmskintr,
 463                                       LLVMInt32TypeInContext(context), bits);
 464       count = lp_build_intrinsic_unary(builder, popcntintr,
 465                                        LLVMInt32TypeInContext(context), bits);
 466    }
 467    else {
 468       unsigned i;
 469       LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
 470       LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
 471       LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
 472       LLVMValueRef shufflev, countd;
 473       LLVMValueRef shuffles[16];
 474       const char *popcntintr = NULL;
 475
 476       countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
 477
 478        for (i = 0; i < type.length; i++) {
 479           shuffles[i] = lp_build_const_int32(gallivm, 4*i);
 480        }
 481
 482        shufflev = LLVMConstVector(shuffles, type.length);
 483        countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
 484        countd = LLVMBuildBitCast(builder, countd, counttype, "countd");
 485
 486        /*
 487         * XXX FIXME
 488         * this is bad on cpus without popcount (on x86 supported by intel
 489         * nehalem, amd barcelona, and up - not tied to sse42).
 490         * Would be much faster to just sum the 4 elements of the vector with
 491         * some horizontal add (shuffle/add/shuffle/add after the initial and).
 492         */
 493        switch (type.length) {
 494        case 4:
 495           popcntintr = "llvm.ctpop.i32";
 496           break;
 497        case 8:
 498           popcntintr = "llvm.ctpop.i64";
 499           break;
 500        case 16:
 501           popcntintr = "llvm.ctpop.i128";
 502           break;
 503        default:
 504           assert(0);
 505        }
 506        count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
 507
 508        if (type.length > 4) {
 509           count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
 510        }
 511    }
 512    newcount = LLVMBuildLoad(builder, counter, "origcount");
 513    newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
 514    LLVMBuildStore(builder, newcount, counter);
 515 }
 516
 517
 518
 519 /**
 520  * Generate code for performing depth and/or stencil tests.
 521  * We operate on a vector of values (typically n 2x2 quads).
 522  *
 523  * \param depth  the depth test state
 524  * \param stencil  the front/back stencil state
 525  * \param type  the data type of the fragment depth/stencil values
 526  * \param format_desc  description of the depth/stencil surface
 527  * \param mask  the alive/dead pixel mask for the quad (vector)
 528  * \param stencil_refs  the front/back stencil ref values (scalar)
 529  * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
 530  * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
 531  * \param face  contains boolean value indicating front/back facing polygon
 532  */
 533 void
 534 lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 535                             const struct pipe_depth_state *depth,
 536                             const struct pipe_stencil_state stencil[2],
 537                             struct lp_type z_src_type,
 538                             const struct util_format_description *format_desc,
 539                             struct lp_build_mask_context *mask,
 540                             LLVMValueRef stencil_refs[2],
 541                             LLVMValueRef z_src,
 542                             LLVMValueRef zs_dst_ptr,
 543                             LLVMValueRef face,
 544                             LLVMValueRef *zs_value,
 545                             boolean do_branch)
 546 {
 547    LLVMBuilderRef builder = gallivm->builder;
 548    struct lp_type zs_type;
 549    struct lp_type z_type;
 550    struct lp_build_context z_bld;
 551    struct lp_build_context s_bld;
 552    struct lp_type s_type;
 553    unsigned z_shift = 0, z_width = 0, z_mask = 0;
 554    LLVMValueRef zs_dst, z_dst = NULL;
 555    LLVMValueRef stencil_vals = NULL;
 556    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
 557    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
 558    LLVMValueRef orig_mask = lp_build_mask_value(mask);
 559    LLVMValueRef front_facing = NULL;
 560
 561
 562    /*
 563     * Depths are expected to be between 0 and 1, even if they are stored in
 564     * floats. Setting these bits here will ensure that the lp_build_conv() call
 565     * below won't try to unnecessarily clamp the incoming values.
 566     */
 567    if(z_src_type.floating) {
 568       z_src_type.sign = FALSE;
 569       z_src_type.norm = TRUE;
 570    }
 571    else {
 572       assert(!z_src_type.sign);
 573       assert(z_src_type.norm);
 574    }
 575
 576    /* Pick the type matching the depth-stencil format. */
 577    zs_type = lp_depth_type(format_desc, z_src_type.length);
 578
 579    /* Pick the intermediate type for depth operations. */
 580    z_type = zs_type;
 581    /* FIXME: Cope with a depth test type with higher bit width. */
 582    assert(zs_type.width <= z_src_type.width);
 583    z_type.width = z_src_type.width;
 584    assert(z_type.length == z_src_type.length);
 585
 586    /* FIXME: for non-float depth/stencil might generate better code
 587     * if we'd always split it up to use 128bit operations.
 588     * For stencil we'd almost certainly want to pack to 8xi16 values,
 589     * for z just run twice.
 590     */
 591
 592    /* Sanity checking */
 593    {
 594       const unsigned z_swizzle = format_desc->swizzle[0];
 595       const unsigned s_swizzle = format_desc->swizzle[1];
 596
 597       assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
 598              s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
 599
 600       assert(depth->enabled || stencil[0].enabled);
 601
 602       assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
 603       assert(format_desc->block.width == 1);
 604       assert(format_desc->block.height == 1);
 605
 606       if (stencil[0].enabled) {
 607          assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
 608                 format_desc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM);
 609       }
 610
 611       assert(z_swizzle < 4);
 612       assert(format_desc->block.bits <= z_type.width);
 613       if (z_type.floating) {
 614          assert(z_swizzle == 0);
 615          assert(format_desc->channel[z_swizzle].type ==
 616                 UTIL_FORMAT_TYPE_FLOAT);
 617          assert(format_desc->channel[z_swizzle].size ==
 618                 format_desc->block.bits);
 619       }
 620       else {
 621          assert(format_desc->channel[z_swizzle].type ==
 622                 UTIL_FORMAT_TYPE_UNSIGNED);
 623          assert(format_desc->channel[z_swizzle].normalized);
 624          assert(!z_type.fixed);
 625       }
 626    }
 627
 628
 629    /* Setup build context for Z vals */
 630    lp_build_context_init(&z_bld, gallivm, z_type);
 631
 632    /* Setup build context for stencil vals */
 633    s_type = lp_int_type(z_type);
 634    lp_build_context_init(&s_bld, gallivm, s_type);
 635
 636    /* Load current z/stencil value from z/stencil buffer */
 637    zs_dst_ptr = LLVMBuildBitCast(builder,
 638                                  zs_dst_ptr,
 639                                  LLVMPointerType(lp_build_vec_type(gallivm, zs_type), 0), "");
 640    zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
 641    if (format_desc->block.bits < z_type.width) {
 642       /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
 643       zs_dst = LLVMBuildZExt(builder, zs_dst, z_bld.vec_type, "");
 644    }
 645
 646    lp_build_name(zs_dst, "zs_dst");
 647
 648
 649    /* Compute and apply the Z/stencil bitmasks and shifts.
 650     */
 651    {
 652       unsigned s_shift, s_mask;
 653
 654       if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
 655          if (z_mask != 0xffffffff) {
 656             z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
 657          }
 658
 659          /*
 660           * Align the framebuffer Z 's LSB to the right.
 661           */
 662          if (z_shift) {
 663             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
 664             z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
 665          } else if (z_bitmask) {
 666             /* TODO: Instead of loading a mask from memory and ANDing, it's
 667              * probably faster to just shake the bits with two shifts. */
 668             z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
 669          } else {
 670             z_dst = zs_dst;
 671             lp_build_name(z_dst, "z_dst");
 672          }
 673       }
 674
 675       if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
 676          if (s_shift) {
 677             LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
 678             stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
 679             stencil_shift = shift;  /* used below */
 680          }
 681          else {
 682             stencil_vals = zs_dst;
 683          }
 684
 685          if (s_mask != 0xffffffff) {
 686             LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
 687             stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
 688          }
 689
 690          lp_build_name(stencil_vals, "s_dst");
 691       }
 692    }
 693
 694    if (stencil[0].enabled) {
 695
 696       if (face) {
 697          LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
 698
 699          /* front_facing = face != 0 ? ~0 : 0 */
 700          front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
 701          front_facing = LLVMBuildSExt(builder, front_facing,
 702                                       LLVMIntTypeInContext(gallivm->context,
 703                                              s_bld.type.length*s_bld.type.width),
 704                                       "");
 705          front_facing = LLVMBuildBitCast(builder, front_facing,
 706                                          s_bld.int_vec_type, "");
 707       }
 708
 709       /* convert scalar stencil refs into vectors */
 710       stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
 711       stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
 712
 713       s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
 714                                           stencil_refs, stencil_vals,
 715                                           front_facing);
 716
 717       /* apply stencil-fail operator */
 718       {
 719          LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
 720          stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
 721                                             stencil_refs, stencil_vals,
 722                                             s_fail_mask, front_facing);
 723       }
 724    }
 725
 726    if (depth->enabled) {
 727       /*
 728        * Convert fragment Z to the desired type, aligning the LSB to the right.
 729        */
 730
 731       assert(z_type.width == z_src_type.width);
 732       assert(z_type.length == z_src_type.length);
 733       assert(lp_check_value(z_src_type, z_src));
 734       if (z_src_type.floating) {
 735          /*
 736           * Convert from floating point values
 737           */
 738
 739          if (!z_type.floating) {
 740             z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
 741                                                             z_src_type,
 742                                                             z_width,
 743                                                             z_src);
 744          }
 745       } else {
 746          /*
 747           * Convert from unsigned normalized values.
 748           */
 749
 750          assert(!z_src_type.sign);
 751          assert(!z_src_type.fixed);
 752          assert(z_src_type.norm);
 753          assert(!z_type.floating);
 754          if (z_src_type.width > z_width) {
 755             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
 756                                                         z_src_type.width - z_width);
 757             z_src = LLVMBuildLShr(builder, z_src, shift, "");
 758          }
 759       }
 760       assert(lp_check_value(z_type, z_src));
 761
 762       lp_build_name(z_src, "z_src");
 763
 764       /* compare src Z to dst Z, returning 'pass' mask */
 765       z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
 766
 767       if (!stencil[0].enabled) {
 768          /* We can potentially skip all remaining operations here, but only
 769           * if stencil is disabled because we still need to update the stencil
 770           * buffer values.  Don't need to update Z buffer values.
 771           */
 772          lp_build_mask_update(mask, z_pass);
 773
 774          if (do_branch) {
 775             lp_build_mask_check(mask);
 776             do_branch = FALSE;
 777          }
 778       }
 779
 780       if (depth->writemask) {
 781          LLVMValueRef zselectmask;
 782
 783          /* mask off bits that failed Z test */
 784          zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
 785
 786          /* mask off bits that failed stencil test */
 787          if (s_pass_mask) {
 788             zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
 789          }
 790
 791          /* Mix the old and new Z buffer values.
 792           * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
 793           */
 794          z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
 795       }
 796
 797       if (stencil[0].enabled) {
 798          /* update stencil buffer values according to z pass/fail result */
 799          LLVMValueRef z_fail_mask, z_pass_mask;
 800
 801          /* apply Z-fail operator */
 802          z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
 803          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
 804                                             stencil_refs, stencil_vals,
 805                                             z_fail_mask, front_facing);
 806
 807          /* apply Z-pass operator */
 808          z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
 809          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
 810                                             stencil_refs, stencil_vals,
 811                                             z_pass_mask, front_facing);
 812       }
 813    }
 814    else {
 815       /* No depth test: apply Z-pass operator to stencil buffer values which
 816        * passed the stencil test.
 817        */
 818       s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
 819       stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
 820                                          stencil_refs, stencil_vals,
 821                                          s_pass_mask, front_facing);
 822    }
 823
 824    /* Put Z and ztencil bits in the right place */
 825    if (z_dst && z_shift) {
 826       LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
 827       z_dst = LLVMBuildShl(builder, z_dst, shift, "");
 828    }
 829    if (stencil_vals && stencil_shift)
 830       stencil_vals = LLVMBuildShl(builder, stencil_vals,
 831                                   stencil_shift, "");
 832
 833    /* Finally, merge/store the z/stencil values */
 834    if ((depth->enabled && depth->writemask) ||
 835        (stencil[0].enabled && stencil[0].writemask)) {
 836
 837       if (z_dst && stencil_vals)
 838          zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
 839       else if (z_dst)
 840          zs_dst = z_dst;
 841       else
 842          zs_dst = stencil_vals;
 843
 844       *zs_value = zs_dst;
 845    }
 846
 847    if (s_pass_mask)
 848       lp_build_mask_update(mask, s_pass_mask);
 849
 850    if (depth->enabled && stencil[0].enabled)
 851       lp_build_mask_update(mask, z_pass);
 852
 853    if (do_branch)
 854       lp_build_mask_check(mask);
 855
 856 }
 857
 858
 859 void
 860 lp_build_depth_write(struct gallivm_state *gallivm,
 861                      struct lp_type z_src_type,
 862                      const struct util_format_description *format_desc,
 863                      LLVMValueRef zs_dst_ptr,
 864                      LLVMValueRef zs_value)
 865 {
 866    LLVMBuilderRef builder = gallivm->builder;
 867
 868    if (format_desc->block.bits < z_src_type.width) {
 869       /* Truncate income ZS values (e.g., when writing to Z16_UNORM) */
 870       LLVMTypeRef zs_type = LLVMIntTypeInContext(gallivm->context, format_desc->block.bits);
 871       if (z_src_type.length > 1) {
 872          zs_type = LLVMVectorType(zs_type, z_src_type.length);
 873       }
 874       zs_value = LLVMBuildTrunc(builder, zs_value, zs_type, "");
 875    }
 876
 877    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
 878                                  LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
 879
 880    LLVMBuildStore(builder, zs_value, zs_dst_ptr);
 881 }
 882
 883
 884 void
 885 lp_build_deferred_depth_write(struct gallivm_state *gallivm,
 886                               struct lp_type z_src_type,
 887                               const struct util_format_description *format_desc,
 888                               struct lp_build_mask_context *mask,
 889                               LLVMValueRef zs_dst_ptr,
 890                               LLVMValueRef zs_value)
 891 {
 892    struct lp_type z_type;
 893    struct lp_build_context z_bld;
 894    LLVMValueRef z_dst;
 895    LLVMBuilderRef builder = gallivm->builder;
 896
 897    /* XXX: pointlessly redo type logic:
 898     */
 899    z_type = lp_depth_type(format_desc, z_src_type.length);
 900    lp_build_context_init(&z_bld, gallivm, z_type);
 901
 902    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
 903                                  LLVMPointerType(z_bld.vec_type, 0), "");
 904
 905    z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
 906
 907    if (z_type.width < z_src_type.width) {
 908       zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
 909    }
 910
 911    z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
 912
 913    LLVMBuildStore(builder, z_dst, zs_dst_ptr);
 914 }