src/gallium/drivers/llvmpipe/lp_quad_depth_test.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \brief  Quad depth testing
  30  */
  31
  32 #include "pipe/p_defines.h"
  33 #include "util/u_memory.h"
  34 #include "tgsi/tgsi_scan.h"
  35 #include "lp_context.h"
  36 #include "lp_quad.h"
  37 #include "lp_surface.h"
  38 #include "lp_quad_pipe.h"
  39 #include "lp_tile_cache.h"
  40 #include "lp_state.h"           /* for lp_fragment_shader */
  41
  42
  43 struct depth_data {
  44    struct pipe_surface *ps;
  45    enum pipe_format format;
  46    unsigned bzzzz[QUAD_SIZE];  /**< Z values fetched from depth buffer */
  47    unsigned qzzzz[QUAD_SIZE];  /**< Z values from the quad */
  48    ubyte stencilVals[QUAD_SIZE];
  49    struct llvmpipe_cached_tile *tile;
  50 };
  51
  52
  53
  54 static void
  55 get_depth_stencil_values( struct depth_data *data,
  56                           const struct quad_header *quad )
  57 {
  58    unsigned j;
  59    const struct llvmpipe_cached_tile *tile = data->tile;
  60
  61    switch (data->format) {
  62    case PIPE_FORMAT_Z16_UNORM:
  63       for (j = 0; j < QUAD_SIZE; j++) {
  64          int x = quad->input.x0 % TILE_SIZE + (j & 1);
  65          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
  66          data->bzzzz[j] = tile->data.depth16[y][x];
  67       }
  68       break;
  69    case PIPE_FORMAT_Z32_UNORM:
  70       for (j = 0; j < QUAD_SIZE; j++) {
  71          int x = quad->input.x0 % TILE_SIZE + (j & 1);
  72          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
  73          data->bzzzz[j] = tile->data.depth32[y][x];
  74       }
  75       break;
  76    case PIPE_FORMAT_X8Z24_UNORM:
  77    case PIPE_FORMAT_S8Z24_UNORM:
  78       for (j = 0; j < QUAD_SIZE; j++) {
  79          int x = quad->input.x0 % TILE_SIZE + (j & 1);
  80          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
  81          data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
  82          data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
  83       }
  84    break;
  85    case PIPE_FORMAT_Z24X8_UNORM:
  86    case PIPE_FORMAT_Z24S8_UNORM:
  87       for (j = 0; j < QUAD_SIZE; j++) {
  88          int x = quad->input.x0 % TILE_SIZE + (j & 1);
  89          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
  90          data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
  91          data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
  92       }
  93       break;
  94    default:
  95       assert(0);
  96    }
  97 }
  98
  99 /* If the shader has not been run, interpolate the depth values
 100  * ourselves.
 101  */
 102 static void
 103 interpolate_quad_depth( struct quad_header *quad )
 104 {
 105    const float fx = (float) quad->input.x0;
 106    const float fy = (float) quad->input.y0;
 107    const float dzdx = quad->coef->dadx[0][2];
 108    const float dzdy = quad->coef->dady[0][2];
 109    const float z0 = quad->coef->a0[0][2] + dzdx * fx + dzdy * fy;
 110
 111    quad->output.depth[0] = z0;
 112    quad->output.depth[1] = z0 + dzdx;
 113    quad->output.depth[2] = z0 + dzdy;
 114    quad->output.depth[3] = z0 + dzdx + dzdy;
 115 }
 116
 117
 118 static void
 119 convert_quad_depth( struct depth_data *data,
 120                     const struct quad_header *quad )
 121 {
 122    unsigned j;
 123
 124    /* Convert quad's float depth values to int depth values (qzzzz).
 125     * If the Z buffer stores integer values, we _have_ to do the depth
 126     * compares with integers (not floats).  Otherwise, the float->int->float
 127     * conversion of Z values (which isn't an identity function) will cause
 128     * Z-fighting errors.
 129     */
 130    switch (data->format) {
 131    case PIPE_FORMAT_Z16_UNORM:
 132       {
 133          float scale = 65535.0;
 134
 135          for (j = 0; j < QUAD_SIZE; j++) {
 136             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
 137          }
 138       }
 139       break;
 140    case PIPE_FORMAT_Z32_UNORM:
 141       {
 142          double scale = (double) (uint) ~0UL;
 143
 144          for (j = 0; j < QUAD_SIZE; j++) {
 145             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
 146          }
 147       }
 148       break;
 149    case PIPE_FORMAT_X8Z24_UNORM:
 150    case PIPE_FORMAT_S8Z24_UNORM:
 151       {
 152          float scale = (float) ((1 << 24) - 1);
 153
 154          for (j = 0; j < QUAD_SIZE; j++) {
 155             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
 156          }
 157       }
 158       break;
 159    case PIPE_FORMAT_Z24X8_UNORM:
 160    case PIPE_FORMAT_Z24S8_UNORM:
 161       {
 162          float scale = (float) ((1 << 24) - 1);
 163
 164          for (j = 0; j < QUAD_SIZE; j++) {
 165             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
 166          }
 167       }
 168       break;
 169    default:
 170       assert(0);
 171    }
 172 }
 173
 174
 175
 176 static void
 177 write_depth_stencil_values( struct depth_data *data,
 178                             struct quad_header *quad )
 179 {
 180    struct llvmpipe_cached_tile *tile = data->tile;
 181    unsigned j;
 182
 183    /* put updated Z values back into cached tile */
 184    switch (data->format) {
 185    case PIPE_FORMAT_Z16_UNORM:
 186       for (j = 0; j < QUAD_SIZE; j++) {
 187          int x = quad->input.x0 % TILE_SIZE + (j & 1);
 188          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
 189          tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
 190       }
 191       break;
 192    case PIPE_FORMAT_X8Z24_UNORM:
 193    case PIPE_FORMAT_Z32_UNORM:
 194       for (j = 0; j < QUAD_SIZE; j++) {
 195          int x = quad->input.x0 % TILE_SIZE + (j & 1);
 196          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
 197          tile->data.depth32[y][x] = data->bzzzz[j];
 198       }
 199       break;
 200    case PIPE_FORMAT_S8Z24_UNORM:
 201       for (j = 0; j < QUAD_SIZE; j++) {
 202          int x = quad->input.x0 % TILE_SIZE + (j & 1);
 203          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
 204          tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
 205       }
 206       break;
 207    case PIPE_FORMAT_Z24S8_UNORM:
 208       for (j = 0; j < QUAD_SIZE; j++) {
 209          int x = quad->input.x0 % TILE_SIZE + (j & 1);
 210          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
 211          tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
 212       }
 213       break;
 214    case PIPE_FORMAT_Z24X8_UNORM:
 215       for (j = 0; j < QUAD_SIZE; j++) {
 216          int x = quad->input.x0 % TILE_SIZE + (j & 1);
 217          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
 218          tile->data.depth32[y][x] = data->bzzzz[j] << 8;
 219       }
 220       break;
 221    default:
 222       assert(0);
 223    }
 224 }
 225
 226
 227
 228
 229 /** Only 8-bit stencil supported */
 230 #define STENCIL_MAX 0xff
 231
 232
 233 /**
 234  * Do the basic stencil test (compare stencil buffer values against the
 235  * reference value.
 236  *
 237  * \param data->stencilVals  the stencil values from the stencil buffer
 238  * \param func  the stencil func (PIPE_FUNC_x)
 239  * \param ref  the stencil reference value
 240  * \param valMask  the stencil value mask indicating which bits of the stencil
 241  *                 values and ref value are to be used.
 242  * \return mask indicating which pixels passed the stencil test
 243  */
 244 static unsigned
 245 do_stencil_test(struct depth_data *data,
 246                 unsigned func,
 247                 unsigned ref, unsigned valMask)
 248 {
 249    unsigned passMask = 0x0;
 250    unsigned j;
 251
 252    ref &= valMask;
 253
 254    switch (func) {
 255    case PIPE_FUNC_NEVER:
 256       /* passMask = 0x0 */
 257       break;
 258    case PIPE_FUNC_LESS:
 259       for (j = 0; j < QUAD_SIZE; j++) {
 260          if (ref < (data->stencilVals[j] & valMask)) {
 261             passMask |= (1 << j);
 262          }
 263       }
 264       break;
 265    case PIPE_FUNC_EQUAL:
 266       for (j = 0; j < QUAD_SIZE; j++) {
 267          if (ref == (data->stencilVals[j] & valMask)) {
 268             passMask |= (1 << j);
 269          }
 270       }
 271       break;
 272    case PIPE_FUNC_LEQUAL:
 273       for (j = 0; j < QUAD_SIZE; j++) {
 274          if (ref <= (data->stencilVals[j] & valMask)) {
 275             passMask |= (1 << j);
 276          }
 277       }
 278       break;
 279    case PIPE_FUNC_GREATER:
 280       for (j = 0; j < QUAD_SIZE; j++) {
 281          if (ref > (data->stencilVals[j] & valMask)) {
 282             passMask |= (1 << j);
 283          }
 284       }
 285       break;
 286    case PIPE_FUNC_NOTEQUAL:
 287       for (j = 0; j < QUAD_SIZE; j++) {
 288          if (ref != (data->stencilVals[j] & valMask)) {
 289             passMask |= (1 << j);
 290          }
 291       }
 292       break;
 293    case PIPE_FUNC_GEQUAL:
 294       for (j = 0; j < QUAD_SIZE; j++) {
 295          if (ref >= (data->stencilVals[j] & valMask)) {
 296             passMask |= (1 << j);
 297          }
 298       }
 299       break;
 300    case PIPE_FUNC_ALWAYS:
 301       passMask = MASK_ALL;
 302       break;
 303    default:
 304       assert(0);
 305    }
 306
 307    return passMask;
 308 }
 309
 310
 311 /**
 312  * Apply the stencil operator to stencil values.
 313  *
 314  * \param data->stencilVals  the stencil buffer values (read and written)
 315  * \param mask  indicates which pixels to update
 316  * \param op  the stencil operator (PIPE_STENCIL_OP_x)
 317  * \param ref  the stencil reference value
 318  * \param wrtMask  writemask controlling which bits are changed in the
 319  *                 stencil values
 320  */
 321 static void
 322 apply_stencil_op(struct depth_data *data,
 323                  unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
 324 {
 325    unsigned j;
 326    ubyte newstencil[QUAD_SIZE];
 327
 328    for (j = 0; j < QUAD_SIZE; j++) {
 329       newstencil[j] = data->stencilVals[j];
 330    }
 331
 332    switch (op) {
 333    case PIPE_STENCIL_OP_KEEP:
 334       /* no-op */
 335       break;
 336    case PIPE_STENCIL_OP_ZERO:
 337       for (j = 0; j < QUAD_SIZE; j++) {
 338          if (mask & (1 << j)) {
 339             newstencil[j] = 0;
 340          }
 341       }
 342       break;
 343    case PIPE_STENCIL_OP_REPLACE:
 344       for (j = 0; j < QUAD_SIZE; j++) {
 345          if (mask & (1 << j)) {
 346             newstencil[j] = ref;
 347          }
 348       }
 349       break;
 350    case PIPE_STENCIL_OP_INCR:
 351       for (j = 0; j < QUAD_SIZE; j++) {
 352          if (mask & (1 << j)) {
 353             if (data->stencilVals[j] < STENCIL_MAX) {
 354                newstencil[j] = data->stencilVals[j] + 1;
 355             }
 356          }
 357       }
 358       break;
 359    case PIPE_STENCIL_OP_DECR:
 360       for (j = 0; j < QUAD_SIZE; j++) {
 361          if (mask & (1 << j)) {
 362             if (data->stencilVals[j] > 0) {
 363                newstencil[j] = data->stencilVals[j] - 1;
 364             }
 365          }
 366       }
 367       break;
 368    case PIPE_STENCIL_OP_INCR_WRAP:
 369       for (j = 0; j < QUAD_SIZE; j++) {
 370          if (mask & (1 << j)) {
 371             newstencil[j] = data->stencilVals[j] + 1;
 372          }
 373       }
 374       break;
 375    case PIPE_STENCIL_OP_DECR_WRAP:
 376       for (j = 0; j < QUAD_SIZE; j++) {
 377          if (mask & (1 << j)) {
 378             newstencil[j] = data->stencilVals[j] - 1;
 379          }
 380       }
 381       break;
 382    case PIPE_STENCIL_OP_INVERT:
 383       for (j = 0; j < QUAD_SIZE; j++) {
 384          if (mask & (1 << j)) {
 385             newstencil[j] = ~data->stencilVals[j];
 386          }
 387       }
 388       break;
 389    default:
 390       assert(0);
 391    }
 392
 393    /*
 394     * update the stencil values
 395     */
 396    if (wrtMask != STENCIL_MAX) {
 397       /* apply bit-wise stencil buffer writemask */
 398       for (j = 0; j < QUAD_SIZE; j++) {
 399          data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
 400       }
 401    }
 402    else {
 403       for (j = 0; j < QUAD_SIZE; j++) {
 404          data->stencilVals[j] = newstencil[j];
 405       }
 406    }
 407 }
 408
 409
 410
 411 /*
 412  * To increase efficiency, we should probably have multiple versions
 413  * of this function that are specifically for Z16, Z32 and FP Z buffers.
 414  * Try to effectively do that with codegen...
 415  */
 416
 417 static boolean
 418 depth_test_quad(struct quad_stage *qs,
 419                 struct depth_data *data,
 420                 struct quad_header *quad)
 421 {
 422    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
 423    unsigned zmask = 0;
 424    unsigned j;
 425
 426    switch (llvmpipe->depth_stencil->depth.func) {
 427    case PIPE_FUNC_NEVER:
 428       /* zmask = 0 */
 429       break;
 430    case PIPE_FUNC_LESS:
 431       /* Note this is pretty much a single sse or cell instruction.
 432        * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
 433        */
 434       for (j = 0; j < QUAD_SIZE; j++) {
 435          if (data->qzzzz[j] < data->bzzzz[j])
 436             zmask |= 1 << j;
 437       }
 438       break;
 439    case PIPE_FUNC_EQUAL:
 440       for (j = 0; j < QUAD_SIZE; j++) {
 441          if (data->qzzzz[j] == data->bzzzz[j])
 442             zmask |= 1 << j;
 443       }
 444       break;
 445    case PIPE_FUNC_LEQUAL:
 446       for (j = 0; j < QUAD_SIZE; j++) {
 447          if (data->qzzzz[j] <= data->bzzzz[j])
 448             zmask |= (1 << j);
 449       }
 450       break;
 451    case PIPE_FUNC_GREATER:
 452       for (j = 0; j < QUAD_SIZE; j++) {
 453          if (data->qzzzz[j] > data->bzzzz[j])
 454             zmask |= (1 << j);
 455       }
 456       break;
 457    case PIPE_FUNC_NOTEQUAL:
 458       for (j = 0; j < QUAD_SIZE; j++) {
 459          if (data->qzzzz[j] != data->bzzzz[j])
 460             zmask |= (1 << j);
 461       }
 462       break;
 463    case PIPE_FUNC_GEQUAL:
 464       for (j = 0; j < QUAD_SIZE; j++) {
 465          if (data->qzzzz[j] >= data->bzzzz[j])
 466             zmask |= (1 << j);
 467       }
 468       break;
 469    case PIPE_FUNC_ALWAYS:
 470       zmask = MASK_ALL;
 471       break;
 472    default:
 473       assert(0);
 474    }
 475
 476    quad->inout.mask &= zmask;
 477    if (quad->inout.mask == 0)
 478       return FALSE;
 479
 480    /* Update our internal copy only if writemask set.  Even if
 481     * depth.writemask is FALSE, may still need to write out buffer
 482     * data due to stencil changes.
 483     */
 484    if (llvmpipe->depth_stencil->depth.writemask) {
 485       for (j = 0; j < QUAD_SIZE; j++) {
 486          if (quad->inout.mask & (1 << j)) {
 487             data->bzzzz[j] = data->qzzzz[j];
 488          }
 489       }
 490    }
 491
 492    return TRUE;
 493 }
 494
 495
 496
 497 /**
 498  * Do stencil (and depth) testing.  Stenciling depends on the outcome of
 499  * depth testing.
 500  */
 501 static boolean
 502 depth_stencil_test_quad(struct quad_stage *qs,
 503                         struct depth_data *data,
 504                         struct quad_header *quad)
 505 {
 506    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
 507    unsigned func, zFailOp, zPassOp, failOp;
 508    ubyte ref, wrtMask, valMask;
 509    uint face = quad->input.facing;
 510
 511    if (!llvmpipe->depth_stencil->stencil[1].enabled) {
 512       /* single-sided stencil test, use front (face=0) state */
 513       face = 0;
 514    }
 515
 516    /* choose front or back face function, operator, etc */
 517    /* XXX we could do these initializations once per primitive */
 518    func    = llvmpipe->depth_stencil->stencil[face].func;
 519    failOp  = llvmpipe->depth_stencil->stencil[face].fail_op;
 520    zFailOp = llvmpipe->depth_stencil->stencil[face].zfail_op;
 521    zPassOp = llvmpipe->depth_stencil->stencil[face].zpass_op;
 522    ref     = llvmpipe->depth_stencil->stencil[face].ref_value;
 523    wrtMask = llvmpipe->depth_stencil->stencil[face].writemask;
 524    valMask = llvmpipe->depth_stencil->stencil[face].valuemask;
 525
 526
 527    /* do the stencil test first */
 528    {
 529       unsigned passMask, failMask;
 530       passMask = do_stencil_test(data, func, ref, valMask);
 531       failMask = quad->inout.mask & ~passMask;
 532       quad->inout.mask &= passMask;
 533
 534       if (failOp != PIPE_STENCIL_OP_KEEP) {
 535          apply_stencil_op(data, failMask, failOp, ref, wrtMask);
 536       }
 537    }
 538
 539    if (quad->inout.mask) {
 540       /* now the pixels that passed the stencil test are depth tested */
 541       if (llvmpipe->depth_stencil->depth.enabled) {
 542          const unsigned origMask = quad->inout.mask;
 543
 544          depth_test_quad(qs, data, quad);  /* quad->mask is updated */
 545
 546          /* update stencil buffer values according to z pass/fail result */
 547          if (zFailOp != PIPE_STENCIL_OP_KEEP) {
 548             const unsigned failMask = origMask & ~quad->inout.mask;
 549             apply_stencil_op(data, failMask, zFailOp, ref, wrtMask);
 550          }
 551
 552          if (zPassOp != PIPE_STENCIL_OP_KEEP) {
 553             const unsigned passMask = origMask & quad->inout.mask;
 554             apply_stencil_op(data, passMask, zPassOp, ref, wrtMask);
 555          }
 556       }
 557       else {
 558          /* no depth test, apply Zpass operator to stencil buffer values */
 559          apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
 560       }
 561    }
 562
 563    return quad->inout.mask != 0;
 564 }
 565
 566
 567
 568 static unsigned mask_count[0x8] =
 569 {
 570    0,                           /* 0x0 */
 571    1,                           /* 0x1 */
 572    1,                           /* 0x2 */
 573    2,                           /* 0x3 */
 574    1,                           /* 0x4 */
 575    2,                           /* 0x5 */
 576    2,                           /* 0x6 */
 577    3,                           /* 0x7 */
 578 };
 579
 580
 581
 582 static void
 583 depth_test_quads_fallback(struct quad_stage *qs,
 584                           struct quad_header *quads[],
 585                           unsigned nr)
 586 {
 587    unsigned i, pass = 0;
 588    const struct lp_fragment_shader *fs = qs->llvmpipe->fs;
 589    boolean interp_depth = !fs->info.writes_z;
 590    struct depth_data data;
 591
 592
 593    if (qs->llvmpipe->framebuffer.zsbuf &&
 594        (qs->llvmpipe->depth_stencil->depth.enabled ||
 595         qs->llvmpipe->depth_stencil->stencil[0].enabled)) {
 596
 597       data.ps = qs->llvmpipe->framebuffer.zsbuf;
 598       data.format = data.ps->format;
 599       data.tile = lp_get_cached_tile(qs->llvmpipe->zsbuf_cache,
 600                                      quads[0]->input.x0,
 601                                      quads[0]->input.y0);
 602
 603       for (i = 0; i < nr; i++) {
 604          if(!quads[i]->inout.mask)
 605             continue;
 606
 607          get_depth_stencil_values(&data, quads[i]);
 608
 609          if (qs->llvmpipe->depth_stencil->depth.enabled) {
 610             if (interp_depth)
 611                interpolate_quad_depth(quads[i]);
 612
 613             convert_quad_depth(&data, quads[i]);
 614          }
 615
 616          if (qs->llvmpipe->depth_stencil->stencil[0].enabled) {
 617             if (!depth_stencil_test_quad(qs, &data, quads[i]))
 618                continue;
 619          }
 620          else {
 621             if (!depth_test_quad(qs, &data, quads[i]))
 622                continue;
 623          }
 624
 625          if (qs->llvmpipe->depth_stencil->stencil[0].enabled ||
 626              qs->llvmpipe->depth_stencil->depth.writemask)
 627             write_depth_stencil_values(&data, quads[i]);
 628
 629          qs->llvmpipe->occlusion_count += mask_count[quads[i]->inout.mask];
 630          ++pass;
 631       }
 632    }
 633
 634    if (pass)
 635       qs->next->run(qs->next, quads, nr);
 636 }
 637
 638 /* XXX: this function assumes setup function actually emits linear
 639  * spans of quads.  It seems a lot more natural to do (early)
 640  * depth-testing on spans rather than quads.
 641  */
 642 static void
 643 depth_interp_z16_less_write(struct quad_stage *qs,
 644                             struct quad_header *quads[],
 645                             unsigned nr)
 646 {
 647    unsigned i, pass = 0;
 648    const unsigned ix = quads[0]->input.x0;
 649    const unsigned iy = quads[0]->input.y0;
 650    const float fx = (float) ix;
 651    const float fy = (float) iy;
 652    const float dzdx = quads[0]->coef->dadx[0][2];
 653    const float dzdy = quads[0]->coef->dady[0][2];
 654    const float z0 = quads[0]->coef->a0[0][2] + dzdx * fx + dzdy * fy;
 655    struct llvmpipe_cached_tile *tile;
 656    ushort (*depth16)[TILE_SIZE];
 657    ushort idepth[4], depth_step;
 658    const float scale = 65535.0;
 659
 660    idepth[0] = (ushort)((z0) * scale);
 661    idepth[1] = (ushort)((z0 + dzdx) * scale);
 662    idepth[2] = (ushort)((z0 + dzdy) * scale);
 663    idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
 664
 665    depth_step = (ushort)(dzdx * 2 * scale);
 666
 667    tile = lp_get_cached_tile(qs->llvmpipe->zsbuf_cache, ix, iy);
 668
 669    depth16 = (ushort (*)[TILE_SIZE])
 670       &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
 671
 672    for (i = 0; i < nr; i++) {
 673       unsigned outmask = quads[i]->inout.mask;
 674       unsigned mask = 0;
 675
 676       if ((outmask & 1) && idepth[0] < depth16[0][0]) {
 677          depth16[0][0] = idepth[0];
 678          mask |= (1 << 0);
 679       }
 680
 681       if ((outmask & 2) && idepth[1] < depth16[0][1]) {
 682          depth16[0][1] = idepth[1];
 683          mask |= (1 << 1);
 684       }
 685
 686       if ((outmask & 4) && idepth[2] < depth16[1][0]) {
 687          depth16[1][0] = idepth[2];
 688          mask |= (1 << 2);
 689       }
 690
 691       if ((outmask & 8) && idepth[3] < depth16[1][1]) {
 692          depth16[1][1] = idepth[3];
 693          mask |= (1 << 3);
 694       }
 695
 696       idepth[0] += depth_step;
 697       idepth[1] += depth_step;
 698       idepth[2] += depth_step;
 699       idepth[3] += depth_step;
 700
 701       depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
 702
 703       quads[i]->inout.mask = mask;
 704       if (quads[i]->inout.mask)
 705          ++pass;
 706    }
 707
 708    if (pass)
 709       qs->next->run(qs->next, quads, nr);
 710
 711 }
 712
 713
 714 static void
 715 depth_noop(struct quad_stage *qs,
 716            struct quad_header *quads[],
 717            unsigned nr)
 718 {
 719    qs->next->run(qs->next, quads, nr);
 720 }
 721
 722
 723
 724 static void
 725 choose_depth_test(struct quad_stage *qs,
 726                   struct quad_header *quads[],
 727                   unsigned nr)
 728 {
 729    boolean interp_depth = !qs->llvmpipe->fs->info.writes_z;
 730
 731    boolean depth = (qs->llvmpipe->framebuffer.zsbuf &&
 732                     qs->llvmpipe->depth_stencil->depth.enabled);
 733
 734    unsigned depthfunc = qs->llvmpipe->depth_stencil->depth.func;
 735
 736    boolean stencil = qs->llvmpipe->depth_stencil->stencil[0].enabled;
 737
 738    boolean depthwrite = qs->llvmpipe->depth_stencil->depth.writemask;
 739
 740
 741    qs->run = depth_test_quads_fallback;
 742
 743    if (!depth &&
 744        !stencil) {
 745       qs->run = depth_noop;
 746    }
 747    else if (interp_depth &&
 748             depth &&
 749             depthfunc == PIPE_FUNC_LESS &&
 750             depthwrite &&
 751             !stencil)
 752    {
 753       switch (qs->llvmpipe->framebuffer.zsbuf->format) {
 754       case PIPE_FORMAT_Z16_UNORM:
 755          qs->run = depth_interp_z16_less_write;
 756          break;
 757       default:
 758          break;
 759       }
 760    }
 761
 762    qs->run( qs, quads, nr );
 763 }
 764
 765
 766
 767
 768
 769 static void depth_test_begin(struct quad_stage *qs)
 770 {
 771    qs->run = choose_depth_test;
 772    qs->next->begin(qs->next);
 773 }
 774
 775
 776 static void depth_test_destroy(struct quad_stage *qs)
 777 {
 778    FREE( qs );
 779 }
 780
 781
 782 struct quad_stage *lp_quad_depth_test_stage( struct llvmpipe_context *llvmpipe )
 783 {
 784    struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
 785
 786    stage->llvmpipe = llvmpipe;
 787    stage->begin = depth_test_begin;
 788    stage->run = choose_depth_test;
 789    stage->destroy = depth_test_destroy;
 790
 791    return stage;
 792 }