X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fsoftpipe%2Fsp_quad_depth_test.c;h=4cce9e9bc127e5b957951f1ef898ee8e89021f97;hb=9e9d69979c7aff7dac2d670af950f4a50273bcde;hp=a981775cbd3c91f7c7d7147e51c6699ccbfbf109;hpb=43867acb6afc7fad26cdc2f22b2a3bb6eeefb2da;p=mesa.git diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index a981775cbd3..4cce9e9bc12 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -1,6 +1,7 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007 VMware, Inc. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +19,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -26,11 +27,12 @@ **************************************************************************/ /** - * \brief Quad depth testing + * \brief Quad depth / stencil testing */ #include "pipe/p_defines.h" #include "util/u_format.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "tgsi/tgsi_scan.h" #include "sp_context.h" @@ -43,10 +45,14 @@ struct depth_data { struct pipe_surface *ps; enum pipe_format format; - unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ - unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ - ubyte stencilVals[QUAD_SIZE]; + unsigned bzzzz[TGSI_QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[TGSI_QUAD_SIZE]; /**< Z values from the quad */ + ubyte stencilVals[TGSI_QUAD_SIZE]; + boolean use_shader_stencil_refs; + ubyte shader_stencil_refs[TGSI_QUAD_SIZE]; struct softpipe_cached_tile *tile; + float minval, maxval; + bool clamp; }; @@ -60,43 +66,68 @@ get_depth_stencil_values( struct depth_data *data, switch (data->format) { case PIPE_FORMAT_Z16_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); data->bzzzz[j] = tile->data.depth16[y][x]; } break; case PIPE_FORMAT_Z32_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); data->bzzzz[j] = tile->data.depth32[y][x]; } break; - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; data->stencilVals[j] = tile->data.depth32[y][x] >> 24; } - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); data->bzzzz[j] = tile->data.depth32[y][x] >> 8; data->stencilVals[j] = tile->data.depth32[y][x] & 0xff; } break; + case PIPE_FORMAT_S8_UINT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = 0; + data->stencilVals[j] = tile->data.stencil8[y][x]; + } + break; + case PIPE_FORMAT_Z32_FLOAT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x]; + } + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff; + data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff; + } + break; default: assert(0); } } -/* If the shader has not been run, interpolate the depth values + +/** + * If the shader has not been run, interpolate the depth values * ourselves. */ static void @@ -115,11 +146,15 @@ interpolate_quad_depth( struct quad_header *quad ) } +/** + * Compute the depth_data::qzzzz[] values from the float fragment Z values. + */ static void convert_quad_depth( struct depth_data *data, const struct quad_header *quad ) { unsigned j; + float dvals[TGSI_QUAD_SIZE]; /* Convert quad's float depth values to int depth values (qzzzz). * If the Z buffer stores integer values, we _have_ to do the depth @@ -127,13 +162,23 @@ convert_quad_depth( struct depth_data *data, * conversion of Z values (which isn't an identity function) will cause * Z-fighting errors. */ + if (data->clamp) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval); + } + } else { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + dvals[j] = quad->output.depth[j]; + } + } + switch (data->format) { case PIPE_FORMAT_Z16_UNORM: { float scale = 65535.0; - for (j = 0; j < QUAD_SIZE; j++) { - data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + data->qzzzz[j] = (unsigned) (dvals[j] * scale); } } break; @@ -141,28 +186,39 @@ convert_quad_depth( struct depth_data *data, { double scale = (double) (uint) ~0UL; - for (j = 0; j < QUAD_SIZE; j++) { - data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + data->qzzzz[j] = (unsigned) (dvals[j] * scale); } } break; - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: { float scale = (float) ((1 << 24) - 1); - for (j = 0; j < QUAD_SIZE; j++) { - data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + data->qzzzz[j] = (unsigned) (dvals[j] * scale); } } break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: { float scale = (float) ((1 << 24) - 1); - for (j = 0; j < QUAD_SIZE; j++) { - data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + data->qzzzz[j] = (unsigned) (dvals[j] * scale); + } + } + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + { + union fi fui; + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + fui.f = dvals[j]; + data->qzzzz[j] = fui.ui; } } break; @@ -172,7 +228,40 @@ convert_quad_depth( struct depth_data *data, } +/** + * Compute the depth_data::shader_stencil_refs[] values from the float + * fragment stencil values. + */ +static void +convert_quad_stencil( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + data->use_shader_stencil_refs = TRUE; + /* Copy quads stencil values + */ + switch (data->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + case PIPE_FORMAT_S8_UINT: + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); + } + break; + default: + assert(0); + } +} + + +/** + * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. + */ static void write_depth_stencil_values( struct depth_data *data, struct quad_header *quad ) @@ -183,41 +272,62 @@ write_depth_stencil_values( struct depth_data *data, /* put updated Z values back into cached tile */ switch (data->format) { case PIPE_FORMAT_Z16_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth16[y][x] = (ushort) data->bzzzz[j]; } break; - case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z32_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = data->bzzzz[j]; } break; - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j]; } break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j]; } break; - case PIPE_FORMAT_Z24X8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { + case PIPE_FORMAT_X8Z24_UNORM: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = data->bzzzz[j] << 8; } break; + case PIPE_FORMAT_S8_UINT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = data->stencilVals[j]; + } + break; + case PIPE_FORMAT_Z32_FLOAT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = data->bzzzz[j]; + } + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32); + } + break; default: assert(0); } @@ -225,7 +335,6 @@ write_depth_stencil_values( struct depth_data *data, - /** Only 8-bit stencil supported */ #define STENCIL_MAX 0xff @@ -248,51 +357,57 @@ do_stencil_test(struct depth_data *data, { unsigned passMask = 0x0; unsigned j; + ubyte refs[TGSI_QUAD_SIZE]; - ref &= valMask; + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j] & valMask; + else + refs[j] = ref & valMask; + } switch (func) { case PIPE_FUNC_NEVER: /* passMask = 0x0 */ break; case PIPE_FUNC_LESS: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (data->stencilVals[j] & valMask)) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + if (refs[j] < (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (data->stencilVals[j] & valMask)) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + if (refs[j] == (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (data->stencilVals[j] & valMask)) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + if (refs[j] <= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (data->stencilVals[j] & valMask)) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + if (refs[j] > (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (data->stencilVals[j] & valMask)) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + if (refs[j] != (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (data->stencilVals[j] & valMask)) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + if (refs[j] >= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } @@ -323,10 +438,15 @@ apply_stencil_op(struct depth_data *data, unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) { unsigned j; - ubyte newstencil[QUAD_SIZE]; + ubyte newstencil[TGSI_QUAD_SIZE]; + ubyte refs[TGSI_QUAD_SIZE]; - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { newstencil[j] = data->stencilVals[j]; + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j]; + else + refs[j] = ref; } switch (op) { @@ -334,21 +454,21 @@ apply_stencil_op(struct depth_data *data, /* no-op */ break; case PIPE_STENCIL_OP_ZERO: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (mask & (1 << j)) { newstencil[j] = 0; } } break; case PIPE_STENCIL_OP_REPLACE: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (mask & (1 << j)) { - newstencil[j] = ref; + newstencil[j] = refs[j]; } } break; case PIPE_STENCIL_OP_INCR: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (mask & (1 << j)) { if (data->stencilVals[j] < STENCIL_MAX) { newstencil[j] = data->stencilVals[j] + 1; @@ -357,7 +477,7 @@ apply_stencil_op(struct depth_data *data, } break; case PIPE_STENCIL_OP_DECR: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (mask & (1 << j)) { if (data->stencilVals[j] > 0) { newstencil[j] = data->stencilVals[j] - 1; @@ -366,21 +486,21 @@ apply_stencil_op(struct depth_data *data, } break; case PIPE_STENCIL_OP_INCR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (mask & (1 << j)) { newstencil[j] = data->stencilVals[j] + 1; } } break; case PIPE_STENCIL_OP_DECR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (mask & (1 << j)) { newstencil[j] = data->stencilVals[j] - 1; } } break; case PIPE_STENCIL_OP_INVERT: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (mask & (1 << j)) { newstencil[j] = ~data->stencilVals[j]; } @@ -395,12 +515,12 @@ apply_stencil_op(struct depth_data *data, */ if (wrtMask != STENCIL_MAX) { /* apply bit-wise stencil buffer writemask */ - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]); } } else { - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { data->stencilVals[j] = newstencil[j]; } } @@ -408,12 +528,11 @@ apply_stencil_op(struct depth_data *data, -/* +/** * To increase efficiency, we should probably have multiple versions * of this function that are specifically for Z16, Z32 and FP Z buffers. * Try to effectively do that with codegen... */ - static boolean depth_test_quad(struct quad_stage *qs, struct depth_data *data, @@ -431,37 +550,37 @@ depth_test_quad(struct quad_stage *qs, /* Note this is pretty much a single sse or cell instruction. * Like this: quad->mask &= (quad->outputs.depth < zzzz); */ - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (data->qzzzz[j] < data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (data->qzzzz[j] == data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (data->qzzzz[j] <= data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (data->qzzzz[j] > data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (data->qzzzz[j] != data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (data->qzzzz[j] >= data->bzzzz[j]) zmask |= (1 << j); } @@ -482,7 +601,7 @@ depth_test_quad(struct quad_stage *qs, * data due to stencil changes. */ if (softpipe->depth_stencil->depth.writemask) { - for (j = 0; j < QUAD_SIZE; j++) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { if (quad->inout.mask & (1 << j)) { data->bzzzz[j] = data->qzzzz[j]; } @@ -513,17 +632,19 @@ depth_stencil_test_quad(struct quad_stage *qs, face = 0; } + /* 0 = front-face, 1 = back-face */ + assert(face == 0 || face == 1); + /* choose front or back face function, operator, etc */ /* XXX we could do these initializations once per primitive */ func = softpipe->depth_stencil->stencil[face].func; failOp = softpipe->depth_stencil->stencil[face].fail_op; zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; - ref = softpipe->depth_stencil->stencil[face].ref_value; + ref = softpipe->stencil_ref.ref_value[face]; wrtMask = softpipe->depth_stencil->stencil[face].writemask; valMask = softpipe->depth_stencil->stencil[face].valuemask; - /* do the stencil test first */ { unsigned passMask, failMask; @@ -563,7 +684,7 @@ depth_stencil_test_quad(struct quad_stage *qs, #define ALPHATEST( FUNC, COMP ) \ - static int \ + static unsigned \ alpha_test_quads_##FUNC( struct quad_stage *qs, \ struct quad_header *quads[], \ unsigned nr ) \ @@ -600,9 +721,9 @@ ALPHATEST( NOTEQUAL, != ) ALPHATEST( GEQUAL, >= ) -/* XXX: Incorporate into shader using KILP. +/* XXX: Incorporate into shader using KILL_IF. */ -static int +static unsigned alpha_test_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) @@ -612,7 +733,6 @@ alpha_test_quads(struct quad_stage *qs, return alpha_test_quads_LESS( qs, quads, nr ); case PIPE_FUNC_EQUAL: return alpha_test_quads_EQUAL( qs, quads, nr ); - break; case PIPE_FUNC_LEQUAL: return alpha_test_quads_LEQUAL( qs, quads, nr ); case PIPE_FUNC_GREATER: @@ -629,6 +749,7 @@ alpha_test_quads(struct quad_stage *qs, } } + static unsigned mask_count[16] = { 0, /* 0x0 */ @@ -651,44 +772,43 @@ static unsigned mask_count[16] = -/** helper to get number of Z buffer bits */ -static unsigned -get_depth_bits(struct quad_stage *qs) -{ - struct pipe_surface *zsurf = qs->softpipe->framebuffer.zsbuf; - if (zsurf) - return util_format_get_component_bits(zsurf->format, - UTIL_FORMAT_COLORSPACE_ZS, 0); - else - return 0; -} - - - +/** + * General depth/stencil test function. Used when there's no fast-path. + */ static void depth_test_quads_fallback(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { unsigned i, pass = 0; - const struct sp_fragment_shader *fs = qs->softpipe->fs; - boolean interp_depth = !fs->info.writes_z; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + boolean interp_depth = !fsInfo->writes_z; + boolean shader_stencil_ref = fsInfo->writes_stencil; struct depth_data data; + unsigned vp_idx = quads[0]->input.viewport_index; + data.use_shader_stencil_refs = FALSE; if (qs->softpipe->depth_stencil->alpha.enabled) { nr = alpha_test_quads(qs, quads, nr); } - if (get_depth_bits(qs) > 0 && - (qs->softpipe->depth_stencil->depth.enabled || - qs->softpipe->depth_stencil->stencil[0].enabled)) { + if (qs->softpipe->framebuffer.zsbuf && + (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled)) { + float near_val, far_val; data.ps = qs->softpipe->framebuffer.zsbuf; data.format = data.ps->format; data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, quads[0]->input.x0, - quads[0]->input.y0); + quads[0]->input.y0, quads[0]->input.layer); + data.clamp = !qs->softpipe->rasterizer->depth_clip; + + near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2]; + far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0); + data.minval = MIN2(near_val, far_val); + data.maxval = MAX2(near_val, far_val); for (i = 0; i < nr; i++) { get_depth_stencil_values(&data, quads[i]); @@ -701,6 +821,9 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (shader_stencil_ref) + convert_quad_stencil(&data, quads[i]); + depth_stencil_test_quad(qs, &data, quads[i]); write_depth_stencil_values(&data, quads[i]); } @@ -712,7 +835,6 @@ depth_test_quads_fallback(struct quad_stage *qs, write_depth_stencil_values(&data, quads[i]); } - quads[pass++] = quads[i]; } @@ -728,154 +850,38 @@ depth_test_quads_fallback(struct quad_stage *qs, qs->next->run(qs->next, quads, nr); } -/* XXX: this function assumes setup function actually emits linear - * spans of quads. It seems a lot more natural to do (early) - * depth-testing on spans rather than quads. - */ -static void -depth_interp_z16_less_write(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - unsigned i, pass = 0; - const unsigned ix = quads[0]->input.x0; - const unsigned iy = quads[0]->input.y0; - const float fx = (float) ix; - const float fy = (float) iy; - const float dzdx = quads[0]->posCoef->dadx[2]; - const float dzdy = quads[0]->posCoef->dady[2]; - const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; - struct softpipe_cached_tile *tile; - ushort (*depth16)[TILE_SIZE]; - ushort idepth[4], depth_step; - const float scale = 65535.0; - - idepth[0] = (ushort)((z0) * scale); - idepth[1] = (ushort)((z0 + dzdx) * scale); - idepth[2] = (ushort)((z0 + dzdy) * scale); - idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); - - depth_step = (ushort)(dzdx * 2 * scale); - - tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); - - depth16 = (ushort (*)[TILE_SIZE]) - &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; - - for (i = 0; i < nr; i++) { - unsigned outmask = quads[i]->inout.mask; - unsigned mask = 0; - - if ((outmask & 1) && idepth[0] < depth16[0][0]) { - depth16[0][0] = idepth[0]; - mask |= (1 << 0); - } - - if ((outmask & 2) && idepth[1] < depth16[0][1]) { - depth16[0][1] = idepth[1]; - mask |= (1 << 1); - } - - if ((outmask & 4) && idepth[2] < depth16[1][0]) { - depth16[1][0] = idepth[2]; - mask |= (1 << 2); - } - - if ((outmask & 8) && idepth[3] < depth16[1][1]) { - depth16[1][1] = idepth[3]; - mask |= (1 << 3); - } - - idepth[0] += depth_step; - idepth[1] += depth_step; - idepth[2] += depth_step; - idepth[3] += depth_step; - - depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; - - quads[i]->inout.mask = mask; - if (quads[i]->inout.mask) - quads[pass++] = quads[i]; - } - - if (pass) - qs->next->run(qs->next, quads, pass); - -} - -static void -depth_interp_z16_lequal_write(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - unsigned i, pass = 0; - const unsigned ix = quads[0]->input.x0; - const unsigned iy = quads[0]->input.y0; - const float fx = (float) ix; - const float fy = (float) iy; - const float dzdx = quads[0]->posCoef->dadx[2]; - const float dzdy = quads[0]->posCoef->dady[2]; - const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; - struct softpipe_cached_tile *tile; - ushort (*depth16)[TILE_SIZE]; - ushort idepth[4], depth_step; - const float scale = 65535.0; - - idepth[0] = (ushort)((z0) * scale); - idepth[1] = (ushort)((z0 + dzdx) * scale); - idepth[2] = (ushort)((z0 + dzdy) * scale); - idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); - - depth_step = (ushort)(dzdx * 2 * scale); - - tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); - - depth16 = (ushort (*)[TILE_SIZE]) - &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; - - for (i = 0; i < nr; i++) { - unsigned outmask = quads[i]->inout.mask; - unsigned mask = 0; - - if ((outmask & 1) && idepth[0] <= depth16[0][0]) { - depth16[0][0] = idepth[0]; - mask |= (1 << 0); - } - - if ((outmask & 2) && idepth[1] <= depth16[0][1]) { - depth16[0][1] = idepth[1]; - mask |= (1 << 1); - } - - if ((outmask & 4) && idepth[2] <= depth16[1][0]) { - depth16[1][0] = idepth[2]; - mask |= (1 << 2); - } - - if ((outmask & 8) && idepth[3] <= depth16[1][1]) { - depth16[1][1] = idepth[3]; - mask |= (1 << 3); - } +/** + * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled. + */ - idepth[0] += depth_step; - idepth[1] += depth_step; - idepth[2] += depth_step; - idepth[3] += depth_step; +#define NAME depth_interp_z16_less_write +#define OPERATOR < +#include "sp_quad_depth_test_tmp.h" - depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; +#define NAME depth_interp_z16_equal_write +#define OPERATOR == +#include "sp_quad_depth_test_tmp.h" - quads[i]->inout.mask = mask; - if (quads[i]->inout.mask) - quads[pass++] = quads[i]; - } +#define NAME depth_interp_z16_lequal_write +#define OPERATOR <= +#include "sp_quad_depth_test_tmp.h" - if (pass) - qs->next->run(qs->next, quads, pass); +#define NAME depth_interp_z16_greater_write +#define OPERATOR > +#include "sp_quad_depth_test_tmp.h" -} +#define NAME depth_interp_z16_notequal_write +#define OPERATOR != +#include "sp_quad_depth_test_tmp.h" +#define NAME depth_interp_z16_gequal_write +#define OPERATOR >= +#include "sp_quad_depth_test_tmp.h" +#define NAME depth_interp_z16_always_write +#define ALWAYS 1 +#include "sp_quad_depth_test_tmp.h" @@ -894,12 +900,13 @@ choose_depth_test(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - boolean interp_depth = !qs->softpipe->fs->info.writes_z; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + + boolean interp_depth = !fsInfo->writes_z; boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; - boolean depth = (get_depth_bits(qs) > 0 && - qs->softpipe->depth_stencil->depth.enabled); + boolean depth = qs->softpipe->depth_stencil->depth.enabled; unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; @@ -909,8 +916,19 @@ choose_depth_test(struct quad_stage *qs, boolean occlusion = qs->softpipe->active_query_count; + boolean clipped = !qs->softpipe->rasterizer->depth_clip; + + if(!qs->softpipe->framebuffer.zsbuf) + depth = depthwrite = stencil = FALSE; + + /* default */ + qs->run = depth_test_quads_fallback; + + /* look for special cases */ if (!alpha && !depth && + !occlusion && + !clipped && !stencil) { qs->run = depth_noop; } @@ -919,59 +937,65 @@ choose_depth_test(struct quad_stage *qs, depth && depthwrite && !occlusion && + !clipped && !stencil) { - switch (depthfunc) { - case PIPE_FUNC_LESS: - switch (qs->softpipe->framebuffer.zsbuf->format) { - case PIPE_FORMAT_Z16_UNORM: + if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) { + switch (depthfunc) { + case PIPE_FUNC_NEVER: + qs->run = depth_test_quads_fallback; + break; + case PIPE_FUNC_LESS: qs->run = depth_interp_z16_less_write; break; - default: - qs->run = depth_test_quads_fallback; + case PIPE_FUNC_EQUAL: + qs->run = depth_interp_z16_equal_write; break; - } - break; - case PIPE_FUNC_LEQUAL: - switch (qs->softpipe->framebuffer.zsbuf->format) { - case PIPE_FORMAT_Z16_UNORM: + case PIPE_FUNC_LEQUAL: qs->run = depth_interp_z16_lequal_write; break; + case PIPE_FUNC_GREATER: + qs->run = depth_interp_z16_greater_write; + break; + case PIPE_FUNC_NOTEQUAL: + qs->run = depth_interp_z16_notequal_write; + break; + case PIPE_FUNC_GEQUAL: + qs->run = depth_interp_z16_gequal_write; + break; + case PIPE_FUNC_ALWAYS: + qs->run = depth_interp_z16_always_write; + break; default: qs->run = depth_test_quads_fallback; break; } - break; - default: - qs->run = depth_test_quads_fallback; } } - else { - qs->run = depth_test_quads_fallback; - } - + /* next quad/fragment stage */ qs->run( qs, quads, nr ); } - - -static void depth_test_begin(struct quad_stage *qs) +static void +depth_test_begin(struct quad_stage *qs) { qs->run = choose_depth_test; qs->next->begin(qs->next); } -static void depth_test_destroy(struct quad_stage *qs) +static void +depth_test_destroy(struct quad_stage *qs) { FREE( qs ); } -struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) +struct quad_stage * +sp_quad_depth_test_stage(struct softpipe_context *softpipe) { struct quad_stage *stage = CALLOC_STRUCT(quad_stage);