From: Brian Paul Date: Tue, 23 Mar 2010 15:00:19 +0000 (-0600) Subject: softpipe: add special cases for all Z compare modes for 16-bit Z buffer X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fc1ba0423ac31cff14346dbc888255fb5fd7d1b2;p=mesa.git softpipe: add special cases for all Z compare modes for 16-bit Z buffer We had fast paths for PIPE_FUNC_LESS and LEQUAL before. To satisfy OpenGL invariance rules, all depth compare modes should produce the same fragment Z values. Fixes progs/demos/singlebuffer.c --- diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 4815a0d49f1..5854cee1be0 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -730,169 +730,36 @@ depth_test_quads_fallback(struct quad_stage *qs, /** - * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LESS and - * Z buffer writes enabled. - * - * NOTE: there's no guarantee that the quads are sequentially side by - * side. The fragment shader may have culled some quads, etc. Sliver - * triangles may generate non-sequential quads. + * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled. */ -static void -depth_interp_z16_less_write(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - unsigned i, pass = 0; - const unsigned ix = quads[0]->input.x0; - const unsigned iy = quads[0]->input.y0; - const float fx = (float) ix; - const float fy = (float) iy; - const float dzdx = quads[0]->posCoef->dadx[2]; - const float dzdy = quads[0]->posCoef->dady[2]; - const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; - struct softpipe_cached_tile *tile; - ushort (*depth16)[TILE_SIZE]; - ushort init_idepth[4], idepth[4], depth_step; - const float scale = 65535.0; - - /* compute scaled depth of the four pixels in first quad */ - init_idepth[0] = (ushort)((z0) * scale); - init_idepth[1] = (ushort)((z0 + dzdx) * scale); - init_idepth[2] = (ushort)((z0 + dzdy) * scale); - init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); - - depth_step = (ushort)(dzdx * scale); - - tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); - - for (i = 0; i < nr; i++) { - const unsigned outmask = quads[i]->inout.mask; - const int dx = quads[i]->input.x0 - ix; - unsigned mask = 0; - - /* compute depth for this quad */ - idepth[0] = init_idepth[0] + dx * depth_step; - idepth[1] = init_idepth[1] + dx * depth_step; - idepth[2] = init_idepth[2] + dx * depth_step; - idepth[3] = init_idepth[3] + dx * depth_step; - - depth16 = (ushort (*)[TILE_SIZE]) - &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE]; - - if ((outmask & 1) && idepth[0] < depth16[0][0]) { - depth16[0][0] = idepth[0]; - mask |= (1 << 0); - } - - if ((outmask & 2) && idepth[1] < depth16[0][1]) { - depth16[0][1] = idepth[1]; - mask |= (1 << 1); - } - - if ((outmask & 4) && idepth[2] < depth16[1][0]) { - depth16[1][0] = idepth[2]; - mask |= (1 << 2); - } - - if ((outmask & 8) && idepth[3] < depth16[1][1]) { - depth16[1][1] = idepth[3]; - mask |= (1 << 3); - } - - quads[i]->inout.mask = mask; - if (quads[i]->inout.mask) - quads[pass++] = quads[i]; - } - - if (pass) - qs->next->run(qs->next, quads, pass); - -} - - -/** - * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LEQUAL and - * Z buffer writes enabled. - * - * NOTE: there's no guarantee that the quads are sequentially side by - * side. The fragment shader may have culled some quads, etc. Sliver - * triangles may generate non-sequential quads. - */ -static void -depth_interp_z16_lequal_write(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - unsigned i, pass = 0; - const unsigned ix = quads[0]->input.x0; - const unsigned iy = quads[0]->input.y0; - const float fx = (float) ix; - const float fy = (float) iy; - const float dzdx = quads[0]->posCoef->dadx[2]; - const float dzdy = quads[0]->posCoef->dady[2]; - const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; - struct softpipe_cached_tile *tile; - ushort (*depth16)[TILE_SIZE]; - ushort init_idepth[4], idepth[4], depth_step; - const float scale = 65535.0; - - /* compute scaled depth of the four pixels in first quad */ - init_idepth[0] = (ushort)((z0) * scale); - init_idepth[1] = (ushort)((z0 + dzdx) * scale); - init_idepth[2] = (ushort)((z0 + dzdy) * scale); - init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); - - depth_step = (ushort)(dzdx * scale); - - tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); - - for (i = 0; i < nr; i++) { - const unsigned outmask = quads[i]->inout.mask; - const int dx = quads[i]->input.x0 - ix; - unsigned mask = 0; - - /* compute depth for this quad */ - idepth[0] = init_idepth[0] + dx * depth_step; - idepth[1] = init_idepth[1] + dx * depth_step; - idepth[2] = init_idepth[2] + dx * depth_step; - idepth[3] = init_idepth[3] + dx * depth_step; - - depth16 = (ushort (*)[TILE_SIZE]) - &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE]; - if ((outmask & 1) && idepth[0] <= depth16[0][0]) { - depth16[0][0] = idepth[0]; - mask |= (1 << 0); - } - - if ((outmask & 2) && idepth[1] <= depth16[0][1]) { - depth16[0][1] = idepth[1]; - mask |= (1 << 1); - } +#define NAME depth_interp_z16_less_write +#define OPERATOR < +#include "sp_quad_depth_test_tmp.h" - if ((outmask & 4) && idepth[2] <= depth16[1][0]) { - depth16[1][0] = idepth[2]; - mask |= (1 << 2); - } +#define NAME depth_interp_z16_equal_write +#define OPERATOR == +#include "sp_quad_depth_test_tmp.h" - if ((outmask & 8) && idepth[3] <= depth16[1][1]) { - depth16[1][1] = idepth[3]; - mask |= (1 << 3); - } +#define NAME depth_interp_z16_lequal_write +#define OPERATOR <= +#include "sp_quad_depth_test_tmp.h" - depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; +#define NAME depth_interp_z16_greater_write +#define OPERATOR > +#include "sp_quad_depth_test_tmp.h" - quads[i]->inout.mask = mask; - if (quads[i]->inout.mask) - quads[pass++] = quads[i]; - } - - if (pass) - qs->next->run(qs->next, quads, pass); - -} +#define NAME depth_interp_z16_notequal_write +#define OPERATOR != +#include "sp_quad_depth_test_tmp.h" +#define NAME depth_interp_z16_gequal_write +#define OPERATOR >= +#include "sp_quad_depth_test_tmp.h" +#define NAME depth_interp_z16_always_write +#define ALWAYS 1 +#include "sp_quad_depth_test_tmp.h" @@ -926,6 +793,10 @@ choose_depth_test(struct quad_stage *qs, boolean occlusion = qs->softpipe->active_query_count; + /* default */ + qs->run = depth_test_quads_fallback; + + /* look for special cases */ if (!alpha && !depth && !stencil) { @@ -938,36 +809,40 @@ choose_depth_test(struct quad_stage *qs, !occlusion && !stencil) { - switch (depthfunc) { - case PIPE_FUNC_LESS: - switch (qs->softpipe->framebuffer.zsbuf->format) { - case PIPE_FORMAT_Z16_UNORM: + if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) { + switch (depthfunc) { + case PIPE_FUNC_NEVER: + qs->run = depth_test_quads_fallback; + break; + case PIPE_FUNC_LESS: qs->run = depth_interp_z16_less_write; break; - default: - qs->run = depth_test_quads_fallback; + case PIPE_FUNC_EQUAL: + qs->run = depth_interp_z16_equal_write; break; - } - break; - case PIPE_FUNC_LEQUAL: - switch (qs->softpipe->framebuffer.zsbuf->format) { - case PIPE_FORMAT_Z16_UNORM: + case PIPE_FUNC_LEQUAL: qs->run = depth_interp_z16_lequal_write; break; + case PIPE_FUNC_GREATER: + qs->run = depth_interp_z16_greater_write; + break; + case PIPE_FUNC_NOTEQUAL: + qs->run = depth_interp_z16_notequal_write; + break; + case PIPE_FUNC_GEQUAL: + qs->run = depth_interp_z16_gequal_write; + break; + case PIPE_FUNC_ALWAYS: + qs->run = depth_interp_z16_always_write; + break; default: qs->run = depth_test_quads_fallback; break; } - break; - default: - qs->run = depth_test_quads_fallback; } } - else { - qs->run = depth_test_quads_fallback; - } - + /* next quad/fragment stage */ qs->run( qs, quads, nr ); } diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test_tmp.h b/src/gallium/drivers/softpipe/sp_quad_depth_test_tmp.h new file mode 100644 index 00000000000..25af415c256 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test_tmp.h @@ -0,0 +1,147 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Template for generating Z test functions + * Only PIPE_FORMAT_Z16_UNORM supported at this time. + */ + + +#ifndef NAME +#error "NAME is not defined!" +#endif + +#if !defined(OPERATOR) && !defined(ALWAYS) +#error "neither OPERATOR nor ALWAYS is defined!" +#endif + + +/* + * NOTE: there's no guarantee that the quads are sequentially side by + * side. The fragment shader may have culled some quads, etc. Sliver + * triangles may generate non-sequential quads. + */ +static void +NAME(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + unsigned i, pass = 0; + const unsigned ix = quads[0]->input.x0; + const unsigned iy = quads[0]->input.y0; + const float fx = (float) ix; + const float fy = (float) iy; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; + struct softpipe_cached_tile *tile; + ushort (*depth16)[TILE_SIZE]; + ushort init_idepth[4], idepth[4], depth_step; + const float scale = 65535.0; + + /* compute scaled depth of the four pixels in first quad */ + init_idepth[0] = (ushort)((z0) * scale); + init_idepth[1] = (ushort)((z0 + dzdx) * scale); + init_idepth[2] = (ushort)((z0 + dzdy) * scale); + init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + + depth_step = (ushort)(dzdx * scale); + + tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); + + for (i = 0; i < nr; i++) { + const unsigned outmask = quads[i]->inout.mask; + const int dx = quads[i]->input.x0 - ix; + unsigned mask = 0; + + /* compute depth for this quad */ + idepth[0] = init_idepth[0] + dx * depth_step; + idepth[1] = init_idepth[1] + dx * depth_step; + idepth[2] = init_idepth[2] + dx * depth_step; + idepth[3] = init_idepth[3] + dx * depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE]; + +#ifdef ALWAYS + if (outmask & 1) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if (outmask & 2) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if (outmask & 4) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if (outmask & 8) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); + } +#else + /* Note: OPERATOR appears here: */ + if ((outmask & 1) && (idepth[0] OPERATOR depth16[0][0])) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if ((outmask & 2) && (idepth[1] OPERATOR depth16[0][1])) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if ((outmask & 4) && (idepth[2] OPERATOR depth16[1][0])) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if ((outmask & 8) && (idepth[3] OPERATOR depth16[1][1])) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); + } +#endif + + depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; + + quads[i]->inout.mask = mask; + if (quads[i]->inout.mask) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); +} + + +#undef NAME +#undef OPERATOR +#undef ALWAYS