*/
#include "pipe/p_defines.h"
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_scan.h"
#include "sp_context.h"
#include "sp_quad.h"
-#include "sp_surface.h"
#include "sp_quad_pipe.h"
#include "sp_tile_cache.h"
#include "sp_state.h" /* for sp_fragment_shader */
* Do stencil (and depth) testing. Stenciling depends on the outcome of
* depth testing.
*/
-static boolean
+static void
depth_stencil_test_quad(struct quad_stage *qs,
struct depth_data *data,
struct quad_header *quad)
failOp = softpipe->depth_stencil->stencil[face].fail_op;
zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
- ref = softpipe->depth_stencil->stencil[face].ref_value;
+ ref = softpipe->stencil_ref.ref_value[face];
wrtMask = softpipe->depth_stencil->stencil[face].writemask;
valMask = softpipe->depth_stencil->stencil[face].valuemask;
/* update stencil buffer values according to z pass/fail result */
if (zFailOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned failMask = origMask & ~quad->inout.mask;
- apply_stencil_op(data, failMask, zFailOp, ref, wrtMask);
+ const unsigned zFailMask = origMask & ~quad->inout.mask;
+ apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
}
if (zPassOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned passMask = origMask & quad->inout.mask;
- apply_stencil_op(data, passMask, zPassOp, ref, wrtMask);
+ const unsigned zPassMask = origMask & quad->inout.mask;
+ apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
}
}
else {
apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
}
}
-
- return quad->inout.mask != 0;
}
}
}
-static unsigned mask_count[0x8] =
+static unsigned mask_count[16] =
{
0, /* 0x0 */
1, /* 0x1 */
2, /* 0x5 */
2, /* 0x6 */
3, /* 0x7 */
+ 1, /* 0x8 */
+ 2, /* 0x9 */
+ 2, /* 0xa */
+ 3, /* 0xb */
+ 2, /* 0xc */
+ 3, /* 0xd */
+ 3, /* 0xe */
+ 4, /* 0xf */
};
+/** helper to get number of Z buffer bits */
+static unsigned
+get_depth_bits(struct quad_stage *qs)
+{
+ struct pipe_surface *zsurf = qs->softpipe->framebuffer.zsbuf;
+ if (zsurf)
+ return util_format_get_component_bits(zsurf->format,
+ UTIL_FORMAT_COLORSPACE_ZS, 0);
+ else
+ return 0;
+}
+
+
+
static void
-depth_test_quads(struct quad_stage *qs,
- struct quad_header *quads[],
- unsigned nr)
+depth_test_quads_fallback(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
{
unsigned i, pass = 0;
const struct sp_fragment_shader *fs = qs->softpipe->fs;
nr = alpha_test_quads(qs, quads, nr);
}
- if (qs->softpipe->framebuffer.zsbuf &&
+ if (get_depth_bits(qs) > 0 &&
(qs->softpipe->depth_stencil->depth.enabled ||
qs->softpipe->depth_stencil->stencil[0].enabled)) {
}
if (qs->softpipe->depth_stencil->stencil[0].enabled) {
- if (!depth_stencil_test_quad(qs, &data, quads[i]))
- continue;
+ depth_stencil_test_quad(qs, &data, quads[i]);
+ write_depth_stencil_values(&data, quads[i]);
}
else {
if (!depth_test_quad(qs, &data, quads[i]))
continue;
+
+ if (qs->softpipe->depth_stencil->depth.writemask)
+ write_depth_stencil_values(&data, quads[i]);
}
- if (qs->softpipe->depth_stencil->stencil[0].enabled ||
- qs->softpipe->depth_stencil->depth.writemask)
- write_depth_stencil_values(&data, quads[i]);
- qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
quads[pass++] = quads[i];
}
nr = pass;
}
+ if (qs->softpipe->active_query_count) {
+ for (i = 0; i < nr; i++)
+ qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
+ }
+
if (nr)
qs->next->run(qs->next, quads, nr);
}
+/**
+ * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LESS and
+ * Z buffer writes enabled.
+ *
+ * NOTE: there's no guarantee that the quads are sequentially side by
+ * side. The fragment shader may have culled some quads, etc. Sliver
+ * triangles may generate non-sequential quads.
+ */
+static void
+depth_interp_z16_less_write(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ unsigned i, pass = 0;
+ const unsigned ix = quads[0]->input.x0;
+ const unsigned iy = quads[0]->input.y0;
+ const float fx = (float) ix;
+ const float fy = (float) iy;
+ const float dzdx = quads[0]->posCoef->dadx[2];
+ const float dzdy = quads[0]->posCoef->dady[2];
+ const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+ struct softpipe_cached_tile *tile;
+ ushort (*depth16)[TILE_SIZE];
+ ushort init_idepth[4], idepth[4], depth_step;
+ const float scale = 65535.0;
+
+ /* compute scaled depth of the four pixels in first quad */
+ init_idepth[0] = (ushort)((z0) * scale);
+ init_idepth[1] = (ushort)((z0 + dzdx) * scale);
+ init_idepth[2] = (ushort)((z0 + dzdy) * scale);
+ init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+
+ depth_step = (ushort)(dzdx * scale);
+
+ tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
+
+ for (i = 0; i < nr; i++) {
+ const unsigned outmask = quads[i]->inout.mask;
+ const int dx = quads[i]->input.x0 - ix;
+ unsigned mask = 0;
+
+ /* compute depth for this quad */
+ idepth[0] = init_idepth[0] + dx * depth_step;
+ idepth[1] = init_idepth[1] + dx * depth_step;
+ idepth[2] = init_idepth[2] + dx * depth_step;
+ idepth[3] = init_idepth[3] + dx * depth_step;
+
+ depth16 = (ushort (*)[TILE_SIZE])
+ &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE];
+
+ if ((outmask & 1) && idepth[0] < depth16[0][0]) {
+ depth16[0][0] = idepth[0];
+ mask |= (1 << 0);
+ }
+
+ if ((outmask & 2) && idepth[1] < depth16[0][1]) {
+ depth16[0][1] = idepth[1];
+ mask |= (1 << 1);
+ }
+
+ if ((outmask & 4) && idepth[2] < depth16[1][0]) {
+ depth16[1][0] = idepth[2];
+ mask |= (1 << 2);
+ }
+
+ if ((outmask & 8) && idepth[3] < depth16[1][1]) {
+ depth16[1][1] = idepth[3];
+ mask |= (1 << 3);
+ }
+
+ quads[i]->inout.mask = mask;
+ if (quads[i]->inout.mask)
+ quads[pass++] = quads[i];
+ }
+
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
+
+}
+
+
+/**
+ * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LEQUAL and
+ * Z buffer writes enabled.
+ *
+ * NOTE: there's no guarantee that the quads are sequentially side by
+ * side. The fragment shader may have culled some quads, etc. Sliver
+ * triangles may generate non-sequential quads.
+ */
+static void
+depth_interp_z16_lequal_write(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ unsigned i, pass = 0;
+ const unsigned ix = quads[0]->input.x0;
+ const unsigned iy = quads[0]->input.y0;
+ const float fx = (float) ix;
+ const float fy = (float) iy;
+ const float dzdx = quads[0]->posCoef->dadx[2];
+ const float dzdy = quads[0]->posCoef->dady[2];
+ const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+ struct softpipe_cached_tile *tile;
+ ushort (*depth16)[TILE_SIZE];
+ ushort init_idepth[4], idepth[4], depth_step;
+ const float scale = 65535.0;
+
+ /* compute scaled depth of the four pixels in first quad */
+ init_idepth[0] = (ushort)((z0) * scale);
+ init_idepth[1] = (ushort)((z0 + dzdx) * scale);
+ init_idepth[2] = (ushort)((z0 + dzdy) * scale);
+ init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+
+ depth_step = (ushort)(dzdx * scale);
+
+ tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
+
+ for (i = 0; i < nr; i++) {
+ const unsigned outmask = quads[i]->inout.mask;
+ const int dx = quads[i]->input.x0 - ix;
+ unsigned mask = 0;
+
+ /* compute depth for this quad */
+ idepth[0] = init_idepth[0] + dx * depth_step;
+ idepth[1] = init_idepth[1] + dx * depth_step;
+ idepth[2] = init_idepth[2] + dx * depth_step;
+ idepth[3] = init_idepth[3] + dx * depth_step;
+
+ depth16 = (ushort (*)[TILE_SIZE])
+ &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE];
+
+ if ((outmask & 1) && idepth[0] <= depth16[0][0]) {
+ depth16[0][0] = idepth[0];
+ mask |= (1 << 0);
+ }
+
+ if ((outmask & 2) && idepth[1] <= depth16[0][1]) {
+ depth16[0][1] = idepth[1];
+ mask |= (1 << 1);
+ }
+
+ if ((outmask & 4) && idepth[2] <= depth16[1][0]) {
+ depth16[1][0] = idepth[2];
+ mask |= (1 << 2);
+ }
+
+ if ((outmask & 8) && idepth[3] <= depth16[1][1]) {
+ depth16[1][1] = idepth[3];
+ mask |= (1 << 3);
+ }
+
+ depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
+
+ quads[i]->inout.mask = mask;
+ if (quads[i]->inout.mask)
+ quads[pass++] = quads[i];
+ }
+
+ if (pass)
+ qs->next->run(qs->next, quads, pass);
+
+}
+
+
+
+
+
+static void
+depth_noop(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ qs->next->run(qs->next, quads, nr);
+}
+
+
+
+static void
+choose_depth_test(struct quad_stage *qs,
+ struct quad_header *quads[],
+ unsigned nr)
+{
+ boolean interp_depth = !qs->softpipe->fs->info.writes_z;
+
+ boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
+
+ boolean depth = (get_depth_bits(qs) > 0 &&
+ qs->softpipe->depth_stencil->depth.enabled);
+
+ unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
+
+ boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
+
+ boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
+
+ boolean occlusion = qs->softpipe->active_query_count;
+
+ if (!alpha &&
+ !depth &&
+ !stencil) {
+ qs->run = depth_noop;
+ }
+ else if (!alpha &&
+ interp_depth &&
+ depth &&
+ depthwrite &&
+ !occlusion &&
+ !stencil)
+ {
+ switch (depthfunc) {
+ case PIPE_FUNC_LESS:
+ switch (qs->softpipe->framebuffer.zsbuf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ qs->run = depth_interp_z16_less_write;
+ break;
+ default:
+ qs->run = depth_test_quads_fallback;
+ break;
+ }
+ break;
+ case PIPE_FUNC_LEQUAL:
+ switch (qs->softpipe->framebuffer.zsbuf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ qs->run = depth_interp_z16_lequal_write;
+ break;
+ default:
+ qs->run = depth_test_quads_fallback;
+ break;
+ }
+ break;
+ default:
+ qs->run = depth_test_quads_fallback;
+ }
+ }
+ else {
+ qs->run = depth_test_quads_fallback;
+ }
+
+
+ qs->run( qs, quads, nr );
+}
+
+
+
+
+
static void depth_test_begin(struct quad_stage *qs)
{
+ qs->run = choose_depth_test;
qs->next->begin(qs->next);
}
stage->softpipe = softpipe;
stage->begin = depth_test_begin;
- stage->run = depth_test_quads;
+ stage->run = choose_depth_test;
stage->destroy = depth_test_destroy;
return stage;