/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
- * \brief Quad depth testing
+ * \brief Quad depth / stencil testing
*/
#include "pipe/p_defines.h"
+#include "util/format/u_format.h"
+#include "util/u_math.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_scan.h"
#include "sp_context.h"
#include "sp_quad.h"
-#include "sp_surface.h"
#include "sp_quad_pipe.h"
#include "sp_tile_cache.h"
#include "sp_state.h" /* for sp_fragment_shader */
struct depth_data {
struct pipe_surface *ps;
enum pipe_format format;
- unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */
- unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */
- ubyte stencilVals[QUAD_SIZE];
+ unsigned bzzzz[TGSI_QUAD_SIZE]; /**< Z values fetched from depth buffer */
+ unsigned qzzzz[TGSI_QUAD_SIZE]; /**< Z values from the quad */
+ ubyte stencilVals[TGSI_QUAD_SIZE];
+ boolean use_shader_stencil_refs;
+ ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
struct softpipe_cached_tile *tile;
+ float minval, maxval;
+ bool clamp;
};
switch (data->format) {
case PIPE_FORMAT_Z16_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth16[y][x];
}
break;
case PIPE_FORMAT_Z32_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth32[y][x];
}
break;
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
}
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
}
break;
+ case PIPE_FORMAT_S8_UINT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = 0;
+ data->stencilVals[j] = tile->data.stencil8[y][x];
+ }
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = tile->data.depth32[y][x];
+ }
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
+ data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
+ }
+ break;
default:
assert(0);
}
}
-/* If the shader has not been run, interpolate the depth values
+
+/**
+ * If the shader has not been run, interpolate the depth values
* ourselves.
*/
static void
}
+/**
+ * Compute the depth_data::qzzzz[] values from the float fragment Z values.
+ */
static void
convert_quad_depth( struct depth_data *data,
const struct quad_header *quad )
{
unsigned j;
+ float dvals[TGSI_QUAD_SIZE];
/* Convert quad's float depth values to int depth values (qzzzz).
* If the Z buffer stores integer values, we _have_ to do the depth
* conversion of Z values (which isn't an identity function) will cause
* Z-fighting errors.
*/
+ if (data->clamp) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);
+ }
+ } else {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ dvals[j] = quad->output.depth[j];
+ }
+ }
+
switch (data->format) {
case PIPE_FORMAT_Z16_UNORM:
{
float scale = 65535.0;
- for (j = 0; j < QUAD_SIZE; j++) {
- data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ data->qzzzz[j] = (unsigned) (dvals[j] * scale);
}
}
break;
{
double scale = (double) (uint) ~0UL;
- for (j = 0; j < QUAD_SIZE; j++) {
- data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ data->qzzzz[j] = (unsigned) (dvals[j] * scale);
}
}
break;
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
{
float scale = (float) ((1 << 24) - 1);
- for (j = 0; j < QUAD_SIZE; j++) {
- data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ data->qzzzz[j] = (unsigned) (dvals[j] * scale);
}
}
break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
{
float scale = (float) ((1 << 24) - 1);
- for (j = 0; j < QUAD_SIZE; j++) {
- data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ data->qzzzz[j] = (unsigned) (dvals[j] * scale);
+ }
+ }
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ {
+ union fi fui;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ fui.f = dvals[j];
+ data->qzzzz[j] = fui.ui;
}
}
break;
}
+/**
+ * Compute the depth_data::shader_stencil_refs[] values from the float
+ * fragment stencil values.
+ */
+static void
+convert_quad_stencil( struct depth_data *data,
+ const struct quad_header *quad )
+{
+ unsigned j;
+ data->use_shader_stencil_refs = TRUE;
+ /* Copy quads stencil values
+ */
+ switch (data->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT:
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
+ */
static void
write_depth_stencil_values( struct depth_data *data,
struct quad_header *quad )
/* put updated Z values back into cached tile */
switch (data->format) {
case PIPE_FORMAT_Z16_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
}
break;
- case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z32_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = data->bzzzz[j];
}
break;
- case PIPE_FORMAT_S8Z24_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
}
break;
- case PIPE_FORMAT_Z24S8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
}
break;
- case PIPE_FORMAT_Z24X8_UNORM:
- for (j = 0; j < QUAD_SIZE; j++) {
+ case PIPE_FORMAT_X8Z24_UNORM:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = data->bzzzz[j] << 8;
}
break;
+ case PIPE_FORMAT_S8_UINT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.stencil8[y][x] = data->stencilVals[j];
+ }
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth32[y][x] = data->bzzzz[j];
+ }
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
+ }
+ break;
default:
assert(0);
}
-
/** Only 8-bit stencil supported */
#define STENCIL_MAX 0xff
{
unsigned passMask = 0x0;
unsigned j;
+ ubyte refs[TGSI_QUAD_SIZE];
- ref &= valMask;
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ if (data->use_shader_stencil_refs)
+ refs[j] = data->shader_stencil_refs[j] & valMask;
+ else
+ refs[j] = ref & valMask;
+ }
switch (func) {
case PIPE_FUNC_NEVER:
/* passMask = 0x0 */
break;
case PIPE_FUNC_LESS:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref < (data->stencilVals[j] & valMask)) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ if (refs[j] < (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_EQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref == (data->stencilVals[j] & valMask)) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ if (refs[j] == (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_LEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref <= (data->stencilVals[j] & valMask)) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ if (refs[j] <= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GREATER:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref > (data->stencilVals[j] & valMask)) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ if (refs[j] > (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_NOTEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref != (data->stencilVals[j] & valMask)) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ if (refs[j] != (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
- if (ref >= (data->stencilVals[j] & valMask)) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ if (refs[j] >= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
{
unsigned j;
- ubyte newstencil[QUAD_SIZE];
+ ubyte newstencil[TGSI_QUAD_SIZE];
+ ubyte refs[TGSI_QUAD_SIZE];
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
newstencil[j] = data->stencilVals[j];
+ if (data->use_shader_stencil_refs)
+ refs[j] = data->shader_stencil_refs[j];
+ else
+ refs[j] = ref;
}
switch (op) {
/* no-op */
break;
case PIPE_STENCIL_OP_ZERO:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = 0;
}
}
break;
case PIPE_STENCIL_OP_REPLACE:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (mask & (1 << j)) {
- newstencil[j] = ref;
+ newstencil[j] = refs[j];
}
}
break;
case PIPE_STENCIL_OP_INCR:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (mask & (1 << j)) {
if (data->stencilVals[j] < STENCIL_MAX) {
newstencil[j] = data->stencilVals[j] + 1;
}
break;
case PIPE_STENCIL_OP_DECR:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (mask & (1 << j)) {
if (data->stencilVals[j] > 0) {
newstencil[j] = data->stencilVals[j] - 1;
}
break;
case PIPE_STENCIL_OP_INCR_WRAP:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = data->stencilVals[j] + 1;
}
}
break;
case PIPE_STENCIL_OP_DECR_WRAP:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = data->stencilVals[j] - 1;
}
}
break;
case PIPE_STENCIL_OP_INVERT:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = ~data->stencilVals[j];
}
*/
if (wrtMask != STENCIL_MAX) {
/* apply bit-wise stencil buffer writemask */
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
}
}
else {
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
data->stencilVals[j] = newstencil[j];
}
}
-/*
+/**
* To increase efficiency, we should probably have multiple versions
* of this function that are specifically for Z16, Z32 and FP Z buffers.
* Try to effectively do that with codegen...
*/
-
static boolean
depth_test_quad(struct quad_stage *qs,
struct depth_data *data,
/* Note this is pretty much a single sse or cell instruction.
* Like this: quad->mask &= (quad->outputs.depth < zzzz);
*/
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (data->qzzzz[j] < data->bzzzz[j])
zmask |= 1 << j;
}
break;
case PIPE_FUNC_EQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (data->qzzzz[j] == data->bzzzz[j])
zmask |= 1 << j;
}
break;
case PIPE_FUNC_LEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (data->qzzzz[j] <= data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_GREATER:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (data->qzzzz[j] > data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_NOTEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (data->qzzzz[j] != data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_GEQUAL:
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (data->qzzzz[j] >= data->bzzzz[j])
zmask |= (1 << j);
}
* data due to stencil changes.
*/
if (softpipe->depth_stencil->depth.writemask) {
- for (j = 0; j < QUAD_SIZE; j++) {
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
if (quad->inout.mask & (1 << j)) {
data->bzzzz[j] = data->qzzzz[j];
}
face = 0;
}
+ /* 0 = front-face, 1 = back-face */
+ assert(face == 0 || face == 1);
+
/* choose front or back face function, operator, etc */
/* XXX we could do these initializations once per primitive */
func = softpipe->depth_stencil->stencil[face].func;
failOp = softpipe->depth_stencil->stencil[face].fail_op;
zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
- ref = softpipe->depth_stencil->stencil[face].ref_value;
+ ref = softpipe->stencil_ref.ref_value[face];
wrtMask = softpipe->depth_stencil->stencil[face].writemask;
valMask = softpipe->depth_stencil->stencil[face].valuemask;
-
/* do the stencil test first */
{
unsigned passMask, failMask;
#define ALPHATEST( FUNC, COMP ) \
- static int \
+ static unsigned \
alpha_test_quads_##FUNC( struct quad_stage *qs, \
struct quad_header *quads[], \
unsigned nr ) \
ALPHATEST( GEQUAL, >= )
-/* XXX: Incorporate into shader using KILP.
+/* XXX: Incorporate into shader using KILL_IF.
*/
-static int
+static unsigned
alpha_test_quads(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
return alpha_test_quads_LESS( qs, quads, nr );
case PIPE_FUNC_EQUAL:
return alpha_test_quads_EQUAL( qs, quads, nr );
- break;
case PIPE_FUNC_LEQUAL:
return alpha_test_quads_LEQUAL( qs, quads, nr );
case PIPE_FUNC_GREATER:
}
}
+
static unsigned mask_count[16] =
{
0, /* 0x0 */
+/**
+ * General depth/stencil test function. Used when there's no fast-path.
+ */
static void
depth_test_quads_fallback(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
unsigned i, pass = 0;
- const struct sp_fragment_shader *fs = qs->softpipe->fs;
- boolean interp_depth = !fs->info.writes_z;
+ const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
+ boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
+ boolean shader_stencil_ref = fsInfo->writes_stencil;
struct depth_data data;
+ unsigned vp_idx = quads[0]->input.viewport_index;
+ data.use_shader_stencil_refs = FALSE;
if (qs->softpipe->depth_stencil->alpha.enabled) {
nr = alpha_test_quads(qs, quads, nr);
}
- if (qs->softpipe->framebuffer.zsbuf &&
- (qs->softpipe->depth_stencil->depth.enabled ||
- qs->softpipe->depth_stencil->stencil[0].enabled)) {
+ if (qs->softpipe->framebuffer.zsbuf &&
+ (qs->softpipe->depth_stencil->depth.enabled ||
+ qs->softpipe->depth_stencil->stencil[0].enabled)) {
+ float near_val, far_val;
data.ps = qs->softpipe->framebuffer.zsbuf;
data.format = data.ps->format;
data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
quads[0]->input.x0,
- quads[0]->input.y0);
+ quads[0]->input.y0, quads[0]->input.layer);
+ data.clamp = !qs->softpipe->rasterizer->depth_clip_near;
+
+ near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
+ far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
+ data.minval = MIN2(near_val, far_val);
+ data.maxval = MAX2(near_val, far_val);
for (i = 0; i < nr; i++) {
get_depth_stencil_values(&data, quads[i]);
}
if (qs->softpipe->depth_stencil->stencil[0].enabled) {
+ if (shader_stencil_ref)
+ convert_quad_stencil(&data, quads[i]);
+
depth_stencil_test_quad(qs, &data, quads[i]);
write_depth_stencil_values(&data, quads[i]);
}
write_depth_stencil_values(&data, quads[i]);
}
-
quads[pass++] = quads[i];
}
qs->next->run(qs->next, quads, nr);
}
-/* XXX: this function assumes setup function actually emits linear
- * spans of quads. It seems a lot more natural to do (early)
- * depth-testing on spans rather than quads.
- */
-static void
-depth_interp_z16_less_write(struct quad_stage *qs,
- struct quad_header *quads[],
- unsigned nr)
-{
- unsigned i, pass = 0;
- const unsigned ix = quads[0]->input.x0;
- const unsigned iy = quads[0]->input.y0;
- const float fx = (float) ix;
- const float fy = (float) iy;
- const float dzdx = quads[0]->posCoef->dadx[2];
- const float dzdy = quads[0]->posCoef->dady[2];
- const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
- struct softpipe_cached_tile *tile;
- ushort (*depth16)[TILE_SIZE];
- ushort idepth[4], depth_step;
- const float scale = 65535.0;
-
- idepth[0] = (ushort)((z0) * scale);
- idepth[1] = (ushort)((z0 + dzdx) * scale);
- idepth[2] = (ushort)((z0 + dzdy) * scale);
- idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
-
- depth_step = (ushort)(dzdx * 2 * scale);
-
- tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
-
- depth16 = (ushort (*)[TILE_SIZE])
- &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
-
- for (i = 0; i < nr; i++) {
- unsigned outmask = quads[i]->inout.mask;
- unsigned mask = 0;
-
- if ((outmask & 1) && idepth[0] < depth16[0][0]) {
- depth16[0][0] = idepth[0];
- mask |= (1 << 0);
- }
-
- if ((outmask & 2) && idepth[1] < depth16[0][1]) {
- depth16[0][1] = idepth[1];
- mask |= (1 << 1);
- }
-
- if ((outmask & 4) && idepth[2] < depth16[1][0]) {
- depth16[1][0] = idepth[2];
- mask |= (1 << 2);
- }
-
- if ((outmask & 8) && idepth[3] < depth16[1][1]) {
- depth16[1][1] = idepth[3];
- mask |= (1 << 3);
- }
-
- idepth[0] += depth_step;
- idepth[1] += depth_step;
- idepth[2] += depth_step;
- idepth[3] += depth_step;
-
- depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
-
- quads[i]->inout.mask = mask;
- if (quads[i]->inout.mask)
- quads[pass++] = quads[i];
- }
-
- if (pass)
- qs->next->run(qs->next, quads, pass);
-
-}
-
-static void
-depth_interp_z16_lequal_write(struct quad_stage *qs,
- struct quad_header *quads[],
- unsigned nr)
-{
- unsigned i, pass = 0;
- const unsigned ix = quads[0]->input.x0;
- const unsigned iy = quads[0]->input.y0;
- const float fx = (float) ix;
- const float fy = (float) iy;
- const float dzdx = quads[0]->posCoef->dadx[2];
- const float dzdy = quads[0]->posCoef->dady[2];
- const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
- struct softpipe_cached_tile *tile;
- ushort (*depth16)[TILE_SIZE];
- ushort idepth[4], depth_step;
- const float scale = 65535.0;
-
- idepth[0] = (ushort)((z0) * scale);
- idepth[1] = (ushort)((z0 + dzdx) * scale);
- idepth[2] = (ushort)((z0 + dzdy) * scale);
- idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
-
- depth_step = (ushort)(dzdx * 2 * scale);
-
- tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
-
- depth16 = (ushort (*)[TILE_SIZE])
- &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
-
- for (i = 0; i < nr; i++) {
- unsigned outmask = quads[i]->inout.mask;
- unsigned mask = 0;
-
- if ((outmask & 1) && idepth[0] <= depth16[0][0]) {
- depth16[0][0] = idepth[0];
- mask |= (1 << 0);
- }
-
- if ((outmask & 2) && idepth[1] <= depth16[0][1]) {
- depth16[0][1] = idepth[1];
- mask |= (1 << 1);
- }
-
- if ((outmask & 4) && idepth[2] <= depth16[1][0]) {
- depth16[1][0] = idepth[2];
- mask |= (1 << 2);
- }
+/**
+ * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
+ */
- if ((outmask & 8) && idepth[3] <= depth16[1][1]) {
- depth16[1][1] = idepth[3];
- mask |= (1 << 3);
- }
+#define NAME depth_interp_z16_less_write
+#define OPERATOR <
+#include "sp_quad_depth_test_tmp.h"
- idepth[0] += depth_step;
- idepth[1] += depth_step;
- idepth[2] += depth_step;
- idepth[3] += depth_step;
+#define NAME depth_interp_z16_equal_write
+#define OPERATOR ==
+#include "sp_quad_depth_test_tmp.h"
- depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
+#define NAME depth_interp_z16_lequal_write
+#define OPERATOR <=
+#include "sp_quad_depth_test_tmp.h"
- quads[i]->inout.mask = mask;
- if (quads[i]->inout.mask)
- quads[pass++] = quads[i];
- }
-
- if (pass)
- qs->next->run(qs->next, quads, pass);
+#define NAME depth_interp_z16_greater_write
+#define OPERATOR >
+#include "sp_quad_depth_test_tmp.h"
-}
+#define NAME depth_interp_z16_notequal_write
+#define OPERATOR !=
+#include "sp_quad_depth_test_tmp.h"
+#define NAME depth_interp_z16_gequal_write
+#define OPERATOR >=
+#include "sp_quad_depth_test_tmp.h"
+#define NAME depth_interp_z16_always_write
+#define ALWAYS 1
+#include "sp_quad_depth_test_tmp.h"
struct quad_header *quads[],
unsigned nr)
{
- boolean interp_depth = !qs->softpipe->fs->info.writes_z;
+ const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
+
+ boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
- boolean depth = (qs->softpipe->framebuffer.zsbuf &&
- qs->softpipe->depth_stencil->depth.enabled);
+ boolean depth = qs->softpipe->depth_stencil->depth.enabled;
unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
boolean occlusion = qs->softpipe->active_query_count;
+ boolean clipped = !qs->softpipe->rasterizer->depth_clip_near;
+ if(!qs->softpipe->framebuffer.zsbuf)
+ depth = depthwrite = stencil = FALSE;
+
+ /* default */
+ qs->run = depth_test_quads_fallback;
+
+ /* look for special cases */
if (!alpha &&
!depth &&
+ !occlusion &&
+ !clipped &&
!stencil) {
qs->run = depth_noop;
}
depth &&
depthwrite &&
!occlusion &&
+ !clipped &&
!stencil)
{
- switch (depthfunc) {
- case PIPE_FUNC_LESS:
- switch (qs->softpipe->framebuffer.zsbuf->format) {
- case PIPE_FORMAT_Z16_UNORM:
+ if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
+ switch (depthfunc) {
+ case PIPE_FUNC_NEVER:
+ qs->run = depth_test_quads_fallback;
+ break;
+ case PIPE_FUNC_LESS:
qs->run = depth_interp_z16_less_write;
break;
- default:
- qs->run = depth_test_quads_fallback;
+ case PIPE_FUNC_EQUAL:
+ qs->run = depth_interp_z16_equal_write;
break;
- }
- break;
- case PIPE_FUNC_LEQUAL:
- switch (qs->softpipe->framebuffer.zsbuf->format) {
- case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FUNC_LEQUAL:
qs->run = depth_interp_z16_lequal_write;
break;
+ case PIPE_FUNC_GREATER:
+ qs->run = depth_interp_z16_greater_write;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ qs->run = depth_interp_z16_notequal_write;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ qs->run = depth_interp_z16_gequal_write;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ qs->run = depth_interp_z16_always_write;
+ break;
default:
qs->run = depth_test_quads_fallback;
break;
}
- break;
- default:
- qs->run = depth_test_quads_fallback;
}
}
- else {
- qs->run = depth_test_quads_fallback;
- }
-
+ /* next quad/fragment stage */
qs->run( qs, quads, nr );
}
-
-
-static void depth_test_begin(struct quad_stage *qs)
+static void
+depth_test_begin(struct quad_stage *qs)
{
qs->run = choose_depth_test;
qs->next->begin(qs->next);
}
-static void depth_test_destroy(struct quad_stage *qs)
+static void
+depth_test_destroy(struct quad_stage *qs)
{
FREE( qs );
}
-struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe )
+struct quad_stage *
+sp_quad_depth_test_stage(struct softpipe_context *softpipe)
{
struct quad_stage *stage = CALLOC_STRUCT(quad_stage);