From e2acc7be2683fd3c295480724b02f5a497309cfd Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 10 Oct 2010 01:39:13 +0200 Subject: [PATCH] dri/nv10: Fake fast Z clears for pre-nv17 cards. --- src/mesa/drivers/dri/nouveau/nv10_context.c | 130 +++++++++++++++--- src/mesa/drivers/dri/nouveau/nv10_driver.h | 6 + src/mesa/drivers/dri/nouveau/nv10_state_fb.c | 8 ++ src/mesa/drivers/dri/nouveau/nv10_state_tnl.c | 3 + 4 files changed, 127 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c index f0e27441947..3d898fd94d9 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_context.c +++ b/src/mesa/drivers/dri/nouveau/nv10_context.c @@ -61,39 +61,129 @@ use_fast_zclear(GLcontext *ctx, GLbitfield buffers) fb->_Ymax == fb->Height && fb->_Ymin == 0; } +GLboolean +nv10_use_viewport_zclear(GLcontext *ctx) +{ + struct nouveau_context *nctx = to_nouveau_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + + return context_chipset(ctx) < 0x17 && + !nctx->hierz.clear_blocked && fb->_DepthBuffer && + (_mesa_get_format_bits(fb->_DepthBuffer->Format, + GL_DEPTH_BITS) >= 24); +} + +float +nv10_transform_depth(GLcontext *ctx, float z) +{ + struct nouveau_context *nctx = to_nouveau_context(ctx); + + if (nv10_use_viewport_zclear(ctx)) + return 2097152.0 * (z + (nctx->hierz.clear_seq & 7)); + else + return ctx->DrawBuffer->_DepthMaxF * z; +} + static void -nv10_clear(GLcontext *ctx, GLbitfield buffers) +nv10_zclear(GLcontext *ctx, GLbitfield *buffers) +{ + /* + * Pre-nv17 cards don't have native support for fast Z clears, + * but in some cases we can still "clear" the Z buffer without + * actually blitting to it if we're willing to sacrifice a few + * bits of depth precision. + * + * Each time a clear is requested we modify the viewport + * transform in such a way that the old contents of the depth + * buffer are clamped to the requested clear value when + * they're read by the GPU. + */ + struct nouveau_context *nctx = to_nouveau_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb); + struct nouveau_surface *s = &to_nouveau_renderbuffer( + fb->_DepthBuffer->Wrapped)->surface; + + if (nv10_use_viewport_zclear(ctx)) { + int x, y, w, h; + float z = ctx->Depth.Clear; + uint32_t value = pack_zs_f(s->format, z, 0); + + get_scissors(fb, &x, &y, &w, &h); + *buffers &= ~BUFFER_BIT_DEPTH; + + if (use_fast_zclear(ctx, *buffers)) { + if (nfb->hierz.clear_value != value) { + /* Don't fast clear if we're changing + * the depth value. */ + nfb->hierz.clear_value = value; + + } else if (z == 0.0) { + nctx->hierz.clear_seq++; + context_dirty(ctx, ZCLEAR); + + if ((nctx->hierz.clear_seq & 7) != 0 && + nctx->hierz.clear_seq != 1) + /* We didn't wrap around -- no need to + * clear the depth buffer for real. */ + return; + + } else if (z == 1.0) { + nctx->hierz.clear_seq--; + context_dirty(ctx, ZCLEAR); + + if ((nctx->hierz.clear_seq & 7) != 7) + /* No wrap around */ + return; + } + } + + value = pack_zs_f(s->format, + (z + (nctx->hierz.clear_seq & 7)) / 8, 0); + context_drv(ctx)->surface_fill(ctx, s, ~0, value, x, y, w, h); + } +} + +static void +nv17_zclear(GLcontext *ctx, GLbitfield *buffers) { struct nouveau_context *nctx = to_nouveau_context(ctx); struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *celsius = context_eng3d(ctx); struct nouveau_framebuffer *nfb = to_nouveau_framebuffer( ctx->DrawBuffer); + struct nouveau_surface *s = &to_nouveau_renderbuffer( + nfb->base._DepthBuffer->Wrapped)->surface; - nouveau_validate_framebuffer(ctx); + /* Clear the hierarchical depth buffer */ + BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1); + OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0)); + BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1); + OUT_RING(chan, 1); - if ((buffers & BUFFER_BIT_DEPTH) && - ctx->Depth.Mask && nfb->hierz.bo) { - struct nouveau_surface *s = &to_nouveau_renderbuffer( - nfb->base._DepthBuffer->Wrapped)->surface; + /* Mark the depth buffer as cleared */ + if (use_fast_zclear(ctx, *buffers)) { + if (nctx->hierz.clear_seq) + *buffers &= ~BUFFER_BIT_DEPTH; - /* Clear the hierarchical depth buffer */ - BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1); - OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0)); - BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1); - OUT_RING(chan, 1); + nfb->hierz.clear_value = + pack_zs_f(s->format, ctx->Depth.Clear, 0); + nctx->hierz.clear_seq++; - /* Mark the depth buffer as cleared */ - if (use_fast_zclear(ctx, buffers)) { - if (nctx->hierz.clear_seq) - buffers &= ~BUFFER_BIT_DEPTH; + context_dirty(ctx, ZCLEAR); + } +} - nfb->hierz.clear_value = - pack_zs_f(s->format, ctx->Depth.Clear, 0); - nctx->hierz.clear_seq++; +static void +nv10_clear(GLcontext *ctx, GLbitfield buffers) +{ + nouveau_validate_framebuffer(ctx); - context_dirty(ctx, ZCLEAR); - } + if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask) { + if (context_chipset(ctx) >= 0x17) + nv17_zclear(ctx, &buffers); + else + nv10_zclear(ctx, &buffers); } nouveau_clear(ctx, buffers); diff --git a/src/mesa/drivers/dri/nouveau/nv10_driver.h b/src/mesa/drivers/dri/nouveau/nv10_driver.h index 340ba05adee..61dceab7b61 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_driver.h +++ b/src/mesa/drivers/dri/nouveau/nv10_driver.h @@ -37,6 +37,12 @@ enum { /* nv10_context.c */ extern const struct nouveau_driver nv10_driver; +GLboolean +nv10_use_viewport_zclear(GLcontext *ctx); + +float +nv10_transform_depth(GLcontext *ctx, float z); + /* nv10_render.c */ void nv10_render_init(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c index 98eb0e8eceb..f9f3ebaa8d0 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c @@ -172,12 +172,15 @@ nv10_emit_viewport(GLcontext *ctx, int emit) { struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *celsius = context_eng3d(ctx); + struct gl_viewport_attrib *vp = &ctx->Viewport; struct gl_framebuffer *fb = ctx->DrawBuffer; float a[4] = {}; get_viewport_translate(ctx, a); a[0] -= 2048; a[1] -= 2048; + if (nv10_use_viewport_zclear(ctx)) + a[2] = nv10_transform_depth(ctx, (vp->Far + vp->Near) / 2); BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); OUT_RINGp(chan, a, 4); @@ -204,5 +207,10 @@ nv10_emit_zclear(GLcontext *ctx, int emit) OUT_RING(chan, nctx->hierz.clear_blocked ? 0 : 1); OUT_RING(chan, nfb->hierz.clear_value | (nctx->hierz.clear_seq & 0xff)); + } else { + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf(chan, nv10_transform_depth(ctx, 0)); + OUT_RINGf(chan, nv10_transform_depth(ctx, 1)); + context_dirty(ctx, VIEWPORT); } } diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c index 0e592a16292..6b2ede88e67 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c @@ -479,6 +479,9 @@ nv10_emit_projection(GLcontext *ctx, int emit) _math_matrix_ctr(&m); get_viewport_scale(ctx, m.m); + if (nv10_use_viewport_zclear(ctx)) + m.m[MAT_SZ] /= 8; + if (nctx->fallback == HWTNL) _math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix); -- 2.30.2