dri/nv10: Fake fast Z clears for pre-nv17 cards.
authorFrancisco Jerez <currojerez@riseup.net>
Sat, 9 Oct 2010 23:39:13 +0000 (01:39 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Sun, 10 Oct 2010 02:14:34 +0000 (04:14 +0200)
src/mesa/drivers/dri/nouveau/nv10_context.c
src/mesa/drivers/dri/nouveau/nv10_driver.h
src/mesa/drivers/dri/nouveau/nv10_state_fb.c
src/mesa/drivers/dri/nouveau/nv10_state_tnl.c

index f0e274419470b4142c7b96b543eb73041631447b..3d898fd94d966d668af9e19de5484574fd218fa1 100644 (file)
@@ -61,39 +61,129 @@ use_fast_zclear(GLcontext *ctx, GLbitfield buffers)
                fb->_Ymax == fb->Height && fb->_Ymin == 0;
 }
 
+GLboolean
+nv10_use_viewport_zclear(GLcontext *ctx)
+{
+       struct nouveau_context *nctx = to_nouveau_context(ctx);
+       struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+       return context_chipset(ctx) < 0x17 &&
+               !nctx->hierz.clear_blocked && fb->_DepthBuffer &&
+               (_mesa_get_format_bits(fb->_DepthBuffer->Format,
+                                      GL_DEPTH_BITS) >= 24);
+}
+
+float
+nv10_transform_depth(GLcontext *ctx, float z)
+{
+       struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+       if (nv10_use_viewport_zclear(ctx))
+               return 2097152.0 * (z + (nctx->hierz.clear_seq & 7));
+       else
+               return ctx->DrawBuffer->_DepthMaxF * z;
+}
+
 static void
-nv10_clear(GLcontext *ctx, GLbitfield buffers)
+nv10_zclear(GLcontext *ctx, GLbitfield *buffers)
+{
+       /*
+        * Pre-nv17 cards don't have native support for fast Z clears,
+        * but in some cases we can still "clear" the Z buffer without
+        * actually blitting to it if we're willing to sacrifice a few
+        * bits of depth precision.
+        *
+        * Each time a clear is requested we modify the viewport
+        * transform in such a way that the old contents of the depth
+        * buffer are clamped to the requested clear value when
+        * they're read by the GPU.
+        */
+       struct nouveau_context *nctx = to_nouveau_context(ctx);
+       struct gl_framebuffer *fb = ctx->DrawBuffer;
+       struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+       struct nouveau_surface *s = &to_nouveau_renderbuffer(
+               fb->_DepthBuffer->Wrapped)->surface;
+
+       if (nv10_use_viewport_zclear(ctx)) {
+               int x, y, w, h;
+               float z = ctx->Depth.Clear;
+               uint32_t value = pack_zs_f(s->format, z, 0);
+
+               get_scissors(fb, &x, &y, &w, &h);
+               *buffers &= ~BUFFER_BIT_DEPTH;
+
+               if (use_fast_zclear(ctx, *buffers)) {
+                       if (nfb->hierz.clear_value != value) {
+                               /* Don't fast clear if we're changing
+                                * the depth value. */
+                               nfb->hierz.clear_value = value;
+
+                       } else if (z == 0.0) {
+                               nctx->hierz.clear_seq++;
+                               context_dirty(ctx, ZCLEAR);
+
+                               if ((nctx->hierz.clear_seq & 7) != 0 &&
+                                   nctx->hierz.clear_seq != 1)
+                                       /* We didn't wrap around -- no need to
+                                        * clear the depth buffer for real. */
+                                       return;
+
+                       } else if (z == 1.0) {
+                               nctx->hierz.clear_seq--;
+                               context_dirty(ctx, ZCLEAR);
+
+                               if ((nctx->hierz.clear_seq & 7) != 7)
+                                       /* No wrap around */
+                                       return;
+                       }
+               }
+
+               value = pack_zs_f(s->format,
+                                 (z + (nctx->hierz.clear_seq & 7)) / 8, 0);
+               context_drv(ctx)->surface_fill(ctx, s, ~0, value, x, y, w, h);
+       }
+}
+
+static void
+nv17_zclear(GLcontext *ctx, GLbitfield *buffers)
 {
        struct nouveau_context *nctx = to_nouveau_context(ctx);
        struct nouveau_channel *chan = context_chan(ctx);
        struct nouveau_grobj *celsius = context_eng3d(ctx);
        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
                ctx->DrawBuffer);
+       struct nouveau_surface *s = &to_nouveau_renderbuffer(
+               nfb->base._DepthBuffer->Wrapped)->surface;
 
-       nouveau_validate_framebuffer(ctx);
+       /* Clear the hierarchical depth buffer */
+       BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
+       OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
+       BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
+       OUT_RING(chan, 1);
 
-       if ((buffers & BUFFER_BIT_DEPTH) &&
-           ctx->Depth.Mask && nfb->hierz.bo) {
-               struct nouveau_surface *s = &to_nouveau_renderbuffer(
-                       nfb->base._DepthBuffer->Wrapped)->surface;
+       /* Mark the depth buffer as cleared */
+       if (use_fast_zclear(ctx, *buffers)) {
+               if (nctx->hierz.clear_seq)
+                       *buffers &= ~BUFFER_BIT_DEPTH;
 
-               /* Clear the hierarchical depth buffer */
-               BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
-               OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
-               BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
-               OUT_RING(chan, 1);
+               nfb->hierz.clear_value =
+                       pack_zs_f(s->format, ctx->Depth.Clear, 0);
+               nctx->hierz.clear_seq++;
 
-               /* Mark the depth buffer as cleared */
-               if (use_fast_zclear(ctx, buffers)) {
-                       if (nctx->hierz.clear_seq)
-                               buffers &= ~BUFFER_BIT_DEPTH;
+               context_dirty(ctx, ZCLEAR);
+       }
+}
 
-                       nfb->hierz.clear_value =
-                               pack_zs_f(s->format, ctx->Depth.Clear, 0);
-                       nctx->hierz.clear_seq++;
+static void
+nv10_clear(GLcontext *ctx, GLbitfield buffers)
+{
+       nouveau_validate_framebuffer(ctx);
 
-                       context_dirty(ctx, ZCLEAR);
-               }
+       if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask) {
+               if (context_chipset(ctx) >= 0x17)
+                       nv17_zclear(ctx, &buffers);
+               else
+                       nv10_zclear(ctx, &buffers);
        }
 
        nouveau_clear(ctx, buffers);
index 340ba05adee05aed18c6b67f084d8c2bd6d7e0de..61dceab7b61253411044f72372ecca93af0cbfb1 100644 (file)
@@ -37,6 +37,12 @@ enum {
 /* nv10_context.c */
 extern const struct nouveau_driver nv10_driver;
 
+GLboolean
+nv10_use_viewport_zclear(GLcontext *ctx);
+
+float
+nv10_transform_depth(GLcontext *ctx, float z);
+
 /* nv10_render.c */
 void
 nv10_render_init(GLcontext *ctx);
index 98eb0e8ecebb13c921cbfafdfa80315859535a1b..f9f3ebaa8d0a1c42e15259ee24bbc0e394855884 100644 (file)
@@ -172,12 +172,15 @@ nv10_emit_viewport(GLcontext *ctx, int emit)
 {
        struct nouveau_channel *chan = context_chan(ctx);
        struct nouveau_grobj *celsius = context_eng3d(ctx);
+       struct gl_viewport_attrib *vp = &ctx->Viewport;
        struct gl_framebuffer *fb = ctx->DrawBuffer;
        float a[4] = {};
 
        get_viewport_translate(ctx, a);
        a[0] -= 2048;
        a[1] -= 2048;
+       if (nv10_use_viewport_zclear(ctx))
+               a[2] = nv10_transform_depth(ctx, (vp->Far + vp->Near) / 2);
 
        BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
        OUT_RINGp(chan, a, 4);
@@ -204,5 +207,10 @@ nv10_emit_zclear(GLcontext *ctx, int emit)
                OUT_RING(chan, nctx->hierz.clear_blocked ? 0 : 1);
                OUT_RING(chan, nfb->hierz.clear_value |
                         (nctx->hierz.clear_seq & 0xff));
+       } else {
+               BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+               OUT_RINGf(chan, nv10_transform_depth(ctx, 0));
+               OUT_RINGf(chan, nv10_transform_depth(ctx, 1));
+               context_dirty(ctx, VIEWPORT);
        }
 }
index 0e592a16292e9aa8c675104e7e2a26415db031c1..6b2ede88e67d9292eb9e3a55ea31a7fad1869398 100644 (file)
@@ -479,6 +479,9 @@ nv10_emit_projection(GLcontext *ctx, int emit)
        _math_matrix_ctr(&m);
        get_viewport_scale(ctx, m.m);
 
+       if (nv10_use_viewport_zclear(ctx))
+               m.m[MAT_SZ] /= 8;
+
        if (nctx->fallback == HWTNL)
                _math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix);