dri/nv10: Use fast Z clears.
authorFrancisco Jerez <currojerez@riseup.net>
Sat, 25 Sep 2010 13:29:02 +0000 (15:29 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Thu, 30 Sep 2010 14:48:28 +0000 (16:48 +0200)
src/mesa/drivers/dri/nouveau/nouveau_class.h
src/mesa/drivers/dri/nouveau/nouveau_context.h
src/mesa/drivers/dri/nouveau/nouveau_fbo.h
src/mesa/drivers/dri/nouveau/nv10_context.c
src/mesa/drivers/dri/nouveau/nv10_driver.h
src/mesa/drivers/dri/nouveau/nv10_state_fb.c

index 5cb13acf7bcc397afac52b3e2ee6e5112f708a2e..d41d431f796f1b48cc563ce35b0b285f07bc2771 100644 (file)
@@ -3191,6 +3191,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV17TCL_DMA_IN_MEMORY4                                                                0x000001ac
 #define  NV17TCL_DMA_IN_MEMORY5                                                                0x000001b0
 #define  NV17TCL_COLOR_MASK_ENABLE                                                     0x000002bc
+#define  NV17TCL_ZCLEAR_ENABLE                                                         0x000003f8
+#define  NV17TCL_ZCLEAR_VALUE                                                          0x000003fc
+#define   NV17TCL_ZCLEAR_VALUE_DEPTH_SHIFT                                             8
+#define   NV17TCL_ZCLEAR_VALUE_DEPTH_MASK                                              0xffffff00
+#define   NV17TCL_ZCLEAR_VALUE_SEQUENCE_SHIFT                                          0
+#define   NV17TCL_ZCLEAR_VALUE_SEQUENCE_MASK                                           0x000000ff
 #define  NV17TCL_LMA_DEPTH_BUFFER_PITCH                                                        0x00000d5c
 #define  NV17TCL_LMA_DEPTH_BUFFER_OFFSET                                               0x00000d60
 #define  NV17TCL_LMA_DEPTH_FILL_VALUE                                                  0x00000d68
index 3dbe72900a6b3cefad4fb88a294bdbfcb41012fe..5f00327119bb7f7aa807cd7e8d8c5f511ef68665 100644 (file)
@@ -67,6 +67,11 @@ struct nouveau_context {
        struct nouveau_hw_state hw;
        struct nouveau_bo_state bo;
        struct nouveau_render_state render;
+
+       struct {
+               GLboolean clear_blocked;
+               int clear_seq;
+       } hierz;
 };
 
 #define to_nouveau_context(ctx)        ((struct nouveau_context *)(ctx))
index 0fe6c08be552ea689ed8ba01c30bd178966b408d..05ea03a075f492518bdf8beed8c2de6cd9e2968e 100644 (file)
 
 struct nouveau_framebuffer {
        struct gl_framebuffer base;
-       struct nouveau_bo *lma_bo;
        GLboolean need_front;
+
+       struct {
+               struct nouveau_bo *bo;
+               uint32_t clear_value;
+       } hierz;
 };
 #define to_nouveau_framebuffer(x) ((struct nouveau_framebuffer *)(x))
 
index 08be2a25a0deed6502625df125002a773e5d8983..41723ff190f4a977ba3a97dc8361b2af8d8464f8 100644 (file)
@@ -40,9 +40,31 @@ static const struct dri_extension nv10_extensions[] = {
        { NULL,                         NULL }
 };
 
+static GLboolean
+use_fast_zclear(GLcontext *ctx, GLbitfield buffers)
+{
+       struct nouveau_context *nctx = to_nouveau_context(ctx);
+       struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+       if (buffers & BUFFER_BIT_STENCIL) {
+               /*
+                * The stencil test is bypassed when fast Z clears are
+                * enabled.
+                */
+               nctx->hierz.clear_blocked = GL_TRUE;
+               context_dirty(ctx, ZCLEAR);
+               return GL_FALSE;
+       }
+
+       return !nctx->hierz.clear_blocked &&
+               fb->_Xmax == fb->Width && fb->_Xmin == 0 &&
+               fb->_Ymax == fb->Height && fb->_Ymin == 0;
+}
+
 static void
 nv10_clear(GLcontext *ctx, GLbitfield buffers)
 {
+       struct nouveau_context *nctx = to_nouveau_context(ctx);
        struct nouveau_channel *chan = context_chan(ctx);
        struct nouveau_grobj *celsius = context_eng3d(ctx);
        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
@@ -50,16 +72,28 @@ nv10_clear(GLcontext *ctx, GLbitfield buffers)
 
        nouveau_validate_framebuffer(ctx);
 
-       /* Clear the LMA depth buffer, if present. */
-       if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask &&
-           nfb->lma_bo) {
+       if ((buffers & BUFFER_BIT_DEPTH) &&
+           ctx->Depth.Mask && nfb->hierz.bo) {
                struct nouveau_surface *s = &to_nouveau_renderbuffer(
                        nfb->base._DepthBuffer->Wrapped)->surface;
 
+               /* Clear the hierarchical depth buffer */
                BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
                OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
                BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
                OUT_RING(chan, 1);
+
+               /* Mark the depth buffer as cleared */
+               if (use_fast_zclear(ctx, buffers)) {
+                       if (nctx->hierz.clear_seq)
+                               buffers &= ~BUFFER_BIT_DEPTH;
+
+                       nfb->hierz.clear_value =
+                               pack_zs_f(s->format, ctx->Depth.Clear, 0);
+                       nctx->hierz.clear_seq++;
+
+                       context_dirty(ctx, ZCLEAR);
+               }
        }
 
        nouveau_clear(ctx, buffers);
@@ -423,7 +457,8 @@ const struct nouveau_driver nv10_driver = {
                nv10_emit_tex_obj,
                nouveau_emit_nothing,
                nouveau_emit_nothing,
-               nv10_emit_viewport
+               nv10_emit_viewport,
+               nv10_emit_zclear
        },
-       .num_emit = NUM_NOUVEAU_STATE,
+       .num_emit = NUM_NV10_STATE,
 };
index cefd6c6fba8494ec4d2ce481baaf9f1e385fc12d..340ba05adee05aed18c6b67f084d8c2bd6d7e0de 100644 (file)
 #ifndef __NV10_DRIVER_H__
 #define __NV10_DRIVER_H__
 
+enum {
+       NOUVEAU_STATE_ZCLEAR = NUM_NOUVEAU_STATE,
+       NUM_NV10_STATE
+};
+
 #define NV10_TEXTURE_UNITS 2
 
 /* nv10_context.c */
@@ -52,6 +57,9 @@ nv10_emit_scissor(GLcontext *ctx, int emit);
 void
 nv10_emit_viewport(GLcontext *ctx, int emit);
 
+void
+nv10_emit_zclear(GLcontext *ctx, int emit);
+
 /* nv10_state_polygon.c */
 void
 nv10_emit_cull_face(GLcontext *ctx, int emit);
index a2fcb6b695983db4376d6ac7bcf21dc176ef0c69..98eb0e8ecebb13c921cbfafdfa80315859535a1b 100644 (file)
@@ -62,14 +62,14 @@ setup_lma_buffer(GLcontext *ctx)
                height = align(fb->Height, 2),
                size = pitch * height;
 
-       if (!nfb->lma_bo || nfb->lma_bo->size != size) {
-               nouveau_bo_ref(NULL, &nfb->lma_bo);
+       if (!nfb->hierz.bo || nfb->hierz.bo->size != size) {
+               nouveau_bo_ref(NULL, &nfb->hierz.bo);
                nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_VRAM, 0, size,
-                              &nfb->lma_bo);
+                              &nfb->hierz.bo);
        }
 
        nouveau_bo_markl(bctx, celsius, NV17TCL_LMA_DEPTH_BUFFER_OFFSET,
-                        nfb->lma_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+                        nfb->hierz.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
 
        WAIT_RING(chan, 9);
        BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_WINDOW_X, 4);
@@ -134,8 +134,10 @@ nv10_emit_framebuffer(GLcontext *ctx, int emit)
                nouveau_bo_markl(bctx, celsius, NV10TCL_ZETA_OFFSET,
                                 s->bo, 0, bo_flags);
 
-               if (context_chipset(ctx) >= 0x17)
+               if (context_chipset(ctx) >= 0x17) {
                        setup_lma_buffer(ctx);
+                       context_dirty(ctx, ZCLEAR);
+               }
        }
 
        BEGIN_RING(chan, celsius, NV10TCL_RT_FORMAT, 2);
@@ -187,3 +189,20 @@ nv10_emit_viewport(GLcontext *ctx, int emit)
 
        context_dirty(ctx, PROJECTION);
 }
+
+void
+nv10_emit_zclear(GLcontext *ctx, int emit)
+{
+       struct nouveau_context *nctx = to_nouveau_context(ctx);
+       struct nouveau_channel *chan = context_chan(ctx);
+       struct nouveau_grobj *celsius = context_eng3d(ctx);
+       struct nouveau_framebuffer *nfb =
+               to_nouveau_framebuffer(ctx->DrawBuffer);
+
+       if (nfb->hierz.bo) {
+               BEGIN_RING(chan, celsius, NV17TCL_ZCLEAR_ENABLE, 2);
+               OUT_RING(chan, nctx->hierz.clear_blocked ? 0 : 1);
+               OUT_RING(chan, nfb->hierz.clear_value |
+                        (nctx->hierz.clear_seq & 0xff));
+       }
+}