From 065163bcd2df12494ca523538736282fc847fa6b Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 25 Sep 2010 15:29:02 +0200 Subject: [PATCH] dri/nv10: Use fast Z clears. --- src/mesa/drivers/dri/nouveau/nouveau_class.h | 6 +++ .../drivers/dri/nouveau/nouveau_context.h | 5 +++ src/mesa/drivers/dri/nouveau/nouveau_fbo.h | 6 ++- src/mesa/drivers/dri/nouveau/nv10_context.c | 45 ++++++++++++++++--- src/mesa/drivers/dri/nouveau/nv10_driver.h | 8 ++++ src/mesa/drivers/dri/nouveau/nv10_state_fb.c | 29 +++++++++--- 6 files changed, 88 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_class.h b/src/mesa/drivers/dri/nouveau/nouveau_class.h index 5cb13acf7bc..d41d431f796 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_class.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_class.h @@ -3191,6 +3191,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV17TCL_DMA_IN_MEMORY4 0x000001ac #define NV17TCL_DMA_IN_MEMORY5 0x000001b0 #define NV17TCL_COLOR_MASK_ENABLE 0x000002bc +#define NV17TCL_ZCLEAR_ENABLE 0x000003f8 +#define NV17TCL_ZCLEAR_VALUE 0x000003fc +#define NV17TCL_ZCLEAR_VALUE_DEPTH_SHIFT 8 +#define NV17TCL_ZCLEAR_VALUE_DEPTH_MASK 0xffffff00 +#define NV17TCL_ZCLEAR_VALUE_SEQUENCE_SHIFT 0 +#define NV17TCL_ZCLEAR_VALUE_SEQUENCE_MASK 0x000000ff #define NV17TCL_LMA_DEPTH_BUFFER_PITCH 0x00000d5c #define NV17TCL_LMA_DEPTH_BUFFER_OFFSET 0x00000d60 #define NV17TCL_LMA_DEPTH_FILL_VALUE 0x00000d68 diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index 3dbe72900a6..5f00327119b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -67,6 +67,11 @@ struct nouveau_context { struct nouveau_hw_state hw; struct nouveau_bo_state bo; struct nouveau_render_state render; + + struct { + GLboolean clear_blocked; + int clear_seq; + } hierz; }; #define to_nouveau_context(ctx) ((struct nouveau_context *)(ctx)) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.h b/src/mesa/drivers/dri/nouveau/nouveau_fbo.h index 0fe6c08be55..05ea03a075f 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.h @@ -29,8 +29,12 @@ struct nouveau_framebuffer { struct gl_framebuffer base; - struct nouveau_bo *lma_bo; GLboolean need_front; + + struct { + struct nouveau_bo *bo; + uint32_t clear_value; + } hierz; }; #define to_nouveau_framebuffer(x) ((struct nouveau_framebuffer *)(x)) diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c index 08be2a25a0d..41723ff190f 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_context.c +++ b/src/mesa/drivers/dri/nouveau/nv10_context.c @@ -40,9 +40,31 @@ static const struct dri_extension nv10_extensions[] = { { NULL, NULL } }; +static GLboolean +use_fast_zclear(GLcontext *ctx, GLbitfield buffers) +{ + struct nouveau_context *nctx = to_nouveau_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + + if (buffers & BUFFER_BIT_STENCIL) { + /* + * The stencil test is bypassed when fast Z clears are + * enabled. + */ + nctx->hierz.clear_blocked = GL_TRUE; + context_dirty(ctx, ZCLEAR); + return GL_FALSE; + } + + return !nctx->hierz.clear_blocked && + fb->_Xmax == fb->Width && fb->_Xmin == 0 && + fb->_Ymax == fb->Height && fb->_Ymin == 0; +} + static void nv10_clear(GLcontext *ctx, GLbitfield buffers) { + struct nouveau_context *nctx = to_nouveau_context(ctx); struct nouveau_channel *chan = context_chan(ctx); struct nouveau_grobj *celsius = context_eng3d(ctx); struct nouveau_framebuffer *nfb = to_nouveau_framebuffer( @@ -50,16 +72,28 @@ nv10_clear(GLcontext *ctx, GLbitfield buffers) nouveau_validate_framebuffer(ctx); - /* Clear the LMA depth buffer, if present. */ - if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask && - nfb->lma_bo) { + if ((buffers & BUFFER_BIT_DEPTH) && + ctx->Depth.Mask && nfb->hierz.bo) { struct nouveau_surface *s = &to_nouveau_renderbuffer( nfb->base._DepthBuffer->Wrapped)->surface; + /* Clear the hierarchical depth buffer */ BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1); OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0)); BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1); OUT_RING(chan, 1); + + /* Mark the depth buffer as cleared */ + if (use_fast_zclear(ctx, buffers)) { + if (nctx->hierz.clear_seq) + buffers &= ~BUFFER_BIT_DEPTH; + + nfb->hierz.clear_value = + pack_zs_f(s->format, ctx->Depth.Clear, 0); + nctx->hierz.clear_seq++; + + context_dirty(ctx, ZCLEAR); + } } nouveau_clear(ctx, buffers); @@ -423,7 +457,8 @@ const struct nouveau_driver nv10_driver = { nv10_emit_tex_obj, nouveau_emit_nothing, nouveau_emit_nothing, - nv10_emit_viewport + nv10_emit_viewport, + nv10_emit_zclear }, - .num_emit = NUM_NOUVEAU_STATE, + .num_emit = NUM_NV10_STATE, }; diff --git a/src/mesa/drivers/dri/nouveau/nv10_driver.h b/src/mesa/drivers/dri/nouveau/nv10_driver.h index cefd6c6fba8..340ba05adee 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_driver.h +++ b/src/mesa/drivers/dri/nouveau/nv10_driver.h @@ -27,6 +27,11 @@ #ifndef __NV10_DRIVER_H__ #define __NV10_DRIVER_H__ +enum { + NOUVEAU_STATE_ZCLEAR = NUM_NOUVEAU_STATE, + NUM_NV10_STATE +}; + #define NV10_TEXTURE_UNITS 2 /* nv10_context.c */ @@ -52,6 +57,9 @@ nv10_emit_scissor(GLcontext *ctx, int emit); void nv10_emit_viewport(GLcontext *ctx, int emit); +void +nv10_emit_zclear(GLcontext *ctx, int emit); + /* nv10_state_polygon.c */ void nv10_emit_cull_face(GLcontext *ctx, int emit); diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c index a2fcb6b6959..98eb0e8eceb 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c @@ -62,14 +62,14 @@ setup_lma_buffer(GLcontext *ctx) height = align(fb->Height, 2), size = pitch * height; - if (!nfb->lma_bo || nfb->lma_bo->size != size) { - nouveau_bo_ref(NULL, &nfb->lma_bo); + if (!nfb->hierz.bo || nfb->hierz.bo->size != size) { + nouveau_bo_ref(NULL, &nfb->hierz.bo); nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_VRAM, 0, size, - &nfb->lma_bo); + &nfb->hierz.bo); } nouveau_bo_markl(bctx, celsius, NV17TCL_LMA_DEPTH_BUFFER_OFFSET, - nfb->lma_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + nfb->hierz.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); WAIT_RING(chan, 9); BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_WINDOW_X, 4); @@ -134,8 +134,10 @@ nv10_emit_framebuffer(GLcontext *ctx, int emit) nouveau_bo_markl(bctx, celsius, NV10TCL_ZETA_OFFSET, s->bo, 0, bo_flags); - if (context_chipset(ctx) >= 0x17) + if (context_chipset(ctx) >= 0x17) { setup_lma_buffer(ctx); + context_dirty(ctx, ZCLEAR); + } } BEGIN_RING(chan, celsius, NV10TCL_RT_FORMAT, 2); @@ -187,3 +189,20 @@ nv10_emit_viewport(GLcontext *ctx, int emit) context_dirty(ctx, PROJECTION); } + +void +nv10_emit_zclear(GLcontext *ctx, int emit) +{ + struct nouveau_context *nctx = to_nouveau_context(ctx); + struct nouveau_channel *chan = context_chan(ctx); + struct nouveau_grobj *celsius = context_eng3d(ctx); + struct nouveau_framebuffer *nfb = + to_nouveau_framebuffer(ctx->DrawBuffer); + + if (nfb->hierz.bo) { + BEGIN_RING(chan, celsius, NV17TCL_ZCLEAR_ENABLE, 2); + OUT_RING(chan, nctx->hierz.clear_blocked ? 0 : 1); + OUT_RING(chan, nfb->hierz.clear_value | + (nctx->hierz.clear_seq & 0xff)); + } +} -- 2.30.2