From 8c26890ac251526bf74777faf62b0c8b84b0c19f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 12 Mar 2019 23:16:37 +0000 Subject: [PATCH] panfrost/mfbd: Respect per-job depth write flag While a depth buffer may be supplied, it only needs to be written to if the depth writemask is set for any draw AND if the depth buffer is not immediately invalidated (as is the case for scanout). This refactors panfrost_job to provide a depth write requirement, which is now implemented for MFBD depth buffers. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 30 ++++++++++++---------- src/gallium/drivers/panfrost/pan_job.h | 9 +++++-- src/gallium/drivers/panfrost/pan_mfbd.c | 21 ++++++++++++--- src/gallium/drivers/panfrost/pan_sfbd.c | 2 +- 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index cb226cc2220..a038ea122f7 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -50,19 +50,6 @@ extern const char *pan_counters_base; /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ //#define DRY_RUN -/* TODO: Sample size, etc */ - -static void -panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) -{ - struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); - - job->msaa |= enabled; - - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); -} - /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically * indepdent between color buffers and depth/stencil). To enable, we allocate * the AFBC metadata buffer and mark that it is enabled. We do -not- actually @@ -789,15 +776,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) void panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) { + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); + if (with_vertex_data) { panfrost_emit_vertex_data(ctx); } + bool msaa = ctx->rasterizer->base.multisample; + if (ctx->dirty & PAN_DIRTY_RASTERIZER) { ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; - panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); + + /* TODO: Sample size */ + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); } + /* Enable job requirements at draw-time */ + + if (msaa) + job->requirements |= PAN_REQ_MSAA; + + if (ctx->depth_stencil->depth.writemask) + job->requirements |= PAN_REQ_DEPTH_WRITE; + if (ctx->occlusion_query) { ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 10503d944ac..30f1cf4bd5c 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -33,6 +33,9 @@ struct panfrost_job_key { struct pipe_surface *zsbuf; }; +#define PAN_REQ_MSAA (1 << 0) +#define PAN_REQ_DEPTH_WRITE (1 << 1) + /* A panfrost_job corresponds to a bound FBO we're rendering to, * collecting over multiple draws. */ @@ -48,8 +51,10 @@ struct panfrost_job { float clear_depth; unsigned clear_stencil; - /* Whether this job uses MSAA */ - bool msaa; + /* Whether this job uses the corresponding requirement (PAN_REQ_* + * bitmask) */ + unsigned requirements; + }; /* Functions for managing the above */ diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c index b9c7cb221e7..68c842981f3 100644 --- a/src/gallium/drivers/panfrost/pan_mfbd.c +++ b/src/gallium/drivers/panfrost/pan_mfbd.c @@ -143,8 +143,6 @@ panfrost_mfbd_set_zsbuf( fbx->ds_afbc.zero1 = 0x10009; fbx->ds_afbc.padding = 0x1000; - - fb->unk3 |= MALI_MFBD_DEPTH_WRITE; } else if (rsrc->bo->layout == PAN_LINEAR) { fb->unk3 |= MALI_MFBD_EXTRA; fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1; @@ -246,7 +244,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y) rts[0].framebuffer_stride = 0; } - if (job->msaa) { + /* When scanning out, the depth buffer is immediately invalidated, so + * we don't need to waste bandwidth writing it out. This can improve + * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of + * memory bandwidth!). + * + * The exception is ReadPixels, but this is not supported on GLES so we + * can safely ignore it. */ + + if (panfrost_is_scanout(ctx)) { + job->requirements &= ~PAN_REQ_DEPTH_WRITE; + } + + /* Actualize the requirements */ + + if (job->requirements & PAN_REQ_MSAA) { rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA; /* XXX */ @@ -254,6 +266,9 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y) fb.rt_count_2 = 4; } + if (job->requirements & PAN_REQ_DEPTH_WRITE) + fb.unk3 |= MALI_MFBD_DEPTH_WRITE; + if (ctx->pipe_framebuffer.nr_cbufs == 1) { struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c index 0e283bbb082..1c08f97fd1d 100644 --- a/src/gallium/drivers/panfrost/pan_sfbd.c +++ b/src/gallium/drivers/panfrost/pan_sfbd.c @@ -132,7 +132,7 @@ panfrost_sfbd_fragment(struct panfrost_context *ctx, bool flip_y) /* TODO */ } - if (job->msaa) + if (job->requirements & PAN_REQ_MSAA) fb.format |= MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B; return panfrost_upload_transient(ctx, &fb, sizeof(fb)) | MALI_SFBD; -- 2.30.2