From 464f1fb73301709cdb33600a7230db59427d1870 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 24 Oct 2017 13:28:53 -0700 Subject: [PATCH] broadcom/vc5: Use raw stores for MSAA buffers. We were storing the resolved pixels in all cases, but nr_samples > 0 means we should be keeping the per-sample values. We will probably want to change the job structure at some point, as we'll want to recognize full-buffer resolves and do the resolved store in the same job as the original rendering, meaning we'll need to track both the MSAA and single-sample resources in the job. However, this will be enough to build the rest of the MSAA support. --- src/gallium/drivers/vc5/vc5_rcl.c | 112 ++++++++++++++++++++++++++---- 1 file changed, 97 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/vc5/vc5_rcl.c b/src/gallium/drivers/vc5/vc5_rcl.c index 4ef2d83798b..e5763ac0ea8 100644 --- a/src/gallium/drivers/vc5/vc5_rcl.c +++ b/src/gallium/drivers/vc5/vc5_rcl.c @@ -27,7 +27,45 @@ #include "broadcom/cle/v3d_packet_v33_pack.h" static void -vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job) +store_raw(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer, + bool color_clear, bool z_clear, bool s_clear) +{ + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.raw_mode = true; + store.buffer_to_store = buffer; + store.address = cl_address(rsc->bo, surf->offset); + store.disable_colour_buffers_clear_on_write = !color_clear; + store.disable_z_buffer_clear_on_write = !z_clear; + store.disable_stencil_buffer_clear_on_write = !s_clear; + + struct vc5_resource_slice *slice = + &rsc->slices[psurf->u.tex.level]; + store.padded_height_of_output_image_in_uif_blocks = + (slice->size / slice->stride) / + (2 * vc5_utile_height(rsc->cpp)); + } +} + +static int +zs_buffer_from_pipe_bits(int pipe_clear_bits) +{ + switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) { + case PIPE_CLEAR_DEPTHSTENCIL: + return ZSTENCIL; + case PIPE_CLEAR_DEPTH: + return Z; + case PIPE_CLEAR_STENCIL: + return STENCIL; + default: + return NONE; + } +} + +static void +vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf) { /* Emit the generic list in our indirect state -- the rcl will just * have pointers into it. @@ -66,21 +104,65 @@ vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job) cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { - uint32_t color_write_enables = - job->resolve >> first_color_buffer_bit; + bool needs_color_clear = job->cleared & pipe_clear_color_buffers; + bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH; + bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL; - store.disable_color_buffer_write = (~color_write_enables) & 0xf; - store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH; - store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL; + uint32_t stores_pending = job->resolve; - store.disable_colour_buffers_clear_on_write = - (job->cleared & pipe_clear_color_buffers) == 0; - store.disable_z_buffer_clear_on_write = - !(job->cleared & PIPE_CLEAR_DEPTH); - store.disable_stencil_buffer_clear_on_write = - !(job->cleared & PIPE_CLEAR_STENCIL); - }; + /* Use raw stores for any MSAA surfaces. These output UIF tiled + * images where each 4x MSAA pixel is a 2x2 quad, and the format will + * be that of the internal_type/internal_bpp, rather than the format + * from GL's perspective. + */ + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(job->resolve & bit)) + continue; + + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf || psurf->texture->nr_samples <= 1) + continue; + + stores_pending &= ~bit; + store_raw(cl, psurf, RENDER_TARGET_0 + i, + !stores_pending && needs_color_clear, + !stores_pending && needs_z_clear, + !stores_pending && needs_s_clear); + + if (stores_pending) + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } + + if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && + job->zsbuf->texture->nr_samples > 1) { + stores_pending &= ~PIPE_CLEAR_DEPTHSTENCIL; + store_raw(cl, job->zsbuf, + zs_buffer_from_pipe_bits(job->resolve), + !stores_pending && needs_color_clear, + !stores_pending && needs_z_clear, + !stores_pending && needs_s_clear); + + if (stores_pending) + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } + + if (stores_pending) { + cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { + + store.disable_color_buffer_write = + (~stores_pending >> first_color_buffer_bit) & 0xf; + store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH; + store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL; + + store.disable_colour_buffers_clear_on_write = + !needs_color_clear; + store.disable_z_buffer_clear_on_write = + !needs_z_clear; + store.disable_stencil_buffer_clear_on_write = + !needs_s_clear; + }; + } cl_emit(cl, RETURN_FROM_SUB_LIST, ret); @@ -272,7 +354,7 @@ vc5_emit_rcl(struct vc5_job *job) cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); - vc5_rcl_emit_generic_per_tile_list(job); + vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1); cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem); -- 2.30.2