broadcom/vc5: Use raw stores for MSAA buffers.
authorEric Anholt <eric@anholt.net>
Tue, 24 Oct 2017 20:28:53 +0000 (13:28 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 30 Oct 2017 20:31:27 +0000 (13:31 -0700)
We were storing the resolved pixels in all cases, but nr_samples > 0 means
we should be keeping the per-sample values.

We will probably want to change the job structure at some point, as we'll
want to recognize full-buffer resolves and do the resolved store in the
same job as the original rendering, meaning we'll need to track both the
MSAA and single-sample resources in the job.  However, this will be enough
to build the rest of the MSAA support.

src/gallium/drivers/vc5/vc5_rcl.c

index 4ef2d83798b9dd386525d97116612e2065b9a981..e5763ac0ea83c895c7d5ec518487c30288d5b83b 100644 (file)
 #include "broadcom/cle/v3d_packet_v33_pack.h"
 
 static void
-vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job)
+store_raw(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer,
+          bool color_clear, bool z_clear, bool s_clear)
+{
+        struct vc5_surface *surf = vc5_surface(psurf);
+        struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+        cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+                store.raw_mode = true;
+                store.buffer_to_store = buffer;
+                store.address = cl_address(rsc->bo, surf->offset);
+                store.disable_colour_buffers_clear_on_write = !color_clear;
+                store.disable_z_buffer_clear_on_write = !z_clear;
+                store.disable_stencil_buffer_clear_on_write = !s_clear;
+
+                struct vc5_resource_slice *slice =
+                        &rsc->slices[psurf->u.tex.level];
+                store.padded_height_of_output_image_in_uif_blocks =
+                        (slice->size / slice->stride) /
+                        (2 * vc5_utile_height(rsc->cpp));
+        }
+}
+
+static int
+zs_buffer_from_pipe_bits(int pipe_clear_bits)
+{
+        switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
+        case PIPE_CLEAR_DEPTHSTENCIL:
+                return ZSTENCIL;
+        case PIPE_CLEAR_DEPTH:
+                return Z;
+        case PIPE_CLEAR_STENCIL:
+                return STENCIL;
+        default:
+                return NONE;
+        }
+}
+
+static void
+vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf)
 {
         /* Emit the generic list in our indirect state -- the rcl will just
          * have pointers into it.
@@ -66,21 +104,65 @@ vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job)
 
         cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
 
-        cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
-                uint32_t color_write_enables =
-                        job->resolve >> first_color_buffer_bit;
+        bool needs_color_clear = job->cleared & pipe_clear_color_buffers;
+        bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH;
+        bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL;
 
-                store.disable_color_buffer_write = (~color_write_enables) & 0xf;
-                store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
-                store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
+        uint32_t stores_pending = job->resolve;
 
-                store.disable_colour_buffers_clear_on_write =
-                        (job->cleared & pipe_clear_color_buffers) == 0;
-                store.disable_z_buffer_clear_on_write =
-                        !(job->cleared & PIPE_CLEAR_DEPTH);
-                store.disable_stencil_buffer_clear_on_write =
-                        !(job->cleared & PIPE_CLEAR_STENCIL);
-        };
+        /* Use raw stores for any MSAA surfaces.  These output UIF tiled
+         * images where each 4x MSAA pixel is a 2x2 quad, and the format will
+         * be that of the internal_type/internal_bpp, rather than the format
+         * from GL's perspective.
+         */
+        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+                if (!(job->resolve & bit))
+                        continue;
+
+                struct pipe_surface *psurf = job->cbufs[i];
+                if (!psurf || psurf->texture->nr_samples <= 1)
+                        continue;
+
+                stores_pending &= ~bit;
+                store_raw(cl, psurf, RENDER_TARGET_0 + i,
+                          !stores_pending && needs_color_clear,
+                          !stores_pending && needs_z_clear,
+                          !stores_pending && needs_s_clear);
+
+                if (stores_pending)
+                        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+        }
+
+        if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
+            job->zsbuf->texture->nr_samples > 1) {
+                stores_pending &= ~PIPE_CLEAR_DEPTHSTENCIL;
+                store_raw(cl, job->zsbuf,
+                          zs_buffer_from_pipe_bits(job->resolve),
+                          !stores_pending && needs_color_clear,
+                          !stores_pending && needs_z_clear,
+                          !stores_pending && needs_s_clear);
+
+                if (stores_pending)
+                        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+        }
+
+        if (stores_pending) {
+                cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
+
+                        store.disable_color_buffer_write =
+                                (~stores_pending >> first_color_buffer_bit) & 0xf;
+                        store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
+                        store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
+
+                        store.disable_colour_buffers_clear_on_write =
+                                !needs_color_clear;
+                        store.disable_z_buffer_clear_on_write =
+                                !needs_z_clear;
+                        store.disable_stencil_buffer_clear_on_write =
+                                !needs_s_clear;
+                };
+        }
 
         cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
 
@@ -272,7 +354,7 @@ vc5_emit_rcl(struct vc5_job *job)
 
         cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
 
-        vc5_rcl_emit_generic_per_tile_list(job);
+        vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1);
 
         cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);