v3d: Use the TLB R/B swapping instead of recompiles when available.
authorEric Anholt <eric@anholt.net>
Mon, 10 Sep 2018 15:19:48 +0000 (08:19 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 1 Nov 2018 20:56:30 +0000 (13:56 -0700)
The recompile reduction is nice, but this also makes it so that a straight
texture copy could get optimized some day to not unpack/repack the f16
values.

src/gallium/drivers/v3d/v3d_resource.c
src/gallium/drivers/v3d/v3d_resource.h
src/gallium/drivers/v3d/v3dx_rcl.c
src/gallium/drivers/v3d/v3dx_state.c

index b9c578b9cf54e223ab94968eb78e965b6c41587b..45e2edf5ab65569a8d3bb2915767a8ff5e3998ea 100644 (file)
@@ -811,6 +811,12 @@ v3d_create_surface(struct pipe_context *pctx,
 
         surface->format = v3d_get_rt_format(&screen->devinfo, psurf->format);
 
+        const struct util_format_description *desc =
+                util_format_description(psurf->format);
+
+        surface->swap_rb = (desc->swizzle[0] == PIPE_SWIZZLE_Z &&
+                            psurf->format != PIPE_FORMAT_B5G6R5_UNORM);
+
         if (util_format_is_depth_or_stencil(psurf->format)) {
                 switch (psurf->format) {
                 case PIPE_FORMAT_Z16_UNORM:
index 141c4ca1f61863eb8d4de03b3bbc536471d21632..95ee0eb7d9cdda1b1b091c7273f58cbfe0ebc29f 100644 (file)
@@ -104,6 +104,13 @@ struct v3d_surface {
          */
         uint8_t internal_bpp;
 
+        /**
+         * If the R and B channels should be swapped.  On V3D 3.x, we do it in
+         * the shader and the blend equation.  On V3D 4.1+, we can use the new
+         * TLB load/store flags instead of recompiling.
+         */
+        bool swap_rb;
+
         uint32_t padded_height_of_output_image_in_uif_blocks;
 
         /* If the resource being referenced is separate stencil, then this is
index 3a76b0f3b24ff2b972859009f57eae01132939d3..01a907b0a86042bc481fa4eeb42eebc13c82971d 100644 (file)
@@ -74,6 +74,7 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
                         load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
                 else
                         load.input_image_format = surf->format;
+                load.r_b_swap = surf->swap_rb;
 
                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
                     surf->tiling == VC5_TILING_UIF_XOR) {
@@ -137,6 +138,7 @@ store_general(struct v3d_job *job,
                 else
                         store.output_image_format = surf->format;
 
+                store.r_b_swap = surf->swap_rb;
                 store.memory_format = surf->tiling;
 
                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
index 4bba8992c0072dd8fd975f8be30230ba5030d4b9..f40febb0ab2d57ef4c02cb99292346a8effd89e3 100644 (file)
@@ -481,6 +481,7 @@ v3d_set_framebuffer_state(struct pipe_context *pctx,
                 struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
                 if (!cbuf)
                         continue;
+                struct v3d_surface *v3d_cbuf = v3d_surface(cbuf);
 
                 const struct util_format_description *desc =
                         util_format_description(cbuf->format);
@@ -488,10 +489,8 @@ v3d_set_framebuffer_state(struct pipe_context *pctx,
                 /* For BGRA8 formats (DRI window system default format), we
                  * need to swap R and B, since the HW's format is RGBA8.
                  */
-                if (desc->swizzle[0] == PIPE_SWIZZLE_Z &&
-                    cbuf->format != PIPE_FORMAT_B5G6R5_UNORM) {
+                if (v3d->screen->devinfo.ver < 42 && v3d_cbuf->swap_rb)
                         v3d->swap_color_rb |= 1 << i;
-                }
 
                 if (desc->swizzle[3] == PIPE_SWIZZLE_1)
                         v3d->blend_dst_alpha_one |= 1 << i;