freedreno/a6xx: Don't take pipe_blit_info in emit_blit_dst
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_blitter.c
index 122a63323b4aaec410dad43afb5076f96ecffe8b..91ee6811c8989899c7015f7efc8e36e056c03e29 100644 (file)
@@ -30,6 +30,7 @@
 
 #include "freedreno_blitter.h"
 #include "freedreno_fence.h"
+#include "freedreno_log.h"
 #include "freedreno_resource.h"
 
 #include "fd6_blitter.h"
 #include "fd6_resource.h"
 #include "fd6_pack.h"
 
+static inline enum a6xx_2d_ifmt
+fd6_ifmt(enum a6xx_format fmt)
+{
+       switch (fmt) {
+       case FMT6_A8_UNORM:
+       case FMT6_8_UNORM:
+       case FMT6_8_SNORM:
+       case FMT6_8_8_UNORM:
+       case FMT6_8_8_SNORM:
+       case FMT6_8_8_8_8_UNORM:
+       case FMT6_8_8_8_X8_UNORM:
+       case FMT6_8_8_8_8_SNORM:
+       case FMT6_4_4_4_4_UNORM:
+       case FMT6_5_5_5_1_UNORM:
+       case FMT6_5_6_5_UNORM:
+               return R2D_UNORM8;
+
+       case FMT6_32_UINT:
+       case FMT6_32_SINT:
+       case FMT6_32_32_UINT:
+       case FMT6_32_32_SINT:
+       case FMT6_32_32_32_32_UINT:
+       case FMT6_32_32_32_32_SINT:
+               return R2D_INT32;
+
+       case FMT6_16_UINT:
+       case FMT6_16_SINT:
+       case FMT6_16_16_UINT:
+       case FMT6_16_16_SINT:
+       case FMT6_16_16_16_16_UINT:
+       case FMT6_16_16_16_16_SINT:
+       case FMT6_10_10_10_2_UINT:
+               return R2D_INT16;
+
+       case FMT6_8_UINT:
+       case FMT6_8_SINT:
+       case FMT6_8_8_UINT:
+       case FMT6_8_8_SINT:
+       case FMT6_8_8_8_8_UINT:
+       case FMT6_8_8_8_8_SINT:
+       case FMT6_Z24_UNORM_S8_UINT:
+       case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
+               return R2D_INT8;
+
+       case FMT6_16_UNORM:
+       case FMT6_16_SNORM:
+       case FMT6_16_16_UNORM:
+       case FMT6_16_16_SNORM:
+       case FMT6_16_16_16_16_UNORM:
+       case FMT6_16_16_16_16_SNORM:
+       case FMT6_32_FLOAT:
+       case FMT6_32_32_FLOAT:
+       case FMT6_32_32_32_32_FLOAT:
+               return R2D_FLOAT32;
+
+       case FMT6_16_FLOAT:
+       case FMT6_16_16_FLOAT:
+       case FMT6_16_16_16_16_FLOAT:
+       case FMT6_11_11_10_FLOAT:
+       case FMT6_10_10_10_2_UNORM_DEST:
+               return R2D_FLOAT16;
+
+       default:
+               unreachable("bad format");
+               return 0;
+       }
+}
+
 /* Make sure none of the requested dimensions extend beyond the size of the
  * resource.  Not entirely sure why this happens, but sometimes it does, and
  * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
@@ -58,7 +127,10 @@ ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
 static bool
 ok_format(enum pipe_format pfmt)
 {
-       enum a6xx_color_fmt fmt = fd6_pipe2color(pfmt);
+       enum a6xx_format fmt = fd6_pipe2color(pfmt);
+
+       if (util_format_is_compressed(pfmt))
+               return true;
 
        switch (pfmt) {
        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
@@ -73,10 +145,7 @@ ok_format(enum pipe_format pfmt)
                break;
        }
 
-       if (fmt == ~0)
-               return false;
-
-       if (fd6_ifmt(fmt) == 0)
+       if (fmt == FMT6_NONE)
                return false;
 
        return true;
@@ -113,13 +182,8 @@ can_do_blit(const struct pipe_blit_info *info)
        fail_if(!ok_format(info->src.format));
        fail_if(!ok_format(info->dst.format));
 
-       /* We can blit if both or neither formats are compressed formats... */
-       fail_if(util_format_is_compressed(info->src.format) !=
-                       util_format_is_compressed(info->src.format));
-
-       /* ... but only if they're the same compression format. */
-       fail_if(util_format_is_compressed(info->src.format) &&
-                       info->src.format != info->dst.format);
+       debug_assert(!util_format_is_compressed(info->src.format));
+       debug_assert(!util_format_is_compressed(info->dst.format));
 
        fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
 
@@ -133,9 +197,6 @@ can_do_blit(const struct pipe_blit_info *info)
 
        fail_if(info->window_rectangle_include);
 
-       fail_if(util_format_is_srgb(info->src.format));
-       fail_if(util_format_is_srgb(info->dst.format));
-
        const struct util_format_description *src_desc =
                util_format_description(info->src.format);
        const struct util_format_description *dst_desc =
@@ -160,19 +221,70 @@ emit_setup(struct fd_batch *batch)
 {
        struct fd_ringbuffer *ring = batch->draw;
 
-       fd6_event_write(batch, ring, 0x1d, true);
-       fd6_event_write(batch, ring, FACENESS_FLUSH, true);
+       fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+       fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
        fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
        fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
+
+       /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
+       OUT_WFI5(ring);
+       OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
+       OUT_RING(ring, fd6_context(batch->ctx)->magic.RB_CCU_CNTL_bypass);
 }
 
-static uint32_t
-blit_control(enum a6xx_color_fmt fmt)
+static void
+emit_blit_setup(struct fd_ringbuffer *ring,
+               enum pipe_format pfmt, bool scissor_enable, union pipe_color_union *color)
 {
-       unsigned blit_cntl = 0xf00000;
-       blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
-       blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(fd6_ifmt(fmt));
-       return blit_cntl;
+       enum a6xx_format fmt = fd6_pipe2color(pfmt);
+       bool is_srgb = util_format_is_srgb(pfmt);
+       enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
+
+       OUT_PKT7(ring, CP_SET_MARKER, 1);
+       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+
+       if (is_srgb) {
+               assert(ifmt == R2D_UNORM8);
+               ifmt = R2D_UNORM8_SRGB;
+       }
+
+       uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
+               A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
+               A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
+               COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
+               COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
+
+       OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
+       OUT_RING(ring, blit_cntl);
+
+       OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
+       OUT_RING(ring, blit_cntl);
+
+       if (fmt == FMT6_10_10_10_2_UNORM_DEST)
+               fmt = FMT6_16_16_16_16_FLOAT;
+
+       /* This register is probably badly named... it seems that it's
+        * controlling the internal/accumulator format or something like
+        * that. It's certainly not tied to only the src format.
+        */
+       OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
+       OUT_RING(ring, A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(fmt) |
+                       COND(util_format_is_pure_sint(pfmt),
+                                       A6XX_SP_2D_SRC_FORMAT_SINT) |
+                       COND(util_format_is_pure_uint(pfmt),
+                                       A6XX_SP_2D_SRC_FORMAT_UINT) |
+                       COND(util_format_is_snorm(pfmt),
+                                       A6XX_SP_2D_SRC_FORMAT_SINT |
+                                               A6XX_SP_2D_SRC_FORMAT_NORM) |
+                       COND(util_format_is_unorm(pfmt),
+// TODO sometimes blob uses UINT+NORM but dEQP seems unhappy about that
+//                                             A6XX_SP_2D_SRC_FORMAT_UINT |
+                                       A6XX_SP_2D_SRC_FORMAT_NORM) |
+                       COND(is_srgb, A6XX_SP_2D_SRC_FORMAT_SRGB) |
+                       A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
+
+       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1);
+       OUT_RING(ring, 0);
 }
 
 /* buffers need to be handled specially since x/width can exceed the bounds
@@ -232,15 +344,7 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
        sshift = sbox->x & 0x3f;
        dshift = dbox->x & 0x3f;
 
-       OUT_PKT7(ring, CP_SET_MARKER, 1);
-       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
-
-       uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
-       OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
-       OUT_RING(ring, blit_cntl);
-
-       OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
-       OUT_RING(ring, blit_cntl);
+       emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL);
 
        for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
                unsigned soff, doff, w, p;
@@ -258,7 +362,7 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
                 * Emit source:
                 */
                OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
-               OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
+               OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
                                A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
                                 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
                                 0x500000);
@@ -277,10 +381,10 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
                 * Emit destination:
                 */
                OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
-               OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
+               OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
                                 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
                                 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
-               OUT_RELOCW(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
+               OUT_RELOC(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
                OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(p));
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
@@ -305,12 +409,6 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
                OUT_RING(ring, 0x3f);
                OUT_WFI5(ring);
 
-               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1);
-               OUT_RING(ring, 0);
-
-               OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
-               OUT_RING(ring, 0xf180);
-
                OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
                OUT_RING(ring, fd6_context(ctx)->magic.RB_UNKNOWN_8E04_blit);
 
@@ -324,18 +422,103 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
        }
 }
 
+static void
+emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc, enum pipe_format pfmt, unsigned level, unsigned layer)
+{
+       struct fd_resource *dst = fd_resource(prsc);
+       enum a6xx_format fmt = fd6_pipe2color(pfmt);
+       enum a6xx_tile_mode tile = fd_resource_tile_mode(prsc, level);
+       enum a3xx_color_swap swap = fd6_resource_swap(dst, pfmt);
+       uint32_t pitch = fd_resource_pitch(dst, level);
+       bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
+       unsigned off = fd_resource_offset(dst, level, layer);
+
+       if (fmt == FMT6_Z24_UNORM_S8_UINT)
+               fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+
+       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
+       OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(fmt) |
+                       A6XX_RB_2D_DST_INFO_TILE_MODE(tile) |
+                       A6XX_RB_2D_DST_INFO_COLOR_SWAP(swap) |
+                       COND(util_format_is_srgb(pfmt), A6XX_RB_2D_DST_INFO_SRGB) |
+                       COND(ubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
+       OUT_RELOC(ring, dst->bo, off, 0, 0);    /* RB_2D_DST_LO/HI */
+       OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(pitch));
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       if (ubwc_enabled) {
+               OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
+               fd6_emit_flag_reference(ring, dst, level, layer);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+       }
+}
+
+static void
+emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, unsigned layer, unsigned nr_samples)
+{
+       struct fd_resource *src = fd_resource(info->src.resource);
+       enum a6xx_format sfmt = fd6_pipe2color(info->src.format);
+       enum a6xx_tile_mode     stile = fd_resource_tile_mode(info->src.resource, info->src.level);
+       enum a3xx_color_swap sswap = fd6_resource_swap(src, info->src.format);
+       uint32_t pitch = fd_resource_pitch(src, info->src.level);
+       bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
+       unsigned soff = fd_resource_offset(src, info->src.level, layer);
+       uint32_t width = u_minify(src->base.width0, info->src.level) * nr_samples;
+       uint32_t height = u_minify(src->base.height0, info->src.level);
+       uint32_t filter = 0;
+
+       if (info->filter == PIPE_TEX_FILTER_LINEAR)
+               filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
+
+       enum a3xx_msaa_samples samples = fd_msaa_samples(src->base.nr_samples);
+
+       if (sfmt == FMT6_10_10_10_2_UNORM_DEST)
+               sfmt = FMT6_10_10_10_2_UNORM;
+
+       OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
+       OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
+                       A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
+                       A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
+                       A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
+                       COND(samples > MSAA_ONE && (info->mask & PIPE_MASK_RGBA),
+                                       A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
+                       COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
+                       COND(util_format_is_srgb(info->src.format), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
+                       0x500000 | filter);
+       OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
+                       A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
+       OUT_RELOC(ring, src->bo, soff, 0, 0);    /* SP_PS_2D_SRC_LO/HI */
+       OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
+
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       if (subwc_enabled) {
+               OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
+               fd6_emit_flag_reference(ring, src, info->src.level, layer);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+       }
+}
+
 static void
 emit_blit_or_clear_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
                const struct pipe_blit_info *info, union pipe_color_union *color)
 {
        const struct pipe_box *sbox = &info->src.box;
        const struct pipe_box *dbox = &info->dst.box;
-       struct fd_resource *src, *dst;
-       struct fdl_slice *sslice, *dslice;
-       enum a6xx_color_fmt sfmt, dfmt;
-       enum a6xx_tile_mode stile, dtile;
-       enum a3xx_color_swap sswap, dswap;
-       unsigned spitch, dpitch;
+       struct fd_resource *dst;
+       enum a6xx_format dfmt;
        int sx1, sy1, sx2, sy2;
        int dx1, dy1, dx2, dy2;
 
@@ -349,62 +532,35 @@ emit_blit_or_clear_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
                fprintf(stderr, "\n");
        }
 
-       src = fd_resource(info->src.resource);
        dst = fd_resource(info->dst.resource);
 
-       sslice = fd_resource_slice(src, info->src.level);
-       dslice = fd_resource_slice(dst, info->dst.level);
-
-       sfmt = fd6_pipe2color(info->src.format);
        dfmt = fd6_pipe2color(info->dst.format);
 
-       int blocksize = util_format_get_blocksize(info->src.format);
-       int blockwidth = util_format_get_blockwidth(info->src.format);
-       int blockheight = util_format_get_blockheight(info->src.format);
-       int nelements;
+       uint32_t nr_samples = fd_resource_nr_samples(&dst->base);
 
-       stile = fd_resource_tile_mode(info->src.resource, info->src.level);
-       dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);
+       if (!color) {
+               sx1 = sbox->x * nr_samples;
+               sy1 = sbox->y;
+               sx2 = (sbox->x + sbox->width) * nr_samples - 1;
+               sy2 = sbox->y + sbox->height - 1;
 
-       /* Linear levels of a tiled resource are always WZYX, so look at
-        * rsc->tile_mode to determine the swap.
-        */
-       sswap = fd6_resource_swap(src, info->src.format);
-       dswap = fd6_resource_swap(dst, info->dst.format);
-
-       if (util_format_is_compressed(info->src.format)) {
-               debug_assert(info->src.format == info->dst.format);
-               sfmt = dfmt = RB6_R8_UNORM;
-               nelements = blocksize;
-       } else {
-               debug_assert(!util_format_is_compressed(info->dst.format));
-               nelements = (dst->base.nr_samples ? dst->base.nr_samples : 1);
+               OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+               OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(sx1));
+               OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(sx2));
+               OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(sy1));
+               OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(sy2));
        }
 
-       spitch = DIV_ROUND_UP(sslice->pitch, blockwidth) * src->layout.cpp;
-       dpitch = DIV_ROUND_UP(dslice->pitch, blockwidth) * dst->layout.cpp;
-
-       sx1 = sbox->x / blockwidth * nelements;
-       sy1 = sbox->y / blockheight;
-       sx2 = DIV_ROUND_UP(sbox->x + sbox->width, blockwidth) * nelements - 1;
-       sy2 = DIV_ROUND_UP(sbox->y + sbox->height, blockheight) - 1;
-
-       dx1 = dbox->x / blockwidth * nelements;
-       dy1 = dbox->y / blockheight;
-       dx2 = DIV_ROUND_UP(dbox->x + dbox->width, blockwidth) * nelements - 1;
-       dy2 = DIV_ROUND_UP(dbox->y + dbox->height, blockheight) - 1;
-
-       uint32_t width = DIV_ROUND_UP(u_minify(src->base.width0, info->src.level), blockwidth) * nelements;
-       uint32_t height = DIV_ROUND_UP(u_minify(src->base.height0, info->src.level), blockheight);
-
-       OUT_PKT7(ring, CP_SET_MARKER, 1);
-       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+       dx1 = dbox->x * nr_samples;
+       dy1 = dbox->y;
+       dx2 = (dbox->x + dbox->width) * nr_samples - 1;
+       dy2 = dbox->y + dbox->height - 1;
 
-       uint32_t blit_cntl = blit_control(dfmt);
+       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dx1) | A6XX_GRAS_2D_DST_TL_Y(dy1));
+       OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dx2) | A6XX_GRAS_2D_DST_BR_Y(dy2));
 
        if (color) {
-               blit_cntl |= A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR;
-
                switch (info->dst.format) {
                case PIPE_FORMAT_Z24X8_UNORM:
                case PIPE_FORMAT_Z24_UNORM_S8_UINT:
@@ -415,19 +571,8 @@ emit_blit_or_clear_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        color->ui[1] = (depth_unorm24 >> 8) & 0xff;
                        color->ui[2] = (depth_unorm24 >> 16) & 0xff;
                        color->ui[3] = stencil;
-
-                       dfmt = RB6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
                        break;
                }
-               case PIPE_FORMAT_B5G6R5_UNORM:
-               case PIPE_FORMAT_B5G5R5A1_UNORM:
-               case PIPE_FORMAT_B5G5R5X1_UNORM:
-               case PIPE_FORMAT_B4G4R4A4_UNORM:
-                       color->ui[0] = float_to_ubyte(color->f[0]);
-                       color->ui[1] = float_to_ubyte(color->f[1]);
-                       color->ui[2] = float_to_ubyte(color->f[2]);
-                       color->ui[3] = float_to_ubyte(color->f[3]);
-                       break;
                default:
                        break;
                }
@@ -447,14 +592,11 @@ emit_blit_or_clear_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        OUT_RING(ring, _mesa_float_to_half(color->f[1]));
                        OUT_RING(ring, _mesa_float_to_half(color->f[2]));
                        OUT_RING(ring, _mesa_float_to_half(color->f[3]));
-                       sfmt = RB6_R16G16B16A16_FLOAT;
                        break;
-
                case R2D_FLOAT32:
                case R2D_INT32:
                case R2D_INT16:
                case R2D_INT8:
-               case R2D_RAW:
                default:
                        OUT_RING(ring, color->ui[0]);
                        OUT_RING(ring, color->ui[1]);
@@ -464,132 +606,30 @@ emit_blit_or_clear_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
                }
        }
 
-       if (dtile != stile)
-               blit_cntl |= 0x20000000;
-
        if (info->scissor_enable) {
                OUT_PKT4(ring, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2);
                OUT_RING(ring, A6XX_GRAS_RESOLVE_CNTL_1_X(info->scissor.minx) |
                                 A6XX_GRAS_RESOLVE_CNTL_1_Y(info->scissor.miny));
                OUT_RING(ring, A6XX_GRAS_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
                                 A6XX_GRAS_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
-               blit_cntl |= A6XX_RB_2D_BLIT_CNTL_SCISSOR;
        }
 
-       OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
-       OUT_RING(ring, blit_cntl);
-
-       OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
-       OUT_RING(ring, blit_cntl);
+       emit_blit_setup(ring, info->dst.format, info->scissor_enable, color);
 
        for (unsigned i = 0; i < info->dst.box.depth; i++) {
-               unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
-               unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
-               bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
-               bool dubwc_enabled = fd_resource_ubwc_enabled(dst, info->dst.level);
-
-               /*
-                * Emit source:
-                */
-               uint32_t filter = 0;
-               if (info->filter == PIPE_TEX_FILTER_LINEAR)
-                       filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
 
-               enum a3xx_msaa_samples samples = fd_msaa_samples(src->base.nr_samples);
+               if (!color)
+                       emit_blit_src(ring, info, sbox->z + i, nr_samples);
 
-               if (sfmt == RB6_R10G10B10A2_UNORM)
-                       sfmt = RB6_R10G10B10A2_FLOAT16;
-
-               OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
-               OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
-                               A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
-                               A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
-                               A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
-                               COND(samples > MSAA_ONE && (info->mask & PIPE_MASK_RGBA),
-                                               A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
-                               COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
-                               0x500000 | filter);
-               OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
-                                A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
-               OUT_RELOC(ring, src->bo, soff, 0, 0);    /* SP_PS_2D_SRC_LO/HI */
-               OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(spitch));
-
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-
-               if (subwc_enabled) {
-                       OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
-                       fd6_emit_flag_reference(ring, src, info->src.level, sbox->z + i);
-                       OUT_RING(ring, 0x00000000);
-                       OUT_RING(ring, 0x00000000);
-                       OUT_RING(ring, 0x00000000);
-               }
-
-               /*
-                * Emit destination:
-                */
-               OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
-               OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
-                                A6XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
-                                A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap) |
-                                COND(dubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
-               OUT_RELOCW(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
-               OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(dpitch));
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-
-               if (dubwc_enabled) {
-                       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
-                       fd6_emit_flag_reference(ring, dst, info->dst.level, dbox->z + i);
-                       OUT_RING(ring, 0x00000000);
-                       OUT_RING(ring, 0x00000000);
-                       OUT_RING(ring, 0x00000000);
-               }
+               emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level, dbox->z + i);
 
                /*
                 * Blit command:
                 */
-               OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
-               OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(sx1));
-               OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(sx2));
-               OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(sy1));
-               OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(sy2));
-
-               OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
-               OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dx1) | A6XX_GRAS_2D_DST_TL_Y(dy1));
-               OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dx2) | A6XX_GRAS_2D_DST_BR_Y(dy2));
-
                OUT_PKT7(ring, CP_EVENT_WRITE, 1);
                OUT_RING(ring, 0x3f);
                OUT_WFI5(ring);
 
-               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1);
-               OUT_RING(ring, 0);
-
-               if (dfmt == RB6_R10G10B10A2_UNORM)
-                       sfmt = RB6_R16G16B16A16_FLOAT;
-
-               OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
-               OUT_RING(ring, A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(sfmt) |
-                               COND(util_format_is_pure_sint(info->src.format),
-                                               A6XX_SP_2D_SRC_FORMAT_SINT) |
-                               COND(util_format_is_pure_uint(info->src.format),
-                                               A6XX_SP_2D_SRC_FORMAT_UINT) |
-                               COND(util_format_is_snorm(info->src.format),
-                                               A6XX_SP_2D_SRC_FORMAT_SINT |
-                                               A6XX_SP_2D_SRC_FORMAT_NORM) |
-                               COND(util_format_is_unorm(info->src.format),
-// TODO sometimes blob uses UINT+NORM but dEQP seems unhappy about that
-//                                             A6XX_SP_2D_SRC_FORMAT_UINT |
-                                               A6XX_SP_2D_SRC_FORMAT_NORM) |
-                               0xf000);
-
                OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
                OUT_RING(ring, fd6_context(ctx)->magic.RB_UNKNOWN_8E04_blit);
 
@@ -627,7 +667,69 @@ fd6_clear_surface(struct fd_context *ctx,
        emit_blit_or_clear_texture(ctx, ring, &info, color);
 }
 
-static bool handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
+static bool
+handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
+{
+       struct fd_batch *batch;
+
+       debug_assert(!(info->mask & PIPE_MASK_ZS));
+
+       if (!can_do_blit(info))
+               return false;
+
+       batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
+
+       fd6_emit_restore(batch, batch->draw);
+       fd6_emit_lrz_flush(batch->draw);
+
+       fd_screen_lock(ctx->screen);
+
+       fd_batch_resource_read(batch, fd_resource(info->src.resource));
+       fd_batch_resource_write(batch, fd_resource(info->dst.resource));
+
+       fd_screen_unlock(ctx->screen);
+
+       /* Clearing last_fence must come after the batch dependency tracking
+        * (resource_read()/resource_write()), as that can trigger a flush,
+        * re-populating last_fence
+        */
+       fd_fence_ref(&ctx->last_fence, NULL);
+
+       fd_batch_set_stage(batch, FD_STAGE_BLIT);
+
+       fd_log_stream(batch, stream, util_dump_blit_info(stream, info));
+
+       emit_setup(batch);
+
+       if ((info->src.resource->target == PIPE_BUFFER) &&
+                       (info->dst.resource->target == PIPE_BUFFER)) {
+               assert(fd_resource(info->src.resource)->layout.tile_mode == TILE6_LINEAR);
+               assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE6_LINEAR);
+               fd_log(batch, "START BLIT (BUFFER)");
+               emit_blit_buffer(ctx, batch->draw, info);
+               fd_log(batch, "END BLIT (BUFFER)");
+       } else {
+               /* I don't *think* we need to handle blits between buffer <-> !buffer */
+               debug_assert(info->src.resource->target != PIPE_BUFFER);
+               debug_assert(info->dst.resource->target != PIPE_BUFFER);
+               fd_log(batch, "START BLIT (TEXTURE)");
+               emit_blit_or_clear_texture(ctx, batch->draw, info, NULL);
+               fd_log(batch, "END BLIT (TEXTURE)");
+       }
+
+       fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true);
+       fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true);
+       fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
+       fd6_cache_inv(batch, batch->draw);
+
+       fd_resource(info->dst.resource)->valid = true;
+       batch->needs_flush = true;
+
+       fd_batch_flush(batch);
+       fd_batch_reference(&batch, NULL);
+
+       return true;
+}
 
 /**
  * Re-written z/s blits can still fail for various reasons (for example MSAA).
@@ -722,55 +824,55 @@ handle_zs_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 }
 
 static bool
-handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
+handle_compressed_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
-       struct fd_batch *batch;
-
-       debug_assert(!(info->mask & PIPE_MASK_ZS));
-
-       if (!can_do_blit(info))
-               return false;
-
-       fd_fence_ref(&ctx->last_fence, NULL);
-
-       batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
+       struct pipe_blit_info blit = *info;
 
-       fd6_emit_restore(batch, batch->draw);
-       fd6_emit_lrz_flush(batch->draw);
+       if (DEBUG_BLIT) {
+               fprintf(stderr, "---- handle_compressed_blit: ");
+               util_dump_blit_info(stderr, info);
+               fprintf(stderr, "\ndst resource: ");
+               util_dump_resource(stderr, info->dst.resource);
+               fprintf(stderr, "\nsrc resource: ");
+               util_dump_resource(stderr, info->src.resource);
+               fprintf(stderr, "\n");
+       }
 
-       mtx_lock(&ctx->screen->lock);
+       if (info->src.format != info->dst.format)
+               return fd_blitter_blit(ctx, info);
 
-       fd_batch_resource_used(batch, fd_resource(info->src.resource), false);
-       fd_batch_resource_used(batch, fd_resource(info->dst.resource), true);
+       if (util_format_get_blocksize(info->src.format) == 8) {
+               blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
+       } else {
+               debug_assert(util_format_get_blocksize(info->src.format) == 16);
+               blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
+       }
 
-       mtx_unlock(&ctx->screen->lock);
+       int bw = util_format_get_blockwidth(info->src.format);
+       int bh = util_format_get_blockheight(info->src.format);
 
-       emit_setup(batch);
+       /* NOTE: x/y *must* be aligned to block boundary (ie. in
+        * glCompressedTexSubImage2D()) but width/height may not
+        * be:
+        */
 
-       if ((info->src.resource->target == PIPE_BUFFER) &&
-                       (info->dst.resource->target == PIPE_BUFFER)) {
-               assert(fd_resource(info->src.resource)->layout.tile_mode == TILE6_LINEAR);
-               assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE6_LINEAR);
-               emit_blit_buffer(ctx, batch->draw, info);
-       } else {
-               /* I don't *think* we need to handle blits between buffer <-> !buffer */
-               debug_assert(info->src.resource->target != PIPE_BUFFER);
-               debug_assert(info->dst.resource->target != PIPE_BUFFER);
-               emit_blit_or_clear_texture(ctx, batch->draw, info, NULL);
-       }
+       debug_assert((blit.src.box.x % bw) == 0);
+       debug_assert((blit.src.box.y % bh) == 0);
 
-       fd6_event_write(batch, batch->draw, 0x1d, true);
-       fd6_event_write(batch, batch->draw, FACENESS_FLUSH, true);
-       fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
-       fd6_cache_inv(batch, batch->draw);
+       blit.src.box.x /= bw;
+       blit.src.box.y /= bh;
+       blit.src.box.width  = DIV_ROUND_UP(blit.src.box.width, bw);
+       blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
 
-       fd_resource(info->dst.resource)->valid = true;
-       batch->needs_flush = true;
+       debug_assert((blit.dst.box.x % bw) == 0);
+       debug_assert((blit.dst.box.y % bh) == 0);
 
-       fd_batch_flush(batch, false);
-       fd_batch_reference(&batch, NULL);
+       blit.dst.box.x /= bw;
+       blit.dst.box.y /= bh;
+       blit.dst.box.width  = DIV_ROUND_UP(blit.dst.box.width, bw);
+       blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
 
-       return true;
+       return do_rewritten_blit(ctx, &blit);
 }
 
 static bool
@@ -778,6 +880,10 @@ fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
        if (info->mask & PIPE_MASK_ZS)
                return handle_zs_blit(ctx, info);
+       if (util_format_is_compressed(info->src.format) ||
+                       util_format_is_compressed(info->dst.format))
+               return handle_compressed_blit(ctx, info);
+
        return handle_rgba_blit(ctx, info);
 }