freedreno: Move fs functions after geometry pipeline stages
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_blitter.c
index 2ae59ce62f4035bba30485a1e87349ce65d13c65..9e145e55e5b4db5befa8aefe665457e2707cad8f 100644 (file)
@@ -124,9 +124,28 @@ can_do_blit(const struct pipe_blit_info *info)
        debug_assert(info->dst.box.height >= 0);
        debug_assert(info->dst.box.depth >= 0);
 
-       /* non-multisampled could either have nr_samples == 0 or == 1 */
+       /* We could probably blit between resources with equal sample count.. */
        fail_if(info->dst.resource->nr_samples > 1);
-       fail_if(info->src.resource->nr_samples > 1);
+
+       /* CP_BLIT supports resolving, but seems to pick one only of the samples
+        * (no blending). This doesn't work for RGBA resolves, so we fall back in
+        * that case.  However, GL/GLES spec says:
+        *
+        *   "If the source formats are integer types or stencil values, a single
+        *    sample’s value is selected for each pixel. If the source formats are
+        *    floating-point or normalized types, the sample values for each pixel
+        *    are resolved in an implementationdependent manner. If the source
+        *    formats are depth values, sample values are resolved in an
+        *    implementation-dependent manner where the result will be between the
+        *    minimum and maximum depth values in the pixel."
+        *
+        * so do those with CP_BLIT.
+        *
+        * TODO since we re-write z/s blits to RGBA, we'll fail this check in some
+        * cases where we don't need to.
+        */
+       fail_if((info->mask & PIPE_MASK_RGBA) &&
+                       info->src.resource->nr_samples > 1);
 
        fail_if(info->window_rectangle_include);
 
@@ -153,21 +172,14 @@ can_do_blit(const struct pipe_blit_info *info)
 }
 
 static void
-emit_setup(struct fd_ringbuffer *ring)
+emit_setup(struct fd_batch *batch)
 {
-       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
-       OUT_RING(ring, PC_CCU_INVALIDATE_COLOR);
-
-       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
-       OUT_RING(ring, LRZ_FLUSH);
+       struct fd_ringbuffer *ring = batch->draw;
 
-       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
-       OUT_RING(ring, 0x0);
-
-       OUT_WFI5(ring);
-
-       OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
-       OUT_RING(ring, 0x10000000);
+       fd6_event_write(batch, ring, 0x1d, true);
+       fd6_event_write(batch, ring, FACENESS_FLUSH, true);
+       fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
+       fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
 }
 
 static uint32_t
@@ -236,7 +248,7 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
        dshift = dbox->x & 0x3f;
 
        OUT_PKT7(ring, CP_SET_MARKER, 1);
-       OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
 
        uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
        OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
@@ -260,10 +272,11 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                /*
                 * Emit source:
                 */
-               OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
+               OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
                OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
                                A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
-                                A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
+                                A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
+                                0x500000);
                OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(sshift + w) |
                                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
                OUT_RELOC(ring, src->bo, soff, 0, 0);    /* SP_PS_2D_SRC_LO/HI */
@@ -275,10 +288,6 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
 
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-
                /*
                 * Emit destination:
                 */
@@ -286,7 +295,7 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
                                 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
                                 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
-               OUT_RELOC(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
+               OUT_RELOCW(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
                OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(p));
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
@@ -402,7 +411,7 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
        uint32_t height = DIV_ROUND_UP(u_minify(src->base.height0, info->src.level), blockheight);
 
        OUT_PKT7(ring, CP_SET_MARKER, 1);
-       OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
 
        uint32_t blit_cntl = blit_control(dfmt);
 
@@ -427,6 +436,10 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
        for (unsigned i = 0; i < info->dst.box.depth; i++) {
                unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
                unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
+               unsigned subwcoff = fd_resource_ubwc_offset(src, info->src.level, sbox->z + i);
+               unsigned dubwcoff = fd_resource_ubwc_offset(dst, info->dst.level, dbox->z + i);
+               bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
+               bool dubwc_enabled = fd_resource_ubwc_enabled(dst, info->dst.level);
 
                /*
                 * Emit source:
@@ -435,23 +448,35 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                if (info->filter == PIPE_TEX_FILTER_LINEAR)
                        filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
 
-               OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
+               enum a3xx_msaa_samples samples = fd_msaa_samples(src->base.nr_samples);
+
+               OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
                OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
                                A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
-                               A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) | 0x500000 | filter);
+                               A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
+                                A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
+                                COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
+                                0x500000 | filter);
                OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
                                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
                OUT_RELOC(ring, src->bo, soff, 0, 0);    /* SP_PS_2D_SRC_LO/HI */
                OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(spitch));
+
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
 
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
+               if (subwc_enabled) {
+                       OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
+                       OUT_RELOC(ring, src->bo, subwcoff, 0, 0);
+                       OUT_RING(ring, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(src->ubwc_pitch) |
+                                        A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(src->ubwc_size));
+                       OUT_RING(ring, 0x00000000);
+                       OUT_RING(ring, 0x00000000);
+                       OUT_RING(ring, 0x00000000);
+               }
 
                /*
                 * Emit destination:
@@ -459,7 +484,8 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
                OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
                                 A6XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
-                                A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
+                                A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap) |
+                                COND(dubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
                OUT_RELOCW(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
                OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(dpitch));
                OUT_RING(ring, 0x00000000);
@@ -468,6 +494,16 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                OUT_RING(ring, 0x00000000);
                OUT_RING(ring, 0x00000000);
 
+               if (dubwc_enabled) {
+                       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
+                       OUT_RELOCW(ring, dst->bo, dubwcoff, 0, 0);
+                       OUT_RING(ring, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(dst->ubwc_pitch) |
+                                        A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(dst->ubwc_size));
+                       OUT_RING(ring, 0x00000000);
+                       OUT_RING(ring, 0x00000000);
+                       OUT_RING(ring, 0x00000000);
+               }
+
                /*
                 * Blit command:
                 */
@@ -516,120 +552,111 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
        }
 }
 
-static void
-rewrite_zs_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
+static bool handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
+
+/**
+ * Re-written z/s blits can still fail for various reasons (for example MSAA).
+ * But we want to do the fallback blit with the re-written pipe_blit_info,
+ * in particular as u_blitter cannot blit stencil.  So handle the fallback
+ * ourself and never "fail".
+ */
+static bool
+do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
-       struct pipe_blit_info separate = *info;
+       bool success = handle_rgba_blit(ctx, info);
+       if (!success)
+               success = fd_blitter_blit(ctx, info);
+       debug_assert(success);  /* fallback should never fail! */
+       return success;
+}
 
-       switch (info->src.format) {
+/**
+ * Handle depth/stencil blits either via u_blitter and/or re-writing the
+ * blit into an equivilant format that we can handle
+ */
+static bool
+handle_zs_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
+{
+       struct pipe_blit_info blit = *info;
+
+       if (DEBUG_BLIT_FALLBACK) {
+               fprintf(stderr, "---- handle_zs_blit: ");
+               util_dump_blit_info(stderr, info);
+               fprintf(stderr, "\ndst resource: ");
+               util_dump_resource(stderr, info->dst.resource);
+               fprintf(stderr, "\nsrc resource: ");
+               util_dump_resource(stderr, info->src.resource);
+               fprintf(stderr, "\n");
+       }
+
+       switch (info->dst.format) {
        case PIPE_FORMAT_S8_UINT:
                debug_assert(info->mask == PIPE_MASK_S);
-               separate.mask = PIPE_MASK_R;
-               separate.src.format = PIPE_FORMAT_R8_UNORM;
-               separate.dst.format = PIPE_FORMAT_R8_UNORM;
-               emit_blit_texture(ring, &separate);
-               break;
+               blit.mask = PIPE_MASK_R;
+               blit.src.format = PIPE_FORMAT_R8_UINT;
+               blit.dst.format = PIPE_FORMAT_R8_UINT;
+               return do_rewritten_blit(ctx, &blit);
 
        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
                if (info->mask & PIPE_MASK_Z) {
-                       separate.mask = PIPE_MASK_R;
-                       separate.src.format = PIPE_FORMAT_R32_FLOAT;
-                       separate.dst.format = PIPE_FORMAT_R32_FLOAT;
-                       emit_blit_texture(ring, &separate);
+                       blit.mask = PIPE_MASK_R;
+                       blit.src.format = PIPE_FORMAT_R32_FLOAT;
+                       blit.dst.format = PIPE_FORMAT_R32_FLOAT;
+                       do_rewritten_blit(ctx, &blit);
                }
+
                if (info->mask & PIPE_MASK_S) {
-                       separate.mask = PIPE_MASK_R;
-                       separate.src.format = PIPE_FORMAT_R8_UNORM;
-                       separate.dst.format = PIPE_FORMAT_R8_UNORM;
-                       separate.src.resource = &fd_resource(info->src.resource)->stencil->base;
-                       separate.dst.resource = &fd_resource(info->dst.resource)->stencil->base;
-                       emit_blit_texture(ring, &separate);
+                       blit.mask = PIPE_MASK_R;
+                       blit.src.format = PIPE_FORMAT_R8_UINT;
+                       blit.dst.format = PIPE_FORMAT_R8_UINT;
+                       blit.src.resource = &fd_resource(info->src.resource)->stencil->base;
+                       blit.dst.resource = &fd_resource(info->dst.resource)->stencil->base;
+                       do_rewritten_blit(ctx, &blit);
                }
-               break;
+
+               return true;
 
        case PIPE_FORMAT_Z16_UNORM:
-               separate.mask = PIPE_MASK_R;
-               separate.src.format = PIPE_FORMAT_R16_UNORM;
-               separate.dst.format = PIPE_FORMAT_R16_UNORM;
-               emit_blit_texture(ring, &separate);
-               break;
+               blit.mask = PIPE_MASK_R;
+               blit.src.format = PIPE_FORMAT_R16_UNORM;
+               blit.dst.format = PIPE_FORMAT_R16_UNORM;
+               return do_rewritten_blit(ctx, &blit);
 
        case PIPE_FORMAT_Z32_UNORM:
        case PIPE_FORMAT_Z32_FLOAT:
                debug_assert(info->mask == PIPE_MASK_Z);
-               separate.mask = PIPE_MASK_R;
-               separate.src.format = PIPE_FORMAT_R32_UINT;
-               separate.dst.format = PIPE_FORMAT_R32_UINT;
-               emit_blit_texture(ring, &separate);
-               break;
+               blit.mask = PIPE_MASK_R;
+               blit.src.format = PIPE_FORMAT_R32_UINT;
+               blit.dst.format = PIPE_FORMAT_R32_UINT;
+               return do_rewritten_blit(ctx, &blit);
 
-       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-               debug_assert(info->mask == PIPE_MASK_ZS);
        case PIPE_FORMAT_Z24X8_UNORM:
-       case PIPE_FORMAT_X8Z24_UNORM:
-               separate.mask = PIPE_MASK_R;
-               separate.src.format = PIPE_FORMAT_R32_UINT;
-               separate.dst.format = PIPE_FORMAT_R32_UINT;
-               emit_blit_texture(ring, &separate);
-               break;
-
-       default:
-               unreachable("");
-       }
-}
-
-static void
-rewrite_combined_zs_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
-{
-       struct pipe_blit_info separate = *info;
-
-       if (DEBUG_BLIT_FALLBACK) {
-               fprintf(stderr, "---- rewrite_combined_zs_blit: ");
-               util_dump_blit_info(stderr, info);
-               fprintf(stderr, "\ndst resource: ");
-               util_dump_resource(stderr, info->dst.resource);
-               fprintf(stderr, "\nsrc resource: ");
-               util_dump_resource(stderr, info->src.resource);
-               fprintf(stderr, "\n");
-       }
-
-       switch (info->mask) {
-       case PIPE_MASK_Z:
-               separate.mask = PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
-               separate.src.format = PIPE_FORMAT_R8G8B8A8_UNORM;
-               separate.dst.format = PIPE_FORMAT_R8G8B8A8_UNORM;
-
-               fd_blitter_blit(ctx, &separate);
-               break;
-
-       case PIPE_MASK_S:
-               separate.mask = PIPE_MASK_A;
-               separate.src.format = PIPE_FORMAT_R8G8B8A8_UNORM;
-               separate.dst.format = PIPE_FORMAT_R8G8B8A8_UNORM;
-
-               fd_blitter_blit(ctx, &separate);
-               break;
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               blit.mask = 0;
+               if (info->mask & PIPE_MASK_Z)
+                       blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
+               if (info->mask & PIPE_MASK_S)
+                       blit.mask |= PIPE_MASK_A;
+               blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+               blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+               return fd_blitter_blit(ctx, &blit);
 
        default:
-               unreachable("");
+               return false;
        }
 }
 
 static bool
-fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
+handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
        struct fd_batch *batch;
 
-       if (info->dst.format == PIPE_FORMAT_Z24_UNORM_S8_UINT &&
-               info->mask != PIPE_MASK_ZS)  {
-               rewrite_combined_zs_blit(ctx, info);
-               return true;
-       }
+       debug_assert(!(info->mask & PIPE_MASK_ZS));
 
        if (!can_do_blit(info))
                return false;
 
-       fd_fence_ref(ctx->base.screen, &ctx->last_fence, NULL);
+       fd_fence_ref(&ctx->last_fence, NULL);
 
        batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
 
@@ -643,7 +670,7 @@ fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 
        mtx_unlock(&ctx->screen->lock);
 
-       emit_setup(batch->draw);
+       emit_setup(batch);
 
        if ((info->src.resource->target == PIPE_BUFFER) &&
                        (info->dst.resource->target == PIPE_BUFFER)) {
@@ -654,27 +681,31 @@ fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
                /* I don't *think* we need to handle blits between buffer <-> !buffer */
                debug_assert(info->src.resource->target != PIPE_BUFFER);
                debug_assert(info->dst.resource->target != PIPE_BUFFER);
-
-               if (info->mask & (PIPE_MASK_ZS)) {
-                       rewrite_zs_blit(batch->draw, info);
-               } else {
-                       emit_blit_texture(batch->draw, info);
-               }
+               emit_blit_texture(batch->draw, info);
        }
 
        fd6_event_write(batch, batch->draw, 0x1d, true);
        fd6_event_write(batch, batch->draw, FACENESS_FLUSH, true);
        fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
+       fd6_cache_inv(batch, batch->draw);
 
        fd_resource(info->dst.resource)->valid = true;
        batch->needs_flush = true;
 
-       fd_batch_flush(batch, false, false);
+       fd_batch_flush(batch, false);
        fd_batch_reference(&batch, NULL);
 
        return true;
 }
 
+static bool
+fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
+{
+       if (info->mask & PIPE_MASK_ZS)
+               return handle_zs_blit(ctx, info);
+       return handle_rgba_blit(ctx, info);
+}
+
 void
 fd6_blitter_init(struct pipe_context *pctx)
 {
@@ -687,6 +718,12 @@ fd6_blitter_init(struct pipe_context *pctx)
 unsigned
 fd6_tile_mode(const struct pipe_resource *tmpl)
 {
+       /* if the mipmap level 0 is still too small to be tiled, then don't
+        * bother pretending:
+        */
+       if (fd_resource_level_linear(tmpl, 0))
+               return TILE6_LINEAR;
+
        /* basically just has to be a format we can blit, so uploads/downloads
         * via linear staging buffer works:
         */