freedreno: Make the slice pitch be bytes, not pixels.
[mesa.git] / src / gallium / drivers / freedreno / a5xx / fd5_draw.c
index bc5232a4c17b34138315d7c6993d4d24b9f8f504..579d1cdaf886798700310284a8459a7fb14199eb 100644 (file)
@@ -54,15 +54,15 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2);
        OUT_RING(ring, info->index_size ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */
-       OUT_RING(ring, info->start_instance);   /* ??? UNKNOWN_2209 */
+       OUT_RING(ring, info->start_instance);   /* VFD_INSTANCE_START_OFFSET */
 
        OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
        OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
                        info->restart_index : 0xffffffff);
 
-       fd5_emit_render_cntl(ctx, false, emit->key.binning_pass);
+       fd5_emit_render_cntl(ctx, false, emit->binning_pass);
        fd5_draw_emit(ctx->batch, ring, primtype,
-                       emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+                       emit->binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
                        info, index_offset);
 }
 
@@ -106,8 +106,6 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
                        .vclamp_color = ctx->rasterizer->clamp_vertex_color,
                        .fclamp_color = ctx->rasterizer->clamp_fragment_color,
                        .rasterflat = ctx->rasterizer->flatshade,
-                       .half_precision = ctx->in_blit &&
-                                       fd_half_precision(&ctx->batch->framebuffer),
                        .ucp_enables = ctx->rasterizer->clip_plane_enable,
                        .has_per_samp = (fd5_ctx->fsaturate || fd5_ctx->vsaturate ||
                                        fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb),
@@ -119,6 +117,8 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
                        .fsaturate_r = fd5_ctx->fsaturate_r,
                        .vastc_srgb = fd5_ctx->vastc_srgb,
                        .fastc_srgb = fd5_ctx->fastc_srgb,
+                       .vsamples = ctx->tex[PIPE_SHADER_VERTEX].samples,
+                       .fsamples = ctx->tex[PIPE_SHADER_FRAGMENT].samples,
                },
                .rasterflat = ctx->rasterizer->flatshade,
                .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
@@ -128,22 +128,32 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
        fixup_shader_state(ctx, &emit.key);
 
        unsigned dirty = ctx->dirty;
+       const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
+       const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
 
        /* do regular pass first, since that is more likely to fail compiling: */
 
-       if (!(fd5_emit_get_vp(&emit) && fd5_emit_get_fp(&emit)))
+       if (!vp || !fp)
                return false;
 
-       emit.key.binning_pass = false;
+       ctx->stats.vs_regs += ir3_shader_halfregs(vp);
+       ctx->stats.fs_regs += ir3_shader_halfregs(fp);
+
+       /* figure out whether we need to disable LRZ write for binning
+        * pass using draw pass's fp:
+        */
+       emit.no_lrz_write = fp->writes_pos || fp->no_earlyz;
+
+       emit.binning_pass = false;
        emit.dirty = dirty;
 
        draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
 
        /* and now binning pass: */
-       emit.key.binning_pass = true;
+       emit.binning_pass = true;
        emit.dirty = dirty & ~(FD_DIRTY_BLEND);
-       emit.vp = NULL;   /* we changed key so need to refetch vp */
-       emit.fp = NULL;
+       emit.vs = NULL;   /* we changed key so need to refetch vp */
+       emit.fs = NULL;
        draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
 
        if (emit.streamout_mask) {
@@ -174,25 +184,97 @@ static bool is_z32(enum pipe_format format)
        }
 }
 
+static void
+fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
+{
+       struct fd_ringbuffer *ring;
+       uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
+
+       // TODO mid-frame clears (ie. app doing crazy stuff)??  Maybe worth
+       // splitting both clear and lrz clear out into their own rb's.  And
+       // just throw away any draws prior to clear.  (Anything not fullscreen
+       // clear, just fallback to generic path that treats it as a normal
+       // draw
+
+       if (!batch->lrz_clear) {
+               batch->lrz_clear = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
+       }
+
+       ring = batch->lrz_clear;
+
+       OUT_WFI5(ring);
+
+       OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+       OUT_RING(ring, 0x10000000);
+
+       OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+       OUT_RING(ring, 0x20fffff);
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
+       OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0) |
+                       COND(zsbuf->base.nr_samples > 1, A5XX_GRAS_SU_CNTL_MSAA_ENABLE));
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+       OUT_RING(ring, 0x00000181);
+
+       OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
+       OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
+                       A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
+                       A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
+       OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
+       OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
+       OUT_RELOCW(ring, zsbuf->lrz, 0x1000, 0, 0);
+
+       OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
+
+       OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
+
+       OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
+                       A5XX_RB_CLEAR_CNTL_MASK(0xf));
+
+       OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
+       OUT_RING(ring, clear);  /* RB_CLEAR_COLOR_DW0 */
+
+       OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
+       OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
+                        A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
+       OUT_RING(ring, 0x00000000);   // XXX UNKNOWN_0CDE
+
+       OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
+
+       OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+       OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
+                       A5XX_RB_RESOLVE_CNTL_1_Y(0));
+       OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
+                       A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
+
+       fd5_emit_blit(batch->ctx, ring);
+}
+
 static bool
 fd5_clear(struct fd_context *ctx, unsigned buffers,
                const union pipe_color_union *color, double depth, unsigned stencil)
 {
        struct fd_ringbuffer *ring = ctx->batch->draw;
        struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
-       struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
 
        if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
                        is_z32(pfb->zsbuf->format))
                return false;
 
-       /* TODO handle scissor.. or fallback to slow-clear? */
-
-       ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
-       ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
-       ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
-       ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
-
        fd5_emit_render_cntl(ctx, true, false);
 
        if (buffers & PIPE_CLEAR_COLOR) {
@@ -236,13 +318,7 @@ fd5_clear(struct fd_context *ctx, unsigned buffers,
                                break;
                        }
 
-                       if (util_format_is_pure_uint(pfmt)) {
-                               util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
-                       } else if (util_format_is_pure_sint(pfmt)) {
-                               util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
-                       } else {
-                               util_pack_color(swapped.f, pfmt, &uc);
-                       }
+                       util_pack_color_union(pfmt, &uc, &swapped);
 
                        OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
                        OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
@@ -283,6 +359,14 @@ fd5_clear(struct fd_context *ctx, unsigned buffers,
                OUT_RING(ring, clear);    /* RB_CLEAR_COLOR_DW0 */
 
                fd5_emit_blit(ctx, ring);
+
+               if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
+                       struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
+                       if (zsbuf->lrz) {
+                               zsbuf->lrz_valid = true;
+                               fd5_clear_lrz(ctx->batch, zsbuf, depth);
+                       }
+               }
        }
 
        /* disable fast clear to not interfere w/ gmem->mem, etc.. */