From: Kristian H. Kristensen Date: Fri, 19 Oct 2018 21:29:49 +0000 (-0700) Subject: freedreno/a6xx: Clear gmem buffers at flush time X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9595be67a946c673273a4c9ea6b3c189d151e5f1;p=mesa.git freedreno/a6xx: Clear gmem buffers at flush time We generate an IB to clear the gmem at flush time and jump to it before rendering each tile. This lets us get rid of the command stream patching for gmem offsets. Signed-off-by: Kristian H. Kristensen Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index d921a33f5f4..adb045adbea 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -376,159 +376,25 @@ fd6_clear(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; - struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); - struct fd_ringbuffer *ring = ctx->batch->draw; - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); - OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(scissor->minx) | - A6XX_RB_BLIT_SCISSOR_TL_Y(scissor->miny)); - OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(scissor->maxx - 1) | - A6XX_RB_BLIT_SCISSOR_BR_Y(scissor->maxy - 1)); - - if (buffers & PIPE_CLEAR_COLOR) { - for (int i = 0; i < pfb->nr_cbufs; i++) { - union util_color uc = {0}; - - if (!pfb->cbufs[i]) - continue; - - if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) - continue; - - enum pipe_format pfmt = pfb->cbufs[i]->format; - - // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? - union pipe_color_union swapped; - switch (fd6_pipe2swap(pfmt)) { - case WZYX: - swapped.ui[0] = color->ui[0]; - swapped.ui[1] = color->ui[1]; - swapped.ui[2] = color->ui[2]; - swapped.ui[3] = color->ui[3]; - break; - case WXYZ: - swapped.ui[2] = color->ui[0]; - swapped.ui[1] = color->ui[1]; - swapped.ui[0] = color->ui[2]; - swapped.ui[3] = color->ui[3]; - break; - case ZYXW: - swapped.ui[3] = color->ui[0]; - swapped.ui[0] = color->ui[1]; - swapped.ui[1] = color->ui[2]; - swapped.ui[2] = color->ui[3]; - break; - case XYZW: - swapped.ui[3] = color->ui[0]; - swapped.ui[2] = color->ui[1]; - swapped.ui[1] = color->ui[2]; - swapped.ui[0] = color->ui[3]; - break; - } - - if (util_format_is_pure_uint(pfmt)) { - util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1); - } else if (util_format_is_pure_sint(pfmt)) { - util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1); - } else { - util_pack_color(swapped.f, pfmt, &uc); - } - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | - A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - OUT_RINGP(ring, i, &ctx->batch->gmem_patches); - - OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); - OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); - OUT_RING(ring, uc.ui[0]); - OUT_RING(ring, uc.ui[1]); - OUT_RING(ring, uc.ui[2]); - OUT_RING(ring, uc.ui[3]); - - fd6_emit_blit(ctx->batch, ring); - } - } - const bool has_depth = pfb->zsbuf; - const bool has_separate_stencil = - has_depth && fd_resource(pfb->zsbuf->texture)->stencil; - - /* First clear depth or combined depth/stencil. */ - if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) || - (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) { - enum pipe_format pfmt = pfb->zsbuf->format; - uint32_t clear_value; - uint32_t mask = 0; - - if (has_separate_stencil) { - pfmt = util_format_get_depth_only(pfb->zsbuf->format); - clear_value = util_pack_z(pfmt, depth); - } else { - pfmt = pfb->zsbuf->format; - clear_value = util_pack_z_stencil(pfmt, depth, stencil); - } - - if (buffers & PIPE_CLEAR_DEPTH) - mask |= 0x1; - - if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) - mask |= 0x2; - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | - // XXX UNK0 for separate stencil ?? - A6XX_RB_BLIT_INFO_DEPTH | - A6XX_RB_BLIT_INFO_CLEAR_MASK(mask)); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - OUT_RINGP(ring, MAX_RENDER_TARGETS, &ctx->batch->gmem_patches); - - OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); - OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); - OUT_RING(ring, clear_value); - - fd6_emit_blit(ctx->batch, ring); - } - - /* Then clear the separate stencil buffer in case of 32 bit depth - * formats with separate stencil. */ - if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) { - OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(RB6_R8_UINT)); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | - //A6XX_RB_BLIT_INFO_UNK0 | - A6XX_RB_BLIT_INFO_DEPTH | - A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1)); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - OUT_RINGP(ring, MAX_RENDER_TARGETS + 1, &ctx->batch->gmem_patches); - - OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); - OUT_RING(ring, 0); + unsigned color_buffers = buffers >> 2; + unsigned i; + + /* If we're clearing after draws, fallback to 3D pipe clears. We could + * use blitter clears in the draw batch but then we'd have to patch up the + * gmem offsets. This doesn't seem like a useful thing to optimize for + * however.*/ + if (ctx->batch->num_draws > 0) + return false; - OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); - OUT_RING(ring, stencil & 0xff); + foreach_bit(i, color_buffers) + ctx->batch->clear_color[i] = *color; + if (buffers & PIPE_CLEAR_DEPTH) + ctx->batch->clear_depth = depth; + if (buffers & PIPE_CLEAR_STENCIL) + ctx->batch->clear_stencil = stencil; - fd6_emit_blit(ctx->batch, ring); - } + ctx->batch->fast_cleared |= buffers; if (has_depth && (buffers & PIPE_CLEAR_DEPTH)) { struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index e8b6d051cb6..94ad6641718 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -253,22 +253,6 @@ patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) util_dynarray_resize(&batch->draw_patches, 0); } -static void -patch_gmem_bases(struct fd_batch *batch) -{ - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; - unsigned i; - - for (i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) { - struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i); - if (patch->val < MAX_RENDER_TARGETS) - *patch->cs = gmem->cbuf_base[patch->val]; - else - *patch->cs = gmem->zsbuf_base[patch->val - MAX_RENDER_TARGETS]; - } - util_dynarray_resize(&batch->gmem_patches, 0); -} - static void update_render_cntl(struct fd_batch *batch, bool binning) { @@ -484,8 +468,6 @@ fd6_emit_tile_init(struct fd_batch *batch) emit_zs(ring, pfb->zsbuf, &ctx->gmem); emit_mrt(ring, pfb, &ctx->gmem); - patch_gmem_bases(batch); - disable_msaa(ring); if (use_hw_binning(batch)) { @@ -678,6 +660,163 @@ emit_restore_blit(struct fd_batch *batch, emit_blit(batch, ring, base, psurf, rsc); } +static void +emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + + uint32_t buffers = batch->fast_cleared; + + if (buffers & PIPE_CLEAR_COLOR) { + + for (int i = 0; i < pfb->nr_cbufs; i++) { + union pipe_color_union *color = &batch->clear_color[i]; + union util_color uc = {0}; + + if (!pfb->cbufs[i]) + continue; + + if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) + continue; + + enum pipe_format pfmt = pfb->cbufs[i]->format; + + // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? + union pipe_color_union swapped; + switch (fd6_pipe2swap(pfmt)) { + case WZYX: + swapped.ui[0] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[2] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case WXYZ: + swapped.ui[2] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[0] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case ZYXW: + swapped.ui[3] = color->ui[0]; + swapped.ui[0] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[2] = color->ui[3]; + break; + case XYZW: + swapped.ui[3] = color->ui[0]; + swapped.ui[2] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[0] = color->ui[3]; + break; + } + + if (util_format_is_pure_uint(pfmt)) { + util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1); + } else if (util_format_is_pure_sint(pfmt)) { + util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1); + } else { + util_pack_color(swapped.f, pfmt, &uc); + } + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RING(ring, gmem->cbuf_base[i]); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); + OUT_RING(ring, uc.ui[0]); + OUT_RING(ring, uc.ui[1]); + OUT_RING(ring, uc.ui[2]); + OUT_RING(ring, uc.ui[3]); + + fd6_emit_blit(batch, ring); + } + } + + const bool has_depth = pfb->zsbuf; + const bool has_separate_stencil = + has_depth && fd_resource(pfb->zsbuf->texture)->stencil; + + /* First clear depth or combined depth/stencil. */ + if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) || + (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) { + enum pipe_format pfmt = pfb->zsbuf->format; + uint32_t clear_value; + uint32_t mask = 0; + + if (has_separate_stencil) { + pfmt = util_format_get_depth_only(pfb->zsbuf->format); + clear_value = util_pack_z(pfmt, batch->clear_depth); + } else { + pfmt = pfb->zsbuf->format; + clear_value = util_pack_z_stencil(pfmt, batch->clear_depth, + batch->clear_stencil); + } + + if (buffers & PIPE_CLEAR_DEPTH) + mask |= 0x1; + + if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) + mask |= 0x2; + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + // XXX UNK0 for separate stencil ?? + A6XX_RB_BLIT_INFO_DEPTH | + A6XX_RB_BLIT_INFO_CLEAR_MASK(mask)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RING(ring, gmem->zsbuf_base[0]); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear_value); + + fd6_emit_blit(batch, ring); + } + + /* Then clear the separate stencil buffer in case of 32 bit depth + * formats with separate stencil. */ + if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) { + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(RB6_R8_UINT)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + //A6XX_RB_BLIT_INFO_UNK0 | + A6XX_RB_BLIT_INFO_DEPTH | + A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RING(ring, gmem->zsbuf_base[1]); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, batch->clear_stencil & 0xff); + + fd6_emit_blit(batch, ring); + } +} + /* * transfer from system memory to gmem */ @@ -724,6 +863,7 @@ prepare_tile_setup_ib(struct fd_batch *batch) set_blit_scissor(batch, batch->tile_setup); emit_restore_blits(batch, batch->tile_setup); + emit_clears(batch, batch->tile_setup); } /* diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index de4c8198c20..28b4942f9f8 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -76,6 +76,7 @@ batch_init(struct fd_batch *batch) batch->fence = fd_fence_create(batch); batch->cleared = 0; + batch->fast_cleared = 0; batch->invalidated = 0; batch->restore = batch->resolve = 0; batch->needs_flush = false; @@ -91,8 +92,6 @@ batch_init(struct fd_batch *batch) if (is_a3xx(ctx->screen)) util_dynarray_init(&batch->rbrc_patches, NULL); - util_dynarray_init(&batch->gmem_patches, NULL); - assert(batch->resources->entries == 0); util_dynarray_init(&batch->samples, NULL); @@ -167,8 +166,6 @@ batch_fini(struct fd_batch *batch) if (is_a3xx(batch->ctx->screen)) util_dynarray_fini(&batch->rbrc_patches); - util_dynarray_fini(&batch->gmem_patches); - while (batch->samples.size > 0) { struct fd_hw_sample *samp = util_dynarray_pop(&batch->samples, struct fd_hw_sample *); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index a9a7ecdaf24..d4feadd5590 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -95,7 +95,7 @@ struct fd_batch { FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, - } invalidated, cleared, restore, resolve; + } invalidated, cleared, fast_cleared, restore, resolve; /* is this a non-draw batch (ie compute/blit which has no pfb state)? */ bool nondraw : 1; @@ -136,11 +136,6 @@ struct fd_batch { */ struct util_dynarray draw_patches; - /* Keep track of blitter GMEM offsets that need to be patched up once we - * know the gmem layout: - */ - struct util_dynarray gmem_patches; - /* Keep track of writes to RB_RENDER_CONTROL which need to be patched * once we know whether or not to use GMEM, and GMEM tile pitch. * @@ -165,6 +160,10 @@ struct fd_batch { struct fd_ringbuffer *tile_setup; struct fd_ringbuffer *tile_fini; + union pipe_color_union clear_color[MAX_RENDER_TARGETS]; + double clear_depth; + unsigned clear_stencil; + /** * hw query related state: */