From 7bfe8cf4a487aec4870df23f6f72c828f1caaa49 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 5 Aug 2015 18:14:49 -0400 Subject: [PATCH] freedreno/a4xx: add s8/z32/z32_s8x24 support Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 41 ++++-- .../drivers/freedreno/a4xx/fd4_format.c | 19 ++- src/gallium/drivers/freedreno/a4xx/fd4_gmem.c | 126 ++++++++++++++---- .../drivers/freedreno/freedreno_util.h | 2 +- 4 files changed, 151 insertions(+), 37 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index e7c210d3437..c3226d5121f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -200,23 +200,13 @@ void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface **bufs) { - unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; + unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS]; int i; for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { mrt_comp[i] = (i < nr_bufs) ? 0xf : 0; } - OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); - OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | - A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | - A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | - A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | - A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | - A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | - A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | - A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); - /* output sampler state: */ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs)); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | @@ -250,6 +240,25 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, uint32_t offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer); enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format); + /* The restore blit_zs shader expects stencil in sampler 0, + * and depth in sampler 1 + */ + if (rsc->stencil && (i == 0)) { + rsc = rsc->stencil; + format = fd4_gmem_restore_format(rsc->base.b.format); + } + + /* z32 restore is accomplished using depth write. If there is + * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + * then no render target: + * + * (The same applies for z32_s8x24, since for stencil sampler + * state the above 'if' will replace 'format' with s8) + */ + if ((format == PIPE_FORMAT_Z32_FLOAT) || + (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) + mrt_comp[i] = 0; + debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer); OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) | @@ -281,6 +290,16 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, OUT_RING(ring, 0x00000000); } } + + OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); } void diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index 29abe0b0cc3..f906a6781b5 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -89,6 +89,14 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX), _T(I8_UNORM, 8_UNORM, NONE, WZYX), + /* NOTE: should be TFMT_8_UINT (which then gets remapped to + * TFMT_8_UNORM for mem2gmem in _gmem_restore_format()), but + * we don't know TFMT_8_UINT yet.. so just use TFMT_8_UNORM + * for now.. sampling from stencil as a texture might not + * work right, but at least should be fine for zsbuf.. + */ + _T(S8_UINT, 8_UNORM, R8_UNORM, WZYX), + /* 16-bit */ V_(R16_UNORM, 16_UNORM, NONE, WZYX), V_(R16_SNORM, 16_SNORM, NONE, WZYX), @@ -191,7 +199,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), - /*_T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX),*/ + _T(Z32_FLOAT, 32_FLOAT, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX), /* 48-bit */ V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), @@ -282,6 +291,9 @@ fd4_pipe2swap(enum pipe_format format) enum a4xx_tex_fetchsize fd4_pipe2fetchsize(enum pipe_format format) { + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + switch (util_format_get_blocksizebits(format)) { case 8: return TFETCH4_1_BYTE; case 16: return TFETCH4_2_BYTE; @@ -312,6 +324,8 @@ fd4_gmem_restore_format(enum pipe_format format) return PIPE_FORMAT_R8G8B8A8_UNORM; case PIPE_FORMAT_Z16_UNORM: return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_S8_UINT: + return PIPE_FORMAT_R8_UNORM; default: return format; } @@ -328,6 +342,9 @@ fd4_pipe2depth(enum pipe_format format) case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_UINT_Z24_UNORM: return DEPTH4_24_8; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return DEPTH4_32; default: return ~0; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 890e71ead2c..6541da54161 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -68,11 +68,23 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, if ((i < nr_bufs) && bufs[i]) { struct pipe_surface *psurf = bufs[i]; + enum pipe_format pformat = 0; rsc = fd_resource(psurf->texture); + pformat = psurf->format; + + /* In case we're drawing to Z32F_S8, the "color" actually goes to + * the stencil + */ + if (rsc->stencil) { + rsc = rsc->stencil; + pformat = rsc->base.b.format; + bases++; + } + slice = fd_resource_slice(rsc, psurf->u.tex.level); - format = fd4_pipe2color(psurf->format); - swap = fd4_pipe2swap(psurf->format); + format = fd4_pipe2color(pformat); + swap = fd4_pipe2swap(pformat); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); @@ -114,13 +126,23 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, /* transfer from gmem to system memory (ie. normal RAM) */ static void -emit_gmem2mem_surf(struct fd_context *ctx, +emit_gmem2mem_surf(struct fd_context *ctx, bool stencil, uint32_t base, struct pipe_surface *psurf) { struct fd_ringbuffer *ring = ctx->ring; struct fd_resource *rsc = fd_resource(psurf->texture); - struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level]; - uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, + enum pipe_format pformat = psurf->format; + struct fd_resource_slice *slice; + uint32_t offset; + + if (stencil) { + debug_assert(rsc->stencil); + rsc = rsc->stencil; + pformat = rsc->base.b.format; + } + + slice = &rsc->slices[psurf->u.tex.level]; + offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); @@ -132,10 +154,10 @@ emit_gmem2mem_surf(struct fd_context *ctx, OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) | - A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) | + A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) | A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | - A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format))); + A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat))); fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL); @@ -226,7 +248,11 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) fd4_emit_vertex_bufs(ring, &emit); if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - emit_gmem2mem_surf(ctx, gmem->zsbuf_base[0], pfb->zsbuf); + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + if (!rsc->stencil || (ctx->resolve & FD_BUFFER_DEPTH)) + emit_gmem2mem_surf(ctx, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf); + if (rsc->stencil && (ctx->resolve & FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(ctx, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf); } if (ctx->resolve & FD_BUFFER_COLOR) { @@ -236,7 +262,7 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) continue; if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_gmem2mem_surf(ctx, gmem->cbuf_base[i], pfb->cbufs[i]); + emit_gmem2mem_surf(ctx, false, gmem->cbuf_base[i], pfb->cbufs[i]); } } @@ -254,9 +280,20 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases, struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w) { struct fd_ringbuffer *ring = ctx->ring; + struct pipe_surface *zsbufs[2]; emit_mrt(ring, nr_bufs, bufs, bases, bin_w); + if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { + /* The gmem_restore_tex logic will put the first buffer's stencil + * as color. Supply it with the proper information to make that + * happen. + */ + zsbufs[0] = zsbufs[1] = bufs[0]; + bufs = zsbufs; + nr_bufs = 2; + } + fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs); fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, @@ -394,13 +431,6 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) bin_w = gmem->bin_w; bin_h = gmem->bin_h; - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - emit.prog = &ctx->blit_prog[0]; - emit.fp = NULL; /* frag shader changed so clear cache */ - fd4_program_emit(ring, &emit, 1, &pfb->zsbuf); - emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); - } - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) { emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; emit.fp = NULL; /* frag shader changed so clear cache */ @@ -408,6 +438,40 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); } + if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + switch (pfb->zsbuf->format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_Z32_FLOAT: + emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ? + &ctx->blit_z : &ctx->blit_zs; + emit.key.half_precision = false; + + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE | + A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | + A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) | + A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE); + + OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); + OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE); + + OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */ + + break; + default: + /* Non-float can use a regular color write. It's split over 8-bit + * components, so half precision is always sufficient. + */ + emit.prog = &ctx->blit_prog[0]; + emit.key.half_precision = true; + break; + } + emit.fp = NULL; /* frag shader changed so clear cache */ + fd4_program_emit(ring, &emit, 1, &pfb->zsbuf); + emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + } + OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | @@ -546,21 +610,35 @@ fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd_gmem_stateobj *gmem = &ctx->gmem; - uint32_t reg; - OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3); - reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); if (pfb->zsbuf) { - reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)); - } - OUT_RING(ring, reg); - if (pfb->zsbuf) { - uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + uint32_t cpp = rsc->cpp; + + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3); + OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) | + A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format))); OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w)); + + OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2); + if (rsc->stencil) { + OUT_RING(ring, A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL | + A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1])); + OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w)); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } } else { + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3); + OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2); + OUT_RING(ring, 0); /* RB_STENCIL_INFO */ + OUT_RING(ring, 0); /* RB_STENCIL_PITCH */ } OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1); diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index b5c5db91788..d6a08b5fa13 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -57,7 +57,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define A2XX_MAX_RENDER_TARGETS 1 #define A3XX_MAX_RENDER_TARGETS 4 #define A4XX_MAX_RENDER_TARGETS 8 -/* for now until a4xx MRT support: */ + #define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS #define FD_DBG_MSGS 0x0001 -- 2.30.2