freedreno/a3xx: add Z32F support
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 25 Apr 2015 05:21:26 +0000 (01:21 -0400)
committerIlia Mirkin <imirkin@alum.mit.edu>
Tue, 28 Apr 2015 00:17:07 +0000 (20:17 -0400)
32-bit depth buffers are stored as unorm, and thus need special handling
when moving to and from gmem. They are copied into gmem by writing
depth, and resolved from gmem using a special resolve bit which
apparently float-ifies the data.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/freedreno/a3xx/fd3_format.c
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_program.c
src/gallium/drivers/freedreno/freedreno_util.c

index 939693d53f13aff2f1ee0bb344242716e075f31b..76cb3182169b458133b102abb42dc7b6b8f8bf4a 100644 (file)
@@ -195,7 +195,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
 
        _T(Z24X8_UNORM,       X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
        _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
-       /*_T(Z32_FLOAT,         Z32_FLOAT,   R8G8B8A8_UNORM, WZYX),*/
+       _T(Z32_FLOAT,         Z32_FLOAT,   R8G8B8A8_UNORM, WZYX),
 
        /* 48-bit */
        V_(R16G16B16_UNORM,   16_16_16_UNORM, NONE, WZYX),
index 4e2eefab1482eb6d662adbed616671ec7c795b80..d76acb2b100b1f2f44ed08baca7332b852146140 100644 (file)
@@ -304,6 +304,7 @@ emit_gmem2mem_surf(struct fd_context *ctx,
 {
        struct fd_ringbuffer *ring = ctx->ring;
        struct fd_resource *rsc = fd_resource(psurf->texture);
+       enum pipe_format format = psurf->format;
        struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level);
        uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
                        psurf->u.tex.first_layer);
@@ -313,7 +314,10 @@ emit_gmem2mem_surf(struct fd_context *ctx,
        OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
        OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
                        A3XX_RB_COPY_CONTROL_MODE(mode) |
-                       A3XX_RB_COPY_CONTROL_GMEM_BASE(base));
+                       A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |
+                       COND(format == PIPE_FORMAT_Z32_FLOAT ||
+                                format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
+                                A3XX_RB_COPY_CONTROL_UNK12));
 
        OUT_RELOCW(ring, rsc->bo, offset, 0, -1);    /* RB_COPY_DEST_BASE */
        OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
@@ -453,15 +457,35 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
 
        assert(bufs > 0);
 
-       emit_mrt(ring, bufs, psurf, bases, bin_w, false);
-
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
                                   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
                                   A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
 
-       OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
-       OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
+       emit_mrt(ring, bufs, psurf, bases, bin_w, false);
+
+       if (psurf[0] && psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
+               /* Depth is stored as unorm in gmem, so we have to write it in using a
+                * special blit shader which writes depth.
+                */
+               OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+               OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |
+                                               A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+                                               A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                                               A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |
+                                               A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));
+
+               OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+               OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |
+                                A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
+               OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w));
+
+               OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
+               OUT_RING(ring, 0);
+       } else {
+               OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+               OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
+       }
 
        fd3_emit_gmem_restore_tex(ring, psurf, bufs);
 
@@ -600,7 +624,21 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
        }
 
        if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
-               emit.prog = &ctx->blit_prog[0];
+               if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+                       pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {
+                       /* Non-float can use a regular color write. It's split over 8-bit
+                        * components, so half precision is always sufficient.
+                        */
+                       emit.prog = &ctx->blit_prog[0];
+                       emit.key.half_precision = true;
+               } else {
+                       /* Float depth needs special blit shader that writes depth */
+                       if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
+                               emit.prog = &ctx->blit_z;
+                       else
+                               emit.prog = &ctx->blit_zs;
+                       emit.key.half_precision = false;
+               }
                fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
                emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
        }
index a648689cefd9af93e8d82dc85ebb9442136c901b..e6a5f01d4129c0867f4758ef9b8b55aaa5747a3b 100644 (file)
@@ -180,6 +180,7 @@ struct fd_context {
 
        /* shaders used by mem->gmem blits: */
        struct fd_program_stateobj blit_prog[8]; // TODO move to screen?
+       struct fd_program_stateobj blit_z, blit_zs;
 
        /* do we need to mem2gmem before rendering.  We don't, if for example,
         * there was a glClear() that invalidated the entire previous buffer
index 52a165b64af6d0f4b1a35a3d68f90e1ed3222e62..5e344e691467fe3e6b504b7919621f8d24d4454d 100644 (file)
@@ -92,7 +92,7 @@ static void * assemble_tgsi(struct pipe_context *pctx,
 }
 
 static void *
-fd_prog_blit(struct pipe_context *pctx, int rts)
+fd_prog_blit(struct pipe_context *pctx, int rts, bool depth)
 {
        int i;
        struct ureg_src tc;
@@ -105,6 +105,12 @@ fd_prog_blit(struct pipe_context *pctx, int rts)
        for (i = 0; i < rts; i++)
                ureg_TEX(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, i),
                                 TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, i));
+       if (depth)
+               ureg_TEX(ureg,
+                                ureg_writemask(
+                                                ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0),
+                                                TGSI_WRITEMASK_Z),
+                                TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, rts));
 
        ureg_END(ureg);
 
@@ -128,11 +134,16 @@ void fd_prog_init(struct pipe_context *pctx)
        ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true);
        ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false);
        ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false);
-       ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1);
+       ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1, false);
        for (i = 1; i < ctx->screen->max_rts; i++) {
                ctx->blit_prog[i].vp = ctx->blit_prog[0].vp;
-               ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1);
+               ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1, false);
        }
+
+       ctx->blit_z.vp = ctx->blit_prog[0].vp;
+       ctx->blit_z.fp = fd_prog_blit(pctx, 0, true);
+       ctx->blit_zs.vp = ctx->blit_prog[0].vp;
+       ctx->blit_zs.fp = fd_prog_blit(pctx, 1, true);
 }
 
 void fd_prog_fini(struct pipe_context *pctx)
@@ -145,4 +156,6 @@ void fd_prog_fini(struct pipe_context *pctx)
        pctx->delete_vs_state(pctx, ctx->blit_prog[0].vp);
        for (i = 0; i < ctx->screen->max_rts; i++)
                pctx->delete_fs_state(pctx, ctx->blit_prog[i].fp);
+       pctx->delete_fs_state(pctx, ctx->blit_z.fp);
+       pctx->delete_fs_state(pctx, ctx->blit_zs.fp);
 }
index 9892b05c37eb208f22b9ab1bfb1d43a4bfd80fda..2acce06d1482ce1d9ba19d1b9333f3a474f46fca 100644 (file)
@@ -44,6 +44,9 @@ fd_pipe2depth(enum pipe_format format)
        case PIPE_FORMAT_X8Z24_UNORM:
        case PIPE_FORMAT_S8_UINT_Z24_UNORM:
                return DEPTHX_24_8;
+       case PIPE_FORMAT_Z32_FLOAT:
+       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+               return DEPTHX_32;
        default:
                return ~0;
        }