freedreno/a3xx: add MRT support
authorIlia Mirkin <imirkin@alum.mit.edu>
Sun, 15 Feb 2015 08:39:43 +0000 (03:39 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Thu, 2 Apr 2015 04:09:14 +0000 (00:09 -0400)
The hardware only supports 4 MRTs. It should be possible to emulate
support for 8, but doesn't seem worth the trouble.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
docs/relnotes/10.6.0.html
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.h
src/gallium/drivers/freedreno/a3xx/fd3_format.h
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/a3xx/fd3_program.c
src/gallium/drivers/freedreno/a3xx/fd3_program.h
src/gallium/drivers/freedreno/a3xx/fd3_screen.c

index 323363737b60df51c730cf4b6a2baa77b905d860..22201e174e107aac9cbd15e6747a52db690b8bca 100644 (file)
@@ -45,11 +45,12 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>GL_AMD_pinned_memory on r600, radeonsi</li>
+<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li>
 <li>GL_ARB_draw_instanced on freedreno</li>
 <li>GL_ARB_gpu_shader_fp64 on nvc0, softpipe</li>
 <li>GL_ARB_instanced_arrays on freedreno</li>
 <li>GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe</li>
-<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li>
+<li>GL_EXT_draw_buffers2 on freedreno</li>
 </ul>
 
 <h2>Bug fixes</h2>
index 6ff762e2ae45ef3800df1dedf484a94df12883e1..044355c2b6838bdcd4107df8741c7b4fa669a89c 100644 (file)
@@ -129,7 +129,6 @@ static void
 fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 {
        struct fd3_context *fd3_ctx = fd3_context(ctx);
-       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
        struct fd3_emit emit = {
                .vtx  = &ctx->vtx,
                .prog = &ctx->prog,
@@ -152,7 +151,6 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
                        .vinteger_s = fd3_ctx->vinteger_s,
                        .finteger_s = fd3_ctx->finteger_s,
                },
-               .format = pipe_surface_format(pfb->cbufs[0]),
                .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
                .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
        };
@@ -239,17 +237,18 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 {
        struct fd3_context *fd3_ctx = fd3_context(ctx);
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-       enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
        struct fd_ringbuffer *ring = ctx->ring;
        unsigned dirty = ctx->dirty;
-       unsigned ce, i;
+       unsigned i;
        struct fd3_emit emit = {
                .vtx  = &fd3_ctx->solid_vbuf_state,
                .prog = &ctx->solid_prog,
                .key = {
-                       .half_precision = fd3_half_precision(format),
+                       .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
+                                                          fd3_half_precision(pfb->cbufs[1]) &&
+                                                          fd3_half_precision(pfb->cbufs[2]) &&
+                                                          fd3_half_precision(pfb->cbufs[3])),
                },
-               .format = format,
        };
 
        dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
@@ -326,17 +325,12 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
                                A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
        }
 
-       if (buffers & PIPE_CLEAR_COLOR) {
-               ce = 0xf;
-       } else {
-               ce = 0x0;
-       }
-
        for (i = 0; i < 4; i++) {
                OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
                OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
                                A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
-                               A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+                               COND(buffers & (PIPE_CLEAR_COLOR0 << i),
+                                        A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)));
 
                OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
                OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
index a5874e44ba86f5a8e87f35d2a0e64fb50fed469c..1b656b77464e4b8f7549ec1d5ddb0350f0a03917 100644 (file)
@@ -293,59 +293,92 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
  * case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil.
  */
 void
-fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+                                                 struct pipe_surface **psurf,
+                                                 int bufs)
 {
-       struct fd_resource *rsc = fd_resource(psurf->texture);
-       unsigned lvl = psurf->u.tex.level;
-       struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
-       uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
-       enum pipe_format format = fd3_gmem_restore_format(psurf->format);
-
-       debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+       int i, j;
 
        /* output sampler state: */
-       OUT_PKT3(ring, CP_LOAD_STATE, 4);
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs);
        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
-                       CP_LOAD_STATE_0_NUM_UNIT(1));
+                       CP_LOAD_STATE_0_NUM_UNIT(bufs));
        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
-                       A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
-                       A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
-                       A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
-                       A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
-       OUT_RING(ring, 0x00000000);
+       for (i = 0; i < bufs; i++) {
+               OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
+                                A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
+                                A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
+                                A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
+                                A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
+               OUT_RING(ring, 0x00000000);
+       }
 
        /* emit texture state: */
-       OUT_PKT3(ring, CP_LOAD_STATE, 6);
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs);
        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
-                       CP_LOAD_STATE_0_NUM_UNIT(1));
+                       CP_LOAD_STATE_0_NUM_UNIT(bufs));
        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
-                       A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
-                       fd3_tex_swiz(format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
-                                       PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
-       OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
-                       A3XX_TEX_CONST_1_WIDTH(psurf->width) |
-                       A3XX_TEX_CONST_1_HEIGHT(psurf->height));
-       OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
-                       A3XX_TEX_CONST_2_INDX(0));
-       OUT_RING(ring, 0x00000000);
+       for (i = 0; i < bufs; i++) {
+               if (!psurf[i]) {
+                       OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+                               A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) |
+                               A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) |
+                               A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) |
+                               A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE));
+                       OUT_RING(ring, 0x00000000);
+                       OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+                       OUT_RING(ring, 0x00000000);
+                       continue;
+               }
+
+               struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+               unsigned lvl = psurf[i]->u.tex.level;
+               struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
+               enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format);
+
+               debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer);
+
+               OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
+                                A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+                                fd3_tex_swiz(format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+                                                         PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+               OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
+                                A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) |
+                                A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height));
+               OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
+                                A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+               OUT_RING(ring, 0x00000000);
+       }
 
        /* emit mipaddrs: */
-       OUT_PKT3(ring, CP_LOAD_STATE, 3);
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs);
        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
-                       CP_LOAD_STATE_0_NUM_UNIT(1));
+                       CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs));
        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
-       OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+       for (i = 0; i < bufs; i++) {
+               if (psurf[i]) {
+                       struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+                       unsigned lvl = psurf[i]->u.tex.level;
+                       uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer);
+                       OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+               } else {
+                       OUT_RING(ring, 0x00000000);
+               }
+
+               /* pad the remaining entries w/ null: */
+               for (j = 1; j < BASETABLE_SZ; j++) {
+                       OUT_RING(ring, 0x00000000);
+               }
+       }
 }
 
 void
@@ -570,8 +603,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
        }
 
-       if (dirty & FD_DIRTY_PROG)
-               fd3_program_emit(ring, emit);
+       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+               struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+               fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+       }
 
        /* TODO we should not need this or fd_wfi() before emit_constants():
         */
@@ -624,6 +659,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                                control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
                        }
 
+                       if (format == PIPE_FORMAT_NONE)
+                               control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+
                        if (has_alpha) {
                                blend_control |= blend->rb_mrt[i].blend_control_rgb;
                        } else {
index ce51c0c4968a5a643df75aa9895acd82a5ea961a..a438ddaee8523308387951a68a84d03f30a5ccc5 100644 (file)
@@ -45,7 +45,7 @@ void fd3_emit_constant(struct fd_ringbuffer *ring,
                const uint32_t *dwords, struct pipe_resource *prsc);
 
 void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
-               struct pipe_surface *psurf);
+               struct pipe_surface **psurf, int bufs);
 
 /* grouped together emit-state for prog/vertex/state emit: */
 struct fd3_emit {
@@ -53,7 +53,6 @@ struct fd3_emit {
        const struct fd_program_stateobj *prog;
        const struct pipe_draw_info *info;
        struct ir3_shader_key key;
-       enum pipe_format format;
        uint32_t dirty;
 
        uint32_t sprite_coord_enable;
index 6a47fda102978c50880de0254cc321e1228c808d..6afc3015901862ee2f723806011397aa3e037a6e 100644 (file)
@@ -42,8 +42,14 @@ uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
                unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
 
 static INLINE bool
-fd3_half_precision(enum pipe_format format)
+fd3_half_precision(const struct pipe_surface *surface)
 {
+       enum pipe_format format;
+       if (!surface)
+               return true;
+
+       format = surface->format;
+
        /* colors are provided in consts, which go through cov.f32f16, which will
         * break these values
         */
index 304fc846af898ec0b3cf388151bb3d495e2cbb34..8589dd6faa18fd806af5efb4e7e39adec4f9fe1c 100644 (file)
@@ -89,6 +89,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
                        } else {
                                stride = slice->pitch * rsc->cpp;
                        }
+               } else if (i < nr_bufs && bases) {
+                       base = bases[i];
                }
 
                OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
@@ -97,7 +99,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
                                A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
                                A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
                                COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
-               if (bin_w || (i >= nr_bufs)) {
+               if (bin_w || (i >= nr_bufs) || !bufs[i]) {
                        OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
                } else {
                        OUT_RELOCW(ring, rsc->bo, offset, 0, -1);
@@ -110,20 +112,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
        }
 }
 
-static uint32_t
-depth_base(struct fd_context *ctx)
-{
-       struct fd_gmem_stateobj *gmem = &ctx->gmem;
-       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-       uint32_t cpp = 4;
-       if (pfb->cbufs[0]) {
-               struct fd_resource *rsc =
-                               fd_resource(pfb->cbufs[0]->texture);
-               cpp = rsc->cpp;
-       }
-       return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
-}
-
 static bool
 use_hw_binning(struct fd_context *ctx)
 {
@@ -167,7 +155,8 @@ emit_binning_workaround(struct fd_context *ctx)
 
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
-                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                       A3XX_RB_MODE_CONTROL_MRT(0));
        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
                        A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
@@ -189,7 +178,7 @@ emit_binning_workaround(struct fd_context *ctx)
                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
 
-       fd3_program_emit(ring, &emit);
+       fd3_program_emit(ring, &emit, 0, NULL);
        fd3_emit_vertex_bufs(ring, &emit);
 
        OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
@@ -338,15 +327,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
        struct fd3_context *fd3_ctx = fd3_context(ctx);
        struct fd_ringbuffer *ring = ctx->ring;
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-       enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
        struct fd3_emit emit = {
                        .vtx = &fd3_ctx->solid_vbuf_state,
                        .prog = &ctx->solid_prog,
                        .key = {
-                               .half_precision = fd3_half_precision(format),
+                               .half_precision = true,
                        },
-                       .format = format,
        };
+       int i;
 
        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
@@ -388,7 +376,8 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
-                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                       A3XX_RB_MODE_CONTROL_MRT(0));
 
        OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
@@ -419,21 +408,28 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
-       fd3_program_emit(ring, &emit);
+       fd3_program_emit(ring, &emit, 0, NULL);
        fd3_emit_vertex_bufs(ring, &emit);
 
-       if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
-               uint32_t base = depth_base(ctx);
-               emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
-       }
+       if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+               emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL,
+                                                  ctx->gmem.zsbuf_base, pfb->zsbuf);
 
        if (ctx->resolve & FD_BUFFER_COLOR) {
-               emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, 0, pfb->cbufs[0]);
+               for (i = 0; i < pfb->nr_cbufs; i++) {
+                       if (!pfb->cbufs[i])
+                               continue;
+                       if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
+                               continue;
+                       emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE,
+                                                          ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
+               }
        }
 
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
-                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                       A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
 
        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
@@ -444,14 +440,24 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 /* transfer from system memory to gmem */
 
 static void
-emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
-               struct pipe_surface *psurf, uint32_t bin_w)
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
+               struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
 {
        struct fd_ringbuffer *ring = ctx->ring;
 
-       emit_mrt(ring, 1, &psurf, &base, bin_w);
+       assert(bufs > 0);
+
+       emit_mrt(ring, bufs, psurf, bases, bin_w);
+
+       OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                                  A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                                  A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
+
+       OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+       OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
 
-       fd3_emit_gmem_restore_tex(ring, psurf);
+       fd3_emit_gmem_restore_tex(ring, psurf, bufs);
 
        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
                        DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -464,15 +470,17 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
        struct fd_gmem_stateobj *gmem = &ctx->gmem;
        struct fd_ringbuffer *ring = ctx->ring;
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-       enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
        struct fd3_emit emit = {
                        .vtx = &fd3_ctx->blit_vbuf_state,
-                       .prog = &ctx->blit_prog[0],
                        .sprite_coord_enable = 1,
+                       /* NOTE: They all use the same VP, this is for vtx bufs. */
+                       .prog = &ctx->blit_prog[0],
                        .key = {
-                               .half_precision = fd3_half_precision(format),
+                               .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
+                                                                  fd3_half_precision(pfb->cbufs[1]) &&
+                                                                  fd3_half_precision(pfb->cbufs[2]) &&
+                                                                  fd3_half_precision(pfb->cbufs[3])),
                        },
-                       .format = format,
        };
        float x0, y0, x1, y1;
        unsigned bin_w = tile->bin_w;
@@ -515,6 +523,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
 
+       OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+       OUT_RING(ring, 0);
+       OUT_RING(ring, 0);
+
        OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
        OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);   /* GRAS_CL_CLIP_CNTL */
 
@@ -567,7 +579,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
-       fd3_program_emit(ring, &emit);
        fd3_emit_vertex_bufs(ring, &emit);
 
        /* for gmem pitch/base calculations, we need to use the non-
@@ -576,16 +587,27 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
        bin_w = gmem->bin_w;
        bin_h = gmem->bin_h;
 
-       if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
-               emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
+       if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
+               emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+               fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+               emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
+       }
 
-       if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
-               emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
+       if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+               emit.prog = &ctx->blit_prog[0];
+               fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
+               emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+       }
 
        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+       OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                                  A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                                  A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
 }
 
 static void
@@ -617,12 +639,13 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
 {
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
        struct fd_ringbuffer *ring = ctx->ring;
-       uint32_t pitch = 0;
+       uint32_t i, pitch = 0;
 
-       if (pfb->cbufs[0]) {
-               struct pipe_surface *psurf = pfb->cbufs[0];
-               unsigned lvl = psurf->u.tex.level;
-               pitch = fd_resource(psurf->texture)->slices[lvl].pitch;
+       for (i = 0; i < pfb->nr_cbufs; i++) {
+               struct pipe_surface *psurf = pfb->cbufs[i];
+               if (!psurf)
+                       continue;
+               pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
        }
 
        fd3_emit_restore(ctx);
@@ -647,7 +670,8 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
                        A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
-                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                       A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
 
        patch_draws(ctx, IGNORE_VISIBILITY);
        patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
@@ -734,7 +758,8 @@ emit_binning_pass(struct fd_context *ctx)
 
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
-                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                       A3XX_RB_MODE_CONTROL_MRT(0));
 
        for (i = 0; i < 4; i++) {
                OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
@@ -774,7 +799,8 @@ emit_binning_pass(struct fd_context *ctx)
 
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
-                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                       A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
@@ -848,21 +874,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
 {
        struct fd_ringbuffer *ring = ctx->ring;
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-       struct fd_gmem_stateobj *gmem = &ctx->gmem;
-       uint32_t reg;
-
-       OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
-       reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
-       if (pfb->zsbuf) {
-               reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
-       }
-       OUT_RING(ring, reg);
-       if (pfb->zsbuf) {
-               uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
-               OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
-       } else {
-               OUT_RING(ring, 0x00000000);
-       }
 
        if (ctx->needs_rb_fbd) {
                fd_wfi(ctx, ring);
@@ -874,7 +885,8 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
 
        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
-                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                       A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
 }
 
 /* before IB to rendering cmds: */
@@ -891,6 +903,21 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
        uint32_t x2 = tile->xoff + tile->bin_w - 1;
        uint32_t y2 = tile->yoff + tile->bin_h - 1;
 
+       uint32_t reg;
+
+       OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+       reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base);
+       if (pfb->zsbuf) {
+               reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+       }
+       OUT_RING(ring, reg);
+       if (pfb->zsbuf) {
+               uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+               OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
+       } else {
+               OUT_RING(ring, 0x00000000);
+       }
+
        if (use_hw_binning(ctx)) {
                struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
 
@@ -918,7 +945,7 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
        OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
        OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
 
-       emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+       emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w);
 
        /* setup scissor/offset for current tile: */
        OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
index 442b47dea9fe741b72480d44371f6bd58c9fa542..4581a6b7af919911d8e3eb0e5115fe474920a048 100644 (file)
@@ -31,8 +31,6 @@
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
 
 #include "freedreno_program.h"
 
@@ -127,13 +125,14 @@ emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
 }
 
 void
-fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
+fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
+                                int nr, struct pipe_surface **bufs)
 {
        const struct ir3_shader_variant *vp, *fp;
        const struct ir3_info *vsi, *fsi;
        enum a3xx_instrbuffermode fpbuffer, vpbuffer;
        uint32_t fpbuffersz, vpbuffersz, fsoff;
-       uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+       uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
        int constmode;
        int i, j, k;
 
@@ -199,11 +198,26 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
                ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
        psize_regid = ir3_find_output_regid(vp,
                ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
-       color_regid = ir3_find_output_regid(fp,
-               ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+       if (fp->color0_mrt) {
+               color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+                       ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+       } else {
+               for (int i = 0; i < fp->outputs_count; i++) {
+                       ir3_semantic sem = fp->outputs[i].semantic;
+                       unsigned idx = sem2idx(sem);
+                       if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
+                               continue;
+                       assert(idx < 4);
+                       color_regid[idx] = fp->outputs[i].regid;
+               }
+       }
 
-       if (util_format_is_alpha(emit->format))
-               color_regid += 3;
+       /* adjust regids for alpha output formats. there is no alpha render
+        * format, so it's just treated like red
+        */
+       for (i = 0; i < nr; i++)
+               if (util_format_is_alpha(pipe_surface_format(bufs[i])))
+                       color_regid[i] += 3;
 
        /* we could probably divide this up into things that need to be
         * emitted if frag-prog is dirty vs if vert-prog is dirty..
@@ -345,21 +359,23 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
        }
 
        OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
-       if (fp->writes_pos) {
-               OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
-                               A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
-       } else {
-               OUT_RING(ring, 0x00000000);
-       }
+       OUT_RING(ring,
+                        COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+                        A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
+                        A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
 
        OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
-       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) |
-                       COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION) |
-                       COND(util_format_is_pure_uint(emit->format), A3XX_SP_FS_MRT_REG_UINT) |
-                       COND(util_format_is_pure_sint(emit->format), A3XX_SP_FS_MRT_REG_SINT));
-       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
-       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
-       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+       for (i = 0; i < 4; i++) {
+               uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+                       COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
+
+               if (i < nr) {
+                       enum pipe_format fmt = pipe_surface_format(bufs[i]);
+                       mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
+                               COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
+               }
+               OUT_RING(ring, mrt_reg);
+       }
 
        if (emit->key.binning_pass) {
                OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
index 0313b774a08602743eadb4189ee4448ee0f423b0..52c808071a4e816e78eabcddd8ba8ae50fe9dbe8 100644 (file)
@@ -39,7 +39,8 @@ struct fd3_shader_stateobj {
 
 struct fd3_emit;
 
-void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit);
+void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
+                                         int nr, struct pipe_surface **bufs);
 
 void fd3_prog_init(struct pipe_context *pctx);
 
index 182db849902041c6c40fe1772fcf42262a5bf27c..3497921257cda98dddf20c5ee18b2757001c520d 100644 (file)
@@ -103,7 +103,7 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen,
 void
 fd3_screen_init(struct pipe_screen *pscreen)
 {
-       fd_screen(pscreen)->max_rts = 1;
+       fd_screen(pscreen)->max_rts = 4;
        pscreen->context_create = fd3_context_create;
        pscreen->is_format_supported = fd3_screen_is_format_supported;
 }