From 4a3c0e995063320693782b934962969e11dab29d Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 15 Feb 2015 03:39:43 -0500 Subject: [PATCH] freedreno/a3xx: add MRT support The hardware only supports 4 MRTs. It should be possible to emulate support for 8, but doesn't seem worth the trouble. Signed-off-by: Ilia Mirkin --- docs/relnotes/10.6.0.html | 3 +- src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 20 +-- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 104 ++++++++---- src/gallium/drivers/freedreno/a3xx/fd3_emit.h | 3 +- .../drivers/freedreno/a3xx/fd3_format.h | 8 +- src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 159 ++++++++++-------- .../drivers/freedreno/a3xx/fd3_program.c | 58 ++++--- .../drivers/freedreno/a3xx/fd3_program.h | 3 +- .../drivers/freedreno/a3xx/fd3_screen.c | 2 +- 9 files changed, 221 insertions(+), 139 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 323363737b6..22201e174e1 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -45,11 +45,12 @@ Note: some of the new features are only available with certain drivers.

Bug fixes

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 6ff762e2ae4..044355c2b68 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -129,7 +129,6 @@ static void fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) { struct fd3_context *fd3_ctx = fd3_context(ctx); - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd3_emit emit = { .vtx = &ctx->vtx, .prog = &ctx->prog, @@ -152,7 +151,6 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .vinteger_s = fd3_ctx->vinteger_s, .finteger_s = fd3_ctx->finteger_s, }, - .format = pipe_surface_format(pfb->cbufs[0]), .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0, }; @@ -239,17 +237,18 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, { struct fd3_context *fd3_ctx = fd3_context(ctx); struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); struct fd_ringbuffer *ring = ctx->ring; unsigned dirty = ctx->dirty; - unsigned ce, i; + unsigned i; struct fd3_emit emit = { .vtx = &fd3_ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { - .half_precision = fd3_half_precision(format), + .half_precision = (fd3_half_precision(pfb->cbufs[0]) && + fd3_half_precision(pfb->cbufs[1]) && + fd3_half_precision(pfb->cbufs[2]) && + fd3_half_precision(pfb->cbufs[3])), }, - .format = format, }; dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; @@ -326,17 +325,12 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); } - if (buffers & PIPE_CLEAR_COLOR) { - ce = 0xf; - } else { - ce = 0x0; - } - for (i = 0; i < 4; i++) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) | - A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce)); + COND(buffers & (PIPE_CLEAR_COLOR0 << i), + A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf))); OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index a5874e44ba8..1b656b77464 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -293,59 +293,92 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, * case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil. */ void -fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf) +fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, + struct pipe_surface **psurf, + int bufs) { - struct fd_resource *rsc = fd_resource(psurf->texture); - unsigned lvl = psurf->u.tex.level; - struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); - uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer); - enum pipe_format format = fd3_gmem_restore_format(psurf->format); - - debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + int i, j; /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 4); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) | - A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) | - A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) | - A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) | - A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT)); - OUT_RING(ring, 0x00000000); + for (i = 0; i < bufs; i++) { + OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) | + A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) | + A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) | + A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) | + A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT)); + OUT_RING(ring, 0x00000000); + } /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 6); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) | - A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | - fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, - PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); - OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | - A3XX_TEX_CONST_1_WIDTH(psurf->width) | - A3XX_TEX_CONST_1_HEIGHT(psurf->height)); - OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | - A3XX_TEX_CONST_2_INDX(0)); - OUT_RING(ring, 0x00000000); + for (i = 0; i < bufs; i++) { + if (!psurf[i]) { + OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | + A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) | + A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) | + A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) | + A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE)); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); + OUT_RING(ring, 0x00000000); + continue; + } + + struct fd_resource *rsc = fd_resource(psurf[i]->texture); + unsigned lvl = psurf[i]->u.tex.level; + struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); + enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format); + + debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer); + + OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) | + A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | + fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); + OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | + A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) | + A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height)); + OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | + A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); + OUT_RING(ring, 0x00000000); + } /* emit mipaddrs: */ - OUT_PKT3(ring, CP_LOAD_STATE, 3); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RELOC(ring, rsc->bo, offset, 0, 0); + for (i = 0; i < bufs; i++) { + if (psurf[i]) { + struct fd_resource *rsc = fd_resource(psurf[i]->texture); + unsigned lvl = psurf[i]->u.tex.level; + uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer); + OUT_RELOC(ring, rsc->bo, offset, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + } + + /* pad the remaining entries w/ null: */ + for (j = 1; j < BASETABLE_SZ; j++) { + OUT_RING(ring, 0x00000000); + } + } } void @@ -570,8 +603,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) - fd3_program_emit(ring, emit); + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs); + } /* TODO we should not need this or fd_wfi() before emit_constants(): */ @@ -624,6 +659,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); } + if (format == PIPE_FORMAT_NONE) + control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + if (has_alpha) { blend_control |= blend->rb_mrt[i].blend_control_rgb; } else { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index ce51c0c4968..a438ddaee85 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -45,7 +45,7 @@ void fd3_emit_constant(struct fd_ringbuffer *ring, const uint32_t *dwords, struct pipe_resource *prsc); void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, - struct pipe_surface *psurf); + struct pipe_surface **psurf, int bufs); /* grouped together emit-state for prog/vertex/state emit: */ struct fd3_emit { @@ -53,7 +53,6 @@ struct fd3_emit { const struct fd_program_stateobj *prog; const struct pipe_draw_info *info; struct ir3_shader_key key; - enum pipe_format format; uint32_t dirty; uint32_t sprite_coord_enable; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h index 6a47fda1029..6afc3015901 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h @@ -42,8 +42,14 @@ uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); static INLINE bool -fd3_half_precision(enum pipe_format format) +fd3_half_precision(const struct pipe_surface *surface) { + enum pipe_format format; + if (!surface) + return true; + + format = surface->format; + /* colors are provided in consts, which go through cov.f32f16, which will * break these values */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 304fc846af8..8589dd6faa1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -89,6 +89,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } else { stride = slice->pitch * rsc->cpp; } + } else if (i < nr_bufs && bases) { + base = bases[i]; } OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2); @@ -97,7 +99,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB)); - if (bin_w || (i >= nr_bufs)) { + if (bin_w || (i >= nr_bufs) || !bufs[i]) { OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); } else { OUT_RELOCW(ring, rsc->bo, offset, 0, -1); @@ -110,20 +112,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } } -static uint32_t -depth_base(struct fd_context *ctx) -{ - struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - uint32_t cpp = 4; - if (pfb->cbufs[0]) { - struct fd_resource *rsc = - fd_resource(pfb->cbufs[0]->texture); - cpp = rsc->cpp; - } - return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000); -} - static bool use_hw_binning(struct fd_context *ctx) { @@ -167,7 +155,8 @@ emit_binning_workaround(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); @@ -189,7 +178,7 @@ emit_binning_workaround(struct fd_context *ctx) A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); - fd3_program_emit(ring, &emit); + fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); @@ -338,15 +327,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); struct fd3_emit emit = { .vtx = &fd3_ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { - .half_precision = fd3_half_precision(format), + .half_precision = true, }, - .format = format, }; + int i; OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); @@ -388,7 +376,8 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | @@ -419,21 +408,28 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &emit); + fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - uint32_t base = depth_base(ctx); - emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); - } + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, + ctx->gmem.zsbuf_base, pfb->zsbuf); if (ctx->resolve & FD_BUFFER_COLOR) { - emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, 0, pfb->cbufs[0]); + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, + ctx->gmem.cbuf_base[i], pfb->cbufs[i]); + } } OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | @@ -444,14 +440,24 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) /* transfer from system memory to gmem */ static void -emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, - struct pipe_surface *psurf, uint32_t bin_w) +emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], + struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) { struct fd_ringbuffer *ring = ctx->ring; - emit_mrt(ring, 1, &psurf, &base, bin_w); + assert(bufs > 0); + + emit_mrt(ring, bufs, psurf, bases, bin_w); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(bufs - 1)); + + OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); + OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); - fd3_emit_gmem_restore_tex(ring, psurf); + fd3_emit_gmem_restore_tex(ring, psurf, bufs); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); @@ -464,15 +470,17 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); struct fd3_emit emit = { .vtx = &fd3_ctx->blit_vbuf_state, - .prog = &ctx->blit_prog[0], .sprite_coord_enable = 1, + /* NOTE: They all use the same VP, this is for vtx bufs. */ + .prog = &ctx->blit_prog[0], .key = { - .half_precision = fd3_half_precision(format), + .half_precision = (fd3_half_precision(pfb->cbufs[0]) && + fd3_half_precision(pfb->cbufs[1]) && + fd3_half_precision(pfb->cbufs[2]) && + fd3_half_precision(pfb->cbufs[3])), }, - .format = format, }; float x0, y0, x1, y1; unsigned bin_w = tile->bin_w; @@ -515,6 +523,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS)); + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */ @@ -567,7 +579,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &emit); fd3_emit_vertex_bufs(ring, &emit); /* for gmem pitch/base calculations, we need to use the non- @@ -576,16 +587,27 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) bin_w = gmem->bin_w; bin_h = gmem->bin_h; - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w); + if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) { + emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; + fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); + emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); + } - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) - emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w); + if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + emit.prog = &ctx->blit_prog[0]; + fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); + emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + } OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); } static void @@ -617,12 +639,13 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd_ringbuffer *ring = ctx->ring; - uint32_t pitch = 0; + uint32_t i, pitch = 0; - if (pfb->cbufs[0]) { - struct pipe_surface *psurf = pfb->cbufs[0]; - unsigned lvl = psurf->u.tex.level; - pitch = fd_resource(psurf->texture)->slices[lvl].pitch; + for (i = 0; i < pfb->nr_cbufs; i++) { + struct pipe_surface *psurf = pfb->cbufs[i]; + if (!psurf) + continue; + pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch; } fd3_emit_restore(ctx); @@ -647,7 +670,8 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_RB_MODE_CONTROL_GMEM_BYPASS | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); patch_draws(ctx, IGNORE_VISIBILITY); patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); @@ -734,7 +758,8 @@ emit_binning_pass(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); for (i = 0; i < 4; i++) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); @@ -774,7 +799,8 @@ emit_binning_pass(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1)); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); @@ -848,21 +874,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) { struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_gmem_stateobj *gmem = &ctx->gmem; - uint32_t reg; - - OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); - reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx)); - if (pfb->zsbuf) { - reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); - } - OUT_RING(ring, reg); - if (pfb->zsbuf) { - uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); - OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); - } else { - OUT_RING(ring, 0x00000000); - } if (ctx->needs_rb_fbd) { fd_wfi(ctx, ring); @@ -874,7 +885,8 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); } /* before IB to rendering cmds: */ @@ -891,6 +903,21 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) uint32_t x2 = tile->xoff + tile->bin_w - 1; uint32_t y2 = tile->yoff + tile->bin_h - 1; + uint32_t reg; + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base); + if (pfb->zsbuf) { + reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); + } + OUT_RING(ring, reg); + if (pfb->zsbuf) { + uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); + } else { + OUT_RING(ring, 0x00000000); + } + if (use_hw_binning(ctx)) { struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; @@ -918,7 +945,7 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 442b47dea9f..4581a6b7af9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -31,8 +31,6 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" #include "freedreno_program.h" @@ -127,13 +125,14 @@ emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) } void -fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) +fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, + int nr, struct pipe_surface **bufs) { const struct ir3_shader_variant *vp, *fp; const struct ir3_info *vsi, *fsi; enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; - uint32_t pos_regid, posz_regid, psize_regid, color_regid; + uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; int constmode; int i, j, k; @@ -199,11 +198,26 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); psize_regid = ir3_find_output_regid(vp, ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); - color_regid = ir3_find_output_regid(fp, - ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + if (fp->color0_mrt) { + color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = + ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + } else { + for (int i = 0; i < fp->outputs_count; i++) { + ir3_semantic sem = fp->outputs[i].semantic; + unsigned idx = sem2idx(sem); + if (sem2name(sem) != TGSI_SEMANTIC_COLOR) + continue; + assert(idx < 4); + color_regid[idx] = fp->outputs[i].regid; + } + } - if (util_format_is_alpha(emit->format)) - color_regid += 3; + /* adjust regids for alpha output formats. there is no alpha render + * format, so it's just treated like red + */ + for (i = 0; i < nr; i++) + if (util_format_is_alpha(pipe_surface_format(bufs[i]))) + color_regid[i] += 3; /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -345,21 +359,23 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) } OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); - if (fp->writes_pos) { - OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE | - A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); - } else { - OUT_RING(ring, 0x00000000); - } + OUT_RING(ring, + COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | + A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) | + A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1)); OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) | - COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION) | - COND(util_format_is_pure_uint(emit->format), A3XX_SP_FS_MRT_REG_UINT) | - COND(util_format_is_pure_sint(emit->format), A3XX_SP_FS_MRT_REG_SINT)); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); + for (i = 0; i < 4; i++) { + uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) | + COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION); + + if (i < nr) { + enum pipe_format fmt = pipe_surface_format(bufs[i]); + mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) | + COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT); + } + OUT_RING(ring, mrt_reg); + } if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 0313b774a08..52c808071a4 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -39,7 +39,8 @@ struct fd3_shader_stateobj { struct fd3_emit; -void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit); +void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, + int nr, struct pipe_surface **bufs); void fd3_prog_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c index 182db849902..3497921257c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c @@ -103,7 +103,7 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen, void fd3_screen_init(struct pipe_screen *pscreen) { - fd_screen(pscreen)->max_rts = 1; + fd_screen(pscreen)->max_rts = 4; pscreen->context_create = fd3_context_create; pscreen->is_format_supported = fd3_screen_is_format_supported; } -- 2.30.2