From 054526e49abb5e7fd49fed6f589cff6f1ab4c9f6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 31 Jul 2015 15:32:58 -0400 Subject: [PATCH] freedreno/a4xx: MRT support Signed-off-by: Rob Clark --- .../drivers/freedreno/a3xx/fd3_program.c | 2 +- .../drivers/freedreno/a3xx/fd3_screen.c | 2 +- .../drivers/freedreno/a4xx/fd4_blend.c | 6 +- src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 36 +++-- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 125 +++++++++++++----- src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 4 +- src/gallium/drivers/freedreno/a4xx/fd4_gmem.c | 92 +++++++------ .../drivers/freedreno/a4xx/fd4_program.c | 67 ++++++---- .../drivers/freedreno/a4xx/fd4_program.h | 3 +- .../drivers/freedreno/a4xx/fd4_screen.c | 2 +- .../drivers/freedreno/freedreno_draw.c | 3 +- .../drivers/freedreno/freedreno_util.h | 2 +- 12 files changed, 212 insertions(+), 132 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index e98c6b5cff4..b5360797745 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -204,7 +204,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); } else { - for (int i = 0; i < fp->outputs_count; i++) { + for (i = 0; i < fp->outputs_count; i++) { ir3_semantic sem = fp->outputs[i].semantic; unsigned idx = sem2idx(sem); if (sem2name(sem) != TGSI_SEMANTIC_COLOR) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c index 094dcf376e5..722fe360202 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c @@ -105,7 +105,7 @@ void fd3_screen_init(struct pipe_screen *pscreen) { struct fd_screen *screen = fd_screen(pscreen); - screen->max_rts = 4; + screen->max_rts = A3XX_MAX_RENDER_TARGETS; screen->compiler = ir3_compiler_create(screen->gpu_id); pscreen->context_create = fd3_context_create; pscreen->is_format_supported = fd3_screen_is_format_supported; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c index 396caa532fc..9d5ae4242f9 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c @@ -61,7 +61,7 @@ fd4_blend_state_create(struct pipe_context *pctx, struct fd4_blend_stateobj *so; // enum a3xx_rop_code rop = ROP_COPY; bool reads_dest = false; - int i; + unsigned i, mrt_blend = 0; if (cso->logicop_enable) { // rop = cso->logicop_func; /* maps 1:1 */ @@ -115,7 +115,7 @@ fd4_blend_state_create(struct pipe_context *pctx, A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE | A4XX_RB_MRT_CONTROL_BLEND | A4XX_RB_MRT_CONTROL_BLEND2; - so->rb_fs_output |= A4XX_RB_FS_OUTPUT_ENABLE_BLEND(1); + mrt_blend |= (1 << i); } if (reads_dest) @@ -125,5 +125,7 @@ fd4_blend_state_create(struct pipe_context *pctx, so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); } + so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend); + return so; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 154154190db..0927b0d7682 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -111,7 +111,6 @@ static void fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) { struct fd4_context *fd4_ctx = fd4_context(ctx); - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd4_emit emit = { .vtx = &ctx->vtx, .prog = &ctx->prog, @@ -132,8 +131,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, }, - .format = fd4_emit_format(pfb->cbufs[0]), - .pformat = pipe_surface_format(pfb->cbufs[0]), }; unsigned dirty; @@ -173,20 +170,16 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; unsigned dirty = ctx->dirty; - unsigned ce, i; + unsigned i; struct fd4_emit emit = { .vtx = &fd4_ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { - .half_precision = true, + .half_precision = fd_half_precision(pfb), }, - .format = fd4_emit_format(pfb->cbufs[0]), }; - uint32_t colr = 0; - - if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs) - colr = pack_rgba(pfb->cbufs[0]->format, color->f); dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; dirty |= FD_DIRTY_PROG; @@ -260,16 +253,15 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, if (buffers & PIPE_CLEAR_COLOR) { OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); - ce = 0xf; - } else { - ce = 0x0; } for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0; + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | A4XX_RB_MRT_CONTROL_B11 | - A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce)); + A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | @@ -280,6 +272,16 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO)); } + OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); + fd4_emit_vertex_bufs(ring, &emit); OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); @@ -288,12 +290,6 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); OUT_RING(ring, 0x00000000); - OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4); - OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW0 */ - OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW1 */ - OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW2 */ - OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */ - /* until fastclear works: */ fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index df96601c747..e7c210d3437 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -197,51 +197,90 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, * special cases.. */ void -fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf) +fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, + struct pipe_surface **bufs) { - struct fd_resource *rsc = fd_resource(psurf->texture); - unsigned lvl = psurf->u.tex.level; - struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); - uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer); - enum pipe_format format = fd4_gmem_restore_format(psurf->format); + unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; + int i; - debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + mrt_comp[i] = (i < nr_bufs) ? 0xf : 0; + } + + OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 4); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs)); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(nr_bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) | - A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) | - A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) | - A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) | - A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT)); - OUT_RING(ring, 0x00000000); + for (i = 0; i < nr_bufs; i++) { + OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) | + A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) | + A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) | + A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) | + A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT)); + OUT_RING(ring, 0x00000000); + } /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 10); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs)); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(nr_bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) | - A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) | - fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, - PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); - OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) | - A4XX_TEX_CONST_1_HEIGHT(psurf->height)); - OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); - OUT_RING(ring, 0x00000000); - OUT_RELOC(ring, rsc->bo, offset, 0, 0); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + for (i = 0; i < nr_bufs; i++) { + if (bufs[i]) { + struct fd_resource *rsc = fd_resource(bufs[i]->texture); + unsigned lvl = bufs[i]->u.tex.level; + struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); + uint32_t offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer); + enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format); + + debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer); + + OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) | + A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) | + fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); + OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) | + A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height)); + OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, 0x00000000); + OUT_RELOC(ring, rsc->bo, offset, 0, 0); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } else { + OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) | + A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) | + A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) | + A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) | + A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) | + A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE)); + OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) | + A4XX_TEX_CONST_1_HEIGHT(0)); + OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0)); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + } } void @@ -348,6 +387,25 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_marker(ring, 5); + if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) { + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; + + for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; + } + + OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); + } + if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) { uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control; @@ -472,8 +530,10 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) - fd4_program_emit(ring, emit); + if (dirty & FD_DIRTY_PROG) { + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs); + } if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ ir3_emit_consts(vp, ring, emit->info, dirty); @@ -690,9 +750,6 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); - OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); - OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf)); - OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 7debee59471..99c7596fb82 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -43,7 +43,7 @@ void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, const uint32_t *dwords, struct pipe_resource *prsc); void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, - struct pipe_surface *psurf); + unsigned nr_bufs, struct pipe_surface **bufs); /* grouped together emit-state for prog/vertex/state emit: */ struct fd4_emit { @@ -51,8 +51,6 @@ struct fd4_emit { const struct fd_program_stateobj *prog; const struct pipe_draw_info *info; struct ir3_shader_key key; - enum a4xx_color_fmt format; - enum pipe_format pformat; uint32_t dirty; /* cached to avoid repeated lookups of same variants: */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 976255f5879..fce5d7a3930 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -44,12 +44,6 @@ #include "fd4_format.h" #include "fd4_zsa.h" -static const struct ir3_shader_key key = { - // XXX should set this based on render target format! We don't - // want half_precision if float32 render target!!! - .half_precision = true, -}; - static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w) @@ -94,6 +88,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } else { stride = slice->pitch * rsc->cpp; } + } else if ((i < nr_bufs) && bases) { + base = bases[i]; } OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3); @@ -101,7 +97,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap)); - if (bin_w || (i >= nr_bufs)) { + if (bin_w || (i >= nr_bufs) || !bufs[i]) { OUT_RING(ring, base); OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride)); } else { @@ -115,20 +111,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } } -static uint32_t -depth_base(struct fd_context *ctx) -{ - struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - uint32_t cpp = 4; - if (pfb->cbufs[0]) { - struct fd_resource *rsc = - fd_resource(pfb->cbufs[0]->texture); - cpp = rsc->cpp; - } - return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000); -} - /* transfer from gmem to system memory (ie. normal RAM) */ static void @@ -163,13 +145,15 @@ static void fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) { struct fd4_context *fd4_ctx = fd4_context(ctx); + struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd4_emit emit = { .vtx = &fd4_ctx->solid_vbuf_state, .prog = &ctx->solid_prog, - .key = key, - .format = fd4_emit_format(pfb->cbufs[0]), + .key = { + .half_precision = true, + }, }; OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); @@ -238,16 +222,22 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */ - fd4_program_emit(ring, &emit); + fd4_program_emit(ring, &emit, 0, NULL); fd4_emit_vertex_bufs(ring, &emit); if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - uint32_t base = depth_base(ctx); - emit_gmem2mem_surf(ctx, base, pfb->zsbuf); + emit_gmem2mem_surf(ctx, gmem->zsbuf_base[0], pfb->zsbuf); } if (ctx->resolve & FD_BUFFER_COLOR) { - emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]); + unsigned i; + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_gmem2mem_surf(ctx, gmem->cbuf_base[i], pfb->cbufs[i]); + } } OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); @@ -260,14 +250,14 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) /* transfer from system memory to gmem */ static void -emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, - struct pipe_surface *psurf, uint32_t bin_w) +emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases, + struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w) { struct fd_ringbuffer *ring = ctx->ring; - emit_mrt(ring, 1, &psurf, &base, bin_w); + emit_mrt(ring, nr_bufs, bufs, bases, bin_w); - fd4_emit_gmem_restore_tex(ring, psurf); + fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs); fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL); @@ -282,10 +272,13 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd4_emit emit = { .vtx = &fd4_ctx->blit_vbuf_state, + /* NOTE: They all use the same VP, this is for vtx bufs. */ .prog = &ctx->blit_prog[0], - .key = key, - .format = fd4_emit_format(pfb->cbufs[0]), + .key = { + .half_precision = fd_half_precision(pfb), + }, }; + unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; float x0, y0, x1, y1; unsigned bin_w = tile->bin_w; unsigned bin_h = tile->bin_h; @@ -304,7 +297,9 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, fui(x1)); OUT_RING(ring, fui(y1)); - for (i = 0; i < 8; i++) { + for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | A4XX_RB_MRT_CONTROL_B11 | @@ -319,6 +314,16 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO)); } + OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); + OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1); OUT_RING(ring, 0x8); /* XXX RB_RENDER_CONTROL */ @@ -381,7 +386,6 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */ - fd4_program_emit(ring, &emit); fd4_emit_vertex_bufs(ring, &emit); /* for gmem pitch/base calculations, we need to use the non- @@ -390,11 +394,17 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) bin_w = gmem->bin_w; bin_h = gmem->bin_h; - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w); + if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + emit.prog = &ctx->blit_prog[0]; + fd4_program_emit(ring, &emit, 1, &pfb->zsbuf); + emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + } - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) - emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w); + if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) { + emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; + fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); + emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); + } OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | @@ -537,7 +547,7 @@ fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) uint32_t reg; OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3); - reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx)); + reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); if (pfb->zsbuf) { reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)); } @@ -586,7 +596,7 @@ fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index b3c06e3aae1..c7a6dffea7b 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -31,8 +31,6 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" #include "freedreno_program.h" @@ -213,14 +211,17 @@ setup_stages(struct fd4_emit *emit, struct stage *s) } void -fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) +fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, + int nr, struct pipe_surface **bufs) { struct stage s[MAX_STAGES]; - uint32_t pos_regid, posz_regid, psize_regid, color_regid; + uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; uint32_t face_regid, coord_regid, zwcoord_regid; int constmode; int i, j, k; + debug_assert(nr <= ARRAY_SIZE(color_regid)); + setup_stages(emit, s); /* blob seems to always use constmode currently: */ @@ -232,11 +233,30 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); psize_regid = ir3_find_output_regid(s[VS].v, ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); - color_regid = ir3_find_output_regid(s[FS].v, - ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + if (s[FS].v->color0_mrt) { + color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = + color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = + ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + } else { + const struct ir3_shader_variant *fp = s[FS].v; + memset(color_regid, 0, sizeof(color_regid)); + for (i = 0; i < fp->outputs_count; i++) { + ir3_semantic sem = fp->outputs[i].semantic; + unsigned idx = sem2idx(sem); + if (sem2name(sem) != TGSI_SEMANTIC_COLOR) + continue; + debug_assert(idx < ARRAY_SIZE(color_regid)); + color_regid[idx] = fp->outputs[i].regid; + } + } + + /* adjust regids for alpha output formats. there is no alpha render + * format, so it's just treated like red + */ + for (i = 0; i < nr; i++) + if (util_format_is_alpha(pipe_surface_format(bufs[i]))) + color_regid[i] += 3; - if (util_format_is_alpha(emit->pformat)) - color_regid += 3; /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); @@ -419,29 +439,24 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) A4XX_RB_RENDER_CONTROL2_WCOORD)); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); - OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(1) | + OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(MAX2(1, nr)) | COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z)); OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1); - if (s[FS].v->writes_pos) { - OUT_RING(ring, 0x00000001 | - A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE | - A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); - } else { - OUT_RING(ring, 0x00000001); - } + OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr)) | + COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | + A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) | - A4XX_SP_FS_MRT_REG_MRTFORMAT(emit->format) | - COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION)); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + for (i = 0; i < 8; i++) { + enum a4xx_color_fmt format = 0; + if (i < nr) + format = fd4_emit_format(bufs[i]); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) | + A4XX_SP_FS_MRT_REG_MRTFORMAT(format) | + COND(emit->key.half_precision, + A4XX_SP_FS_MRT_REG_HALF_PRECISION)); + } if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h index 52306a4c60d..8dfccaf9d74 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.h @@ -39,7 +39,8 @@ struct fd4_shader_stateobj { struct fd4_emit; -void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit); +void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, + int nr, struct pipe_surface **bufs); void fd4_prog_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c index e8cbb2d201a..d8ea414f300 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c @@ -102,7 +102,7 @@ void fd4_screen_init(struct pipe_screen *pscreen) { struct fd_screen *screen = fd_screen(pscreen); - screen->max_rts = 1; + screen->max_rts = A4XX_MAX_RENDER_TARGETS; screen->compiler = ir3_compiler_create(screen->gpu_id); pscreen->context_create = fd4_context_create; pscreen->is_format_supported = fd4_screen_is_format_supported; diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index a04cc879630..6831a58749c 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -265,7 +265,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONSTBUF | - FD_DIRTY_BLEND; + FD_DIRTY_BLEND | + FD_DIRTY_FRAMEBUFFER; if (fd_mesa_debug & FD_DBG_DCLEAR) ctx->dirty = 0xffffffff; diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 2880e890a1c..b5c5db91788 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -58,7 +58,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define A3XX_MAX_RENDER_TARGETS 4 #define A4XX_MAX_RENDER_TARGETS 8 /* for now until a4xx MRT support: */ -#define MAX_RENDER_TARGETS A3XX_MAX_RENDER_TARGETS +#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS #define FD_DBG_MSGS 0x0001 #define FD_DBG_DISASM 0x0002 -- 2.30.2