X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa3xx%2Ffd3_gmem.c;h=d01b656b0a2b6674932f95187dbcd6407d37f9a4;hb=ee61790daf46d83d64288b99fb02f17070acb3dc;hp=68264096866a660a89beaa43b945b6670cda8353;hpb=39a7c049d36bf5d18653e0a8b360dc8317f4b8ec;p=mesa.git diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 68264096866..d01b656b0a2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -1,5 +1,3 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - /* * Copyright (C) 2013 Rob Clark * @@ -45,7 +43,8 @@ static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, - struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w) + struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w, + bool decode_srgb) { enum a3xx_tile_mode tile_mode; unsigned i; @@ -56,26 +55,43 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, tile_mode = LINEAR; } - for (i = 0; i < 4; i++) { + for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format pformat = 0; enum a3xx_color_fmt format = 0; enum a3xx_color_swap swap = WZYX; + bool srgb = false; struct fd_resource *rsc = NULL; struct fd_resource_slice *slice = NULL; uint32_t stride = 0; uint32_t base = 0; - uint32_t layer_offset = 0; + uint32_t offset = 0; if ((i < nr_bufs) && bufs[i]) { struct pipe_surface *psurf = bufs[i]; rsc = fd_resource(psurf->texture); - slice = &rsc->slices[psurf->u.tex.level]; - format = fd3_pipe2color(psurf->format); - swap = fd3_pipe2swap(psurf->format); + pformat = psurf->format; + /* In case we're drawing to Z32F_S8, the "color" actually goes to + * the stencil + */ + if (rsc->stencil) { + rsc = rsc->stencil; + pformat = rsc->base.format; + if (bases) + bases++; + } + slice = fd_resource_slice(rsc, psurf->u.tex.level); + format = fd3_pipe2color(pformat); + swap = fd3_pipe2swap(pformat); + if (decode_srgb) + srgb = util_format_is_srgb(pformat); + else + pformat = util_format_linear(pformat); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); - layer_offset = slice->size0 * psurf->u.tex.first_layer; + offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); if (bin_w) { stride = bin_w * rsc->cpp; @@ -86,45 +102,33 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } else { stride = slice->pitch * rsc->cpp; } + } else if (i < nr_bufs && bases) { + base = bases[i]; } OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2); OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | - A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap)); - if (bin_w || (i >= nr_bufs)) { + A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | + COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB)); + if (bin_w || (i >= nr_bufs) || !bufs[i]) { OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); } else { - OUT_RELOCW(ring, rsc->bo, - slice->offset + layer_offset, 0, -1); + OUT_RELOCW(ring, rsc->bo, offset, 0, -1); } OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); OUT_RING(ring, COND((i < nr_bufs) && bufs[i], A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT( - fd3_fs_output_format(bufs[i]->format)))); - } -} - -static uint32_t -depth_base(struct fd_context *ctx) -{ - struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - uint32_t cpp = 4; - if (pfb->cbufs[0]) { - struct fd_resource *rsc = - fd_resource(pfb->cbufs[0]->texture); - cpp = rsc->cpp; + fd3_fs_output_format(pformat)))); } - return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000); } static bool -use_hw_binning(struct fd_context *ctx) +use_hw_binning(struct fd_batch *batch) { - struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; /* workaround: combining scissor optimization and hw binning * seems problematic. Seems like we end up with a mismatch @@ -143,19 +147,26 @@ use_hw_binning(struct fd_context *ctx) if (gmem->minx || gmem->miny) return false; + if ((gmem->maxpw * gmem->maxph) > 32) + return false; + + if ((gmem->maxpw > 15) || (gmem->maxph > 15)) + return false; + return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); } /* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */ -static void update_vsc_pipe(struct fd_context *ctx); +static void update_vsc_pipe(struct fd_batch *batch); static void -emit_binning_workaround(struct fd_context *ctx) +emit_binning_workaround(struct fd_batch *batch) { - struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd3_emit emit = { - .vtx = &fd3_ctx->solid_vbuf_state, + .debug = &ctx->debug, + .vtx = &ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { .half_precision = true, @@ -164,7 +175,8 @@ emit_binning_workaround(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); @@ -173,7 +185,7 @@ emit_binning_workaround(struct fd_context *ctx) OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | A3XX_RB_COPY_CONTROL_MODE(0) | A3XX_RB_COPY_CONTROL_GMEM_BASE(0)); - OUT_RELOCW(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */ + OUT_RELOCW(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128)); OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) | @@ -186,7 +198,7 @@ emit_binning_workaround(struct fd_context *ctx) A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); - fd3_program_emit(ring, &emit); + fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); @@ -248,7 +260,7 @@ emit_binning_workaround(struct fd_context *ctx) OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) | A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0)); - fd_wfi(ctx, ring); + fd_wfi(batch, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0)); @@ -271,11 +283,11 @@ emit_binning_workaround(struct fd_context *ctx) OUT_PKT3(ring, CP_DRAW_INDX_2, 5); OUT_RING(ring, 0x00000000); /* viz query info. */ OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE, - INDEX_SIZE_32_BIT, IGNORE_VISIBILITY)); + INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0)); OUT_RING(ring, 2); /* NumIndices */ OUT_RING(ring, 2); OUT_RING(ring, 1); - fd_reset_wfi(ctx); + fd_reset_wfi(batch); OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS)); @@ -283,7 +295,7 @@ emit_binning_workaround(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_RING(ring, 0x00000000); - fd_wfi(ctx, ring); + fd_wfi(batch, ring); OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); @@ -300,49 +312,64 @@ emit_binning_workaround(struct fd_context *ctx) /* transfer from gmem to system memory (ie. normal RAM) */ static void -emit_gmem2mem_surf(struct fd_context *ctx, - enum adreno_rb_copy_control_mode mode, - uint32_t base, struct pipe_surface *psurf) +emit_gmem2mem_surf(struct fd_batch *batch, + enum adreno_rb_copy_control_mode mode, + bool stencil, + uint32_t base, struct pipe_surface *psurf) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); - struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level]; - uint32_t layer_offset = slice->size0 * psurf->u.tex.first_layer; + enum pipe_format format = psurf->format; + + if (!rsc->valid) + return; + + if (stencil) { + rsc = rsc->stencil; + format = rsc->base.format; + } + + struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); + uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | A3XX_RB_COPY_CONTROL_MODE(mode) | - A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); + A3XX_RB_COPY_CONTROL_GMEM_BASE(base) | + COND(format == PIPE_FORMAT_Z32_FLOAT || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, + A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE)); - OUT_RELOCW(ring, rsc->bo, slice->offset + layer_offset, 0, -1); /* RB_COPY_DEST_BASE */ + OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | - A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | + A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) | A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | - A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format))); + A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format))); - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, - DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); + fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) { - struct fd3_context *fd3_ctx = fd3_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd3_emit emit = { - .vtx = &fd3_ctx->solid_vbuf_state, + .debug = &ctx->debug, + .vtx = &ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { - .half_precision = fd3_half_precision(format), + .half_precision = true, }, - .format = format, }; + int i; OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); @@ -373,7 +400,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ - fd_wfi(ctx, ring); + fd_wfi(batch, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0)); @@ -384,7 +411,8 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | @@ -415,21 +443,34 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &emit); + fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - uint32_t base = depth_base(ctx); - emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); + if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH) + emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false, + ctx->gmem.zsbuf_base[0], pfb->zsbuf); + if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL) + emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true, + ctx->gmem.zsbuf_base[1], pfb->zsbuf); } - if (ctx->resolve & FD_BUFFER_COLOR) { - emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, 0, pfb->cbufs[0]); + if (batch->resolve & FD_BUFFER_COLOR) { + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false, + ctx->gmem.cbuf_base[i], pfb->cbufs[i]); + } } OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | @@ -440,34 +481,77 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) /* transfer from system memory to gmem */ static void -emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, - struct pipe_surface *psurf, uint32_t bin_w) +emit_mem2gmem_surf(struct fd_batch *batch, uint32_t bases[], + struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_surface *zsbufs[2]; - emit_mrt(ring, 1, &psurf, &base, bin_w); + assert(bufs > 0); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(bufs - 1)); + + emit_mrt(ring, bufs, psurf, bases, bin_w, false); + + if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT || + psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { + /* Depth is stored as unorm in gmem, so we have to write it in using a + * special blit shader which writes depth. + */ + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z | + A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | + A3XX_RB_DEPTH_CONTROL_Z_ENABLE | + A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE | + A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS))); + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) | + A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->ctx->gmem.bin_w)); + + if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); + OUT_RING(ring, 0); + } else { + /* The gmem_restore_tex logic will put the first buffer's stencil + * as color. Supply it with the proper information to make that + * happen. + */ + zsbufs[0] = zsbufs[1] = psurf[0]; + psurf = zsbufs; + bufs = 2; + } + } else { + OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); + OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); + } - fd3_emit_gmem_restore_tex(ring, psurf); + fd3_emit_gmem_restore_tex(ring, psurf, bufs); - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, - DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); + fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) { - struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd3_emit emit = { - .vtx = &fd3_ctx->blit_vbuf_state, - .prog = &ctx->blit_prog, + .debug = &ctx->debug, + .vtx = &ctx->blit_vbuf_state, + .sprite_coord_enable = 1, + /* NOTE: They all use the same VP, this is for vtx bufs. */ + .prog = &ctx->blit_prog[0], .key = { - .half_precision = fd3_half_precision(format), + .half_precision = fd_half_precision(pfb), }, - .format = format, }; float x0, y0, x1, y1; unsigned bin_w = tile->bin_w; @@ -481,12 +565,14 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height); OUT_PKT3(ring, CP_MEM_WRITE, 5); - OUT_RELOCW(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0); + OUT_RELOCW(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0); OUT_RING(ring, fui(x0)); OUT_RING(ring, fui(y0)); OUT_RING(ring, fui(x1)); OUT_RING(ring, fui(y1)); + fd3_emit_cache_flush(batch, ring); + for (i = 0; i < 4; i++) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | @@ -506,14 +592,18 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); - fd_wfi(ctx, ring); + fd_wfi(batch, ring); OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS)); + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */ - fd_wfi(ctx, ring); + fd_wfi(batch, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0)); @@ -545,6 +635,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); + OUT_RING(ring, 0); /* RB_STENCIL_INFO */ + OUT_RING(ring, 0); /* RB_STENCIL_PITCH */ + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | @@ -562,7 +656,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &emit); fd3_emit_vertex_bufs(ring, &emit); /* for gmem pitch/base calculations, we need to use the non- @@ -571,62 +664,89 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) bin_w = gmem->bin_w; bin_h = gmem->bin_h; - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w); + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { + emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; + emit.fp = NULL; /* frag shader changed so clear cache */ + fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); + emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); + } - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) - emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w); + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && + pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) { + /* Non-float can use a regular color write. It's split over 8-bit + * components, so half precision is always sufficient. + */ + emit.prog = &ctx->blit_prog[0]; + emit.key.half_precision = true; + } else { + /* Float depth needs special blit shader that writes depth */ + if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) + emit.prog = &ctx->blit_z; + else + emit.prog = &ctx->blit_zs; + emit.key.half_precision = false; + } + emit.fp = NULL; /* frag shader changed so clear cache */ + fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); + emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + } OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); } static void -patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode) +patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) { unsigned i; - for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) { - struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i); - *patch->cs = patch->val | DRAW(0, 0, 0, vismode); + for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); + *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0); } - util_dynarray_resize(&ctx->draw_patches, 0); + util_dynarray_resize(&batch->draw_patches, 0); } static void -patch_rbrc(struct fd_context *ctx, uint32_t val) +patch_rbrc(struct fd_batch *batch, uint32_t val) { - struct fd3_context *fd3_ctx = fd3_context(ctx); unsigned i; - for (i = 0; i < fd_patch_num_elements(&fd3_ctx->rbrc_patches); i++) { - struct fd_cs_patch *patch = fd_patch_element(&fd3_ctx->rbrc_patches, i); + for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i); *patch->cs = patch->val | val; } - util_dynarray_resize(&fd3_ctx->rbrc_patches, 0); + util_dynarray_resize(&batch->rbrc_patches, 0); } /* for rendering directly to system memory: */ static void -fd3_emit_sysmem_prep(struct fd_context *ctx) +fd3_emit_sysmem_prep(struct fd_batch *batch) { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_ringbuffer *ring = ctx->ring; - uint32_t pitch = 0; - - if (pfb->cbufs[0]) { - struct pipe_surface *psurf = pfb->cbufs[0]; - unsigned lvl = psurf->u.tex.level; - pitch = fd_resource(psurf->texture)->slices[lvl].pitch; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + uint32_t i, pitch = 0; + + for (i = 0; i < pfb->nr_cbufs; i++) { + struct pipe_surface *psurf = pfb->cbufs[i]; + if (!psurf) + continue; + pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch; } - fd3_emit_restore(ctx); + fd3_emit_restore(batch, ring); OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); @@ -642,24 +762,26 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_RB_MODE_CONTROL_GMEM_BYPASS | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); - patch_draws(ctx, IGNORE_VISIBILITY); - patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); + patch_draws(batch, IGNORE_VISIBILITY); + patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); } static void -update_vsc_pipe(struct fd_context *ctx) +update_vsc_pipe(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; int i; OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ for (i = 0; i < 8; i++) { - struct fd_vsc_pipe *pipe = &ctx->pipe[i]; + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; if (!pipe->bo) { pipe->bo = fd_bo_new(ctx->dev, 0x40000, @@ -677,11 +799,12 @@ update_vsc_pipe(struct fd_context *ctx) } static void -emit_binning_pass(struct fd_context *ctx) +emit_binning_pass(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; int i; uint32_t x1 = gmem->minx; @@ -690,8 +813,8 @@ emit_binning_pass(struct fd_context *ctx) uint32_t y2 = gmem->miny + gmem->height - 1; if (ctx->screen->gpu_id == 320) { - emit_binning_workaround(ctx); - fd_wfi(ctx, ring); + emit_binning_workaround(batch); + fd_wfi(batch, ring); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_RING(ring, 0x00007fff); } @@ -729,7 +852,8 @@ emit_binning_pass(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); for (i = 0; i < 4; i++) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); @@ -743,10 +867,10 @@ emit_binning_pass(struct fd_context *ctx) A3XX_PC_VSTREAM_CONTROL_N(0)); /* emit IB to binning drawcmds: */ - OUT_IB(ring, ctx->binning_start, ctx->binning_end); - fd_reset_wfi(ctx); + ctx->emit_ib(ring, batch->binning); + fd_reset_wfi(batch); - fd_wfi(ctx, ring); + fd_wfi(batch, ring); /* and then put stuff back the way it was: */ @@ -769,22 +893,23 @@ emit_binning_pass(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1)); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); - fd_event_write(ctx, ring, CACHE_FLUSH); - fd_wfi(ctx, ring); + fd_event_write(batch, ring, CACHE_FLUSH); + fd_wfi(batch, ring); if (ctx->screen->gpu_id == 320) { /* dummy-draw workaround: */ OUT_PKT3(ring, CP_DRAW_INDX, 3); OUT_RING(ring, 0x00000000); OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, - INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0)); OUT_RING(ring, 0); /* NumIndices */ - fd_reset_wfi(ctx); + fd_reset_wfi(batch); } OUT_PKT3(ring, CP_NOP, 4); @@ -793,22 +918,23 @@ emit_binning_pass(struct fd_context *ctx) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - fd_wfi(ctx, ring); + fd_wfi(batch, ring); if (ctx->screen->gpu_id == 320) { - emit_binning_workaround(ctx); + emit_binning_workaround(batch); } } /* before first tile */ static void -fd3_emit_tile_init(struct fd_context *ctx) +fd3_emit_tile_init(struct fd_batch *batch) { - struct fd_ringbuffer *ring = ctx->ring; - struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; uint32_t rb_render_control; - fd3_emit_restore(ctx); + fd3_emit_restore(batch, ring); /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated * at the right and bottom edge tiles @@ -817,82 +943,83 @@ fd3_emit_tile_init(struct fd_context *ctx) OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); - update_vsc_pipe(ctx); + update_vsc_pipe(batch); - if (use_hw_binning(ctx)) { - /* mark the end of the binning cmds: */ - fd_ringmarker_mark(ctx->binning_end); + fd_wfi(batch, ring); + OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); + OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | + A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); + if (use_hw_binning(batch)) { /* emit hw binning pass: */ - emit_binning_pass(ctx); + emit_binning_pass(batch); - patch_draws(ctx, USE_VISIBILITY); + patch_draws(batch, USE_VISIBILITY); } else { - patch_draws(ctx, IGNORE_VISIBILITY); + patch_draws(batch, IGNORE_VISIBILITY); } rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w); - patch_rbrc(ctx, rb_render_control); + patch_rbrc(batch, rb_render_control); } /* before mem2gmem */ static void -fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_gmem_stateobj *gmem = &ctx->gmem; - uint32_t reg; - - OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); - reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx)); - if (pfb->zsbuf) { - reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); - } - OUT_RING(ring, reg); - if (pfb->zsbuf) { - uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); - OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); - } else { - OUT_RING(ring, 0x00000000); - } - - if (ctx->needs_rb_fbd) { - fd_wfi(ctx, ring); - OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); - OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | - A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); - ctx->needs_rb_fbd = false; - } + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); } /* before IB to rendering cmds: */ static void -fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; uint32_t x1 = tile->xoff; uint32_t y1 = tile->yoff; uint32_t x2 = tile->xoff + tile->bin_w - 1; uint32_t y2 = tile->yoff + tile->bin_h - 1; - if (use_hw_binning(ctx)) { - struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; + uint32_t reg; + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); + if (pfb->zsbuf) { + reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); + } + OUT_RING(ring, reg); + if (pfb->zsbuf) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w)); + if (rsc->stencil) { + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); + OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1])); + OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w)); + } + } else { + OUT_RING(ring, 0x00000000); + } + + if (use_hw_binning(batch)) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; assert(pipe->w * pipe->h); - fd_event_write(ctx, ring, HLSQ_FLUSH); - fd_wfi(ctx, ring); + fd_event_write(batch, ring, HLSQ_FLUSH); + fd_wfi(batch, ring); OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) | @@ -900,8 +1027,8 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT3(ring, CP_SET_BIN_DATA, 2); - OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */ - OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */ + OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */ + OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */ (tile->p * 4), 0, 0); } else { OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); @@ -913,7 +1040,7 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);