OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
- fd5_emit_render_cntl(ctx, false);
+ fd5_emit_render_cntl(ctx, false, emit->key.binning_pass);
fd5_draw_emit(ctx->batch, ring, primtype,
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info, index_offset);
draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
-// /* and now binning pass: */
-// emit.key.binning_pass = true;
-// emit.dirty = dirty & ~(FD_DIRTY_BLEND);
-// emit.vp = NULL; /* we changed key so need to refetch vp */
-// emit.fp = NULL;
-// draw_impl(ctx, ctx->batch->binning, &emit);
+ /* and now binning pass: */
+ emit.key.binning_pass = true;
+ emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+ emit.vp = NULL; /* we changed key so need to refetch vp */
+ emit.fp = NULL;
+ draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
if (emit.streamout_mask) {
struct fd_ringbuffer *ring = ctx->batch->draw;
ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
- fd5_emit_render_cntl(ctx, true);
+ fd5_emit_render_cntl(ctx, true, false);
if (buffers & PIPE_CLEAR_COLOR) {
for (int i = 0; i < pfb->nr_cbufs; i++) {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
-
- // TODO hacks.. these should not be hardcoded:
- OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
- OUT_RING(ring, 0x00000008); /* GRAS_SC_CNTL */
}
static void
OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
OUT_RING(ring, 0x00000000); /* ADDR_LO */
OUT_RING(ring, 0x00000000); /* ADDR_HI */
- OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE));
+ OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
+ COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
OUT_RING(ring, 0x00000000);
emit_marker5(ring, 7);
}
}
static inline void
-fd5_emit_render_cntl(struct fd_context *ctx, bool blit)
+fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning)
{
- struct fd_ringbuffer *ring = ctx->batch->draw;
+ struct fd_ringbuffer *ring = binning ? ctx->batch->binning : ctx->batch->draw;
/* TODO eventually this partially depends on the pfb state, ie.
* which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
+ COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
+ COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
COND(!blit, 0x8));
OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
+ COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
}
}
}
+static bool
+use_hw_binning(struct fd_batch *batch)
+{
+ struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+
+ return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
+ (batch->num_draws > 0);
+}
+
static void
patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
{
util_dynarray_resize(&batch->draw_patches, 0);
}
+static void
+update_vsc_pipe(struct fd_batch *batch)
+{
+ struct fd_context *ctx = batch->ctx;
+ struct fd5_context *fd5_ctx = fd5_context(ctx);
+ struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+ struct fd_ringbuffer *ring = batch->gmem;
+ int i;
+
+ OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
+ OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+ OUT_RELOCW(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
+
+ OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
+
+ OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
+ for (i = 0; i < 16; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
+ A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
+ A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
+ A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
+ }
+
+ OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
+ for (i = 0; i < 16; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ if (!pipe->bo) {
+ pipe->bo = fd_bo_new(ctx->dev, 0x20000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+ }
+ OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
+ }
+
+ OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
+ for (i = 0; i < 16; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
+ }
+}
+
+static void
+emit_binning_pass(struct fd_batch *batch)
+{
+ struct fd_context *ctx = batch->ctx;
+ struct fd_ringbuffer *ring = batch->gmem;
+ struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+
+ uint32_t x1 = gmem->minx;
+ uint32_t y1 = gmem->miny;
+ uint32_t x2 = gmem->minx + gmem->width - 1;
+ uint32_t y2 = gmem->miny + gmem->height - 1;
+
+ fd5_set_render_mode(batch->ctx, ring, BINNING);
+
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
+ A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
+ A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
+
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
+ A5XX_RB_RESOLVE_CNTL_1_Y(y1));
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
+ A5XX_RB_RESOLVE_CNTL_2_Y(y2));
+
+ update_vsc_pipe(batch);
+
+ OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
+ OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, UNK_2C);
+
+ OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) |
+ A5XX_RB_WINDOW_OFFSET_Y(0));
+
+ /* emit IB to binning drawcmds: */
+ ctx->emit_ib(ring, batch->binning);
+
+ fd_reset_wfi(batch);
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, UNK_2D);
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring, CACHE_FLUSH_TS);
+ OUT_RELOCW(ring, fd5_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */
+ OUT_RING(ring, 0x00000000);
+
+ // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
+
+ fd_wfi(batch, ring);
+
+ OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
+ OUT_RING(ring, 0x0);
+}
+
/* before first tile */
static void
fd5_emit_tile_init(struct fd_batch *batch)
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
-/*
-opcode: CP_PREEMPT_ENABLE_LOCAL (6a) (2 dwords)
- */
+ if (use_hw_binning(batch)) {
+ emit_binning_pass(batch);
+ patch_draws(batch, USE_VISIBILITY);
+ } else {
+ patch_draws(batch, IGNORE_VISIBILITY);
+ }
fd5_set_render_mode(batch->ctx, ring, GMEM);
}
static void
fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
{
+ struct fd_context *ctx = batch->ctx;
+ struct fd5_context *fd5_ctx = fd5_context(ctx);
struct fd_ringbuffer *ring = batch->gmem;
uint32_t x1 = tile->xoff;
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
A5XX_RB_RESOLVE_CNTL_2_Y(y2));
+ if (use_hw_binning(batch)) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
+
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x0);
+
+ OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
+ OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
+ CP_SET_BIN_DATA5_0_VSC_N(tile->n));
+ OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */
+ OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
+ (tile->p * 4), 0, 0);
+ } else {
+ OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+ OUT_RING(ring, 0x1);
+ }
+
OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) |
A5XX_RB_WINDOW_OFFSET_Y(y1));
struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
- OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
- OUT_RING(ring, 0x1);
-
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
- patch_draws(batch, IGNORE_VISIBILITY);
-
emit_zs(ring, pfb->zsbuf, gmem);
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE));
+ OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
+ OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));
+
+ OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
+ OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) |
+ COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
+ COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) |
+ 0x10000); // XXX
+
+ OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
+ OUT_RING(ring, A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(l.max_loc) |
+ 0x400); // XXX
+
if (emit->key.binning_pass) {
OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */
} else {
- // TODO if some of these other bits depend on something other than
- // program state we should probably move these next three regs:
-
- OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
- OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));
-
- OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
- OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) |
- COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
- COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) |
- 0x10000); // XXX
-
- OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
- OUT_RING(ring, A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(l.max_loc) |
- 0x400); // XXX
-
OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */
}
A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
}
- if (emit->key.binning_pass) {
- OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
- OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(0));
- } else {
+
+ OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
+ OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
+ A5XX_VPC_PACK_PSIZELOC(psize_loc));
+
+ if (!emit->key.binning_pass) {
uint32_t vinterp[8], vpsrepl[8];
memset(vinterp, 0, sizeof(vinterp));
}
}
- OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
- OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
- A5XX_VPC_PACK_PSIZELOC(psize_loc));
-
OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8);
for (i = 0; i < 8; i++)
OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
* means we'd always have to recalc tiles ever batch)
*/
struct fd_gmem_stateobj gmem;
- struct fd_vsc_pipe pipe[8];
+ struct fd_vsc_pipe pipe[16];
struct fd_tile tile[512];
/* which state objects need to be re-emit'd: */
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
const uint32_t gmem_alignw = ctx->screen->gmem_alignw;
const uint32_t gmem_alignh = ctx->screen->gmem_alignh;
+ const unsigned npipes = ctx->screen->num_vsc_pipes;
const uint32_t gmem_size = ctx->screen->gmemsize_bytes;
uint32_t minx, miny, width, height;
uint32_t nbins_x = 1, nbins_y = 1;
uint32_t i, j, t, xoff, yoff;
uint32_t tpp_x, tpp_y;
bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
- int tile_n[ARRAY_SIZE(ctx->pipe)];
+ int tile_n[npipes];
if (has_zs) {
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
/* configure pipes: */
xoff = yoff = 0;
- for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
+ for (i = 0; i < npipes; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
if (xoff >= nbins_x) {
xoff += tpp_x;
}
- for (; i < ARRAY_SIZE(ctx->pipe); i++) {
+ for (; i < npipes; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
pipe->x = pipe->y = pipe->w = pipe->h = 0;
}
if (screen->gpu_id >= 500) {
screen->gmem_alignw = 64;
screen->gmem_alignh = 32;
+ screen->num_vsc_pipes = 16;
} else {
screen->gmem_alignw = 32;
screen->gmem_alignh = 32;
+ screen->num_vsc_pipes = 8;
}
/* NOTE: don't enable reordering on a2xx, since completely untested.
uint32_t max_freq;
uint32_t max_rts; /* max # of render targets */
uint32_t gmem_alignw, gmem_alignh;
+ uint32_t num_vsc_pipes;
bool has_timestamp;
void *compiler; /* currently unused for a2xx */