so->base = *cso;
+ so->lrz_write = true; /* unless blend enabled for any MRT */
+
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
const struct pipe_rt_blend_state *rt;
A5XX_RB_MRT_CONTROL_BLEND |
A5XX_RB_MRT_CONTROL_BLEND2;
mrt_blend |= (1 << i);
+ so->lrz_write = false;
}
if (reads_dest) {
uint32_t blend_control_alpha;
} rb_mrt[A5XX_MAX_RENDER_TARGETS];
uint32_t rb_blend_cntl;
+ bool lrz_write;
};
static inline struct fd5_blend_stateobj *
fixup_shader_state(ctx, &emit.key);
unsigned dirty = ctx->dirty;
+ const struct ir3_shader_variant *vp = fd5_emit_get_vp(&emit);
+ const struct ir3_shader_variant *fp = fd5_emit_get_fp(&emit);
/* do regular pass first, since that is more likely to fail compiling: */
- if (!(fd5_emit_get_vp(&emit) && fd5_emit_get_fp(&emit)))
+ if (!vp || !fp)
return false;
+ /* figure out whether we need to disable LRZ write for binning
+ * pass using draw pass's fp:
+ */
+ emit.no_lrz_write = fp->writes_pos || fp->has_kill;
+
emit.key.binning_pass = false;
emit.dirty = dirty;
}
}
+static void
+fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
+{
+ struct fd_ringbuffer *ring;
+ uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth);
+
+ // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth
+ // splitting both clear and lrz clear out into their own rb's. And
+ // just throw away any draws prior to clear. (Anything not fullscreen
+ // clear, just fallback to generic path that treats it as a normal
+ // draw
+
+ if (!batch->lrz_clear) {
+ batch->lrz_clear = fd_ringbuffer_new(batch->ctx->screen->pipe, 0x1000);
+ fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem);
+ }
+
+ ring = batch->lrz_clear;
+
+ OUT_WFI5(ring);
+
+ OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+ OUT_RING(ring, 0x10000000);
+
+ OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
+ OUT_RING(ring, 0x20fffff);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
+ OUT_RING(ring, A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, 0x00000181);
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
+ OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
+ A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
+ OUT_RING(ring, A5XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2));
+ OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz)));
+ OUT_RELOCW(ring, zsbuf->lrz, 0x1000, 0, 0);
+
+ OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT4(ring, REG_A5XX_RB_DEST_MSAA_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE));
+
+ OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR |
+ A5XX_RB_CLEAR_CNTL_MASK(0xf));
+
+ OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1);
+ OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
+
+ OUT_PKT4(ring, REG_A5XX_VSC_RESOLVE_CNTL, 2);
+ OUT_RING(ring, A5XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) |
+ A5XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height));
+ OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE
+
+ OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+ OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
+
+ OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
+ A5XX_RB_RESOLVE_CNTL_1_Y(0));
+ OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) |
+ A5XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1));
+
+ fd5_emit_blit(batch->ctx, ring);
+}
+
static bool
fd5_clear(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil)
is_z32(pfb->zsbuf->format))
return false;
- /* TODO handle scissor.. or fallback to slow-clear? */
-
ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */
fd5_emit_blit(ctx, ring);
+
+ if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
+ struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
+ if (zsbuf->lrz) {
+ zsbuf->lrz_valid = true;
+ fd5_clear_lrz(ctx->batch, zsbuf, depth);
+ }
+ }
}
/* disable fast clear to not interfere w/ gmem->mem, etc.. */
fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd5_emit *emit)
{
+ struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit);
const enum fd_dirty_3d_state dirty = emit->dirty;
emit_marker5(ring, 5);
if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
uint32_t rb_alpha_control = zsa->rb_alpha_control;
if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
OUT_RING(ring, zsa->rb_stencil_control);
}
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) {
+ struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
+ struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
+
+ if (pfb->zsbuf) {
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
+
+ if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid)
+ gras_lrz_cntl = 0;
+ else if (emit->key.binning_pass && blend->lrz_write && zsa->lrz_write)
+ gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE;
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
+ OUT_RING(ring, gras_lrz_cntl);
+ }
+ }
+
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
}
if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER)) {
- struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
unsigned nr = pfb->nr_cbufs;
uint32_t i;
for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
- enum pipe_format format = pipe_surface_format(
- ctx->batch->framebuffer.cbufs[i]);
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
bool is_int = util_format_is_pure_integer(format);
bool has_alpha = util_format_has_alpha(format);
uint32_t control = blend->rb_mrt[i].control;
OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
- /* other regs not used (yet?) and always seem to have same value: */
- OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
- OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
-
OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
bool rasterflat;
bool no_decode_srgb;
+ /* in binning pass, we don't have real frag shader, so we
+ * don't know if real draw disqualifies lrz write. So just
+ * figure that out up-front and stash it in the emit.
+ */
+ bool no_lrz_write;
+
/* cached to avoid repeated lookups of same variants: */
const struct ir3_shader_variant *vp, *fp;
/* TODO: other shader stages.. */
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
+ if (rsc->lrz) {
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
+ OUT_RELOCW(ring, rsc->lrz, 0x1000, 0, 0);
+ OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
+ OUT_RELOCW(ring, rsc->lrz, 0, 0, 0);
+ } else {
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
+
+ OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
if (rsc->stencil) {
if (gmem) {
stride = 1 * gmem->bin_w;
static void
fd5_emit_tile_init(struct fd_batch *batch)
{
+ struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->gmem;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
fd5_emit_restore(batch, ring);
+ if (batch->lrz_clear)
+ ctx->emit_ib(ring, batch->lrz_clear);
+
fd5_emit_lrz_flush(ring);
+ OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
+ OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
+
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x0);
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
+ emit_zs(ring, pfb->zsbuf, &ctx->gmem);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem);
+
if (use_hw_binning(batch)) {
emit_binning_pass(batch);
+ fd5_emit_lrz_flush(ring);
patch_draws(batch, USE_VISIBILITY);
} else {
patch_draws(batch, IGNORE_VISIBILITY);
so->base = *cso;
+ switch (cso->depth.func) {
+ case PIPE_FUNC_LESS:
+ case PIPE_FUNC_LEQUAL:
+ so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE;
+ break;
+
+ case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_GEQUAL:
+ so->gras_lrz_cntl = A5XX_GRAS_LRZ_CNTL_ENABLE | A5XX_GRAS_LRZ_CNTL_GREATER;
+ break;
+
+ default:
+ /* LRZ not enabled */
+ so->gras_lrz_cntl = 0;
+ break;
+ }
+
+ if (!(cso->stencil->enabled || cso->alpha.enabled || !cso->depth.writemask))
+ so->lrz_write = true;
+
so->rb_depth_cntl |=
A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth.func); /* maps 1:1 */
uint32_t rb_depth_cntl;
uint32_t rb_stencil_control;
uint32_t rb_stencilrefmask;
+ uint32_t gras_lrz_cntl;
+ bool lrz_write;
};
static inline struct fd5_zsa_stateobj *
fd_ringbuffer_del(batch->draw);
fd_ringbuffer_del(batch->binning);
fd_ringbuffer_del(batch->gmem);
+ if (batch->lrz_clear) {
+ fd_ringbuffer_del(batch->lrz_clear);
+ batch->lrz_clear = NULL;
+ }
util_dynarray_fini(&batch->draw_patches);
/** tiling/gmem (IB0) cmdstream: */
struct fd_ringbuffer *gmem;
+ // TODO maybe more generically split out clear and clear_binning rings?
+ struct fd_ringbuffer *lrz_clear;
+
/**
* hw query related state:
*/
realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
}
+// TODO common helper?
+static bool
+has_depth(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return true;
+ default:
+ return false;
+ }
+}
+
/**
* Create a new texture object, using the given template info.
*/
fd_resource_create(struct pipe_screen *pscreen,
const struct pipe_resource *tmpl)
{
+ struct fd_screen *screen = fd_screen(pscreen);
struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
struct pipe_resource *prsc = &rsc->base.b;
enum pipe_format format = tmpl->format;
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
- else if (fd_screen(pscreen)->gpu_id < 400 &&
+ else if (screen->gpu_id < 400 &&
util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
format = PIPE_FORMAT_R8G8B8A8_UNORM;
rsc->internal_format = format;
assert(rsc->cpp);
+ // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
+ if (is_a5xx(screen) && (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
+ const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+ DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
+ unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 32);
+ unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
+ unsigned size = lrz_pitch * lrz_height * 2;
+
+ size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
+
+ rsc->lrz_height = lrz_height;
+ rsc->lrz_width = lrz_pitch;
+ rsc->lrz_pitch = lrz_pitch;
+ rsc->lrz = fd_bo_new(screen->dev, size, flags);
+ }
+
alignment = slice_alignment(pscreen, tmpl);
- if (is_a4xx(fd_screen(pscreen)) || is_a5xx(fd_screen(pscreen))) {
+ if (is_a4xx(screen) || is_a5xx(screen)) {
switch (tmpl->target) {
case PIPE_TEXTURE_3D:
rsc->layer_first = false;
* shadowed.
*/
uint32_t bc_batch_mask;
+
+ /*
+ * LRZ
+ */
+ bool lrz_valid : 1;
+ uint16_t lrz_width; // for lrz clear, does this differ from lrz_pitch?
+ uint16_t lrz_height;
+ uint16_t lrz_pitch;
+ struct fd_bo *lrz;
};
static inline struct fd_resource *
{"inorder", FD_DBG_INORDER,"Disable reordering for draws/blits"},
{"bstat", FD_DBG_BSTAT, "Print batch stats at context destroy"},
{"nogrow", FD_DBG_NOGROW, "Disable \"growable\" cmdstream buffers, even if kernel supports it"},
+ {"lrz", FD_DBG_LRZ, "Enable experimental LRZ support (a5xx+)"},
DEBUG_NAMED_VALUE_END
};
#define FD_DBG_INORDER 0x4000
#define FD_DBG_BSTAT 0x8000
#define FD_DBG_NOGROW 0x10000
+#define FD_DBG_LRZ 0x20000
extern int fd_mesa_debug;
extern bool fd_binning_enabled;