From 11583dc65536f274db68cc5c8cb1a0d7007b0201 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 3 May 2019 13:39:45 -0700 Subject: [PATCH] freedreno/a6xx: UBWC support for images There are still some fallbacks we'll need to handle before we can enable UBWC by default. I think we may need to fallback to uncompressed if image atomic operations are used. And we still need to sort out how to handle image and sampler views of compressed resources if the image/ sampler view is using a format that does not support compression. (I think the latter should hopefully be uncommon outside of deqp/piglit.) But at least this gets us to the point where supertuxkart works properly with UBWC enabled ;-) Signed-off-by: Rob Clark --- .../drivers/freedreno/a6xx/fd6_image.c | 63 ++++++++++++++----- .../drivers/freedreno/freedreno_resource.c | 13 +++- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index 9c1182777c0..a38b78907d3 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -47,6 +47,7 @@ struct fd6_image { uint32_t pitch; uint32_t array_pitch; struct fd_bo *bo; + uint32_t ubwc_offset; uint32_t offset; bool buffer; }; @@ -77,6 +78,7 @@ static void translate_image(struct fd6_image *img, const struct pipe_image_view if (prsc->target == PIPE_BUFFER) { img->buffer = true; + img->ubwc_offset = 0; /* not valid for buffers */ img->offset = pimg->u.buf.offset; img->pitch = 0; img->array_pitch = 0; @@ -94,7 +96,8 @@ static void translate_image(struct fd6_image *img, const struct pipe_image_view unsigned lvl = pimg->u.tex.level; unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1; - img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer); + img->ubwc_offset = rsc->ubwc_offset; // TODO helper + img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer) + rsc->offset; img->pitch = rsc->slices[lvl].pitch * rsc->cpp; switch (prsc->target) { @@ -148,6 +151,7 @@ static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer img->bo = rsc->bo; img->buffer = true; + img->ubwc_offset = 0; /* not valid for buffers */ img->offset = pimg->buffer_offset; img->pitch = 0; img->array_pitch = 0; @@ -163,6 +167,10 @@ static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img) { + struct fd_resource *rsc = fd_resource(img->prsc); + bool ubwc_enabled = rsc->ubwc_size && + !fd_resource_level_linear(img->prsc, img->level); + OUT_RING(ring, fd6_tex_const_0(img->prsc, img->level, img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W)); @@ -172,7 +180,8 @@ static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img) COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) | A6XX_TEX_CONST_2_TYPE(img->type) | A6XX_TEX_CONST_2_PITCH(img->pitch)); - OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch)); + OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch) | + COND(ubwc_enabled, A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_UNK27)); if (img->bo) { OUT_RELOC(ring, img->bo, img->offset, (uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0); @@ -180,16 +189,25 @@ static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img) OUT_RING(ring, 0x00000000); OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth)); } - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + + OUT_RING(ring, 0x00000000); /* texconst6 */ + + if (ubwc_enabled) { + OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0); + OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->ubwc_size)); + OUT_RING(ring, A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(rsc->ubwc_pitch)); + } else { + OUT_RING(ring, 0x00000000); /* texconst7 */ + OUT_RING(ring, 0x00000000); /* texconst8 */ + OUT_RING(ring, 0x00000000); /* texconst9 */ + OUT_RING(ring, 0x00000000); /* texconst10 */ + } + + OUT_RING(ring, 0x00000000); /* texconst11 */ + OUT_RING(ring, 0x00000000); /* texconst12 */ + OUT_RING(ring, 0x00000000); /* texconst13 */ + OUT_RING(ring, 0x00000000); /* texconst14 */ + OUT_RING(ring, 0x00000000); /* texconst15 */ } void @@ -212,6 +230,8 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img) { struct fd_resource *rsc = fd_resource(img->prsc); enum a6xx_tile_mode tile_mode = TILE6_LINEAR; + bool ubwc_enabled = rsc->ubwc_size && + !fd_resource_level_linear(img->prsc, img->level); if (rsc->tile_mode && !fd_resource_level_linear(img->prsc, img->level)) { tile_mode = rsc->tile_mode; @@ -224,7 +244,8 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img) OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) | COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) | A6XX_IBO_2_TYPE(img->type)); - OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch)); + OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch) | + COND(ubwc_enabled, A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27)); if (img->bo) { OUT_RELOCW(ring, img->bo, img->offset, (uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0); @@ -233,10 +254,18 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img) OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth)); } OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + + if (ubwc_enabled) { + OUT_RELOCW(ring, rsc->bo, img->ubwc_offset, 0, 0); + OUT_RING(ring, A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(rsc->ubwc_size)); + OUT_RING(ring, A6XX_IBO_10_FLAG_BUFFER_PITCH(rsc->ubwc_pitch)); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index c7436d74da0..1f0bb040c24 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -911,8 +911,17 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen, allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count); /* TODO turn on UBWC for all internal buffers - * Manhattan benchmark shows artifacts when enabled. Once this - * is fixed the following line can be removed. + * + * There are still some regressions in deqp with UBWC enabled. I + * think it is mostly related to sampler/image views using a format + * that doesn't support compression with a resource created with + * a format that does. We need to track the compression state of + * a buffer and do an (in-place, hopefully?) resolve if it is re- + * interpreted with a format that does not support compression. + * + * It is possible (likely?) that we can't do atomic ops on a + * compressed buffer as well, so this would also require transition + * to a compressed state. */ allow_ubwc &= !!(fd_mesa_debug & FD_DBG_UBWC); -- 2.30.2