From 22d2cbe6856fea65bf01dc96941b5127f17dacab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 21 Nov 2019 15:09:02 -0800 Subject: [PATCH] freedreno: Allow UBWC on textures with multiple mipmap levels. This is a backport of Jonathan Marek's UBWC work on turnip to GL. Performance highlights from our trace set (320 frames sampled) traces/glmark2/texture-texture-filter=mipmap.rdc: +9.1% +/- 2.2% traces/android/trex.rdc: +8.7% +/- 0.4% traces/glmark2/desktop-effect=shadow:windows=4.rdc: +4.2% +/- 2.5% Tested-by: Marge Bot Part-of: --- src/freedreno/fdl/fd6_layout.c | 14 ++-- src/freedreno/fdl/freedreno_layout.c | 4 +- src/freedreno/fdl/freedreno_layout.h | 19 +++--- src/freedreno/vulkan/tu_image.c | 5 +- .../drivers/freedreno/a6xx/fd6_image.c | 9 ++- .../drivers/freedreno/a6xx/fd6_resource.c | 68 ++----------------- .../drivers/freedreno/a6xx/fd6_resource.h | 1 - .../drivers/freedreno/a6xx/fd6_screen.c | 1 - .../drivers/freedreno/a6xx/fd6_texture.c | 15 ++-- .../drivers/freedreno/freedreno_resource.c | 8 +-- .../drivers/freedreno/freedreno_resource.h | 4 +- .../drivers/freedreno/freedreno_screen.h | 1 - 12 files changed, 51 insertions(+), 98 deletions(-) diff --git a/src/freedreno/fdl/fd6_layout.c b/src/freedreno/fdl/fd6_layout.c index 30f334968ee..661d7574e8b 100644 --- a/src/freedreno/fdl/fd6_layout.c +++ b/src/freedreno/fdl/fd6_layout.c @@ -69,7 +69,7 @@ void fdl6_layout(struct fdl_layout *layout, enum pipe_format format, uint32_t nr_samples, uint32_t width0, uint32_t height0, uint32_t depth0, - uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc) + uint32_t mip_levels, uint32_t array_size, bool is_3d) { assert(nr_samples > 0); layout->width0 = width0; @@ -81,6 +81,11 @@ fdl6_layout(struct fdl_layout *layout, layout->format = format; layout->nr_samples = nr_samples; + if (depth0 > 1) + layout->ubwc = false; + if (tile_alignment[layout->cpp].ubwc_blockwidth == 0) + layout->ubwc = false; + const struct util_format_description *format_desc = util_format_description(format); uint32_t depth = depth0; @@ -115,8 +120,7 @@ fdl6_layout(struct fdl_layout *layout, for (uint32_t level = 0; level < mip_levels; level++) { struct fdl_slice *slice = &layout->slices[level]; struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level]; - uint32_t tile_mode = (ubwc ? - layout->tile_mode : fdl_tile_mode(layout, level)); + uint32_t tile_mode = fdl_tile_mode(layout, level); uint32_t width, height; /* tiled levels of 3D textures are rounded up to PoT dimensions: */ @@ -176,7 +180,7 @@ fdl6_layout(struct fdl_layout *layout, layout->size += slice->size0 * depth * layers_in_level; - if (ubwc) { + if (layout->ubwc) { /* with UBWC every level is aligned to 4K */ layout->size = align(layout->size, 4096); @@ -219,7 +223,7 @@ fdl6_layout(struct fdl_layout *layout, * get to program the UBWC and non-UBWC offset/strides * independently. */ - if (ubwc) { + if (layout->ubwc) { for (uint32_t level = 0; level < mip_levels; level++) layout->slices[level].offset += layout->ubwc_layer_size * array_size; layout->size += layout->ubwc_layer_size * array_size; diff --git a/src/freedreno/fdl/freedreno_layout.c b/src/freedreno/fdl/freedreno_layout.c index 5336683ba18..57f6388b69a 100644 --- a/src/freedreno/fdl/freedreno_layout.c +++ b/src/freedreno/fdl/freedreno_layout.c @@ -47,8 +47,6 @@ fdl_dump_layout(struct fdl_layout *layout) for (uint32_t level = 0; level < layout->slices[level].size0; level++) { struct fdl_slice *slice = &layout->slices[level]; struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level]; - uint32_t tile_mode = (layout->ubwc_layer_size ? - layout->tile_mode : fdl_tile_mode(layout, level)); fprintf(stderr, "%s: %ux%ux%u@%ux%u:\t%2u: stride=%4u, size=%6u,%6u, aligned_height=%3u, offset=0x%x,0x%x, layersz %5u,%5u tiling=%d\n", util_format_name(layout->format), @@ -62,6 +60,6 @@ fdl_dump_layout(struct fdl_layout *layout) slice->size0 / (slice->pitch * layout->cpp), slice->offset, ubwc_slice->offset, layout->layer_size, layout->ubwc_layer_size, - tile_mode); + fdl_tile_mode(layout, level)); } } diff --git a/src/freedreno/fdl/freedreno_layout.h b/src/freedreno/fdl/freedreno_layout.h index 7230d337a48..f20a3056c2c 100644 --- a/src/freedreno/fdl/freedreno_layout.h +++ b/src/freedreno/fdl/freedreno_layout.h @@ -93,6 +93,7 @@ struct fdl_layout { struct fdl_slice ubwc_slices[FDL_MAX_MIP_LEVELS]; uint32_t layer_size; uint32_t ubwc_layer_size; /* in bytes */ + bool ubwc : 1; bool layer_first : 1; /* see above description */ /* Note that for tiled textures, beyond a certain mipmap level (ie. @@ -134,22 +135,20 @@ fdl_surface_offset(const struct fdl_layout *layout, unsigned level, unsigned lay static inline uint32_t fdl_ubwc_offset(const struct fdl_layout *layout, unsigned level, unsigned layer) { - /* for now this doesn't do anything clever, but when UBWC is enabled - * for multi layer/level images, it will. - */ - if (layout->ubwc_layer_size) { - assert(level == 0); - assert(layer == 0); - } - return layout->ubwc_slices[0].offset; + const struct fdl_slice *slice = &layout->ubwc_slices[level]; + return slice->offset + layer * layout->ubwc_layer_size; } static inline bool fdl_level_linear(const struct fdl_layout *layout, int level) { + if (layout->ubwc) + return false; + unsigned w = u_minify(layout->width0, level); if (w < 16) return true; + return false; } @@ -165,7 +164,7 @@ fdl_tile_mode(const struct fdl_layout *layout, int level) static inline bool fdl_ubwc_enabled(const struct fdl_layout *layout, int level) { - return layout->ubwc_layer_size && fdl_tile_mode(layout, level); + return layout->ubwc; } void @@ -175,7 +174,7 @@ void fdl6_layout(struct fdl_layout *layout, enum pipe_format format, uint32_t nr_samples, uint32_t width0, uint32_t height0, uint32_t depth0, - uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc); + uint32_t mip_levels, uint32_t array_size, bool is_3d); void fdl_dump_layout(struct fdl_layout *layout); diff --git a/src/freedreno/vulkan/tu_image.c b/src/freedreno/vulkan/tu_image.c index fee74a294c5..4b804ba5c24 100644 --- a/src/freedreno/vulkan/tu_image.c +++ b/src/freedreno/vulkan/tu_image.c @@ -150,6 +150,8 @@ tu_image_create(VkDevice _device, /* expect UBWC enabled if we asked for it */ assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled); + image->layout.ubwc = ubwc_enabled; + fdl6_layout(&image->layout, vk_format_to_pipe_format(image->vk_format), image->samples, pCreateInfo->extent.width, @@ -157,8 +159,7 @@ tu_image_create(VkDevice _device, pCreateInfo->extent.depth, pCreateInfo->mipLevels, pCreateInfo->arrayLayers, - pCreateInfo->imageType == VK_IMAGE_TYPE_3D, - ubwc_enabled); + pCreateInfo->imageType == VK_IMAGE_TYPE_3D); *pImage = tu_image_to_handle(image); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index 57503b5c14e..4e87e2e9c6e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -197,9 +197,16 @@ static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img) if (ubwc_enabled) { struct fdl_slice *ubwc_slice = &rsc->layout.ubwc_slices[img->level]; + + uint32_t block_width, block_height; + fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height); + OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0); OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2)); - OUT_RING(ring, A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch)); + OUT_RING(ring, + A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(img->width, block_width))) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(img->height, block_height)))); } else { OUT_RING(ring, 0x00000000); /* texconst7 */ OUT_RING(ring, 0x00000000); /* texconst8 */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c index d35a42156bb..2e86632892a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c @@ -89,67 +89,6 @@ ok_ubwc_format(struct fd_resource *rsc, enum pipe_format pfmt) } } -uint32_t -fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc) -{ -#define RBG_TILE_WIDTH_ALIGNMENT 64 -#define RGB_TILE_HEIGHT_ALIGNMENT 16 -#define UBWC_PLANE_SIZE_ALIGNMENT 4096 - - struct pipe_resource *prsc = &rsc->base; - uint32_t width = prsc->width0; - uint32_t height = prsc->height0; - - if (!ok_ubwc_format(rsc, prsc->format)) - return 0; - - /* limit things to simple single level 2d for now: */ - if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0)) - return 0; - - uint32_t block_width, block_height; - switch (rsc->layout.cpp) { - case 2: - case 4: - block_width = 16; - block_height = 4; - break; - case 8: - block_width = 8; - block_height = 4; - break; - case 16: - block_width = 4; - block_height = 4; - break; - default: - return 0; - } - - uint32_t meta_stride = - ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT); - uint32_t meta_height = - ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT); - uint32_t meta_size = - ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT); - - /* UBWC goes first, then color data.. this constraint is mainly only - * because it is what the kernel expects for scanout. For non-2D we - * could just use a separate UBWC buffer.. - */ - for (int level = 0; level <= prsc->last_level; level++) { - struct fdl_slice *slice = fd_resource_slice(rsc, level); - slice->offset += meta_size; - } - - rsc->layout.ubwc_slices[0].offset = 0; - rsc->layout.ubwc_slices[0].pitch = meta_stride; - rsc->layout.ubwc_layer_size = meta_size; - rsc->layout.tile_mode = TILE6_3; - - return meta_size; -} - /** * Ensure the rsc is in an ok state to be used with the specified format. * This handles the case of UBWC buffers used with non-UBWC compatible @@ -159,7 +98,7 @@ void fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc, enum pipe_format format) { - if (!rsc->layout.ubwc_layer_size) + if (!rsc->layout.ubwc) return; if (ok_ubwc_format(rsc, format)) @@ -205,10 +144,13 @@ fd6_setup_slices(struct fd_resource *rsc) if (!(fd_mesa_debug & FD_DBG_NOLRZ) && has_depth(rsc->base.format)) setup_lrz(rsc); + if (rsc->layout.ubwc && !ok_ubwc_format(rsc, rsc->base.format)) + rsc->layout.ubwc = false; + fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc), prsc->width0, prsc->height0, prsc->depth0, prsc->last_level + 1, prsc->array_size, - prsc->target == PIPE_TEXTURE_3D, false); + prsc->target == PIPE_TEXTURE_3D); return rsc->layout.size; } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.h b/src/gallium/drivers/freedreno/a6xx/fd6_resource.h index f639207c6bc..b988f1c78b6 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_resource.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.h @@ -30,7 +30,6 @@ #include "freedreno_resource.h" -uint32_t fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc); void fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc, enum pipe_format format); uint32_t fd6_setup_slices(struct fd_resource *rsc); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.c b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c index 6e046cfa8c6..6e04df65119 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c @@ -138,7 +138,6 @@ fd6_screen_init(struct pipe_screen *pscreen) screen->setup_slices = fd6_setup_slices; screen->tile_mode = fd6_tile_mode; - screen->fill_ubwc_buffer_sizes = fd6_fill_ubwc_buffer_sizes; static const uint64_t supported_modifiers[] = { DRM_FORMAT_MOD_LINEAR, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c index 91b42bd3d7c..e0883815ca9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c @@ -277,11 +277,6 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); - if (so->ubwc_enabled) { - so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2); - so->texconst10 |= A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(rsc->layout.ubwc_slices[lvl].pitch); - } - so->texconst2 |= A6XX_TEX_CONST_2_TYPE(fd6_tex_type(cso->target)); switch (cso->target) { @@ -320,7 +315,17 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, } if (so->ubwc_enabled) { + struct fdl_slice *ubwc_base_slice = &rsc->layout.ubwc_slices[lvl]; + + uint32_t block_width, block_height; + fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height); + so->texconst3 |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL; + so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2); + so->texconst10 |= + A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_base_slice->pitch) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->width0, lvl), block_width))) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->height0, lvl), block_height))); } return &so->base; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 748664384eb..59b1d9793fa 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -977,6 +977,8 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen, rsc->internal_format = format; + rsc->layout.ubwc = rsc->layout.tile_mode && is_a6xx(screen) && allow_ubwc; + if (prsc->target == PIPE_BUFFER) { assert(prsc->format == PIPE_FORMAT_R8_UNORM); size = prsc->width0; @@ -985,9 +987,6 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen, size = screen->setup_slices(rsc); } - if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->layout.tile_mode) - size += screen->fill_ubwc_buffer_sizes(rsc); - /* special case for hw-query buffer, which we need to allocate before we * know the size: */ @@ -1098,8 +1097,7 @@ fd_resource_from_handle(struct pipe_screen *pscreen, DBG("bad modifier: %"PRIx64, handle->modifier); goto fail; } - debug_assert(screen->fill_ubwc_buffer_sizes); - screen->fill_ubwc_buffer_sizes(rsc); + /* XXX UBWC setup */ } else if (handle->modifier && (handle->modifier != DRM_FORMAT_MOD_INVALID)) { goto fail; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index 7a4f9fe2930..478417e6ba3 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -158,7 +158,9 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer) static inline uint32_t fd_resource_ubwc_offset(struct fd_resource *rsc, unsigned level, unsigned layer) { - return fdl_ubwc_offset(&rsc->layout, level, layer); + uint32_t offset = fdl_ubwc_offset(&rsc->layout, level, layer); + debug_assert(offset < fd_bo_size(rsc->bo)); + return offset; } /* This might be a5xx specific, but higher mipmap levels are always linear: */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 22513065e5d..b5730da0297 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -91,7 +91,6 @@ struct fd_screen { */ struct fd_pipe *pipe; - uint32_t (*fill_ubwc_buffer_sizes)(struct fd_resource *rsc); uint32_t (*setup_slices)(struct fd_resource *rsc); unsigned (*tile_mode)(const struct pipe_resource *prsc); -- 2.30.2