This is a backport of Jonathan Marek's UBWC work on turnip to GL.
Performance highlights from our trace set (320 frames sampled)
traces/glmark2/texture-texture-filter=mipmap.rdc: +9.1% +/- 2.2%
traces/android/trex.rdc: +8.7% +/- 0.4%
traces/glmark2/desktop-effect=shadow:windows=4.rdc: +4.2% +/- 2.5%
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3059>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3059>
fdl6_layout(struct fdl_layout *layout,
enum pipe_format format, uint32_t nr_samples,
uint32_t width0, uint32_t height0, uint32_t depth0,
- uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc)
+ uint32_t mip_levels, uint32_t array_size, bool is_3d)
{
assert(nr_samples > 0);
layout->width0 = width0;
layout->format = format;
layout->nr_samples = nr_samples;
+ if (depth0 > 1)
+ layout->ubwc = false;
+ if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
+ layout->ubwc = false;
+
const struct util_format_description *format_desc =
util_format_description(format);
uint32_t depth = depth0;
for (uint32_t level = 0; level < mip_levels; level++) {
struct fdl_slice *slice = &layout->slices[level];
struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
- uint32_t tile_mode = (ubwc ?
- layout->tile_mode : fdl_tile_mode(layout, level));
+ uint32_t tile_mode = fdl_tile_mode(layout, level);
uint32_t width, height;
/* tiled levels of 3D textures are rounded up to PoT dimensions: */
layout->size += slice->size0 * depth * layers_in_level;
- if (ubwc) {
+ if (layout->ubwc) {
/* with UBWC every level is aligned to 4K */
layout->size = align(layout->size, 4096);
* get to program the UBWC and non-UBWC offset/strides
* independently.
*/
- if (ubwc) {
+ if (layout->ubwc) {
for (uint32_t level = 0; level < mip_levels; level++)
layout->slices[level].offset += layout->ubwc_layer_size * array_size;
layout->size += layout->ubwc_layer_size * array_size;
for (uint32_t level = 0; level < layout->slices[level].size0; level++) {
struct fdl_slice *slice = &layout->slices[level];
struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
- uint32_t tile_mode = (layout->ubwc_layer_size ?
- layout->tile_mode : fdl_tile_mode(layout, level));
fprintf(stderr, "%s: %ux%ux%u@%ux%u:\t%2u: stride=%4u, size=%6u,%6u, aligned_height=%3u, offset=0x%x,0x%x, layersz %5u,%5u tiling=%d\n",
util_format_name(layout->format),
slice->size0 / (slice->pitch * layout->cpp),
slice->offset, ubwc_slice->offset,
layout->layer_size, layout->ubwc_layer_size,
- tile_mode);
+ fdl_tile_mode(layout, level));
}
}
struct fdl_slice ubwc_slices[FDL_MAX_MIP_LEVELS];
uint32_t layer_size;
uint32_t ubwc_layer_size; /* in bytes */
+ bool ubwc : 1;
bool layer_first : 1; /* see above description */
/* Note that for tiled textures, beyond a certain mipmap level (ie.
static inline uint32_t
fdl_ubwc_offset(const struct fdl_layout *layout, unsigned level, unsigned layer)
{
- /* for now this doesn't do anything clever, but when UBWC is enabled
- * for multi layer/level images, it will.
- */
- if (layout->ubwc_layer_size) {
- assert(level == 0);
- assert(layer == 0);
- }
- return layout->ubwc_slices[0].offset;
+ const struct fdl_slice *slice = &layout->ubwc_slices[level];
+ return slice->offset + layer * layout->ubwc_layer_size;
}
static inline bool
fdl_level_linear(const struct fdl_layout *layout, int level)
{
+ if (layout->ubwc)
+ return false;
+
unsigned w = u_minify(layout->width0, level);
if (w < 16)
return true;
+
return false;
}
static inline bool
fdl_ubwc_enabled(const struct fdl_layout *layout, int level)
{
- return layout->ubwc_layer_size && fdl_tile_mode(layout, level);
+ return layout->ubwc;
}
void
fdl6_layout(struct fdl_layout *layout,
enum pipe_format format, uint32_t nr_samples,
uint32_t width0, uint32_t height0, uint32_t depth0,
- uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc);
+ uint32_t mip_levels, uint32_t array_size, bool is_3d);
void
fdl_dump_layout(struct fdl_layout *layout);
/* expect UBWC enabled if we asked for it */
assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
+ image->layout.ubwc = ubwc_enabled;
+
fdl6_layout(&image->layout, vk_format_to_pipe_format(image->vk_format),
image->samples,
pCreateInfo->extent.width,
pCreateInfo->extent.depth,
pCreateInfo->mipLevels,
pCreateInfo->arrayLayers,
- pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
- ubwc_enabled);
+ pCreateInfo->imageType == VK_IMAGE_TYPE_3D);
*pImage = tu_image_to_handle(image);
if (ubwc_enabled) {
struct fdl_slice *ubwc_slice = &rsc->layout.ubwc_slices[img->level];
+
+ uint32_t block_width, block_height;
+ fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+
OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
- OUT_RING(ring, A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch));
+ OUT_RING(ring,
+ A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(img->width, block_width))) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(img->height, block_height))));
} else {
OUT_RING(ring, 0x00000000); /* texconst7 */
OUT_RING(ring, 0x00000000); /* texconst8 */
}
}
-uint32_t
-fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc)
-{
-#define RBG_TILE_WIDTH_ALIGNMENT 64
-#define RGB_TILE_HEIGHT_ALIGNMENT 16
-#define UBWC_PLANE_SIZE_ALIGNMENT 4096
-
- struct pipe_resource *prsc = &rsc->base;
- uint32_t width = prsc->width0;
- uint32_t height = prsc->height0;
-
- if (!ok_ubwc_format(rsc, prsc->format))
- return 0;
-
- /* limit things to simple single level 2d for now: */
- if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0))
- return 0;
-
- uint32_t block_width, block_height;
- switch (rsc->layout.cpp) {
- case 2:
- case 4:
- block_width = 16;
- block_height = 4;
- break;
- case 8:
- block_width = 8;
- block_height = 4;
- break;
- case 16:
- block_width = 4;
- block_height = 4;
- break;
- default:
- return 0;
- }
-
- uint32_t meta_stride =
- ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT);
- uint32_t meta_height =
- ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
- uint32_t meta_size =
- ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
-
- /* UBWC goes first, then color data.. this constraint is mainly only
- * because it is what the kernel expects for scanout. For non-2D we
- * could just use a separate UBWC buffer..
- */
- for (int level = 0; level <= prsc->last_level; level++) {
- struct fdl_slice *slice = fd_resource_slice(rsc, level);
- slice->offset += meta_size;
- }
-
- rsc->layout.ubwc_slices[0].offset = 0;
- rsc->layout.ubwc_slices[0].pitch = meta_stride;
- rsc->layout.ubwc_layer_size = meta_size;
- rsc->layout.tile_mode = TILE6_3;
-
- return meta_size;
-}
-
/**
* Ensure the rsc is in an ok state to be used with the specified format.
* This handles the case of UBWC buffers used with non-UBWC compatible
fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
enum pipe_format format)
{
- if (!rsc->layout.ubwc_layer_size)
+ if (!rsc->layout.ubwc)
return;
if (ok_ubwc_format(rsc, format))
if (!(fd_mesa_debug & FD_DBG_NOLRZ) && has_depth(rsc->base.format))
setup_lrz(rsc);
+ if (rsc->layout.ubwc && !ok_ubwc_format(rsc, rsc->base.format))
+ rsc->layout.ubwc = false;
+
fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
prsc->width0, prsc->height0, prsc->depth0,
prsc->last_level + 1, prsc->array_size,
- prsc->target == PIPE_TEXTURE_3D, false);
+ prsc->target == PIPE_TEXTURE_3D);
return rsc->layout.size;
}
#include "freedreno_resource.h"
-uint32_t fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc);
void fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
enum pipe_format format);
uint32_t fd6_setup_slices(struct fd_resource *rsc);
screen->setup_slices = fd6_setup_slices;
screen->tile_mode = fd6_tile_mode;
- screen->fill_ubwc_buffer_sizes = fd6_fill_ubwc_buffer_sizes;
static const uint64_t supported_modifiers[] = {
DRM_FORMAT_MOD_LINEAR,
cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
- if (so->ubwc_enabled) {
- so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2);
- so->texconst10 |= A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(rsc->layout.ubwc_slices[lvl].pitch);
- }
-
so->texconst2 |= A6XX_TEX_CONST_2_TYPE(fd6_tex_type(cso->target));
switch (cso->target) {
}
if (so->ubwc_enabled) {
+ struct fdl_slice *ubwc_base_slice = &rsc->layout.ubwc_slices[lvl];
+
+ uint32_t block_width, block_height;
+ fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+
so->texconst3 |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL;
+ so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2);
+ so->texconst10 |=
+ A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_base_slice->pitch) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->width0, lvl), block_width))) |
+ A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->height0, lvl), block_height)));
}
return &so->base;
rsc->internal_format = format;
+ rsc->layout.ubwc = rsc->layout.tile_mode && is_a6xx(screen) && allow_ubwc;
+
if (prsc->target == PIPE_BUFFER) {
assert(prsc->format == PIPE_FORMAT_R8_UNORM);
size = prsc->width0;
size = screen->setup_slices(rsc);
}
- if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->layout.tile_mode)
- size += screen->fill_ubwc_buffer_sizes(rsc);
-
/* special case for hw-query buffer, which we need to allocate before we
* know the size:
*/
DBG("bad modifier: %"PRIx64, handle->modifier);
goto fail;
}
- debug_assert(screen->fill_ubwc_buffer_sizes);
- screen->fill_ubwc_buffer_sizes(rsc);
+ /* XXX UBWC setup */
} else if (handle->modifier &&
(handle->modifier != DRM_FORMAT_MOD_INVALID)) {
goto fail;
static inline uint32_t
fd_resource_ubwc_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
{
- return fdl_ubwc_offset(&rsc->layout, level, layer);
+ uint32_t offset = fdl_ubwc_offset(&rsc->layout, level, layer);
+ debug_assert(offset < fd_bo_size(rsc->bo));
+ return offset;
}
/* This might be a5xx specific, but higher mipmap levels are always linear: */
*/
struct fd_pipe *pipe;
- uint32_t (*fill_ubwc_buffer_sizes)(struct fd_resource *rsc);
uint32_t (*setup_slices)(struct fd_resource *rsc);
unsigned (*tile_mode)(const struct pipe_resource *prsc);