From 857d9f3b02c95713d845c54bc6cc1faf877095bb Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 3 May 2019 13:10:22 -0700 Subject: [PATCH] freedreno/a6xx: UBWC fixes A few fixes that get UBWC working for the games/benchmarks where I noticed problems before (in particular and manhattan, and stk (modulo image support for UBWC when compute shaders are used for post-process effects): + fix the size of the UBWC meta buffer (ie, the offset to color pixel data) that is returned by ->fill_ubwc_buffer_sizes() + correct size/layout for 8 and 16 byte per pixel formats + limit the supported formats.. Note all formats that can be tiled can be compressed. Signed-off-by: Rob Clark --- .../drivers/freedreno/a6xx/fd6_resource.c | 87 ++++++++++++++++--- .../drivers/freedreno/a6xx/fd6_texture.c | 2 +- 2 files changed, 78 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c index e15a7ba44bb..ca475f30ef6 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c @@ -26,6 +26,7 @@ */ #include "fd6_resource.h" +#include "fd6_format.h" #include "a6xx.xml.h" @@ -161,12 +162,51 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma return size; } +/* A subset of the valid tiled formats can be compressed. We do + * already require tiled in order to be compressed, but just because + * it can be tiled doesn't mean it can be compressed. + */ +static bool +ok_ubwc_format(enum a6xx_color_fmt fmt) +{ + switch (fmt) { + case RB6_R10G10B10A2_UINT: + case RB6_R10G10B10A2_UNORM: + case RB6_R11G11B10_FLOAT: + case RB6_R16_FLOAT: + case RB6_R16G16B16A16_FLOAT: + case RB6_R16G16B16A16_SINT: + case RB6_R16G16B16A16_UINT: + case RB6_R16G16_FLOAT: + case RB6_R16G16_SINT: + case RB6_R16G16_UINT: + case RB6_R16_SINT: + case RB6_R16_UINT: + case RB6_R32G32B32A32_SINT: + case RB6_R32G32B32A32_UINT: + case RB6_R32G32_SINT: + case RB6_R32G32_UINT: + case RB6_R32_SINT: + case RB6_R32_UINT: + case RB6_R5G6B5_UNORM: + case RB6_R8G8B8A8_SINT: + case RB6_R8G8B8A8_UINT: + case RB6_R8G8B8A8_UNORM: + case RB6_R8G8B8_UNORM: + case RB6_R8G8_SINT: + case RB6_R8G8_UINT: + case RB6_R8G8_UNORM: + case RB6_X8Z24_UNORM: + return true; + default: + return false; + } +} + uint32_t fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc) { -#define RGB_TILE_WIDTH 16 #define RBG_TILE_WIDTH_ALIGNMENT 64 -#define RGB_TILE_HEIGHT 4 #define RGB_TILE_HEIGHT_ALIGNMENT 16 #define UBWC_PLANE_SIZE_ALIGNMENT 4096 @@ -174,23 +214,50 @@ fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc) uint32_t width = prsc->width0; uint32_t height = prsc->height0; + if (!ok_ubwc_format(fd6_pipe2color(prsc->format))) + return 0; + /* limit things to simple single level 2d for now: */ if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0)) return 0; + uint32_t block_width, block_height; + switch (rsc->cpp) { + case 2: + case 4: + block_width = 16; + block_height = 4; + break; + case 8: + block_width = 8; + block_height = 4; + break; + case 16: + block_width = 4; + block_height = 4; + break; + default: + return 0; + } + uint32_t meta_stride = - ALIGN_POT(DIV_ROUND_UP(width, RGB_TILE_WIDTH), RBG_TILE_WIDTH_ALIGNMENT); - uint32_t meta_scanlines = - ALIGN_POT(DIV_ROUND_UP(height, RGB_TILE_HEIGHT), RGB_TILE_HEIGHT_ALIGNMENT); - uint32_t meta_plane = - ALIGN_POT(meta_stride * meta_scanlines, UBWC_PLANE_SIZE_ALIGNMENT); + ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT); + uint32_t meta_height = + ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT); + uint32_t meta_size = + ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT); - rsc->offset = meta_plane; + /* UBWC goes first, then color data.. this constraint is mainly only + * because it is what the kernel expects for scanout. For non-2D we + * could just use a separate UBWC buffer.. + */ + rsc->ubwc_offset = 0; + rsc->offset = meta_size; rsc->ubwc_pitch = meta_stride; - rsc->ubwc_size = meta_plane >> 2; + rsc->ubwc_size = meta_size >> 2; /* in dwords??? */ rsc->tile_mode = TILE6_3; - return rsc->ubwc_size; + return meta_size; } uint32_t diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c index f780ebdf0aa..b2430cc6c3e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c @@ -266,7 +266,7 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, format, rsc->slices[lvl].pitch) * rsc->cpp); so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); - so->ubwc_enabled = rsc->ubwc_size && u_minify(prsc->width0, lvl) >= 16; + so->ubwc_enabled = rsc->ubwc_size && !fd_resource_level_linear(prsc, lvl); } so->texconst0 |= fd6_tex_const_0(prsc, lvl, cso->format, -- 2.30.2