From 2e746bc63d1ab56c6006f328c21a77dc69d0b9a8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 22 Jan 2018 09:14:25 +0800 Subject: [PATCH] broadcom/vc5: Enable UIF XOR on textures. This should increase performance by reducing SDRAM bank conflicts when crossing between UIF columns (particularly on power-of-two height textures). The uif_xor_disable setup is dropped, since we need to allow XOR on lower miplevels even when level 0 is XOR. The level 0 force UIF and level 0 XOR flags should handle setting XOR properly on imported buffers. --- src/gallium/drivers/vc5/vc5_resource.c | 15 +++++++++++-- src/gallium/drivers/vc5/vc5_state.c | 3 --- src/gallium/drivers/vc5/vc5_tiling.c | 29 ++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/vc5/vc5_resource.c b/src/gallium/drivers/vc5/vc5_resource.c index 40697ce6de2..157eb1c1013 100644 --- a/src/gallium/drivers/vc5/vc5_resource.c +++ b/src/gallium/drivers/vc5/vc5_resource.c @@ -435,8 +435,6 @@ vc5_setup_slices(struct vc5_resource *rsc) level_width = align(level_width, 2 * uif_block_w); level_height = align(level_height, uif_block_h); } else { - slice->tiling = VC5_TILING_UIF_NO_XOR; - /* We align the width to a 4-block column of * UIF blocks, but we only align height to UIF * blocks. @@ -449,6 +447,19 @@ vc5_setup_slices(struct vc5_resource *rsc) slice->ub_pad = vc5_get_ub_pad(rsc, level_height); level_height += slice->ub_pad * uif_block_h; + + /* If the padding set us to to be aligned to + * the page cache size, then the HW will use + * the XOR bit on odd columns to get us + * perfectly misaligned + */ + if ((level_height / uif_block_h) % + (VC5_PAGE_CACHE_SIZE / + VC5_UIFBLOCK_ROW_SIZE) == 0) { + slice->tiling = VC5_TILING_UIF_XOR; + } else { + slice->tiling = VC5_TILING_UIF_NO_XOR; + } } } diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c index a055d252b65..9d4d0893f8b 100644 --- a/src/gallium/drivers/vc5/vc5_state.c +++ b/src/gallium/drivers/vc5/vc5_state.c @@ -773,9 +773,6 @@ vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, cso->format); } - tex.uif_xor_disable = (rsc->slices[0].tiling == - VC5_TILING_UIF_NO_XOR); - /* Since other platform devices may produce UIF images even * when they're not big enough for V3D to assume they're UIF, * we force images with level 0 as UIF to be always treated diff --git a/src/gallium/drivers/vc5/vc5_tiling.c b/src/gallium/drivers/vc5/vc5_tiling.c index 4d8757dbf62..cbd86d5566a 100644 --- a/src/gallium/drivers/vc5/vc5_tiling.c +++ b/src/gallium/drivers/vc5/vc5_tiling.c @@ -154,7 +154,8 @@ vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, * 4x4 groups, and those 4x4 groups are then stored in raster order. */ static inline uint32_t -vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, + bool do_xor) { uint32_t utile_w = vc5_utile_width(cpp); uint32_t utile_h = vc5_utile_height(cpp); @@ -170,6 +171,9 @@ vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); + if (do_xor && (mb_x / 4) & 1) + mb_y ^= 0x10; + uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; @@ -193,6 +197,20 @@ vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) return mb_pixel_address; } +static inline uint32_t +vc5_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_uif_pixel_offset(cpp, image_h, x, y, true); +} + +static inline uint32_t +vc5_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_uif_pixel_offset(cpp, image_h, x, y, false); +} + static inline void vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, void *cpu, uint32_t cpu_stride, @@ -289,11 +307,18 @@ vc5_move_tiled_image(void *gpu, uint32_t gpu_stride, bool is_load) { switch (tiling_format) { + case VC5_TILING_UIF_XOR: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_uif_xor_pixel_offset, + is_load); + break; case VC5_TILING_UIF_NO_XOR: vc5_move_pixels_general(gpu, gpu_stride, cpu, cpu_stride, cpp, image_h, box, - vc5_get_uif_pixel_offset, + vc5_get_uif_no_xor_pixel_offset, is_load); break; case VC5_TILING_UBLINEAR_2_COLUMN: -- 2.30.2