From 58ccadfc5c94295d3ab78444f851ca0b54b1bc31 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 27 Aug 2019 21:07:41 -0400 Subject: [PATCH] radeonsi: move HTILE allocation outside of radeonsi ac_surface computes it for amdgpu. radeon_drm_surface computes it for radeon. Acked-by: Pierre-Eric Pelloux-Prayer --- src/amd/common/ac_surface.c | 13 ++- src/amd/common/ac_surface.h | 1 + src/gallium/drivers/radeonsi/si_texture.c | 100 +++--------------- .../winsys/radeon/drm/radeon_drm_surface.c | 70 ++++++++++++ 4 files changed, 93 insertions(+), 91 deletions(-) diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 2c2917d4a23..1d254ec3a78 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -338,11 +338,12 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, } } - /* TC-compatible HTILE. */ + /* HTILE. */ if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D && - level == 0) { + level == 0 && + !(surf->flags & RADEON_SURF_NO_HTILE)) { AddrHtileIn->flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible; AddrHtileIn->pitch = AddrSurfInfoOut->pitch; AddrHtileIn->height = AddrSurfInfoOut->height; @@ -1065,6 +1066,9 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, if (in->flags.depth) { assert(in->swizzleMode != ADDR_SW_LINEAR); + if (surf->flags & RADEON_SURF_NO_HTILE) + return 0; + /* HTILE */ ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; @@ -1091,7 +1095,10 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->htile_size = hout.htileBytes; surf->htile_slice_size = hout.sliceSize; surf->htile_alignment = hout.baseAlign; - } else { + return 0; + } + + { /* Compute tile swizzle for the color surface. * All *_X and *_T modes can use the swizzle. */ diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 53074c90faf..52aa63bff2e 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -72,6 +72,7 @@ enum radeon_micro_mode { #define RADEON_SURF_NO_RENDER_TARGET (1 << 27) #define RADEON_SURF_FORCE_SWIZZLE_MODE (1 << 28) #define RADEON_SURF_NO_FMASK (1 << 29) +#define RADEON_SURF_NO_HTILE (1 << 30) struct legacy_surf_level { uint64_t offset; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index baa34d0b408..439de3d0ffa 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -253,9 +253,11 @@ static int si_init_surface(struct si_screen *sscreen, if (!is_flushed_depth && is_depth) { flags |= RADEON_SURF_ZBUFFER; - if (tc_compatible_htile && - (sscreen->info.chip_class >= GFX9 || - array_mode == RADEON_SURF_MODE_2D)) { + if (sscreen->debug_flags & DBG(NO_HYPERZ)) { + flags |= RADEON_SURF_NO_HTILE; + } else if (tc_compatible_htile && + (sscreen->info.chip_class >= GFX9 || + array_mode == RADEON_SURF_MODE_2D)) { /* TC-compatible HTILE only supports Z32_FLOAT. * GFX9 also supports Z16_UNORM. * On GFX8, promote Z16 to Z32. DB->CB copies will convert @@ -1067,85 +1069,6 @@ static void si_texture_destroy(struct pipe_screen *screen, static const struct u_resource_vtbl si_texture_vtbl; -static void si_texture_get_htile_size(struct si_screen *sscreen, - struct si_texture *tex) -{ - unsigned cl_width, cl_height, width, height; - unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; - unsigned num_pipes = sscreen->info.num_tile_pipes; - - assert(sscreen->info.chip_class <= GFX8); - - tex->surface.htile_size = 0; - - if (tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - !sscreen->info.htile_cmask_support_1d_tiling) - return; - - /* Overalign HTILE on P2 configs to work around GPU hangs in - * piglit/depthstencil-render-miplevels 585. - * - * This has been confirmed to help Kabini & Stoney, where the hangs - * are always reproducible. I think I have seen the test hang - * on Carrizo too, though it was very rare there. - */ - if (sscreen->info.chip_class >= GFX7 && num_pipes < 4) - num_pipes = 4; - - switch (num_pipes) { - case 1: - cl_width = 32; - cl_height = 16; - break; - case 2: - cl_width = 32; - cl_height = 32; - break; - case 4: - cl_width = 64; - cl_height = 32; - break; - case 8: - cl_width = 64; - cl_height = 64; - break; - case 16: - cl_width = 128; - cl_height = 64; - break; - default: - assert(0); - return; - } - - width = align(tex->surface.u.legacy.level[0].nblk_x, cl_width * 8); - height = align(tex->surface.u.legacy.level[0].nblk_y, cl_height * 8); - - slice_elements = (width * height) / (8 * 8); - slice_bytes = slice_elements * 4; - - pipe_interleave_bytes = sscreen->info.pipe_interleave_bytes; - base_align = num_pipes * pipe_interleave_bytes; - - tex->surface.htile_alignment = base_align; - tex->surface.htile_size = - util_num_layers(&tex->buffer.b.b, 0) * - align(slice_bytes, base_align); -} - -static void si_texture_allocate_htile(struct si_screen *sscreen, - struct si_texture *tex) -{ - if (sscreen->info.chip_class <= GFX8 && !tex->tc_compatible_htile) - si_texture_get_htile_size(sscreen, tex); - - if (!tex->surface.htile_size) - return; - - tex->htile_offset = align(tex->size, tex->surface.htile_alignment); - tex->size = tex->htile_offset + tex->surface.htile_size; -} - void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex, struct u_log_context *log) { @@ -1370,12 +1293,12 @@ si_texture_create_object(struct pipe_screen *screen, tex->can_sample_s = !tex->surface.u.legacy.stencil_adjusted; } - if (!(base->flags & (SI_RESOURCE_FLAG_TRANSFER | - SI_RESOURCE_FLAG_FLUSHED_DEPTH))) { - tex->db_compatible = true; + tex->db_compatible = surface->flags & RADEON_SURF_ZBUFFER; - if (!(sscreen->debug_flags & DBG(NO_HYPERZ))) - si_texture_allocate_htile(sscreen, tex); + if (tex->surface.htile_size) { + tex->htile_offset = align64(tex->size, + tex->surface.htile_alignment); + tex->size = tex->htile_offset + tex->surface.htile_size; } } else { if (tex->surface.fmask_size) { @@ -1678,7 +1601,8 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, } struct radeon_surf surface = {0}; - bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH; + bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH || + templ->flags & SI_RESOURCE_FLAG_TRANSFER; bool tc_compatible_htile = sscreen->info.chip_class >= GFX8 && /* There are issues with TC-compatible HTILE on Tonga (and diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c index e09805784e0..332b9e4a428 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c @@ -280,6 +280,72 @@ static void si_compute_cmask(const struct radeon_info *info, surf->cmask_size = align(slice_bytes, base_align) * num_layers; } +static void si_compute_htile(const struct radeon_info *info, + struct radeon_surf *surf, unsigned num_layers) +{ + unsigned cl_width, cl_height, width, height; + unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; + unsigned num_pipes = info->num_tile_pipes; + + surf->htile_size = 0; + + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) || + surf->flags & RADEON_SURF_NO_HTILE) + return; + + if (surf->u.legacy.level[0].mode == RADEON_SURF_MODE_1D && + !info->htile_cmask_support_1d_tiling) + return; + + /* Overalign HTILE on P2 configs to work around GPU hangs in + * piglit/depthstencil-render-miplevels 585. + * + * This has been confirmed to help Kabini & Stoney, where the hangs + * are always reproducible. I think I have seen the test hang + * on Carrizo too, though it was very rare there. + */ + if (info->chip_class >= GFX7 && num_pipes < 4) + num_pipes = 4; + + switch (num_pipes) { + case 1: + cl_width = 32; + cl_height = 16; + break; + case 2: + cl_width = 32; + cl_height = 32; + break; + case 4: + cl_width = 64; + cl_height = 32; + break; + case 8: + cl_width = 64; + cl_height = 64; + break; + case 16: + cl_width = 128; + cl_height = 64; + break; + default: + assert(0); + return; + } + + width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8); + height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8); + + slice_elements = (width * height) / (8 * 8); + slice_bytes = slice_elements * 4; + + pipe_interleave_bytes = info->pipe_interleave_bytes; + base_align = num_pipes * pipe_interleave_bytes; + + surf->htile_alignment = base_align; + surf->htile_size = num_layers * align(slice_bytes, base_align); +} + static int radeon_winsys_surface_init(struct radeon_winsys *rws, const struct pipe_resource *tex, unsigned flags, unsigned bpe, @@ -365,6 +431,10 @@ static int radeon_winsys_surface_init(struct radeon_winsys *rws, si_compute_cmask(&ws->info, &config, surf_ws); } + + if (ws->gen == DRV_SI) + si_compute_htile(&ws->info, surf_ws, util_num_layers(tex, 0)); + return 0; } -- 2.30.2