From a1b9eb62f60290e2d5aab403b1954aca7773a1e4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 9 Jun 2020 02:08:21 -0400 Subject: [PATCH] ac/surface: don't recompute the DCC retile map for imported textures The retile map is not used in this case, and the retile map computation takes 39% of CPU time when resizing a window. This brings it down to 23%. The dcc_retile_use_uint16 setting has to be derived from DCC sizes. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_surface.c | 122 ++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index d316216fd88..c6180865e23 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -1355,22 +1355,9 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size); - /* Compute address mapping from non-displayable to displayable DCC. */ - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {}; - addrin.size = sizeof(addrin); - addrin.colorFlags.color = 1; - addrin.swizzleMode = din.swizzleMode; - addrin.resourceType = din.resourceType; - addrin.bpp = din.bpp; - addrin.unalignedWidth = din.unalignedWidth; - addrin.unalignedHeight = din.unalignedHeight; - addrin.numSlices = 1; - addrin.numMipLevels = 1; - addrin.numFrags = 1; - - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {}; - addrout.size = sizeof(addrout); - + surf->u.gfx9.dcc_retile_use_uint16 = + surf->u.gfx9.display_dcc_size <= UINT16_MAX + 1 && + surf->dcc_size <= UINT16_MAX + 1; surf->u.gfx9.dcc_retile_num_elements = DIV_ROUND_UP(in->width, dout.compressBlkWidth) * DIV_ROUND_UP(in->height, dout.compressBlkHeight) * 2; @@ -1378,53 +1365,66 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->u.gfx9.dcc_retile_num_elements = align(surf->u.gfx9.dcc_retile_num_elements, 4); - surf->u.gfx9.dcc_retile_map = - malloc(surf->u.gfx9.dcc_retile_num_elements * 4); - if (!surf->u.gfx9.dcc_retile_map) - return ADDR_OUTOFMEMORY; - - unsigned index = 0; - surf->u.gfx9.dcc_retile_use_uint16 = true; - - for (unsigned y = 0; y < in->height; y += dout.compressBlkHeight) { - addrin.y = y; - - for (unsigned x = 0; x < in->width; x += dout.compressBlkWidth) { - addrin.x = x; - - /* Compute src DCC address */ - addrin.dccKeyFlags.pipeAligned = surf->u.gfx9.dcc.pipe_aligned; - addrin.dccKeyFlags.rbAligned = surf->u.gfx9.dcc.rb_aligned; - addrout.addr = 0; - - ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); - if (ret != ADDR_OK) - return ret; - - surf->u.gfx9.dcc_retile_map[index * 2] = addrout.addr; - if (addrout.addr > UINT16_MAX) - surf->u.gfx9.dcc_retile_use_uint16 = false; - - /* Compute dst DCC address */ - addrin.dccKeyFlags.pipeAligned = 0; - addrin.dccKeyFlags.rbAligned = 0; - addrout.addr = 0; - - ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); - if (ret != ADDR_OK) - return ret; - - surf->u.gfx9.dcc_retile_map[index * 2 + 1] = addrout.addr; - if (addrout.addr > UINT16_MAX) - surf->u.gfx9.dcc_retile_use_uint16 = false; - - assert(index * 2 + 1 < surf->u.gfx9.dcc_retile_num_elements); - index++; + if (!(surf->flags & RADEON_SURF_IMPORTED)) { + /* Compute address mapping from non-displayable to displayable DCC. */ + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {}; + addrin.size = sizeof(addrin); + addrin.colorFlags.color = 1; + addrin.swizzleMode = din.swizzleMode; + addrin.resourceType = din.resourceType; + addrin.bpp = din.bpp; + addrin.unalignedWidth = din.unalignedWidth; + addrin.unalignedHeight = din.unalignedHeight; + addrin.numSlices = 1; + addrin.numMipLevels = 1; + addrin.numFrags = 1; + + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {}; + addrout.size = sizeof(addrout); + + surf->u.gfx9.dcc_retile_map = + malloc(surf->u.gfx9.dcc_retile_num_elements * 4); + if (!surf->u.gfx9.dcc_retile_map) + return ADDR_OUTOFMEMORY; + + unsigned index = 0; + + for (unsigned y = 0; y < in->height; y += dout.compressBlkHeight) { + addrin.y = y; + + for (unsigned x = 0; x < in->width; x += dout.compressBlkWidth) { + addrin.x = x; + + /* Compute src DCC address */ + addrin.dccKeyFlags.pipeAligned = surf->u.gfx9.dcc.pipe_aligned; + addrin.dccKeyFlags.rbAligned = surf->u.gfx9.dcc.rb_aligned; + addrout.addr = 0; + + ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); + if (ret != ADDR_OK) + return ret; + + surf->u.gfx9.dcc_retile_map[index * 2] = addrout.addr; + + /* Compute dst DCC address */ + addrin.dccKeyFlags.pipeAligned = 0; + addrin.dccKeyFlags.rbAligned = 0; + addrout.addr = 0; + + ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); + if (ret != ADDR_OK) + return ret; + + surf->u.gfx9.dcc_retile_map[index * 2 + 1] = addrout.addr; + + assert(index * 2 + 1 < surf->u.gfx9.dcc_retile_num_elements); + index++; + } } + /* Fill the remaining pairs with the last one (for the compute shader). */ + for (unsigned i = index * 2; i < surf->u.gfx9.dcc_retile_num_elements; i++) + surf->u.gfx9.dcc_retile_map[i] = surf->u.gfx9.dcc_retile_map[i - 2]; } - /* Fill the remaining pairs with the last one (for the compute shader). */ - for (unsigned i = index * 2; i < surf->u.gfx9.dcc_retile_num_elements; i++) - surf->u.gfx9.dcc_retile_map[i] = surf->u.gfx9.dcc_retile_map[i - 2]; } } -- 2.30.2