X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_surface.c;h=81882576bafeb55c23c3081fd84a23f1489e74a4;hb=4f96747530be799e3ccd84ccf48df6d7fdbd0a03;hp=cd3d7b715cfe1268269ffd43d6a4fccbb9927af5;hpb=ddbd2f4c540425a30e36bf18df845b8f5c41495d;p=mesa.git diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index cd3d7b715cf..81882576baf 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -27,9 +27,10 @@ #include "ac_surface.h" #include "amd_family.h" -#include "amdgpu_id.h" +#include "addrlib/amdgpu_asic_addr.h" #include "ac_gpu_info.h" #include "util/macros.h" +#include "util/u_atomic.h" #include "util/u_math.h" #include @@ -48,90 +49,99 @@ #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D #endif +static unsigned get_first(unsigned x, unsigned y) +{ + return x; +} + static void addrlib_family_rev_id(enum radeon_family family, - unsigned *addrlib_family, - unsigned *addrlib_revid) + unsigned *addrlib_family, + unsigned *addrlib_revid) { switch (family) { case CHIP_TAHITI: *addrlib_family = FAMILY_SI; - *addrlib_revid = SI_TAHITI_P_A0; + *addrlib_revid = get_first(AMDGPU_TAHITI_RANGE); break; case CHIP_PITCAIRN: *addrlib_family = FAMILY_SI; - *addrlib_revid = SI_PITCAIRN_PM_A0; + *addrlib_revid = get_first(AMDGPU_PITCAIRN_RANGE); break; case CHIP_VERDE: *addrlib_family = FAMILY_SI; - *addrlib_revid = SI_CAPEVERDE_M_A0; + *addrlib_revid = get_first(AMDGPU_CAPEVERDE_RANGE); break; case CHIP_OLAND: *addrlib_family = FAMILY_SI; - *addrlib_revid = SI_OLAND_M_A0; + *addrlib_revid = get_first(AMDGPU_OLAND_RANGE); break; case CHIP_HAINAN: *addrlib_family = FAMILY_SI; - *addrlib_revid = SI_HAINAN_V_A0; + *addrlib_revid = get_first(AMDGPU_HAINAN_RANGE); break; case CHIP_BONAIRE: *addrlib_family = FAMILY_CI; - *addrlib_revid = CI_BONAIRE_M_A0; + *addrlib_revid = get_first(AMDGPU_BONAIRE_RANGE); break; case CHIP_KAVERI: *addrlib_family = FAMILY_KV; - *addrlib_revid = KV_SPECTRE_A0; + *addrlib_revid = get_first(AMDGPU_SPECTRE_RANGE); break; case CHIP_KABINI: *addrlib_family = FAMILY_KV; - *addrlib_revid = KB_KALINDI_A0; + *addrlib_revid = get_first(AMDGPU_KALINDI_RANGE); break; case CHIP_HAWAII: *addrlib_family = FAMILY_CI; - *addrlib_revid = CI_HAWAII_P_A0; + *addrlib_revid = get_first(AMDGPU_HAWAII_RANGE); break; case CHIP_MULLINS: *addrlib_family = FAMILY_KV; - *addrlib_revid = ML_GODAVARI_A0; + *addrlib_revid = get_first(AMDGPU_GODAVARI_RANGE); break; case CHIP_TONGA: *addrlib_family = FAMILY_VI; - *addrlib_revid = VI_TONGA_P_A0; + *addrlib_revid = get_first(AMDGPU_TONGA_RANGE); break; case CHIP_ICELAND: *addrlib_family = FAMILY_VI; - *addrlib_revid = VI_ICELAND_M_A0; + *addrlib_revid = get_first(AMDGPU_ICELAND_RANGE); break; case CHIP_CARRIZO: *addrlib_family = FAMILY_CZ; - *addrlib_revid = CARRIZO_A0; + *addrlib_revid = get_first(AMDGPU_CARRIZO_RANGE); break; case CHIP_STONEY: *addrlib_family = FAMILY_CZ; - *addrlib_revid = STONEY_A0; + *addrlib_revid = get_first(AMDGPU_STONEY_RANGE); break; case CHIP_FIJI: *addrlib_family = FAMILY_VI; - *addrlib_revid = VI_FIJI_P_A0; + *addrlib_revid = get_first(AMDGPU_FIJI_RANGE); break; case CHIP_POLARIS10: *addrlib_family = FAMILY_VI; - *addrlib_revid = VI_POLARIS10_P_A0; + *addrlib_revid = get_first(AMDGPU_POLARIS10_RANGE); break; case CHIP_POLARIS11: *addrlib_family = FAMILY_VI; - *addrlib_revid = VI_POLARIS11_M_A0; + *addrlib_revid = get_first(AMDGPU_POLARIS11_RANGE); break; case CHIP_POLARIS12: *addrlib_family = FAMILY_VI; - *addrlib_revid = VI_POLARIS12_V_A0; + *addrlib_revid = get_first(AMDGPU_POLARIS12_RANGE); break; case CHIP_VEGA10: *addrlib_family = FAMILY_AI; - *addrlib_revid = AI_VEGA10_P_A0; + *addrlib_revid = get_first(AMDGPU_VEGA10_RANGE); + break; + case CHIP_VEGA12: + *addrlib_family = FAMILY_AI; + *addrlib_revid = get_first(AMDGPU_VEGA12_RANGE); break; case CHIP_RAVEN: *addrlib_family = FAMILY_RV; - *addrlib_revid = RAVEN_A0; + *addrlib_revid = get_first(AMDGPU_RAVEN_RANGE); break; default: fprintf(stderr, "amdgpu: Unknown family.\n"); @@ -157,7 +167,7 @@ ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info, ADDR_CREATE_OUTPUT addrCreateOutput = {0}; ADDR_REGISTER_VALUE regValue = {0}; ADDR_CREATE_FLAGS createFlags = {{0}}; - ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; + ADDR_GET_MAX_ALINGMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; ADDR_E_RETURNCODE addrRet; addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); @@ -166,7 +176,7 @@ ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info, regValue.gbAddrConfig = amdinfo->gb_addr_cfg; createFlags.value = 0; - addrlib_family_rev_id(info->family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision); + addrlib_family_rev_id(info->family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision); if (addrCreateInput.chipFamily == FAMILY_UNKNOWN) return NULL; @@ -257,6 +267,18 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, AddrSurfInfoIn->width = u_minify(config->info.width, level); AddrSurfInfoIn->height = u_minify(config->info.height, level); + /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics, + * because GFX9 needs linear alignment of 256 bytes. + */ + if (config->info.levels == 1 && + AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED && + AddrSurfInfoIn->bpp) { + unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8); + + assert(util_is_power_of_two(AddrSurfInfoIn->bpp)); + AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment); + } + if (config->is_3d) AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level); else if (config->is_cube) @@ -286,7 +308,7 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] : &surf->u.legacy.level[level]; surf_level->offset = align64(surf->surf_size, AddrSurfInfoOut->baseAlign); - surf_level->slice_size = AddrSurfInfoOut->sliceSize; + surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4; surf_level->nblk_x = AddrSurfInfoOut->pitch; surf_level->nblk_y = AddrSurfInfoOut->height; @@ -394,12 +416,16 @@ static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) } /** + * This must be called after the first level is computed. + * * Copy surface-global settings like pipe/bank config from level 0 surface - * computation. + * computation, and compute tile swizzle. */ -static void gfx6_surface_settings(const struct radeon_info* info, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio, - struct radeon_surf *surf) +static int gfx6_surface_settings(ADDR_HANDLE addrlib, + const struct radeon_info *info, + const struct ac_surf_config *config, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio, + struct radeon_surf *surf) { surf->surf_alignment = csio->baseAlign; surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1; @@ -416,6 +442,36 @@ static void gfx6_surface_settings(const struct radeon_info* info, } else { surf->u.legacy.macro_tile_index = 0; } + + /* Compute tile swizzle. */ + /* TODO: fix tile swizzle with mipmapping for SI */ + if ((info->chip_class >= CIK || config->info.levels == 1) && + config->info.surf_index && + surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && + !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && + (config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) { + ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0}; + + AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); + AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); + + AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; + AddrBaseSwizzleIn.tileIndex = csio->tileIndex; + AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex; + AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo; + AddrBaseSwizzleIn.tileMode = csio->tileMode; + + int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, + &AddrBaseSwizzleOut); + if (r != ADDR_OK) + return r; + + assert(AddrBaseSwizzleOut.tileSwizzle <= + u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); + surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle; + } + return 0; } /** @@ -534,22 +590,42 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, info->chip_class >= VI && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) && - !compressed && AddrDccIn.numSamples <= 1 && + !compressed && ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1); AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth; - /* noStencil = 0 can result in a depth part that is incompatible with - * mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in - * this case, we may end up setting stencil_adjusted). + /* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit) + * for Z and stencil. This can cause a number of problems which we work + * around here: + * + * - a depth part that is incompatible with mipmapped texturing + * - at least on Stoney, entirely incompatible Z/S aspects (e.g. + * incorrect tiling applied to the stencil part, stencil buffer + * memory accesses that go out of bounds) even without mipmapping * - * TODO: update addrlib to a newer version, remove this, and - * use flags.matchStencilTileCfg = 1 as an alternative fix. + * Some piglit tests that are prone to different types of related + * failures: + * ./bin/ext_framebuffer_multisample-upsample 2 stencil + * ./bin/framebuffer-blit-levels {draw,read} stencil + * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample} + * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw} + * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8 */ - if (config->info.levels > 1) + int stencil_tile_idx = -1; + + if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil && + (config->info.levels > 1 || info->family == CHIP_STONEY)) { + /* Compute stencilTileIdx that is compatible with the (depth) + * tileIdx. This degrades the depth surface if necessary to + * ensure that a matching stencilTileIdx exists. */ + AddrSurfInfoIn.flags.matchStencilTileCfg = 1; + + /* Keep the depth mip-tail compatible with texturing. */ AddrSurfInfoIn.flags.noStencil = 1; + } /* Set preferred macrotile parameters. This is usually required * for shared resources. This is for 2D tiling only. */ @@ -608,6 +684,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, } } + surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); surf->num_dcc_levels = 0; surf->surf_size = 0; surf->dcc_size = 0; @@ -631,12 +708,36 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, if (level > 0) continue; - gfx6_surface_settings(info, &AddrSurfInfoOut, surf); + /* Check that we actually got a TC-compatible HTILE if + * we requested it (only for level 0, since we're not + * supporting HTILE on higher mip levels anyway). */ + assert(AddrSurfInfoOut.tcCompatible || + !AddrSurfInfoIn.flags.tcCompatible || + AddrSurfInfoIn.flags.matchStencilTileCfg); + + if (AddrSurfInfoIn.flags.matchStencilTileCfg) { + if (!AddrSurfInfoOut.tcCompatible) { + AddrSurfInfoIn.flags.tcCompatible = 0; + surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; + } + + AddrSurfInfoIn.flags.matchStencilTileCfg = 0; + AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex; + stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx; + + assert(stencil_tile_idx >= 0); + } + + r = gfx6_surface_settings(addrlib, info, config, + &AddrSurfInfoOut, surf); + if (r) + return r; } } /* Calculate texture layout information for stencil. */ if (surf->flags & RADEON_SURF_SBUFFER) { + AddrSurfInfoIn.tileIndex = stencil_tile_idx; AddrSurfInfoIn.bpp = 8; AddrSurfInfoIn.flags.depth = 0; AddrSurfInfoIn.flags.stencil = 1; @@ -663,8 +764,12 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, } if (level == 0) { - if (only_stencil) - gfx6_surface_settings(info, &AddrSurfInfoOut, surf); + if (only_stencil) { + r = gfx6_surface_settings(addrlib, info, config, + &AddrSurfInfoOut, surf); + if (r) + return r; + } /* For 2D modes only. */ if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { @@ -680,9 +785,16 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, * complicated. */ if (surf->dcc_size && config->info.levels > 1) { + /* The smallest miplevels that are never compressed by DCC + * still read the DCC buffer via TC if the base level uses DCC, + * and for some reason the DCC buffer needs to be larger if + * the miptree uses non-zero tile_swizzle. Otherwise there are + * VM faults. + * + * "dcc_alignment * 4" was determined by trial and error. + */ surf->dcc_size = align64(surf->surf_size >> 8, - info->pipe_interleave_bytes * - info->num_tile_pipes); + surf->dcc_alignment * 4); } /* Make sure HTILE covers the whole miptree, because the shader reads @@ -692,20 +804,9 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, surf->htile_size *= 2; surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED; - - /* workout base swizzle */ - if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) { - ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0}; - - AddrBaseSwizzleIn.surfIndex = config->info.surf_index; - AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex; - AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.macroModeIndex; - AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo; - AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode; - AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut); - surf->u.legacy.tile_swizzle = AddrBaseSwizzleOut.tileSwizzle; - } + surf->is_displayable = surf->is_linear || + surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || + surf->micro_tile_mode == RADEON_MICRO_MODE_ROTATED; return 0; } @@ -752,6 +853,7 @@ gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, } static int gfx9_compute_miptree(ADDR_HANDLE addrlib, + const struct ac_surf_config *config, struct radeon_surf *surf, bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in) { @@ -764,7 +866,7 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, ret = Addr2ComputeSurfaceInfo(addrlib, in, &out); if (ret != ADDR_OK) - return ret; + return ret; if (in->flags.stencil) { surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode; @@ -807,8 +909,8 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT); hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT); - hin.hTileFlags.pipeAligned = 1; - hin.hTileFlags.rbAligned = 1; + hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned; + hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned; hin.depthFlags = in->flags; hin.swizzleMode = in->swizzleMode; hin.unalignedWidth = in->width; @@ -826,21 +928,51 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->htile_slice_size = hout.sliceSize; surf->htile_alignment = hout.baseAlign; } else { + /* Compute tile swizzle for the color surface. + * All *_X and *_T modes can use the swizzle. + */ + if (config->info.surf_index && + in->swizzleMode >= ADDR_SW_64KB_Z_T && + !out.mipChainInTail && + !(surf->flags & RADEON_SURF_SHAREABLE) && + (in->numSamples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) { + ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; + + xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); + xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); + + xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; + xin.flags = in->flags; + xin.swizzleMode = in->swizzleMode; + xin.resourceType = in->resourceType; + xin.format = in->format; + xin.numSamples = in->numSamples; + xin.numFrags = in->numFrags; + + ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); + if (ret != ADDR_OK) + return ret; + + assert(xout.pipeBankXor <= + u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); + surf->tile_swizzle = xout.pipeBankXor; + } + /* DCC */ if (!(surf->flags & RADEON_SURF_DISABLE_DCC) && - !(surf->flags & RADEON_SURF_SCANOUT) && !compressed && - in->swizzleMode != ADDR_SW_LINEAR && - /* TODO: We could support DCC with MSAA. */ - in->numSamples == 1) { + in->swizzleMode != ADDR_SW_LINEAR) { ADDR2_COMPUTE_DCCINFO_INPUT din = {0}; ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0}; + ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {}; din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT); dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT); + dout.pMipInfo = meta_mip_info; - din.dccKeyFlags.pipeAligned = 1; - din.dccKeyFlags.rbAligned = 1; + din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned; + din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned; din.colorFlags = in->flags; din.resourceType = in->resourceType; din.swizzleMode = in->swizzleMode; @@ -861,6 +993,39 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->u.gfx9.dcc_pitch_max = dout.pitch - 1; surf->dcc_size = dout.dccRamSize; surf->dcc_alignment = dout.dccRamBaseAlign; + surf->num_dcc_levels = in->numMipLevels; + + /* Disable DCC for levels that are in the mip tail. + * + * There are two issues that this is intended to + * address: + * + * 1. Multiple mip levels may share a cache line. This + * can lead to corruption when switching between + * rendering to different mip levels because the + * RBs don't maintain coherency. + * + * 2. Texturing with metadata after rendering sometimes + * fails with corruption, probably for a similar + * reason. + * + * Working around these issues for all levels in the + * mip tail may be overly conservative, but it's what + * Vulkan does. + * + * Alternative solutions that also work but are worse: + * - Disable DCC entirely. + * - Flush TC L2 after rendering. + */ + for (unsigned i = 0; i < in->numMipLevels; i++) { + if (meta_mip_info[i].inMiptail) { + surf->num_dcc_levels = i; + break; + } + } + + if (!surf->num_dcc_levels) + surf->dcc_size = 0; } /* FMASK */ @@ -889,6 +1054,34 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->u.gfx9.fmask.epitch = fout.pitch - 1; surf->u.gfx9.fmask_size = fout.fmaskBytes; surf->u.gfx9.fmask_alignment = fout.baseAlign; + + /* Compute tile swizzle for the FMASK surface. */ + if (config->info.fmask_surf_index && + fin.swizzleMode >= ADDR_SW_64KB_Z_T && + !(surf->flags & RADEON_SURF_SHAREABLE)) { + ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; + + xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); + xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); + + /* This counter starts from 1 instead of 0. */ + xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); + xin.flags = in->flags; + xin.swizzleMode = in->swizzleMode; + xin.resourceType = in->resourceType; + xin.format = in->format; + xin.numSamples = in->numSamples; + xin.numFrags = in->numFrags; + + ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); + if (ret != ADDR_OK) + return ret; + + assert(xout.pipeBankXor <= + u_bit_consecutive(0, sizeof(surf->u.gfx9.fmask_tile_swizzle) * 8)); + surf->u.gfx9.fmask_tile_swizzle = xout.pipeBankXor; + } } /* CMASK */ @@ -899,8 +1092,14 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT); cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT); - cin.cMaskFlags.pipeAligned = 1; - cin.cMaskFlags.rbAligned = 1; + if (in->numSamples) { + /* FMASK is always aligned. */ + cin.cMaskFlags.pipeAligned = 1; + cin.cMaskFlags.rbAligned = 1; + } else { + cin.cMaskFlags.pipeAligned = !in->flags.metaPipeUnaligned; + cin.cMaskFlags.rbAligned = !in->flags.metaRbUnaligned; + } cin.colorFlags = in->flags; cin.resourceType = in->resourceType; cin.unalignedWidth = in->width; @@ -927,6 +1126,7 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, } static int gfx9_compute_surface(ADDR_HANDLE addrlib, + const struct radeon_info *info, const struct ac_surf_config *config, enum radeon_surf_mode mode, struct radeon_surf *surf) @@ -955,6 +1155,32 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, assert(0); } } else { + switch (surf->bpe) { + case 1: + assert(!(surf->flags & RADEON_SURF_ZBUFFER)); + AddrSurfInfoIn.format = ADDR_FMT_8; + break; + case 2: + assert(surf->flags & RADEON_SURF_ZBUFFER || + !(surf->flags & RADEON_SURF_SBUFFER)); + AddrSurfInfoIn.format = ADDR_FMT_16; + break; + case 4: + assert(surf->flags & RADEON_SURF_ZBUFFER || + !(surf->flags & RADEON_SURF_SBUFFER)); + AddrSurfInfoIn.format = ADDR_FMT_32; + break; + case 8: + assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); + AddrSurfInfoIn.format = ADDR_FMT_32_32; + break; + case 16: + assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); + AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32; + break; + default: + assert(0); + } AddrSurfInfoIn.bpp = surf->bpe * 8; } @@ -988,6 +1214,10 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, else AddrSurfInfoIn.numSlices = config->info.array_size; + /* This is propagated to HTILE/DCC/CMASK. */ + AddrSurfInfoIn.flags.metaPipeUnaligned = 0; + AddrSurfInfoIn.flags.metaRbUnaligned = 0; + switch (mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: assert(config->info.samples <= 1); @@ -997,6 +1227,11 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, case RADEON_SURF_MODE_1D: case RADEON_SURF_MODE_2D: + if (surf->flags & RADEON_SURF_IMPORTED) { + AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode; + break; + } + r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, false, &AddrSurfInfoIn.swizzleMode); if (r) @@ -1008,7 +1243,9 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, } surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType; + surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); + surf->num_dcc_levels = 0; surf->surf_size = 0; surf->dcc_size = 0; surf->htile_size = 0; @@ -1019,23 +1256,40 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, surf->u.gfx9.cmask_size = 0; /* Calculate texture layout information. */ - r = gfx9_compute_miptree(addrlib, surf, compressed, &AddrSurfInfoIn); + r = gfx9_compute_miptree(addrlib, config, surf, compressed, + &AddrSurfInfoIn); if (r) return r; /* Calculate texture layout information for stencil. */ if (surf->flags & RADEON_SURF_SBUFFER) { - AddrSurfInfoIn.bpp = 8; - AddrSurfInfoIn.flags.depth = 0; AddrSurfInfoIn.flags.stencil = 1; + AddrSurfInfoIn.bpp = 8; + AddrSurfInfoIn.format = ADDR_FMT_8; + + if (!AddrSurfInfoIn.flags.depth) { + r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, false, + &AddrSurfInfoIn.swizzleMode); + if (r) + return r; + } else + AddrSurfInfoIn.flags.depth = 0; - r = gfx9_compute_miptree(addrlib, surf, compressed, &AddrSurfInfoIn); + r = gfx9_compute_miptree(addrlib, config, surf, compressed, + &AddrSurfInfoIn); if (r) return r; } surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR; - surf->num_dcc_levels = surf->dcc_size ? config->info.levels : 0; + + /* Query whether the surface is displayable. */ + bool displayable = false; + r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode, + surf->bpe * 8, &displayable); + if (r) + return r; + surf->is_displayable = displayable; switch (surf->u.gfx9.surf.swizzle_mode) { /* S = standard. */ @@ -1090,6 +1344,10 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, assert(0); } + /* Temporary workaround to prevent VM faults and hangs. */ + if (info->family == CHIP_VEGA12) + surf->u.gfx9.fmask_size *= 8; + return 0; } @@ -1105,7 +1363,7 @@ int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info, return r; if (info->chip_class >= GFX9) - return gfx9_compute_surface(addrlib, config, mode, surf); + return gfx9_compute_surface(addrlib, info, config, mode, surf); else return gfx6_compute_surface(addrlib, info, config, mode, surf); }