ac/surface: add radeon_surf::has_stencil for convenience
[mesa.git] / src / amd / common / ac_surface.c
index b20d8189cc8687fd546479c6e3e8443c7d7a81f6..c6ff57362f79455eae6b98cb20c4fdf111afb498 100644 (file)
  */
 
 #include "ac_surface.h"
+#include "amd_family.h"
 #include "amdgpu_id.h"
+#include "ac_gpu_info.h"
 #include "util/macros.h"
+#include "util/u_atomic.h"
 #include "util/u_math.h"
 
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <amdgpu.h>
@@ -146,22 +150,24 @@ static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInpu
        return ADDR_OK;
 }
 
-ADDR_HANDLE amdgpu_addr_create(enum radeon_family family,
-                              const struct amdgpu_gpu_info *info)
+ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,
+                              const struct amdgpu_gpu_info *amdinfo,
+                              uint64_t *max_alignment)
 {
        ADDR_CREATE_INPUT addrCreateInput = {0};
        ADDR_CREATE_OUTPUT addrCreateOutput = {0};
        ADDR_REGISTER_VALUE regValue = {0};
        ADDR_CREATE_FLAGS createFlags = {{0}};
+       ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
        ADDR_E_RETURNCODE addrRet;
 
        addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
        addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
 
-       regValue.gbAddrConfig = info->gb_addr_cfg;
+       regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
        createFlags.value = 0;
 
-       addrlib_family_rev_id(family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision);
+       addrlib_family_rev_id(info->family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision);
        if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
                return NULL;
 
@@ -169,18 +175,18 @@ ADDR_HANDLE amdgpu_addr_create(enum radeon_family family,
                addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
                regValue.blockVarSizeLog2 = 0;
        } else {
-               regValue.noOfBanks = info->mc_arb_ramcfg & 0x3;
-               regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2;
+               regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
+               regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
 
-               regValue.backendDisables = info->enabled_rb_pipes_mask;
-               regValue.pTileConfig = info->gb_tile_mode;
-               regValue.noOfEntries = ARRAY_SIZE(info->gb_tile_mode);
+               regValue.backendDisables = amdinfo->enabled_rb_pipes_mask;
+               regValue.pTileConfig = amdinfo->gb_tile_mode;
+               regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode);
                if (addrCreateInput.chipFamily == FAMILY_SI) {
                        regValue.pMacroTileConfig = NULL;
                        regValue.noOfMacroEntries = 0;
                } else {
-                       regValue.pMacroTileConfig = info->gb_macro_tile_mode;
-                       regValue.noOfMacroEntries = ARRAY_SIZE(info->gb_macro_tile_mode);
+                       regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode;
+                       regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode);
                }
 
                createFlags.useTileIndex = 1;
@@ -199,9 +205,41 @@ ADDR_HANDLE amdgpu_addr_create(enum radeon_family family,
        if (addrRet != ADDR_OK)
                return NULL;
 
+       if (max_alignment) {
+               addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
+               if (addrRet == ADDR_OK){
+                       *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
+               }
+       }
        return addrCreateOutput.hLib;
 }
 
+static int surf_config_sanity(const struct ac_surf_config *config)
+{
+       /* all dimension must be at least 1 ! */
+       if (!config->info.width || !config->info.height || !config->info.depth ||
+           !config->info.array_size || !config->info.levels)
+               return -EINVAL;
+
+       switch (config->info.samples) {
+       case 0:
+       case 1:
+       case 2:
+       case 4:
+       case 8:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (config->is_3d && config->info.array_size > 1)
+               return -EINVAL;
+       if (config->is_cube && config->info.depth > 1)
+               return -EINVAL;
+
+       return 0;
+}
+
 static int gfx6_compute_level(ADDR_HANDLE addrlib,
                              const struct ac_surf_config *config,
                              struct radeon_surf *surf, bool is_stencil,
@@ -220,6 +258,18 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
        AddrSurfInfoIn->width = u_minify(config->info.width, level);
        AddrSurfInfoIn->height = u_minify(config->info.height, level);
 
+       /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
+        * because GFX9 needs linear alignment of 256 bytes.
+        */
+       if (config->info.levels == 1 &&
+           AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
+           AddrSurfInfoIn->bpp) {
+               unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
+
+               assert(util_is_power_of_two(AddrSurfInfoIn->bpp));
+               AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
+       }
+
        if (config->is_3d)
                AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
        else if (config->is_cube)
@@ -302,10 +352,9 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
        /* TC-compatible HTILE. */
        if (!is_stencil &&
            AddrSurfInfoIn->flags.depth &&
-           AddrSurfInfoIn->flags.tcCompatible &&
            surf_level->mode == RADEON_SURF_MODE_2D &&
            level == 0) {
-               AddrHtileIn->flags.tcCompatible = 1;
+               AddrHtileIn->flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible;
                AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
                AddrHtileIn->height = AddrSurfInfoOut->height;
                AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
@@ -321,6 +370,7 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
 
                if (ret == ADDR_OK) {
                        surf->htile_size = AddrHtileOut->htileBytes;
+                       surf->htile_slice_size = AddrHtileOut->sliceSize;
                        surf->htile_alignment = AddrHtileOut->baseAlign;
                }
        }
@@ -332,11 +382,11 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
 #define   G_009910_MICRO_TILE_MODE_NEW(x)      (((x) >> 22) & 0x07)
 
 static void gfx6_set_micro_tile_mode(struct radeon_surf *surf,
-                                    const struct amdgpu_gpu_info *amdinfo)
+                                    const struct radeon_info *info)
 {
-       uint32_t tile_mode = amdinfo->gb_tile_mode[surf->u.legacy.tiling_index[0]];
+       uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
 
-       if (amdinfo->family_id >= AMDGPU_FAMILY_CI)
+       if (info->chip_class >= CIK)
                surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
        else
                surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
@@ -356,6 +406,65 @@ static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
        return index;
 }
 
+/**
+ * This must be called after the first level is computed.
+ *
+ * Copy surface-global settings like pipe/bank config from level 0 surface
+ * computation, and compute tile swizzle.
+ */
+static int gfx6_surface_settings(ADDR_HANDLE addrlib,
+                                const struct radeon_info *info,
+                                const struct ac_surf_config *config,
+                                ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
+                                struct radeon_surf *surf)
+{
+       surf->surf_alignment = csio->baseAlign;
+       surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
+       gfx6_set_micro_tile_mode(surf, info);
+
+       /* For 2D modes only. */
+       if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
+               surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
+               surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
+               surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
+               surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
+               surf->u.legacy.num_banks = csio->pTileInfo->banks;
+               surf->u.legacy.macro_tile_index = csio->macroModeIndex;
+       } else {
+               surf->u.legacy.macro_tile_index = 0;
+       }
+
+       /* Compute tile swizzle. */
+       /* TODO: fix tile swizzle with mipmapping for SI */
+       if ((info->chip_class >= CIK || config->info.levels == 1) &&
+           config->info.surf_index &&
+           surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
+           !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
+           (config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) {
+               ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+               ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+               AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
+               AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
+
+               AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
+               AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
+               AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
+               AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
+               AddrBaseSwizzleIn.tileMode = csio->tileMode;
+
+               int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
+                                              &AddrBaseSwizzleOut);
+               if (r != ADDR_OK)
+                       return r;
+
+               assert(AddrBaseSwizzleOut.tileSwizzle <=
+                      u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
+               surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+       }
+       return 0;
+}
+
 /**
  * Fill in the tiling information in \p surf based on the given surface config.
  *
@@ -363,6 +472,7 @@ static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
  * blk_w, blk_h, bpe, flags.
  */
 static int gfx6_compute_surface(ADDR_HANDLE addrlib,
+                               const struct radeon_info *info,
                                const struct ac_surf_config *config,
                                enum radeon_surf_mode mode,
                                struct radeon_surf *surf)
@@ -468,7 +578,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
         *   driver team).
         */
        AddrSurfInfoIn.flags.dccCompatible =
-               config->chip_class >= VI &&
+               info->chip_class >= VI &&
                !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
                !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
                !compressed && AddrDccIn.numSamples <= 1 &&
@@ -517,7 +627,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
                assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
                assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
 
-               if (config->chip_class == SI) {
+               if (info->chip_class == SI) {
                        if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
                                if (surf->bpe == 2)
                                        AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
@@ -545,37 +655,34 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
                }
        }
 
+       surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
        surf->num_dcc_levels = 0;
        surf->surf_size = 0;
        surf->dcc_size = 0;
        surf->dcc_alignment = 1;
        surf->htile_size = 0;
+       surf->htile_slice_size = 0;
        surf->htile_alignment = 1;
 
+       const bool only_stencil = (surf->flags & RADEON_SURF_SBUFFER) &&
+                                 !(surf->flags & RADEON_SURF_ZBUFFER);
+
        /* Calculate texture layout information. */
-       for (level = 0; level < config->info.levels; level++) {
-               r = gfx6_compute_level(addrlib, config, surf, false, level, compressed,
-                                      &AddrSurfInfoIn, &AddrSurfInfoOut,
-                                      &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
-               if (r)
-                       return r;
+       if (!only_stencil) {
+               for (level = 0; level < config->info.levels; level++) {
+                       r = gfx6_compute_level(addrlib, config, surf, false, level, compressed,
+                                              &AddrSurfInfoIn, &AddrSurfInfoOut,
+                                              &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
+                       if (r)
+                               return r;
 
-               if (level == 0) {
-                       surf->surf_alignment = AddrSurfInfoOut.baseAlign;
-                       surf->u.legacy.pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1;
-                       gfx6_set_micro_tile_mode(surf, config->amdinfo);
-
-                       /* For 2D modes only. */
-                       if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
-                               surf->u.legacy.bankw = AddrSurfInfoOut.pTileInfo->bankWidth;
-                               surf->u.legacy.bankh = AddrSurfInfoOut.pTileInfo->bankHeight;
-                               surf->u.legacy.mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio;
-                               surf->u.legacy.tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
-                               surf->u.legacy.num_banks = AddrSurfInfoOut.pTileInfo->banks;
-                               surf->u.legacy.macro_tile_index = AddrSurfInfoOut.macroModeIndex;
-                       } else {
-                               surf->u.legacy.macro_tile_index = 0;
-                       }
+                       if (level > 0)
+                               continue;
+
+                       r = gfx6_surface_settings(addrlib, info, config,
+                                                 &AddrSurfInfoOut, surf);
+                       if (r)
+                               return r;
                }
        }
 
@@ -597,11 +704,23 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
                                return r;
 
                        /* DB uses the depth pitch for both stencil and depth. */
-                       if (surf->u.legacy.stencil_level[level].nblk_x !=
-                           surf->u.legacy.level[level].nblk_x)
-                               surf->u.legacy.stencil_adjusted = true;
+                       if (!only_stencil) {
+                               if (surf->u.legacy.stencil_level[level].nblk_x !=
+                                   surf->u.legacy.level[level].nblk_x)
+                                       surf->u.legacy.stencil_adjusted = true;
+                       } else {
+                               surf->u.legacy.level[level].nblk_x =
+                                       surf->u.legacy.stencil_level[level].nblk_x;
+                       }
 
                        if (level == 0) {
+                               if (only_stencil) {
+                                       r = gfx6_surface_settings(addrlib, info, config,
+                                                                 &AddrSurfInfoOut, surf);
+                                       if (r)
+                                               return r;
+                               }
+
                                /* For 2D modes only. */
                                if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
                                        surf->u.legacy.stencil_tile_split =
@@ -616,9 +735,16 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
         * complicated.
         */
        if (surf->dcc_size && config->info.levels > 1) {
+               /* The smallest miplevels that are never compressed by DCC
+                * still read the DCC buffer via TC if the base level uses DCC,
+                * and for some reason the DCC buffer needs to be larger if
+                * the miptree uses non-zero tile_swizzle. Otherwise there are
+                * VM faults.
+                *
+                * "dcc_alignment * 4" was determined by trial and error.
+                */
                surf->dcc_size = align64(surf->surf_size >> 8,
-                                        config->pipe_interleave_bytes *
-                                        config->num_tile_pipes);
+                                        surf->dcc_alignment * 4);
        }
 
        /* Make sure HTILE covers the whole miptree, because the shader reads
@@ -745,6 +871,7 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
                surf->u.gfx9.htile.rb_aligned = hin.hTileFlags.rbAligned;
                surf->u.gfx9.htile.pipe_aligned = hin.hTileFlags.pipeAligned;
                surf->htile_size = hout.htileBytes;
+               surf->htile_slice_size = hout.sliceSize;
                surf->htile_alignment = hout.baseAlign;
        } else {
                /* DCC */
@@ -782,6 +909,23 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
                        surf->u.gfx9.dcc_pitch_max = dout.pitch - 1;
                        surf->dcc_size = dout.dccRamSize;
                        surf->dcc_alignment = dout.dccRamBaseAlign;
+                       surf->num_dcc_levels = in->numMipLevels;
+
+                       /* Disable DCC for the smallest levels. It seems to be
+                        * required for DCC readability between CB and shaders
+                        * when TC L2 isn't flushed. This was guessed.
+                        *
+                        * Alternative solutions that also work but are worse:
+                        * - Disable DCC.
+                        * - Flush TC L2 after rendering.
+                        */
+                       for (unsigned i = 1; i < in->numMipLevels; i++) {
+                               if (mip_info[i].pitch *
+                                   mip_info[i].height * surf->bpe < 1024) {
+                                       surf->num_dcc_levels = i;
+                                       break;
+                               }
+                       }
                }
 
                /* FMASK */
@@ -882,7 +1026,9 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
        AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
        AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
        AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
-       AddrSurfInfoIn.flags.texture = 1;
+       /* flags.texture currently refers to TC-compatible HTILE */
+       AddrSurfInfoIn.flags.texture = AddrSurfInfoIn.flags.color ||
+                                      surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
        AddrSurfInfoIn.flags.opt4space = 1;
 
        AddrSurfInfoIn.numMipLevels = config->info.levels;
@@ -916,6 +1062,11 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
 
        case RADEON_SURF_MODE_1D:
        case RADEON_SURF_MODE_2D:
+               if (surf->flags & RADEON_SURF_IMPORTED) {
+                       AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode;
+                       break;
+               }
+
                r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, false,
                                                    &AddrSurfInfoIn.swizzleMode);
                if (r)
@@ -927,10 +1078,13 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
        }
 
        surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
+       surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
 
+       surf->num_dcc_levels = 0;
        surf->surf_size = 0;
        surf->dcc_size = 0;
        surf->htile_size = 0;
+       surf->htile_slice_size = 0;
        surf->u.gfx9.surf_offset = 0;
        surf->u.gfx9.stencil_offset = 0;
        surf->u.gfx9.fmask_size = 0;
@@ -953,7 +1107,6 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
        }
 
        surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
-       surf->num_dcc_levels = surf->dcc_size ? config->info.levels : 0;
 
        switch (surf->u.gfx9.surf.swizzle_mode) {
                /* S = standard. */
@@ -1011,13 +1164,19 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
        return 0;
 }
 
-int ac_compute_surface(ADDR_HANDLE addrlib,
+int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
                       const struct ac_surf_config *config,
                       enum radeon_surf_mode mode,
                       struct radeon_surf *surf)
 {
-       if (config->chip_class >= GFX9)
+       int r;
+
+       r = surf_config_sanity(config);
+       if (r)
+               return r;
+
+       if (info->chip_class >= GFX9)
                return gfx9_compute_surface(addrlib, config, mode, surf);
        else
-               return gfx6_compute_surface(addrlib, config, mode, surf);
+               return gfx6_compute_surface(addrlib, info, config, mode, surf);
 }