src/amd/common/ac_surface.c

   1 /*
   2  * Copyright © 2011 Red Hat All Rights Reserved.
   3  * Copyright © 2017 Advanced Micro Devices, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining
   7  * a copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  16  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  18  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * The above copyright notice and this permission notice (including the
  24  * next paragraph) shall be included in all copies or substantial portions
  25  * of the Software.
  26  */
  27
  28 #include "ac_surface.h"
  29 #include "amd_family.h"
  30 #include "addrlib/src/amdgpu_asic_addr.h"
  31 #include "ac_gpu_info.h"
  32 #include "util/hash_table.h"
  33 #include "util/macros.h"
  34 #include "util/simple_mtx.h"
  35 #include "util/u_atomic.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "sid.h"
  39
  40 #include <errno.h>
  41 #include <stdio.h>
  42 #include <stdlib.h>
  43 #include <amdgpu.h>
  44 #include "drm-uapi/amdgpu_drm.h"
  45
  46 #include "addrlib/inc/addrinterface.h"
  47
  48 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
  49 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
  50 #endif
  51
  52 #ifndef CIASICIDGFXENGINE_ARCTICISLAND
  53 #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
  54 #endif
  55
  56 struct ac_addrlib {
  57         ADDR_HANDLE handle;
  58
  59         /* The cache of DCC retile maps for reuse when allocating images of
  60          * similar sizes.
  61          */
  62         simple_mtx_t dcc_retile_map_lock;
  63         struct hash_table *dcc_retile_maps;
  64         struct hash_table *dcc_retile_tile_indices;
  65 };
  66
  67 struct dcc_retile_map_key {
  68         enum radeon_family family;
  69         unsigned retile_width;
  70         unsigned retile_height;
  71         bool rb_aligned;
  72         bool pipe_aligned;
  73         unsigned dcc_retile_num_elements;
  74         ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT input;
  75 };
  76
  77 static uint32_t dcc_retile_map_hash_key(const void *key)
  78 {
  79         return _mesa_hash_data(key, sizeof(struct dcc_retile_map_key));
  80 }
  81
  82 static bool dcc_retile_map_keys_equal(const void *a, const void *b)
  83 {
  84         return memcmp(a, b, sizeof(struct dcc_retile_map_key)) == 0;
  85 }
  86
  87 static void dcc_retile_map_free(struct hash_entry *entry)
  88 {
  89         free((void*)entry->key);
  90         free(entry->data);
  91 }
  92
  93 struct dcc_retile_tile_key {
  94         enum radeon_family family;
  95         unsigned bpp;
  96         unsigned swizzle_mode;
  97         bool rb_aligned;
  98         bool pipe_aligned;
  99 };
 100
 101 struct dcc_retile_tile_data {
 102         unsigned tile_width_log2;
 103         unsigned tile_height_log2;
 104         uint16_t *data;
 105 };
 106
 107 static uint32_t dcc_retile_tile_hash_key(const void *key)
 108 {
 109         return _mesa_hash_data(key, sizeof(struct dcc_retile_tile_key));
 110 }
 111
 112 static bool dcc_retile_tile_keys_equal(const void *a, const void *b)
 113 {
 114         return memcmp(a, b, sizeof(struct dcc_retile_tile_key)) == 0;
 115 }
 116
 117 static void dcc_retile_tile_free(struct hash_entry *entry)
 118 {
 119         free((void*)entry->key);
 120         free(((struct dcc_retile_tile_data*)entry->data)->data);
 121         free(entry->data);
 122 }
 123
 124 /* Assumes dcc_retile_map_lock is taken. */
 125 static const struct dcc_retile_tile_data *
 126 ac_compute_dcc_retile_tile_indices(struct ac_addrlib *addrlib,
 127                                    const struct radeon_info *info,
 128                                    unsigned bpp, unsigned swizzle_mode,
 129                                    bool rb_aligned, bool pipe_aligned)
 130 {
 131         struct dcc_retile_tile_key key = (struct dcc_retile_tile_key) {
 132                 .family = info->family,
 133                 .bpp = bpp,
 134                 .swizzle_mode = swizzle_mode,
 135                 .rb_aligned = rb_aligned,
 136                 .pipe_aligned = pipe_aligned
 137         };
 138
 139         struct hash_entry *entry = _mesa_hash_table_search(addrlib->dcc_retile_tile_indices, &key);
 140         if (entry)
 141                 return entry->data;
 142
 143         ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
 144         ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
 145         din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
 146         dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
 147
 148         din.dccKeyFlags.pipeAligned = pipe_aligned;
 149         din.dccKeyFlags.rbAligned = rb_aligned;
 150         din.resourceType = ADDR_RSRC_TEX_2D;
 151         din.swizzleMode = swizzle_mode;
 152         din.bpp = bpp;
 153         din.unalignedWidth = 1;
 154         din.unalignedHeight = 1;
 155         din.numSlices = 1;
 156         din.numFrags = 1;
 157         din.numMipLevels = 1;
 158
 159         ADDR_E_RETURNCODE ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
 160         if (ret != ADDR_OK)
 161                 return NULL;
 162
 163         ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {0};
 164         addrin.size = sizeof(addrin);
 165         addrin.swizzleMode = swizzle_mode;
 166         addrin.resourceType = ADDR_RSRC_TEX_2D;
 167         addrin.bpp = bpp;
 168         addrin.numSlices = 1;
 169         addrin.numMipLevels = 1;
 170         addrin.numFrags = 1;
 171         addrin.pitch = dout.pitch;
 172         addrin.height = dout.height;
 173         addrin.compressBlkWidth = dout.compressBlkWidth;
 174         addrin.compressBlkHeight = dout.compressBlkHeight;
 175         addrin.compressBlkDepth = dout.compressBlkDepth;
 176         addrin.metaBlkWidth = dout.metaBlkWidth;
 177         addrin.metaBlkHeight = dout.metaBlkHeight;
 178         addrin.metaBlkDepth = dout.metaBlkDepth;
 179         addrin.dccKeyFlags.pipeAligned = pipe_aligned;
 180         addrin.dccKeyFlags.rbAligned = rb_aligned;
 181
 182         unsigned w = dout.metaBlkWidth / dout.compressBlkWidth;
 183         unsigned h = dout.metaBlkHeight / dout.compressBlkHeight;
 184         uint16_t *indices = malloc(w * h * sizeof (uint16_t));
 185         if (!indices)
 186                 return NULL;
 187
 188         ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
 189         addrout.size = sizeof(addrout);
 190
 191         for (unsigned y = 0; y < h; ++y) {
 192                 addrin.y = y * dout.compressBlkHeight;
 193                 for (unsigned x = 0; x < w; ++x) {
 194                         addrin.x = x * dout.compressBlkWidth;
 195                         addrout.addr = 0;
 196
 197                         if (Addr2ComputeDccAddrFromCoord(addrlib->handle, &addrin, &addrout) != ADDR_OK) {
 198                                 free(indices);
 199                                 return NULL;
 200                         }
 201                         indices[y * w + x] = addrout.addr;
 202                 }
 203         }
 204
 205         struct dcc_retile_tile_data *data = calloc(1, sizeof(*data));
 206         if (!data) {
 207                 free(indices);
 208                 return NULL;
 209         }
 210
 211         data->tile_width_log2 = util_logbase2(w);
 212         data->tile_height_log2 = util_logbase2(h);
 213         data->data = indices;
 214
 215         struct dcc_retile_tile_key *heap_key = mem_dup(&key, sizeof(key));
 216         if (!heap_key) {
 217                 free(data);
 218                 free(indices);
 219                 return NULL;
 220         }
 221
 222         entry = _mesa_hash_table_insert(addrlib->dcc_retile_tile_indices, heap_key, data);
 223         if (!entry) {
 224                 free(heap_key);
 225                 free(data);
 226                 free(indices);
 227         }
 228         return data;
 229 }
 230
 231 static uint32_t ac_compute_retile_tile_addr(const struct dcc_retile_tile_data *tile,
 232                                             unsigned stride, unsigned x, unsigned y)
 233 {
 234         unsigned x_mask = (1u << tile->tile_width_log2) - 1;
 235         unsigned y_mask = (1u << tile->tile_height_log2) - 1;
 236         unsigned tile_size_log2 = tile->tile_width_log2 + tile->tile_height_log2;
 237
 238         unsigned base = ((y >> tile->tile_height_log2) * stride + (x >> tile->tile_width_log2)) << tile_size_log2;
 239         unsigned offset_in_tile = tile->data[((y & y_mask) << tile->tile_width_log2) + (x & x_mask)];
 240         return base + offset_in_tile;
 241 }
 242
 243 static uint32_t *ac_compute_dcc_retile_map(struct ac_addrlib *addrlib,
 244                                            const struct radeon_info *info,
 245                                            unsigned retile_width, unsigned retile_height,
 246                                            bool rb_aligned, bool pipe_aligned, bool use_uint16,
 247                                            unsigned dcc_retile_num_elements,
 248                                            const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT *in)
 249 {
 250         unsigned dcc_retile_map_size = dcc_retile_num_elements * (use_uint16 ? 2 : 4);
 251         struct dcc_retile_map_key key;
 252
 253         assert(in->numFrags == 1 && in->numSlices == 1 && in->numMipLevels == 1);
 254
 255         memset(&key, 0, sizeof(key));
 256         key.family = info->family;
 257         key.retile_width = retile_width;
 258         key.retile_height = retile_height;
 259         key.rb_aligned = rb_aligned;
 260         key.pipe_aligned = pipe_aligned;
 261         key.dcc_retile_num_elements = dcc_retile_num_elements;
 262         memcpy(&key.input, in, sizeof(*in));
 263
 264         simple_mtx_lock(&addrlib->dcc_retile_map_lock);
 265
 266         /* If we have already computed this retile map, get it from the hash table. */
 267         struct hash_entry *entry = _mesa_hash_table_search(addrlib->dcc_retile_maps, &key);
 268         if (entry) {
 269                 uint32_t *map = entry->data;
 270                 simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
 271                 return map;
 272         }
 273
 274         const struct dcc_retile_tile_data *src_tile =
 275                 ac_compute_dcc_retile_tile_indices(addrlib, info, in->bpp,
 276                                                    in->swizzleMode,
 277                                                    rb_aligned, pipe_aligned);
 278         const struct dcc_retile_tile_data *dst_tile =
 279                 ac_compute_dcc_retile_tile_indices(addrlib, info, in->bpp,
 280                                                    in->swizzleMode, false, false);
 281         if (!src_tile || !dst_tile) {
 282                 simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
 283                 return NULL;
 284         }
 285
 286         void *dcc_retile_map = malloc(dcc_retile_map_size);
 287         if (!dcc_retile_map) {
 288                 simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
 289                 return NULL;
 290         }
 291
 292         unsigned index = 0;
 293         unsigned w = DIV_ROUND_UP(retile_width, in->compressBlkWidth);
 294         unsigned h = DIV_ROUND_UP(retile_height, in->compressBlkHeight);
 295         unsigned src_stride = DIV_ROUND_UP(w, 1u << src_tile->tile_width_log2);
 296         unsigned dst_stride = DIV_ROUND_UP(w, 1u << dst_tile->tile_width_log2);
 297
 298         for (unsigned y = 0; y < h; ++y) {
 299                 for (unsigned x = 0; x < w; ++x) {
 300                         unsigned src_addr = ac_compute_retile_tile_addr(src_tile, src_stride, x, y);
 301                         unsigned dst_addr = ac_compute_retile_tile_addr(dst_tile, dst_stride, x, y);
 302
 303                         if (use_uint16) {
 304                                 ((uint16_t*)dcc_retile_map)[2 * index] = src_addr;
 305                                 ((uint16_t*)dcc_retile_map)[2 * index + 1] = dst_addr;
 306                         } else {
 307                                 ((uint32_t*)dcc_retile_map)[2 * index] = src_addr;
 308                                 ((uint32_t*)dcc_retile_map)[2 * index + 1] = dst_addr;
 309                         }
 310                         ++index;
 311                 }
 312         }
 313
 314         /* Fill the remaining pairs with the last one (for the compute shader). */
 315         for (unsigned i = index * 2; i < dcc_retile_num_elements; i++) {
 316                 if (use_uint16)
 317                         ((uint16_t*)dcc_retile_map)[i] = ((uint16_t*)dcc_retile_map)[i - 2];
 318                 else
 319                         ((uint32_t*)dcc_retile_map)[i] = ((uint32_t*)dcc_retile_map)[i - 2];
 320         }
 321
 322         /* Insert the retile map into the hash table, so that it can be reused and
 323          * the computation can be skipped for similar image sizes.
 324          */
 325         _mesa_hash_table_insert(addrlib->dcc_retile_maps,
 326                                 mem_dup(&key, sizeof(key)), dcc_retile_map);
 327
 328         simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
 329         return dcc_retile_map;
 330 }
 331
 332 static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
 333 {
 334         return malloc(pInput->sizeInBytes);
 335 }
 336
 337 static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
 338 {
 339         free(pInput->pVirtAddr);
 340         return ADDR_OK;
 341 }
 342
 343 struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
 344                                      const struct amdgpu_gpu_info *amdinfo,
 345                                      uint64_t *max_alignment)
 346 {
 347         ADDR_CREATE_INPUT addrCreateInput = {0};
 348         ADDR_CREATE_OUTPUT addrCreateOutput = {0};
 349         ADDR_REGISTER_VALUE regValue = {0};
 350         ADDR_CREATE_FLAGS createFlags = {{0}};
 351         ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
 352         ADDR_E_RETURNCODE addrRet;
 353
 354         addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
 355         addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
 356
 357         regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
 358         createFlags.value = 0;
 359
 360         addrCreateInput.chipFamily = info->family_id;
 361         addrCreateInput.chipRevision = info->chip_external_rev;
 362
 363         if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
 364                 return NULL;
 365
 366         if (addrCreateInput.chipFamily >= FAMILY_AI) {
 367                 addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
 368         } else {
 369                 regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
 370                 regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
 371
 372                 regValue.backendDisables = amdinfo->enabled_rb_pipes_mask;
 373                 regValue.pTileConfig = amdinfo->gb_tile_mode;
 374                 regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode);
 375                 if (addrCreateInput.chipFamily == FAMILY_SI) {
 376                         regValue.pMacroTileConfig = NULL;
 377                         regValue.noOfMacroEntries = 0;
 378                 } else {
 379                         regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode;
 380                         regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode);
 381                 }
 382
 383                 createFlags.useTileIndex = 1;
 384                 createFlags.useHtileSliceAlign = 1;
 385
 386                 addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
 387         }
 388
 389         addrCreateInput.callbacks.allocSysMem = allocSysMem;
 390         addrCreateInput.callbacks.freeSysMem = freeSysMem;
 391         addrCreateInput.callbacks.debugPrint = 0;
 392         addrCreateInput.createFlags = createFlags;
 393         addrCreateInput.regValue = regValue;
 394
 395         addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
 396         if (addrRet != ADDR_OK)
 397                 return NULL;
 398
 399         if (max_alignment) {
 400                 addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
 401                 if (addrRet == ADDR_OK){
 402                         *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
 403                 }
 404         }
 405
 406         struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
 407         if (!addrlib) {
 408                 AddrDestroy(addrCreateOutput.hLib);
 409                 return NULL;
 410         }
 411
 412         addrlib->handle = addrCreateOutput.hLib;
 413         simple_mtx_init(&addrlib->dcc_retile_map_lock, mtx_plain);
 414         addrlib->dcc_retile_maps = _mesa_hash_table_create(NULL, dcc_retile_map_hash_key,
 415                                                            dcc_retile_map_keys_equal);
 416         addrlib->dcc_retile_tile_indices = _mesa_hash_table_create(NULL, dcc_retile_tile_hash_key,
 417                                                                    dcc_retile_tile_keys_equal);
 418         return addrlib;
 419 }
 420
 421 void ac_addrlib_destroy(struct ac_addrlib *addrlib)
 422 {
 423         AddrDestroy(addrlib->handle);
 424         simple_mtx_destroy(&addrlib->dcc_retile_map_lock);
 425         _mesa_hash_table_destroy(addrlib->dcc_retile_maps, dcc_retile_map_free);
 426         _mesa_hash_table_destroy(addrlib->dcc_retile_tile_indices, dcc_retile_tile_free);
 427         free(addrlib);
 428 }
 429
 430 static int surf_config_sanity(const struct ac_surf_config *config,
 431                               unsigned flags)
 432 {
 433         /* FMASK is allocated together with the color surface and can't be
 434          * allocated separately.
 435          */
 436         assert(!(flags & RADEON_SURF_FMASK));
 437         if (flags & RADEON_SURF_FMASK)
 438                 return -EINVAL;
 439
 440         /* all dimension must be at least 1 ! */
 441         if (!config->info.width || !config->info.height || !config->info.depth ||
 442             !config->info.array_size || !config->info.levels)
 443                 return -EINVAL;
 444
 445         switch (config->info.samples) {
 446         case 0:
 447         case 1:
 448         case 2:
 449         case 4:
 450         case 8:
 451                 break;
 452         case 16:
 453                 if (flags & RADEON_SURF_Z_OR_SBUFFER)
 454                         return -EINVAL;
 455                 break;
 456         default:
 457                 return -EINVAL;
 458         }
 459
 460         if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
 461                 switch (config->info.storage_samples) {
 462                 case 0:
 463                 case 1:
 464                 case 2:
 465                 case 4:
 466                 case 8:
 467                         break;
 468                 default:
 469                         return -EINVAL;
 470                 }
 471         }
 472
 473         if (config->is_3d && config->info.array_size > 1)
 474                 return -EINVAL;
 475         if (config->is_cube && config->info.depth > 1)
 476                 return -EINVAL;
 477
 478         return 0;
 479 }
 480
 481 static int gfx6_compute_level(ADDR_HANDLE addrlib,
 482                               const struct ac_surf_config *config,
 483                               struct radeon_surf *surf, bool is_stencil,
 484                               unsigned level, bool compressed,
 485                               ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
 486                               ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
 487                               ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
 488                               ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
 489                               ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
 490                               ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
 491 {
 492         struct legacy_surf_level *surf_level;
 493         ADDR_E_RETURNCODE ret;
 494
 495         AddrSurfInfoIn->mipLevel = level;
 496         AddrSurfInfoIn->width = u_minify(config->info.width, level);
 497         AddrSurfInfoIn->height = u_minify(config->info.height, level);
 498
 499         /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
 500          * because GFX9 needs linear alignment of 256 bytes.
 501          */
 502         if (config->info.levels == 1 &&
 503             AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
 504             AddrSurfInfoIn->bpp &&
 505             util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
 506                 unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
 507
 508                 AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
 509         }
 510
 511         /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
 512          * true for r32g32b32 formats. */
 513         if (AddrSurfInfoIn->bpp == 96) {
 514                 assert(config->info.levels == 1);
 515                 assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
 516
 517                 /* The least common multiple of 64 bytes and 12 bytes/pixel is
 518                  * 192 bytes, or 16 pixels. */
 519                 AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
 520         }
 521
 522         if (config->is_3d)
 523                 AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
 524         else if (config->is_cube)
 525                 AddrSurfInfoIn->numSlices = 6;
 526         else
 527                 AddrSurfInfoIn->numSlices = config->info.array_size;
 528
 529         if (level > 0) {
 530                 /* Set the base level pitch. This is needed for calculation
 531                  * of non-zero levels. */
 532                 if (is_stencil)
 533                         AddrSurfInfoIn->basePitch = surf->u.legacy.stencil_level[0].nblk_x;
 534                 else
 535                         AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
 536
 537                 /* Convert blocks to pixels for compressed formats. */
 538                 if (compressed)
 539                         AddrSurfInfoIn->basePitch *= surf->blk_w;
 540         }
 541
 542         ret = AddrComputeSurfaceInfo(addrlib,
 543                                      AddrSurfInfoIn,
 544                                      AddrSurfInfoOut);
 545         if (ret != ADDR_OK) {
 546                 return ret;
 547         }
 548
 549         surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] : &surf->u.legacy.level[level];
 550         surf_level->offset = align64(surf->surf_size, AddrSurfInfoOut->baseAlign);
 551         surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
 552         surf_level->nblk_x = AddrSurfInfoOut->pitch;
 553         surf_level->nblk_y = AddrSurfInfoOut->height;
 554
 555         switch (AddrSurfInfoOut->tileMode) {
 556         case ADDR_TM_LINEAR_ALIGNED:
 557                 surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
 558                 break;
 559         case ADDR_TM_1D_TILED_THIN1:
 560                 surf_level->mode = RADEON_SURF_MODE_1D;
 561                 break;
 562         case ADDR_TM_2D_TILED_THIN1:
 563                 surf_level->mode = RADEON_SURF_MODE_2D;
 564                 break;
 565         default:
 566                 assert(0);
 567         }
 568
 569         if (is_stencil)
 570                 surf->u.legacy.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
 571         else
 572                 surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
 573
 574         surf->surf_size = surf_level->offset + AddrSurfInfoOut->surfSize;
 575
 576         /* Clear DCC fields at the beginning. */
 577         surf_level->dcc_offset = 0;
 578
 579         /* The previous level's flag tells us if we can use DCC for this level. */
 580         if (AddrSurfInfoIn->flags.dccCompatible &&
 581             (level == 0 || AddrDccOut->subLvlCompressible)) {
 582                 bool prev_level_clearable = level == 0 ||
 583                                             AddrDccOut->dccRamSizeAligned;
 584
 585                 AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
 586                 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
 587                 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
 588                 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
 589                 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
 590
 591                 ret = AddrComputeDccInfo(addrlib,
 592                                          AddrDccIn,
 593                                          AddrDccOut);
 594
 595                 if (ret == ADDR_OK) {
 596                         surf_level->dcc_offset = surf->dcc_size;
 597                         surf->num_dcc_levels = level + 1;
 598                         surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
 599                         surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
 600
 601                         /* If the DCC size of a subresource (1 mip level or 1 slice)
 602                          * is not aligned, the DCC memory layout is not contiguous for
 603                          * that subresource, which means we can't use fast clear.
 604                          *
 605                          * We only do fast clears for whole mipmap levels. If we did
 606                          * per-slice fast clears, the same restriction would apply.
 607                          * (i.e. only compute the slice size and see if it's aligned)
 608                          *
 609                          * The last level can be non-contiguous and still be clearable
 610                          * if it's interleaved with the next level that doesn't exist.
 611                          */
 612                         if (AddrDccOut->dccRamSizeAligned ||
 613                             (prev_level_clearable && level == config->info.levels - 1))
 614                                 surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
 615                         else
 616                                 surf_level->dcc_fast_clear_size = 0;
 617
 618                         /* Compute the DCC slice size because addrlib doesn't
 619                          * provide this info. As DCC memory is linear (each
 620                          * slice is the same size) it's easy to compute.
 621                          */
 622                         surf->dcc_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
 623
 624                         /* For arrays, we have to compute the DCC info again
 625                          * with one slice size to get a correct fast clear
 626                          * size.
 627                          */
 628                         if (config->info.array_size > 1) {
 629                                 AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
 630                                 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
 631                                 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
 632                                 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
 633                                 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
 634
 635                                 ret = AddrComputeDccInfo(addrlib,
 636                                                          AddrDccIn, AddrDccOut);
 637                                 if (ret == ADDR_OK) {
 638                                         /* If the DCC memory isn't properly
 639                                          * aligned, the data are interleaved
 640                                          * accross slices.
 641                                          */
 642                                         if (AddrDccOut->dccRamSizeAligned)
 643                                                 surf_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
 644                                         else
 645                                                 surf_level->dcc_slice_fast_clear_size = 0;
 646                                 }
 647
 648                                 if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&
 649                                     surf->dcc_slice_size != surf_level->dcc_slice_fast_clear_size) {
 650                                         surf->dcc_size = 0;
 651                                         surf->num_dcc_levels = 0;
 652                                         AddrDccOut->subLvlCompressible = false;
 653                                 }
 654                         } else {
 655                                 surf_level->dcc_slice_fast_clear_size = surf_level->dcc_fast_clear_size;
 656                         }
 657                 }
 658         }
 659
 660         /* HTILE. */
 661         if (!is_stencil &&
 662             AddrSurfInfoIn->flags.depth &&
 663             surf_level->mode == RADEON_SURF_MODE_2D &&
 664             level == 0 &&
 665             !(surf->flags & RADEON_SURF_NO_HTILE)) {
 666                 AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
 667                 AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
 668                 AddrHtileIn->height = AddrSurfInfoOut->height;
 669                 AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
 670                 AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
 671                 AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
 672                 AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
 673                 AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
 674                 AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
 675
 676                 ret = AddrComputeHtileInfo(addrlib,
 677                                            AddrHtileIn,
 678                                            AddrHtileOut);
 679
 680                 if (ret == ADDR_OK) {
 681                         surf->htile_size = AddrHtileOut->htileBytes;
 682                         surf->htile_slice_size = AddrHtileOut->sliceSize;
 683                         surf->htile_alignment = AddrHtileOut->baseAlign;
 684                 }
 685         }
 686
 687         return 0;
 688 }
 689
 690 static void gfx6_set_micro_tile_mode(struct radeon_surf *surf,
 691                                      const struct radeon_info *info)
 692 {
 693         uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
 694
 695         if (info->chip_class >= GFX7)
 696                 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
 697         else
 698                 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
 699 }
 700
 701 static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
 702 {
 703         unsigned index, tileb;
 704
 705         tileb = 8 * 8 * surf->bpe;
 706         tileb = MIN2(surf->u.legacy.tile_split, tileb);
 707
 708         for (index = 0; tileb > 64; index++)
 709                 tileb >>= 1;
 710
 711         assert(index < 16);
 712         return index;
 713 }
 714
 715 static bool get_display_flag(const struct ac_surf_config *config,
 716                              const struct radeon_surf *surf)
 717 {
 718         unsigned num_channels = config->info.num_channels;
 719         unsigned bpe = surf->bpe;
 720
 721         if (!config->is_3d &&
 722             !config->is_cube &&
 723             !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
 724             surf->flags & RADEON_SURF_SCANOUT &&
 725             config->info.samples <= 1 &&
 726             surf->blk_w <= 2 && surf->blk_h == 1) {
 727                 /* subsampled */
 728                 if (surf->blk_w == 2 && surf->blk_h == 1)
 729                         return true;
 730
 731                 if  (/* RGBA8 or RGBA16F */
 732                      (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
 733                      /* R5G6B5 or R5G5B5A1 */
 734                      (bpe == 2 && num_channels >= 3) ||
 735                      /* C8 palette */
 736                      (bpe == 1 && num_channels == 1))
 737                         return true;
 738         }
 739         return false;
 740 }
 741
 742 /**
 743  * This must be called after the first level is computed.
 744  *
 745  * Copy surface-global settings like pipe/bank config from level 0 surface
 746  * computation, and compute tile swizzle.
 747  */
 748 static int gfx6_surface_settings(ADDR_HANDLE addrlib,
 749                                  const struct radeon_info *info,
 750                                  const struct ac_surf_config *config,
 751                                  ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
 752                                  struct radeon_surf *surf)
 753 {
 754         surf->surf_alignment = csio->baseAlign;
 755         surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
 756         gfx6_set_micro_tile_mode(surf, info);
 757
 758         /* For 2D modes only. */
 759         if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
 760                 surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
 761                 surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
 762                 surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
 763                 surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
 764                 surf->u.legacy.num_banks = csio->pTileInfo->banks;
 765                 surf->u.legacy.macro_tile_index = csio->macroModeIndex;
 766         } else {
 767                 surf->u.legacy.macro_tile_index = 0;
 768         }
 769
 770         /* Compute tile swizzle. */
 771         /* TODO: fix tile swizzle with mipmapping for GFX6 */
 772         if ((info->chip_class >= GFX7 || config->info.levels == 1) &&
 773             config->info.surf_index &&
 774             surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
 775             !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
 776             !get_display_flag(config, surf)) {
 777                 ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
 778                 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
 779
 780                 AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
 781                 AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
 782
 783                 AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
 784                 AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
 785                 AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
 786                 AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
 787                 AddrBaseSwizzleIn.tileMode = csio->tileMode;
 788
 789                 int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
 790                                                &AddrBaseSwizzleOut);
 791                 if (r != ADDR_OK)
 792                         return r;
 793
 794                 assert(AddrBaseSwizzleOut.tileSwizzle <=
 795                        u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
 796                 surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
 797         }
 798         return 0;
 799 }
 800
 801 static void ac_compute_cmask(const struct radeon_info *info,
 802                              const struct ac_surf_config *config,
 803                              struct radeon_surf *surf)
 804 {
 805         unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
 806         unsigned num_pipes = info->num_tile_pipes;
 807         unsigned cl_width, cl_height;
 808
 809         if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||
 810             (config->info.samples >= 2 && !surf->fmask_size))
 811                 return;
 812
 813         assert(info->chip_class <= GFX8);
 814
 815         switch (num_pipes) {
 816         case 2:
 817                 cl_width = 32;
 818                 cl_height = 16;
 819                 break;
 820         case 4:
 821                 cl_width = 32;
 822                 cl_height = 32;
 823                 break;
 824         case 8:
 825                 cl_width = 64;
 826                 cl_height = 32;
 827                 break;
 828         case 16: /* Hawaii */
 829                 cl_width = 64;
 830                 cl_height = 64;
 831                 break;
 832         default:
 833                 assert(0);
 834                 return;
 835         }
 836
 837         unsigned base_align = num_pipes * pipe_interleave_bytes;
 838
 839         unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
 840         unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
 841         unsigned slice_elements = (width * height) / (8*8);
 842
 843         /* Each element of CMASK is a nibble. */
 844         unsigned slice_bytes = slice_elements / 2;
 845
 846         surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128);
 847         if (surf->u.legacy.cmask_slice_tile_max)
 848                 surf->u.legacy.cmask_slice_tile_max -= 1;
 849
 850         unsigned num_layers;
 851         if (config->is_3d)
 852                 num_layers = config->info.depth;
 853         else if (config->is_cube)
 854                 num_layers = 6;
 855         else
 856                 num_layers = config->info.array_size;
 857
 858         surf->cmask_alignment = MAX2(256, base_align);
 859         surf->cmask_slice_size = align(slice_bytes, base_align);
 860         surf->cmask_size = surf->cmask_slice_size * num_layers;
 861 }
 862
 863 /**
 864  * Fill in the tiling information in \p surf based on the given surface config.
 865  *
 866  * The following fields of \p surf must be initialized by the caller:
 867  * blk_w, blk_h, bpe, flags.
 868  */
 869 static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 870                                 const struct radeon_info *info,
 871                                 const struct ac_surf_config *config,
 872                                 enum radeon_surf_mode mode,
 873                                 struct radeon_surf *surf)
 874 {
 875         unsigned level;
 876         bool compressed;
 877         ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
 878         ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
 879         ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
 880         ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
 881         ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
 882         ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
 883         ADDR_TILEINFO AddrTileInfoIn = {0};
 884         ADDR_TILEINFO AddrTileInfoOut = {0};
 885         int r;
 886
 887         AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
 888         AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
 889         AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
 890         AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
 891         AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
 892         AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
 893         AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
 894
 895         compressed = surf->blk_w == 4 && surf->blk_h == 4;
 896
 897         /* MSAA requires 2D tiling. */
 898         if (config->info.samples > 1)
 899                 mode = RADEON_SURF_MODE_2D;
 900
 901         /* DB doesn't support linear layouts. */
 902         if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
 903             mode < RADEON_SURF_MODE_1D)
 904                 mode = RADEON_SURF_MODE_1D;
 905
 906         /* Set the requested tiling mode. */
 907         switch (mode) {
 908         case RADEON_SURF_MODE_LINEAR_ALIGNED:
 909                 AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
 910                 break;
 911         case RADEON_SURF_MODE_1D:
 912                 AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
 913                 break;
 914         case RADEON_SURF_MODE_2D:
 915                 AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
 916                 break;
 917         default:
 918                 assert(0);
 919         }
 920
 921         /* The format must be set correctly for the allocation of compressed
 922          * textures to work. In other cases, setting the bpp is sufficient.
 923          */
 924         if (compressed) {
 925                 switch (surf->bpe) {
 926                 case 8:
 927                         AddrSurfInfoIn.format = ADDR_FMT_BC1;
 928                         break;
 929                 case 16:
 930                         AddrSurfInfoIn.format = ADDR_FMT_BC3;
 931                         break;
 932                 default:
 933                         assert(0);
 934                 }
 935         }
 936         else {
 937                 AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
 938         }
 939
 940         AddrDccIn.numSamples = AddrSurfInfoIn.numSamples =
 941                 MAX2(1, config->info.samples);
 942         AddrSurfInfoIn.tileIndex = -1;
 943
 944         if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
 945                 AddrDccIn.numSamples = AddrSurfInfoIn.numFrags =
 946                         MAX2(1, config->info.storage_samples);
 947         }
 948
 949         /* Set the micro tile type. */
 950         if (surf->flags & RADEON_SURF_SCANOUT)
 951                 AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
 952         else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
 953                 AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
 954         else
 955                 AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
 956
 957         AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
 958         AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
 959         AddrSurfInfoIn.flags.cube = config->is_cube;
 960         AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
 961         AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
 962         AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
 963
 964         /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
 965          * requested, because TC-compatible HTILE requires 2D tiling.
 966          */
 967         AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
 968                                          !AddrSurfInfoIn.flags.fmask &&
 969                                          config->info.samples <= 1 &&
 970                                          !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
 971
 972         /* DCC notes:
 973          * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
 974          *   with samples >= 4.
 975          * - Mipmapped array textures have low performance (discovered by a closed
 976          *   driver team).
 977          */
 978         AddrSurfInfoIn.flags.dccCompatible =
 979                 info->chip_class >= GFX8 &&
 980                 info->has_graphics && /* disable DCC on compute-only chips */
 981                 !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
 982                 !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
 983                 !compressed &&
 984                 ((config->info.array_size == 1 && config->info.depth == 1) ||
 985                  config->info.levels == 1);
 986
 987         AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
 988         AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
 989
 990         /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
 991          * for Z and stencil. This can cause a number of problems which we work
 992          * around here:
 993          *
 994          * - a depth part that is incompatible with mipmapped texturing
 995          * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
 996          *   incorrect tiling applied to the stencil part, stencil buffer
 997          *   memory accesses that go out of bounds) even without mipmapping
 998          *
 999          * Some piglit tests that are prone to different types of related
1000          * failures:
1001          *  ./bin/ext_framebuffer_multisample-upsample 2 stencil
1002          *  ./bin/framebuffer-blit-levels {draw,read} stencil
1003          *  ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
1004          *  ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
1005          *  ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
1006          */
1007         int stencil_tile_idx = -1;
1008
1009         if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
1010             (config->info.levels > 1 || info->family == CHIP_STONEY)) {
1011                 /* Compute stencilTileIdx that is compatible with the (depth)
1012                  * tileIdx. This degrades the depth surface if necessary to
1013                  * ensure that a matching stencilTileIdx exists. */
1014                 AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
1015
1016                 /* Keep the depth mip-tail compatible with texturing. */
1017                 AddrSurfInfoIn.flags.noStencil = 1;
1018         }
1019
1020         /* Set preferred macrotile parameters. This is usually required
1021          * for shared resources. This is for 2D tiling only. */
1022         if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
1023             surf->u.legacy.bankw && surf->u.legacy.bankh &&
1024             surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
1025                 /* If any of these parameters are incorrect, the calculation
1026                  * will fail. */
1027                 AddrTileInfoIn.banks = surf->u.legacy.num_banks;
1028                 AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
1029                 AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
1030                 AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
1031                 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
1032                 AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
1033                 AddrSurfInfoIn.flags.opt4Space = 0;
1034                 AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
1035
1036                 /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
1037                  * the tile index, because we are expected to know it if
1038                  * we know the other parameters.
1039                  *
1040                  * This is something that can easily be fixed in Addrlib.
1041                  * For now, just figure it out here.
1042                  * Note that only 2D_TILE_THIN1 is handled here.
1043                  */
1044                 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1045                 assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
1046
1047                 if (info->chip_class == GFX6) {
1048                         if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
1049                                 if (surf->bpe == 2)
1050                                         AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
1051                                 else
1052                                         AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
1053                         } else {
1054                                 if (surf->bpe == 1)
1055                                         AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
1056                                 else if (surf->bpe == 2)
1057                                         AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
1058                                 else if (surf->bpe == 4)
1059                                         AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
1060                                 else
1061                                         AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
1062                         }
1063                 } else {
1064                         /* GFX7 - GFX8 */
1065                         if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
1066                                 AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
1067                         else
1068                                 AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
1069
1070                         /* Addrlib doesn't set this if tileIndex is forced like above. */
1071                         AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
1072                 }
1073         }
1074
1075         surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
1076         surf->num_dcc_levels = 0;
1077         surf->surf_size = 0;
1078         surf->dcc_size = 0;
1079         surf->dcc_alignment = 1;
1080         surf->htile_size = 0;
1081         surf->htile_slice_size = 0;
1082         surf->htile_alignment = 1;
1083
1084         const bool only_stencil = (surf->flags & RADEON_SURF_SBUFFER) &&
1085                                   !(surf->flags & RADEON_SURF_ZBUFFER);
1086
1087         /* Calculate texture layout information. */
1088         if (!only_stencil) {
1089                 for (level = 0; level < config->info.levels; level++) {
1090                         r = gfx6_compute_level(addrlib, config, surf, false, level, compressed,
1091                                                &AddrSurfInfoIn, &AddrSurfInfoOut,
1092                                                &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
1093                         if (r)
1094                                 return r;
1095
1096                         if (level > 0)
1097                                 continue;
1098
1099                         if (!AddrSurfInfoOut.tcCompatible) {
1100                                 AddrSurfInfoIn.flags.tcCompatible = 0;
1101                                 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1102                         }
1103
1104                         if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
1105                                 AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
1106                                 AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
1107                                 stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
1108
1109                                 assert(stencil_tile_idx >= 0);
1110                         }
1111
1112                         r = gfx6_surface_settings(addrlib, info, config,
1113                                                   &AddrSurfInfoOut, surf);
1114                         if (r)
1115                                 return r;
1116                 }
1117         }
1118
1119         /* Calculate texture layout information for stencil. */
1120         if (surf->flags & RADEON_SURF_SBUFFER) {
1121                 AddrSurfInfoIn.tileIndex = stencil_tile_idx;
1122                 AddrSurfInfoIn.bpp = 8;
1123                 AddrSurfInfoIn.flags.depth = 0;
1124                 AddrSurfInfoIn.flags.stencil = 1;
1125                 AddrSurfInfoIn.flags.tcCompatible = 0;
1126                 /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
1127                 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
1128
1129                 for (level = 0; level < config->info.levels; level++) {
1130                         r = gfx6_compute_level(addrlib, config, surf, true, level, compressed,
1131                                                &AddrSurfInfoIn, &AddrSurfInfoOut,
1132                                                &AddrDccIn, &AddrDccOut,
1133                                                NULL, NULL);
1134                         if (r)
1135                                 return r;
1136
1137                         /* DB uses the depth pitch for both stencil and depth. */
1138                         if (!only_stencil) {
1139                                 if (surf->u.legacy.stencil_level[level].nblk_x !=
1140                                     surf->u.legacy.level[level].nblk_x)
1141                                         surf->u.legacy.stencil_adjusted = true;
1142                         } else {
1143                                 surf->u.legacy.level[level].nblk_x =
1144                                         surf->u.legacy.stencil_level[level].nblk_x;
1145                         }
1146
1147                         if (level == 0) {
1148                                 if (only_stencil) {
1149                                         r = gfx6_surface_settings(addrlib, info, config,
1150                                                                   &AddrSurfInfoOut, surf);
1151                                         if (r)
1152                                                 return r;
1153                                 }
1154
1155                                 /* For 2D modes only. */
1156                                 if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
1157                                         surf->u.legacy.stencil_tile_split =
1158                                                 AddrSurfInfoOut.pTileInfo->tileSplitBytes;
1159                                 }
1160                         }
1161                 }
1162         }
1163
1164         /* Compute FMASK. */
1165         if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color &&
1166             info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {
1167                 ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
1168                 ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
1169                 ADDR_TILEINFO fmask_tile_info = {};
1170
1171                 fin.size = sizeof(fin);
1172                 fout.size = sizeof(fout);
1173
1174                 fin.tileMode = AddrSurfInfoOut.tileMode;
1175                 fin.pitch = AddrSurfInfoOut.pitch;
1176                 fin.height = config->info.height;
1177                 fin.numSlices = AddrSurfInfoIn.numSlices;
1178                 fin.numSamples = AddrSurfInfoIn.numSamples;
1179                 fin.numFrags = AddrSurfInfoIn.numFrags;
1180                 fin.tileIndex = -1;
1181                 fout.pTileInfo = &fmask_tile_info;
1182
1183                 r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
1184                 if (r)
1185                         return r;
1186
1187                 surf->fmask_size = fout.fmaskBytes;
1188                 surf->fmask_alignment = fout.baseAlign;
1189                 surf->fmask_tile_swizzle = 0;
1190
1191                 surf->u.legacy.fmask.slice_tile_max =
1192                         (fout.pitch * fout.height) / 64;
1193                 if (surf->u.legacy.fmask.slice_tile_max)
1194                     surf->u.legacy.fmask.slice_tile_max -= 1;
1195
1196                 surf->u.legacy.fmask.tiling_index = fout.tileIndex;
1197                 surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight;
1198                 surf->u.legacy.fmask.pitch_in_pixels = fout.pitch;
1199                 surf->u.legacy.fmask.slice_size = fout.sliceSize;
1200
1201                 /* Compute tile swizzle for FMASK. */
1202                 if (config->info.fmask_surf_index &&
1203                     !(surf->flags & RADEON_SURF_SHAREABLE)) {
1204                         ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
1205                         ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
1206
1207                         xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
1208                         xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
1209
1210                         /* This counter starts from 1 instead of 0. */
1211                         xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
1212                         xin.tileIndex = fout.tileIndex;
1213                         xin.macroModeIndex = fout.macroModeIndex;
1214                         xin.pTileInfo = fout.pTileInfo;
1215                         xin.tileMode = fin.tileMode;
1216
1217                         int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
1218                         if (r != ADDR_OK)
1219                                 return r;
1220
1221                         assert(xout.tileSwizzle <=
1222                                u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1223                         surf->fmask_tile_swizzle = xout.tileSwizzle;
1224                 }
1225         }
1226
1227         /* Recalculate the whole DCC miptree size including disabled levels.
1228          * This is what addrlib does, but calling addrlib would be a lot more
1229          * complicated.
1230          */
1231         if (surf->dcc_size && config->info.levels > 1) {
1232                 /* The smallest miplevels that are never compressed by DCC
1233                  * still read the DCC buffer via TC if the base level uses DCC,
1234                  * and for some reason the DCC buffer needs to be larger if
1235                  * the miptree uses non-zero tile_swizzle. Otherwise there are
1236                  * VM faults.
1237                  *
1238                  * "dcc_alignment * 4" was determined by trial and error.
1239                  */
1240                 surf->dcc_size = align64(surf->surf_size >> 8,
1241                                          surf->dcc_alignment * 4);
1242         }
1243
1244         /* Make sure HTILE covers the whole miptree, because the shader reads
1245          * TC-compatible HTILE even for levels where it's disabled by DB.
1246          */
1247         if (surf->htile_size && config->info.levels > 1 &&
1248             surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
1249                 /* MSAA can't occur with levels > 1, so ignore the sample count. */
1250                 const unsigned total_pixels = surf->surf_size / surf->bpe;
1251                 const unsigned htile_block_size = 8 * 8;
1252                 const unsigned htile_element_size = 4;
1253
1254                 surf->htile_size = (total_pixels / htile_block_size) *
1255                                    htile_element_size;
1256                 surf->htile_size = align(surf->htile_size, surf->htile_alignment);
1257         } else if (!surf->htile_size) {
1258                 /* Unset this if HTILE is not present. */
1259                 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1260         }
1261
1262         surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
1263         surf->is_displayable = surf->is_linear ||
1264                                surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
1265                                surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
1266
1267         /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1268          * used at the same time. This case is not currently expected to occur
1269          * because we don't use rotated. Enforce this restriction on all chips
1270          * to facilitate testing.
1271          */
1272         if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
1273                 assert(!"rotate micro tile mode is unsupported");
1274                 return ADDR_ERROR;
1275         }
1276
1277         ac_compute_cmask(info, config, surf);
1278         return 0;
1279 }
1280
1281 /* This is only called when expecting a tiled layout. */
1282 static int
1283 gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
1284                                 struct radeon_surf *surf,
1285                                 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in,
1286                                 bool is_fmask, AddrSwizzleMode *swizzle_mode)
1287 {
1288         ADDR_E_RETURNCODE ret;
1289         ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
1290         ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
1291
1292         sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
1293         sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
1294
1295         sin.flags = in->flags;
1296         sin.resourceType = in->resourceType;
1297         sin.format = in->format;
1298         sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
1299         /* TODO: We could allow some of these: */
1300         sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
1301         sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */
1302         sin.bpp = in->bpp;
1303         sin.width = in->width;
1304         sin.height = in->height;
1305         sin.numSlices = in->numSlices;
1306         sin.numMipLevels = in->numMipLevels;
1307         sin.numSamples = in->numSamples;
1308         sin.numFrags = in->numFrags;
1309
1310         if (is_fmask) {
1311                 sin.flags.display = 0;
1312                 sin.flags.color = 0;
1313                 sin.flags.fmask = 1;
1314         }
1315
1316         if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
1317                 sin.forbiddenBlock.linear = 1;
1318
1319                 if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
1320                         sin.preferredSwSet.sw_D = 1;
1321                 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
1322                         sin.preferredSwSet.sw_S = 1;
1323                 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
1324                         sin.preferredSwSet.sw_Z = 1;
1325                 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
1326                         sin.preferredSwSet.sw_R = 1;
1327         }
1328
1329         ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
1330         if (ret != ADDR_OK)
1331                 return ret;
1332
1333         *swizzle_mode = sout.swizzleMode;
1334         return 0;
1335 }
1336
1337 static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
1338 {
1339         if (info->chip_class >= GFX10)
1340                 return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
1341
1342         return sw_mode != ADDR_SW_LINEAR;
1343 }
1344
1345 ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
1346                                             const struct radeon_surf *surf)
1347 {
1348         if (info->chip_class <= GFX9) {
1349                 /* Only independent 64B blocks are supported. */
1350                 return surf->u.gfx9.dcc.independent_64B_blocks &&
1351                        !surf->u.gfx9.dcc.independent_128B_blocks &&
1352                        surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
1353         }
1354
1355         if (info->family == CHIP_NAVI10) {
1356                 /* Only independent 128B blocks are supported. */
1357                 return !surf->u.gfx9.dcc.independent_64B_blocks &&
1358                        surf->u.gfx9.dcc.independent_128B_blocks &&
1359                        surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
1360         }
1361
1362         if (info->family == CHIP_NAVI12 ||
1363             info->family == CHIP_NAVI14) {
1364                 /* Either 64B or 128B can be used, but not both.
1365                  * If 64B is used, DCC image stores are unsupported.
1366                  */
1367                 return surf->u.gfx9.dcc.independent_64B_blocks !=
1368                        surf->u.gfx9.dcc.independent_128B_blocks &&
1369                        (!surf->u.gfx9.dcc.independent_64B_blocks ||
1370                         surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) &&
1371                        (!surf->u.gfx9.dcc.independent_128B_blocks ||
1372                         surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B);
1373         }
1374
1375         /* 128B is recommended, but 64B can be set too if needed for 4K by DCN.
1376          * Since there is no reason to ever disable 128B, require it.
1377          * DCC image stores are always supported.
1378          */
1379         return surf->u.gfx9.dcc.independent_128B_blocks &&
1380                surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
1381 }
1382
1383 static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
1384                                     const struct ac_surf_config *config,
1385                                     const struct radeon_surf *surf,
1386                                     bool rb_aligned, bool pipe_aligned)
1387 {
1388         if (!info->use_display_dcc_unaligned &&
1389             !info->use_display_dcc_with_retile_blit)
1390                 return false;
1391
1392         /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
1393         if (surf->bpe != 4)
1394                 return false;
1395
1396         /* Handle unaligned DCC. */
1397         if (info->use_display_dcc_unaligned &&
1398             (rb_aligned || pipe_aligned))
1399                 return false;
1400
1401         switch (info->chip_class) {
1402         case GFX9:
1403                 /* There are more constraints, but we always set
1404                  * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
1405                  * which always works.
1406                  */
1407                 assert(surf->u.gfx9.dcc.independent_64B_blocks &&
1408                        surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
1409                 return true;
1410         case GFX10:
1411         case GFX10_3:
1412                 /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
1413                 if (info->chip_class == GFX10 &&
1414                     surf->u.gfx9.dcc.independent_128B_blocks)
1415                         return false;
1416
1417                 /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1. */
1418                 return ((config->info.width <= 2560 &&
1419                          config->info.height <= 2560) ||
1420                         (surf->u.gfx9.dcc.independent_64B_blocks &&
1421                          surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
1422         default:
1423                 unreachable("unhandled chip");
1424                 return false;
1425         }
1426 }
1427
1428 static int gfx9_compute_miptree(struct ac_addrlib *addrlib,
1429                                 const struct radeon_info *info,
1430                                 const struct ac_surf_config *config,
1431                                 struct radeon_surf *surf, bool compressed,
1432                                 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
1433 {
1434         ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {};
1435         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
1436         ADDR_E_RETURNCODE ret;
1437
1438         out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
1439         out.pMipInfo = mip_info;
1440
1441         ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
1442         if (ret != ADDR_OK)
1443                 return ret;
1444
1445         if (in->flags.stencil) {
1446                 surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode;
1447                 surf->u.gfx9.stencil.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 :
1448                                                                    out.mipChainPitch - 1;
1449                 surf->surf_alignment = MAX2(surf->surf_alignment, out.baseAlign);
1450                 surf->u.gfx9.stencil_offset = align(surf->surf_size, out.baseAlign);
1451                 surf->surf_size = surf->u.gfx9.stencil_offset + out.surfSize;
1452                 return 0;
1453         }
1454
1455         surf->u.gfx9.surf.swizzle_mode = in->swizzleMode;
1456         surf->u.gfx9.surf.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 :
1457                                                         out.mipChainPitch - 1;
1458
1459         /* CMASK fast clear uses these even if FMASK isn't allocated.
1460          * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
1461          */
1462         surf->u.gfx9.fmask.swizzle_mode = surf->u.gfx9.surf.swizzle_mode & ~0x3;
1463         surf->u.gfx9.fmask.epitch = surf->u.gfx9.surf.epitch;
1464
1465         surf->u.gfx9.surf_slice_size = out.sliceSize;
1466         surf->u.gfx9.surf_pitch = out.pitch;
1467         surf->u.gfx9.surf_height = out.height;
1468         surf->surf_size = out.surfSize;
1469         surf->surf_alignment = out.baseAlign;
1470
1471         if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
1472             surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR) {
1473                 /* Adjust surf_pitch to be in elements units not in pixels */
1474                 surf->u.gfx9.surf_pitch =
1475                         align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);
1476                 surf->u.gfx9.surf.epitch = MAX2(surf->u.gfx9.surf.epitch,
1477                                                 surf->u.gfx9.surf_pitch * surf->blk_w - 1);
1478                 /* The surface is really a surf->bpe bytes per pixel surface even if we
1479                  * use it as a surf->bpe bytes per element one.
1480                  * Adjust surf_slice_size and surf_size to reflect the change
1481                  * made to surf_pitch.
1482                  */
1483                 surf->u.gfx9.surf_slice_size = MAX2(
1484                         surf->u.gfx9.surf_slice_size,
1485                         surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
1486                 surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
1487         }
1488
1489         if (in->swizzleMode == ADDR_SW_LINEAR) {
1490                 for (unsigned i = 0; i < in->numMipLevels; i++) {
1491                         surf->u.gfx9.offset[i] = mip_info[i].offset;
1492                         surf->u.gfx9.pitch[i] = mip_info[i].pitch;
1493                 }
1494         }
1495
1496         if (in->flags.depth) {
1497                 assert(in->swizzleMode != ADDR_SW_LINEAR);
1498
1499                 if (surf->flags & RADEON_SURF_NO_HTILE)
1500                         return 0;
1501
1502                 /* HTILE */
1503                 ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
1504                 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
1505
1506                 hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
1507                 hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
1508
1509                 assert(in->flags.metaPipeUnaligned == 0);
1510                 assert(in->flags.metaRbUnaligned == 0);
1511
1512                 hin.hTileFlags.pipeAligned = 1;
1513                 hin.hTileFlags.rbAligned = 1;
1514                 hin.depthFlags = in->flags;
1515                 hin.swizzleMode = in->swizzleMode;
1516                 hin.unalignedWidth = in->width;
1517                 hin.unalignedHeight = in->height;
1518                 hin.numSlices = in->numSlices;
1519                 hin.numMipLevels = in->numMipLevels;
1520                 hin.firstMipIdInTail = out.firstMipIdInTail;
1521
1522                 ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
1523                 if (ret != ADDR_OK)
1524                         return ret;
1525
1526                 surf->htile_size = hout.htileBytes;
1527                 surf->htile_slice_size = hout.sliceSize;
1528                 surf->htile_alignment = hout.baseAlign;
1529                 return 0;
1530         }
1531
1532         {
1533                 /* Compute tile swizzle for the color surface.
1534                  * All *_X and *_T modes can use the swizzle.
1535                  */
1536                 if (config->info.surf_index &&
1537                     in->swizzleMode >= ADDR_SW_64KB_Z_T &&
1538                     !out.mipChainInTail &&
1539                     !(surf->flags & RADEON_SURF_SHAREABLE) &&
1540                     !in->flags.display) {
1541                         ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
1542                         ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
1543
1544                         xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
1545                         xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
1546
1547                         xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
1548                         xin.flags = in->flags;
1549                         xin.swizzleMode = in->swizzleMode;
1550                         xin.resourceType = in->resourceType;
1551                         xin.format = in->format;
1552                         xin.numSamples = in->numSamples;
1553                         xin.numFrags = in->numFrags;
1554
1555                         ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
1556                         if (ret != ADDR_OK)
1557                                 return ret;
1558
1559                         assert(xout.pipeBankXor <=
1560                                u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1561                         surf->tile_swizzle = xout.pipeBankXor;
1562                 }
1563
1564                 /* DCC */
1565                 if (info->has_graphics &&
1566                     !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
1567                     !compressed &&
1568                     is_dcc_supported_by_CB(info, in->swizzleMode) &&
1569                     (!in->flags.display ||
1570                      is_dcc_supported_by_DCN(info, config, surf,
1571                                              !in->flags.metaRbUnaligned,
1572                                              !in->flags.metaPipeUnaligned))) {
1573                         ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
1574                         ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
1575                         ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
1576
1577                         din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
1578                         dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
1579                         dout.pMipInfo = meta_mip_info;
1580
1581                         din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
1582                         din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
1583                         din.resourceType = in->resourceType;
1584                         din.swizzleMode = in->swizzleMode;
1585                         din.bpp = in->bpp;
1586                         din.unalignedWidth = in->width;
1587                         din.unalignedHeight = in->height;
1588                         din.numSlices = in->numSlices;
1589                         din.numFrags = in->numFrags;
1590                         din.numMipLevels = in->numMipLevels;
1591                         din.dataSurfaceSize = out.surfSize;
1592                         din.firstMipIdInTail = out.firstMipIdInTail;
1593
1594                         ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
1595                         if (ret != ADDR_OK)
1596                                 return ret;
1597
1598                         surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
1599                         surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
1600                         surf->u.gfx9.dcc_block_width = dout.compressBlkWidth;
1601                         surf->u.gfx9.dcc_block_height = dout.compressBlkHeight;
1602                         surf->u.gfx9.dcc_block_depth = dout.compressBlkDepth;
1603                         surf->dcc_size = dout.dccRamSize;
1604                         surf->dcc_alignment = dout.dccRamBaseAlign;
1605                         surf->num_dcc_levels = in->numMipLevels;
1606
1607                         /* Disable DCC for levels that are in the mip tail.
1608                          *
1609                          * There are two issues that this is intended to
1610                          * address:
1611                          *
1612                          * 1. Multiple mip levels may share a cache line. This
1613                          *    can lead to corruption when switching between
1614                          *    rendering to different mip levels because the
1615                          *    RBs don't maintain coherency.
1616                          *
1617                          * 2. Texturing with metadata after rendering sometimes
1618                          *    fails with corruption, probably for a similar
1619                          *    reason.
1620                          *
1621                          * Working around these issues for all levels in the
1622                          * mip tail may be overly conservative, but it's what
1623                          * Vulkan does.
1624                          *
1625                          * Alternative solutions that also work but are worse:
1626                          * - Disable DCC entirely.
1627                          * - Flush TC L2 after rendering.
1628                          */
1629                         for (unsigned i = 0; i < in->numMipLevels; i++) {
1630                                 if (meta_mip_info[i].inMiptail) {
1631                                         /* GFX10 can only compress the first level
1632                                          * in the mip tail.
1633                                          *
1634                                          * TODO: Try to do the same thing for gfx9
1635                                          *       if there are no regressions.
1636                                          */
1637                                         if (info->chip_class >= GFX10)
1638                                                 surf->num_dcc_levels = i + 1;
1639                                         else
1640                                                 surf->num_dcc_levels = i;
1641                                         break;
1642                                 }
1643                         }
1644
1645                         if (!surf->num_dcc_levels)
1646                                 surf->dcc_size = 0;
1647
1648                         surf->u.gfx9.display_dcc_size = surf->dcc_size;
1649                         surf->u.gfx9.display_dcc_alignment = surf->dcc_alignment;
1650                         surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
1651
1652                         /* Compute displayable DCC. */
1653                         if (in->flags.display &&
1654                             surf->num_dcc_levels &&
1655                             info->use_display_dcc_with_retile_blit) {
1656                                 /* Compute displayable DCC info. */
1657                                 din.dccKeyFlags.pipeAligned = 0;
1658                                 din.dccKeyFlags.rbAligned = 0;
1659
1660                                 assert(din.numSlices == 1);
1661                                 assert(din.numMipLevels == 1);
1662                                 assert(din.numFrags == 1);
1663                                 assert(surf->tile_swizzle == 0);
1664                                 assert(surf->u.gfx9.dcc.pipe_aligned ||
1665                                        surf->u.gfx9.dcc.rb_aligned);
1666
1667                                 ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
1668                                 if (ret != ADDR_OK)
1669                                         return ret;
1670
1671                                 surf->u.gfx9.display_dcc_size = dout.dccRamSize;
1672                                 surf->u.gfx9.display_dcc_alignment = dout.dccRamBaseAlign;
1673                                 surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
1674                                 assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size);
1675
1676                                 surf->u.gfx9.dcc_retile_use_uint16 =
1677                                         surf->u.gfx9.display_dcc_size <= UINT16_MAX + 1 &&
1678                                         surf->dcc_size <= UINT16_MAX + 1;
1679
1680                                 /* Align the retile map size to get more hash table hits and
1681                                  * decrease the maximum memory footprint when all retile maps
1682                                  * are cached in the hash table.
1683                                  */
1684                                 unsigned retile_dim[2] = {in->width, in->height};
1685
1686                                 for (unsigned i = 0; i < 2; i++) {
1687                                         /* Increase the alignment as the size increases.
1688                                          * Greater alignment increases retile compute work,
1689                                          * but decreases maximum memory footprint for the cache.
1690                                          *
1691                                          * With this alignment, the worst case memory footprint of
1692                                          * the cache is:
1693                                          *   1920x1080: 55 MB
1694                                          *   2560x1440: 99 MB
1695                                          *   3840x2160: 305 MB
1696                                          *
1697                                          * The worst case size in MB can be computed in Haskell as follows:
1698                                          *   (sum (map get_retile_size (map get_dcc_size (deduplicate (map align_pair
1699                                          *       [(i*16,j*16) | i <- [1..maxwidth`div`16], j <- [1..maxheight`div`16]]))))) `div` 1024^2
1700                                          *     where
1701                                          *       alignment x = if x <= 512 then 16 else if x <= 1024 then 32 else if x <= 2048 then 64 else 128
1702                                          *       align x = (x + (alignment x) - 1) `div` (alignment x) * (alignment x)
1703                                          *       align_pair e = (align (fst e), align (snd e))
1704                                          *       deduplicate = map head . groupBy (\ a b -> ((fst a) == (fst b)) && ((snd a) == (snd b))) . sortBy compare
1705                                          *       get_dcc_size e = ((fst e) * (snd e) * bpp) `div` 256
1706                                          *       get_retile_size dcc_size = dcc_size * 2 * (if dcc_size <= 2^16 then 2 else 4)
1707                                          *       bpp = 4; maxwidth = 3840; maxheight = 2160
1708                                          */
1709                                         if (retile_dim[i] <= 512)
1710                                                 retile_dim[i] = align(retile_dim[i], 16);
1711                                         else if (retile_dim[i] <= 1024)
1712                                                 retile_dim[i] = align(retile_dim[i], 32);
1713                                         else if (retile_dim[i] <= 2048)
1714                                                 retile_dim[i] = align(retile_dim[i], 64);
1715                                         else
1716                                                 retile_dim[i] = align(retile_dim[i], 128);
1717
1718                                         /* Don't align more than the DCC pixel alignment. */
1719                                         assert(dout.metaBlkWidth >= 128 && dout.metaBlkHeight >= 128);
1720                                 }
1721
1722                                 surf->u.gfx9.dcc_retile_num_elements =
1723                                         DIV_ROUND_UP(retile_dim[0], dout.compressBlkWidth) *
1724                                         DIV_ROUND_UP(retile_dim[1], dout.compressBlkHeight) * 2;
1725                                 /* Align the size to 4 (for the compute shader). */
1726                                 surf->u.gfx9.dcc_retile_num_elements =
1727                                         align(surf->u.gfx9.dcc_retile_num_elements, 4);
1728
1729                                 if (!(surf->flags & RADEON_SURF_IMPORTED)) {
1730                                         /* Compute address mapping from non-displayable to displayable DCC. */
1731                                         ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
1732                                         memset(&addrin, 0, sizeof(addrin));
1733                                         addrin.size             = sizeof(addrin);
1734                                         addrin.swizzleMode      = din.swizzleMode;
1735                                         addrin.resourceType     = din.resourceType;
1736                                         addrin.bpp              = din.bpp;
1737                                         addrin.numSlices        = 1;
1738                                         addrin.numMipLevels     = 1;
1739                                         addrin.numFrags         = 1;
1740                                         addrin.pitch            = dout.pitch;
1741                                         addrin.height           = dout.height;
1742                                         addrin.compressBlkWidth = dout.compressBlkWidth;
1743                                         addrin.compressBlkHeight = dout.compressBlkHeight;
1744                                         addrin.compressBlkDepth = dout.compressBlkDepth;
1745                                         addrin.metaBlkWidth     = dout.metaBlkWidth;
1746                                         addrin.metaBlkHeight    = dout.metaBlkHeight;
1747                                         addrin.metaBlkDepth     = dout.metaBlkDepth;
1748                                         addrin.dccRamSliceSize  = 0; /* Don't care for non-layered images. */
1749
1750                                         surf->u.gfx9.dcc_retile_map =
1751                                                 ac_compute_dcc_retile_map(addrlib, info,
1752                                                                           retile_dim[0], retile_dim[1],
1753                                                                           surf->u.gfx9.dcc.rb_aligned,
1754                                                                           surf->u.gfx9.dcc.pipe_aligned,
1755                                                                           surf->u.gfx9.dcc_retile_use_uint16,
1756                                                                           surf->u.gfx9.dcc_retile_num_elements,
1757                                                                           &addrin);
1758                                         if (!surf->u.gfx9.dcc_retile_map)
1759                                                 return ADDR_OUTOFMEMORY;
1760                                 }
1761                         }
1762                 }
1763
1764                 /* FMASK */
1765                 if (in->numSamples > 1 && info->has_graphics &&
1766                     !(surf->flags & RADEON_SURF_NO_FMASK)) {
1767                         ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
1768                         ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
1769
1770                         fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
1771                         fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
1772
1773                         ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, in,
1774                                                               true, &fin.swizzleMode);
1775                         if (ret != ADDR_OK)
1776                                 return ret;
1777
1778                         fin.unalignedWidth = in->width;
1779                         fin.unalignedHeight = in->height;
1780                         fin.numSlices = in->numSlices;
1781                         fin.numSamples = in->numSamples;
1782                         fin.numFrags = in->numFrags;
1783
1784                         ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
1785                         if (ret != ADDR_OK)
1786                                 return ret;
1787
1788                         surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode;
1789                         surf->u.gfx9.fmask.epitch = fout.pitch - 1;
1790                         surf->fmask_size = fout.fmaskBytes;
1791                         surf->fmask_alignment = fout.baseAlign;
1792
1793                         /* Compute tile swizzle for the FMASK surface. */
1794                         if (config->info.fmask_surf_index &&
1795                             fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
1796                             !(surf->flags & RADEON_SURF_SHAREABLE)) {
1797                                 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
1798                                 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
1799
1800                                 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
1801                                 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
1802
1803                                 /* This counter starts from 1 instead of 0. */
1804                                 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
1805                                 xin.flags = in->flags;
1806                                 xin.swizzleMode = fin.swizzleMode;
1807                                 xin.resourceType = in->resourceType;
1808                                 xin.format = in->format;
1809                                 xin.numSamples = in->numSamples;
1810                                 xin.numFrags = in->numFrags;
1811
1812                                 ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
1813                                 if (ret != ADDR_OK)
1814                                         return ret;
1815
1816                                 assert(xout.pipeBankXor <=
1817                                        u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
1818                                 surf->fmask_tile_swizzle = xout.pipeBankXor;
1819                         }
1820                 }
1821
1822                 /* CMASK -- on GFX10 only for FMASK */
1823                 if (in->swizzleMode != ADDR_SW_LINEAR &&
1824                     in->resourceType == ADDR_RSRC_TEX_2D &&
1825                     ((info->chip_class <= GFX9 &&
1826                       in->numSamples == 1 &&
1827                       in->flags.metaPipeUnaligned == 0 &&
1828                       in->flags.metaRbUnaligned == 0) ||
1829                      (surf->fmask_size && in->numSamples >= 2))) {
1830                         ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
1831                         ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
1832
1833                         cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
1834                         cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
1835
1836                         assert(in->flags.metaPipeUnaligned == 0);
1837                         assert(in->flags.metaRbUnaligned == 0);
1838
1839                         cin.cMaskFlags.pipeAligned = 1;
1840                         cin.cMaskFlags.rbAligned = 1;
1841                         cin.resourceType = in->resourceType;
1842                         cin.unalignedWidth = in->width;
1843                         cin.unalignedHeight = in->height;
1844                         cin.numSlices = in->numSlices;
1845
1846                         if (in->numSamples > 1)
1847                                 cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode;
1848                         else
1849                                 cin.swizzleMode = in->swizzleMode;
1850
1851                         ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
1852                         if (ret != ADDR_OK)
1853                                 return ret;
1854
1855                         surf->cmask_size = cout.cmaskBytes;
1856                         surf->cmask_alignment = cout.baseAlign;
1857                 }
1858         }
1859
1860         return 0;
1861 }
1862
1863 static int gfx9_compute_surface(struct ac_addrlib *addrlib,
1864                                 const struct radeon_info *info,
1865                                 const struct ac_surf_config *config,
1866                                 enum radeon_surf_mode mode,
1867                                 struct radeon_surf *surf)
1868 {
1869         bool compressed;
1870         ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
1871         int r;
1872
1873         AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
1874
1875         compressed = surf->blk_w == 4 && surf->blk_h == 4;
1876
1877         /* The format must be set correctly for the allocation of compressed
1878          * textures to work. In other cases, setting the bpp is sufficient. */
1879         if (compressed) {
1880                 switch (surf->bpe) {
1881                 case 8:
1882                         AddrSurfInfoIn.format = ADDR_FMT_BC1;
1883                         break;
1884                 case 16:
1885                         AddrSurfInfoIn.format = ADDR_FMT_BC3;
1886                         break;
1887                 default:
1888                         assert(0);
1889                 }
1890         } else {
1891                 switch (surf->bpe) {
1892                 case 1:
1893                         assert(!(surf->flags & RADEON_SURF_ZBUFFER));
1894                         AddrSurfInfoIn.format = ADDR_FMT_8;
1895                         break;
1896                 case 2:
1897                         assert(surf->flags & RADEON_SURF_ZBUFFER ||
1898                                !(surf->flags & RADEON_SURF_SBUFFER));
1899                         AddrSurfInfoIn.format = ADDR_FMT_16;
1900                         break;
1901                 case 4:
1902                         assert(surf->flags & RADEON_SURF_ZBUFFER ||
1903                                !(surf->flags & RADEON_SURF_SBUFFER));
1904                         AddrSurfInfoIn.format = ADDR_FMT_32;
1905                         break;
1906                 case 8:
1907                         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1908                         AddrSurfInfoIn.format = ADDR_FMT_32_32;
1909                         break;
1910                 case 12:
1911                         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1912                         AddrSurfInfoIn.format = ADDR_FMT_32_32_32;
1913                         break;
1914                 case 16:
1915                         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1916                         AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;
1917                         break;
1918                 default:
1919                         assert(0);
1920                 }
1921                 AddrSurfInfoIn.bpp = surf->bpe * 8;
1922         }
1923
1924         bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
1925         AddrSurfInfoIn.flags.color = is_color_surface &&
1926                                      !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
1927         AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
1928         AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
1929         /* flags.texture currently refers to TC-compatible HTILE */
1930         AddrSurfInfoIn.flags.texture = is_color_surface ||
1931                                        surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1932         AddrSurfInfoIn.flags.opt4space = 1;
1933
1934         AddrSurfInfoIn.numMipLevels = config->info.levels;
1935         AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
1936         AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
1937
1938         if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
1939                 AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
1940
1941         /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
1942          * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
1943          * must sample 1D textures as 2D. */
1944         if (config->is_3d)
1945                 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
1946         else if (info->chip_class != GFX9 && config->is_1d)
1947                 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
1948         else
1949                 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
1950
1951         AddrSurfInfoIn.width = config->info.width;
1952         AddrSurfInfoIn.height = config->info.height;
1953
1954         if (config->is_3d)
1955                 AddrSurfInfoIn.numSlices = config->info.depth;
1956         else if (config->is_cube)
1957                 AddrSurfInfoIn.numSlices = 6;
1958         else
1959                 AddrSurfInfoIn.numSlices = config->info.array_size;
1960
1961         /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
1962         AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
1963         AddrSurfInfoIn.flags.metaRbUnaligned = 0;
1964
1965         /* Optimal values for the L2 cache. */
1966         if (info->chip_class == GFX9) {
1967                 surf->u.gfx9.dcc.independent_64B_blocks = 1;
1968                 surf->u.gfx9.dcc.independent_128B_blocks = 0;
1969                 surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
1970         } else if (info->chip_class >= GFX10) {
1971                 surf->u.gfx9.dcc.independent_64B_blocks = 0;
1972                 surf->u.gfx9.dcc.independent_128B_blocks = 1;
1973                 surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
1974         }
1975
1976         if (AddrSurfInfoIn.flags.display) {
1977                 /* The display hardware can only read DCC with RB_ALIGNED=0 and
1978                  * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
1979                  *
1980                  * The CB block requires RB_ALIGNED=1 except 1 RB chips.
1981                  * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
1982                  * after rendering, so PIPE_ALIGNED=1 is recommended.
1983                  */
1984                 if (info->use_display_dcc_unaligned) {
1985                         AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
1986                         AddrSurfInfoIn.flags.metaRbUnaligned = 1;
1987                 }
1988
1989                 /* Adjust DCC settings to meet DCN requirements. */
1990                 if (info->use_display_dcc_unaligned ||
1991                     info->use_display_dcc_with_retile_blit) {
1992                         /* Only Navi12/14 support independent 64B blocks in L2,
1993                          * but without DCC image stores.
1994                          */
1995                         if (info->family == CHIP_NAVI12 ||
1996                             info->family == CHIP_NAVI14) {
1997                                 surf->u.gfx9.dcc.independent_64B_blocks = 1;
1998                                 surf->u.gfx9.dcc.independent_128B_blocks = 0;
1999                                 surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2000                         }
2001
2002                         if (info->chip_class >= GFX10_3) {
2003                                 surf->u.gfx9.dcc.independent_64B_blocks = 1;
2004                                 surf->u.gfx9.dcc.independent_128B_blocks = 1;
2005                                 surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2006                         }
2007                 }
2008         }
2009
2010         switch (mode) {
2011         case RADEON_SURF_MODE_LINEAR_ALIGNED:
2012                 assert(config->info.samples <= 1);
2013                 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
2014                 AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
2015                 break;
2016
2017         case RADEON_SURF_MODE_1D:
2018         case RADEON_SURF_MODE_2D:
2019                 if (surf->flags & RADEON_SURF_IMPORTED ||
2020                     (info->chip_class >= GFX10 &&
2021                      surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
2022                         AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode;
2023                         break;
2024                 }
2025
2026                 r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn,
2027                                                     false, &AddrSurfInfoIn.swizzleMode);
2028                 if (r)
2029                         return r;
2030                 break;
2031
2032         default:
2033                 assert(0);
2034         }
2035
2036         surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
2037         surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
2038
2039         surf->num_dcc_levels = 0;
2040         surf->surf_size = 0;
2041         surf->fmask_size = 0;
2042         surf->dcc_size = 0;
2043         surf->htile_size = 0;
2044         surf->htile_slice_size = 0;
2045         surf->u.gfx9.surf_offset = 0;
2046         surf->u.gfx9.stencil_offset = 0;
2047         surf->cmask_size = 0;
2048         surf->u.gfx9.dcc_retile_use_uint16 = false;
2049         surf->u.gfx9.dcc_retile_num_elements = 0;
2050         surf->u.gfx9.dcc_retile_map = NULL;
2051
2052         /* Calculate texture layout information. */
2053         r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
2054                                  &AddrSurfInfoIn);
2055         if (r)
2056                 return r;
2057
2058         /* Calculate texture layout information for stencil. */
2059         if (surf->flags & RADEON_SURF_SBUFFER) {
2060                 AddrSurfInfoIn.flags.stencil = 1;
2061                 AddrSurfInfoIn.bpp = 8;
2062                 AddrSurfInfoIn.format = ADDR_FMT_8;
2063
2064                 if (!AddrSurfInfoIn.flags.depth) {
2065                         r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn,
2066                                                             false, &AddrSurfInfoIn.swizzleMode);
2067                         if (r)
2068                                 return r;
2069                 } else
2070                         AddrSurfInfoIn.flags.depth = 0;
2071
2072                 r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
2073                                          &AddrSurfInfoIn);
2074                 if (r)
2075                         return r;
2076         }
2077
2078         surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
2079
2080         /* Query whether the surface is displayable. */
2081         /* This is only useful for surfaces that are allocated without SCANOUT. */
2082         bool displayable = false;
2083         if (!config->is_3d && !config->is_cube) {
2084                 r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.surf.swizzle_mode,
2085                                                    surf->bpe * 8, &displayable);
2086                 if (r)
2087                         return r;
2088
2089                 /* Display needs unaligned DCC. */
2090                 if (surf->num_dcc_levels &&
2091                     (!is_dcc_supported_by_DCN(info, config, surf,
2092                                               surf->u.gfx9.dcc.rb_aligned,
2093                                               surf->u.gfx9.dcc.pipe_aligned) ||
2094                      /* Don't set is_displayable if displayable DCC is missing. */
2095                      (info->use_display_dcc_with_retile_blit &&
2096                       !surf->u.gfx9.dcc_retile_num_elements)))
2097                         displayable = false;
2098         }
2099         surf->is_displayable = displayable;
2100
2101         /* Validate that we allocated a displayable surface if requested. */
2102         assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
2103
2104         /* Validate that DCC is set up correctly. */
2105         if (surf->num_dcc_levels) {
2106                 assert(is_dcc_supported_by_L2(info, surf));
2107                 if (AddrSurfInfoIn.flags.color)
2108                         assert(is_dcc_supported_by_CB(info, surf->u.gfx9.surf.swizzle_mode));
2109                 if (AddrSurfInfoIn.flags.display) {
2110                         assert(is_dcc_supported_by_DCN(info, config, surf,
2111                                                        surf->u.gfx9.dcc.rb_aligned,
2112                                                        surf->u.gfx9.dcc.pipe_aligned));
2113                 }
2114         }
2115
2116         if (info->has_graphics &&
2117             !compressed &&
2118             !config->is_3d &&
2119             config->info.levels == 1 &&
2120             AddrSurfInfoIn.flags.color &&
2121             !surf->is_linear &&
2122             surf->surf_alignment >= 64 * 1024 && /* 64KB tiling */
2123             !(surf->flags & (RADEON_SURF_DISABLE_DCC |
2124                              RADEON_SURF_FORCE_SWIZZLE_MODE |
2125                              RADEON_SURF_FORCE_MICRO_TILE_MODE))) {
2126                 /* Validate that DCC is enabled if DCN can do it. */
2127                 if ((info->use_display_dcc_unaligned ||
2128                      info->use_display_dcc_with_retile_blit) &&
2129                     AddrSurfInfoIn.flags.display &&
2130                     surf->bpe == 4) {
2131                         assert(surf->num_dcc_levels);
2132                 }
2133
2134                 /* Validate that non-scanout DCC is always enabled. */
2135                 if (!AddrSurfInfoIn.flags.display)
2136                         assert(surf->num_dcc_levels);
2137         }
2138
2139         if (!surf->htile_size) {
2140                 /* Unset this if HTILE is not present. */
2141                 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
2142         }
2143
2144         switch (surf->u.gfx9.surf.swizzle_mode) {
2145                 /* S = standard. */
2146                 case ADDR_SW_256B_S:
2147                 case ADDR_SW_4KB_S:
2148                 case ADDR_SW_64KB_S:
2149                 case ADDR_SW_64KB_S_T:
2150                 case ADDR_SW_4KB_S_X:
2151                 case ADDR_SW_64KB_S_X:
2152                         surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
2153                         break;
2154
2155                 /* D = display. */
2156                 case ADDR_SW_LINEAR:
2157                 case ADDR_SW_256B_D:
2158                 case ADDR_SW_4KB_D:
2159                 case ADDR_SW_64KB_D:
2160                 case ADDR_SW_64KB_D_T:
2161                 case ADDR_SW_4KB_D_X:
2162                 case ADDR_SW_64KB_D_X:
2163                         surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
2164                         break;
2165
2166                 /* R = rotated (gfx9), render target (gfx10). */
2167                 case ADDR_SW_256B_R:
2168                 case ADDR_SW_4KB_R:
2169                 case ADDR_SW_64KB_R:
2170                 case ADDR_SW_64KB_R_T:
2171                 case ADDR_SW_4KB_R_X:
2172                 case ADDR_SW_64KB_R_X:
2173                 case ADDR_SW_VAR_R_X:
2174                         /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
2175                          * used at the same time. We currently do not use rotated
2176                          * in gfx9.
2177                          */
2178                         assert(info->chip_class >= GFX10 ||
2179                                !"rotate micro tile mode is unsupported");
2180                         surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
2181                         break;
2182
2183                 /* Z = depth. */
2184                 case ADDR_SW_4KB_Z:
2185                 case ADDR_SW_64KB_Z:
2186                 case ADDR_SW_64KB_Z_T:
2187                 case ADDR_SW_4KB_Z_X:
2188                 case ADDR_SW_64KB_Z_X:
2189                 case ADDR_SW_VAR_Z_X:
2190                         surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
2191                         break;
2192
2193                 default:
2194                         assert(0);
2195         }
2196
2197         return 0;
2198 }
2199
2200 int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
2201                        const struct ac_surf_config *config,
2202                        enum radeon_surf_mode mode,
2203                        struct radeon_surf *surf)
2204 {
2205         int r;
2206
2207         r = surf_config_sanity(config, surf->flags);
2208         if (r)
2209                 return r;
2210
2211         if (info->chip_class >= GFX9)
2212                 r = gfx9_compute_surface(addrlib, info, config, mode, surf);
2213         else
2214                 r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
2215
2216         if (r)
2217                 return r;
2218
2219         /* Determine the memory layout of multiple allocations in one buffer. */
2220         surf->total_size = surf->surf_size;
2221         surf->alignment = surf->surf_alignment;
2222
2223         if (surf->htile_size) {
2224                 surf->htile_offset = align64(surf->total_size, surf->htile_alignment);
2225                 surf->total_size = surf->htile_offset + surf->htile_size;
2226                 surf->alignment = MAX2(surf->alignment, surf->htile_alignment);
2227         }
2228
2229         if (surf->fmask_size) {
2230                 assert(config->info.samples >= 2);
2231                 surf->fmask_offset = align64(surf->total_size, surf->fmask_alignment);
2232                 surf->total_size = surf->fmask_offset + surf->fmask_size;
2233                 surf->alignment = MAX2(surf->alignment, surf->fmask_alignment);
2234         }
2235
2236         /* Single-sample CMASK is in a separate buffer. */
2237         if (surf->cmask_size && config->info.samples >= 2) {
2238                 surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
2239                 surf->total_size = surf->cmask_offset + surf->cmask_size;
2240                 surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
2241         }
2242
2243         if (surf->is_displayable)
2244                 surf->flags |= RADEON_SURF_SCANOUT;
2245
2246         if (surf->dcc_size &&
2247             /* dcc_size is computed on GFX9+ only if it's displayable. */
2248             (info->chip_class >= GFX9 || !get_display_flag(config, surf))) {
2249                 /* It's better when displayable DCC is immediately after
2250                  * the image due to hw-specific reasons.
2251                  */
2252                 if (info->chip_class >= GFX9 &&
2253                     surf->u.gfx9.dcc_retile_num_elements) {
2254                         /* Add space for the displayable DCC buffer. */
2255                         surf->display_dcc_offset =
2256                                 align64(surf->total_size, surf->u.gfx9.display_dcc_alignment);
2257                         surf->total_size = surf->display_dcc_offset +
2258                                            surf->u.gfx9.display_dcc_size;
2259
2260                         /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
2261                         surf->dcc_retile_map_offset =
2262                                 align64(surf->total_size, info->tcc_cache_line_size);
2263
2264                         if (surf->u.gfx9.dcc_retile_use_uint16) {
2265                                 surf->total_size = surf->dcc_retile_map_offset +
2266                                                    surf->u.gfx9.dcc_retile_num_elements * 2;
2267                         } else {
2268                                 surf->total_size = surf->dcc_retile_map_offset +
2269                                                    surf->u.gfx9.dcc_retile_num_elements * 4;
2270                         }
2271                 }
2272
2273                 surf->dcc_offset = align64(surf->total_size, surf->dcc_alignment);
2274                 surf->total_size = surf->dcc_offset + surf->dcc_size;
2275                 surf->alignment = MAX2(surf->alignment, surf->dcc_alignment);
2276         }
2277
2278         return 0;
2279 }
2280
2281 /* This is meant to be used for disabling DCC. */
2282 void ac_surface_zero_dcc_fields(struct radeon_surf *surf)
2283 {
2284    surf->dcc_offset = 0;
2285    surf->display_dcc_offset = 0;
2286    surf->dcc_retile_map_offset = 0;
2287 }
2288
2289 static unsigned eg_tile_split(unsigned tile_split)
2290 {
2291    switch (tile_split) {
2292    case 0:     tile_split = 64;    break;
2293    case 1:     tile_split = 128;   break;
2294    case 2:     tile_split = 256;   break;
2295    case 3:     tile_split = 512;   break;
2296    default:
2297    case 4:     tile_split = 1024;  break;
2298    case 5:     tile_split = 2048;  break;
2299    case 6:     tile_split = 4096;  break;
2300    }
2301    return tile_split;
2302 }
2303
2304 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
2305 {
2306    switch (eg_tile_split) {
2307    case 64:    return 0;
2308    case 128:   return 1;
2309    case 256:   return 2;
2310    case 512:   return 3;
2311    default:
2312    case 1024:  return 4;
2313    case 2048:  return 5;
2314    case 4096:  return 6;
2315    }
2316 }
2317
2318 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT  45
2319 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK   0x3
2320
2321 /* This should be called before ac_compute_surface. */
2322 void ac_surface_set_bo_metadata(const struct radeon_info *info,
2323                                 struct radeon_surf *surf, uint64_t tiling_flags,
2324                                 enum radeon_surf_mode *mode)
2325 {
2326    bool scanout;
2327
2328    if (info->chip_class >= GFX9) {
2329       surf->u.gfx9.surf.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
2330       surf->u.gfx9.dcc.independent_64B_blocks = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
2331       surf->u.gfx9.dcc.independent_128B_blocks = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
2332       surf->u.gfx9.dcc.max_compressed_block_size = AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
2333       surf->u.gfx9.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
2334       scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
2335       *mode = surf->u.gfx9.surf.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;
2336    } else {
2337       surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
2338       surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
2339       surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
2340       surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
2341       surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
2342       surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
2343       scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
2344
2345       if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4)  /* 2D_TILED_THIN1 */
2346          *mode = RADEON_SURF_MODE_2D;
2347       else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
2348          *mode = RADEON_SURF_MODE_1D;
2349       else
2350          *mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
2351    }
2352
2353    if (scanout)
2354       surf->flags |= RADEON_SURF_SCANOUT;
2355    else
2356       surf->flags &= ~RADEON_SURF_SCANOUT;
2357 }
2358
2359 void ac_surface_get_bo_metadata(const struct radeon_info *info,
2360                                 struct radeon_surf *surf, uint64_t *tiling_flags)
2361 {
2362    *tiling_flags = 0;
2363
2364    if (info->chip_class >= GFX9) {
2365       uint64_t dcc_offset = 0;
2366
2367       if (surf->dcc_offset) {
2368          dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset
2369                                                : surf->dcc_offset;
2370          assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
2371       }
2372
2373       *tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.surf.swizzle_mode);
2374       *tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8);
2375       *tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.display_dcc_pitch_max);
2376       *tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.dcc.independent_64B_blocks);
2377       *tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.dcc.independent_128B_blocks);
2378       *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, surf->u.gfx9.dcc.max_compressed_block_size);
2379       *tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
2380    } else {
2381       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
2382          *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
2383       else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
2384          *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
2385       else
2386          *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
2387
2388       *tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config);
2389       *tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw));
2390       *tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh));
2391       if (surf->u.legacy.tile_split)
2392          *tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));
2393       *tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea));
2394       *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks)-1);
2395
2396       if (surf->flags & RADEON_SURF_SCANOUT)
2397          *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
2398       else
2399          *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
2400    }
2401 }
2402
2403 static uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info)
2404 {
2405    return (ATI_VENDOR_ID << 16) | info->pci_id;
2406 }
2407
2408 /* This should be called after ac_compute_surface. */
2409 bool ac_surface_set_umd_metadata(const struct radeon_info *info,
2410                                  struct radeon_surf *surf,
2411                                  unsigned num_storage_samples,
2412                                  unsigned num_mipmap_levels,
2413                                  unsigned size_metadata,
2414                                  uint32_t metadata[64])
2415 {
2416    uint32_t *desc = &metadata[2];
2417    uint64_t offset;
2418
2419    if (info->chip_class >= GFX9)
2420       offset = surf->u.gfx9.surf_offset;
2421    else
2422       offset = surf->u.legacy.level[0].offset;
2423
2424    if (offset ||                 /* Non-zero planes ignore metadata. */
2425        size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */
2426        metadata[0] == 0 ||       /* invalid version number */
2427        metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ {
2428       /* Disable DCC because it might not be enabled. */
2429       ac_surface_zero_dcc_fields(surf);
2430
2431       /* Don't report an error if the texture comes from an incompatible driver,
2432        * but this might not work.
2433        */
2434       return true;
2435    }
2436
2437    /* Validate that sample counts and the number of mipmap levels match. */
2438    unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]);
2439    unsigned type = G_008F1C_TYPE(desc[3]);
2440
2441    if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2442       unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples));
2443
2444       if (desc_last_level != log_samples) {
2445          fprintf(stderr,
2446                  "amdgpu: invalid MSAA texture import, "
2447                  "metadata has log2(samples) = %u, the caller set %u\n",
2448                  desc_last_level, log_samples);
2449          return false;
2450       }
2451    } else {
2452       if (desc_last_level != num_mipmap_levels - 1) {
2453          fprintf(stderr,
2454                  "amdgpu: invalid mipmapped texture import, "
2455                  "metadata has last_level = %u, the caller set %u\n",
2456                  desc_last_level, num_mipmap_levels - 1);
2457          return false;
2458       }
2459    }
2460
2461    if (info->chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) {
2462       /* Read DCC information. */
2463       switch (info->chip_class) {
2464       case GFX8:
2465          surf->dcc_offset = (uint64_t)desc[7] << 8;
2466          break;
2467
2468       case GFX9:
2469          surf->dcc_offset =
2470             ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40);
2471          surf->u.gfx9.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]);
2472          surf->u.gfx9.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]);
2473
2474          /* If DCC is unaligned, this can only be a displayable image. */
2475          if (!surf->u.gfx9.dcc.pipe_aligned && !surf->u.gfx9.dcc.rb_aligned)
2476             assert(surf->is_displayable);
2477          break;
2478
2479       case GFX10:
2480       case GFX10_3:
2481          surf->dcc_offset =
2482             ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
2483          surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
2484          break;
2485
2486       default:
2487          assert(0);
2488          return false;
2489       }
2490    } else {
2491       /* Disable DCC. dcc_offset is always set by texture_from_handle
2492        * and must be cleared here.
2493        */
2494       ac_surface_zero_dcc_fields(surf);
2495    }
2496
2497    return true;
2498 }
2499
2500 void ac_surface_get_umd_metadata(const struct radeon_info *info,
2501                                  struct radeon_surf *surf,
2502                                  unsigned num_mipmap_levels,
2503                                  uint32_t desc[8],
2504                                  unsigned *size_metadata, uint32_t metadata[64])
2505 {
2506    /* Clear the base address and set the relative DCC offset. */
2507    desc[0] = 0;
2508    desc[1] &= C_008F14_BASE_ADDRESS_HI;
2509
2510    switch (info->chip_class) {
2511    case GFX6:
2512    case GFX7:
2513       break;
2514    case GFX8:
2515       desc[7] = surf->dcc_offset >> 8;
2516       break;
2517    case GFX9:
2518       desc[7] = surf->dcc_offset >> 8;
2519       desc[5] &= C_008F24_META_DATA_ADDRESS;
2520       desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40);
2521       break;
2522    case GFX10:
2523    case GFX10_3:
2524       desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
2525       desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8);
2526       desc[7] = surf->dcc_offset >> 16;
2527       break;
2528    default:
2529       assert(0);
2530    }
2531
2532    /* Metadata image format format version 1:
2533     * [0] = 1 (metadata format identifier)
2534     * [1] = (VENDOR_ID << 16) | PCI_ID
2535     * [2:9] = image descriptor for the whole resource
2536     *         [2] is always 0, because the base address is cleared
2537     *         [9] is the DCC offset bits [39:8] from the beginning of
2538     *             the buffer
2539     * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
2540     */
2541
2542    metadata[0] = 1; /* metadata image format version 1 */
2543
2544    /* Tiling modes are ambiguous without a PCI ID. */
2545    metadata[1] = ac_get_umd_metadata_word1(info);
2546
2547    /* Dwords [2:9] contain the image descriptor. */
2548    memcpy(&metadata[2], desc, 8 * 4);
2549    *size_metadata = 10 * 4;
2550
2551    /* Dwords [10:..] contain the mipmap level offsets. */
2552    if (info->chip_class <= GFX8) {
2553       for (unsigned i = 0; i < num_mipmap_levels; i++)
2554          metadata[10 + i] = surf->u.legacy.level[i].offset >> 8;
2555
2556       *size_metadata += num_mipmap_levels * 4;
2557    }
2558 }
2559
2560 void ac_surface_override_offset_stride(const struct radeon_info *info,
2561                                        struct radeon_surf *surf,
2562                                        unsigned num_mipmap_levels,
2563                                        uint64_t offset, unsigned pitch)
2564 {
2565    if (info->chip_class >= GFX9) {
2566       if (pitch) {
2567          surf->u.gfx9.surf_pitch = pitch;
2568          if (num_mipmap_levels == 1)
2569             surf->u.gfx9.surf.epitch = pitch - 1;
2570          surf->u.gfx9.surf_slice_size =
2571                (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;
2572       }
2573       surf->u.gfx9.surf_offset = offset;
2574       if (surf->u.gfx9.stencil_offset)
2575          surf->u.gfx9.stencil_offset += offset;
2576    } else {
2577       if (pitch) {
2578          surf->u.legacy.level[0].nblk_x = pitch;
2579          surf->u.legacy.level[0].slice_size_dw =
2580                ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;
2581       }
2582
2583       if (offset) {
2584          for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i)
2585             surf->u.legacy.level[i].offset += offset;
2586       }
2587    }
2588
2589    if (surf->htile_offset)
2590       surf->htile_offset += offset;
2591    if (surf->fmask_offset)
2592       surf->fmask_offset += offset;
2593    if (surf->cmask_offset)
2594       surf->cmask_offset += offset;
2595    if (surf->dcc_offset)
2596       surf->dcc_offset += offset;
2597    if (surf->display_dcc_offset)
2598       surf->display_dcc_offset += offset;
2599    if (surf->dcc_retile_map_offset)
2600       surf->dcc_retile_map_offset += offset;
2601 }