2 * Copyright © 2011 Red Hat All Rights Reserved.
3 * Copyright © 2017 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
28 #include "ac_surface.h"
29 #include "amd_family.h"
30 #include "addrlib/src/amdgpu_asic_addr.h"
31 #include "ac_gpu_info.h"
32 #include "util/hash_table.h"
33 #include "util/macros.h"
34 #include "util/simple_mtx.h"
35 #include "util/u_atomic.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
44 #include "drm-uapi/amdgpu_drm.h"
46 #include "addrlib/inc/addrinterface.h"
48 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
49 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
52 #ifndef CIASICIDGFXENGINE_ARCTICISLAND
53 #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
59 /* The cache of DCC retile maps for reuse when allocating images of
62 simple_mtx_t dcc_retile_map_lock
;
63 struct hash_table
*dcc_retile_maps
;
64 struct hash_table
*dcc_retile_tile_indices
;
67 struct dcc_retile_map_key
{
68 enum radeon_family family
;
69 unsigned retile_width
;
70 unsigned retile_height
;
73 unsigned dcc_retile_num_elements
;
74 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT input
;
77 static uint32_t dcc_retile_map_hash_key(const void *key
)
79 return _mesa_hash_data(key
, sizeof(struct dcc_retile_map_key
));
82 static bool dcc_retile_map_keys_equal(const void *a
, const void *b
)
84 return memcmp(a
, b
, sizeof(struct dcc_retile_map_key
)) == 0;
87 static void dcc_retile_map_free(struct hash_entry
*entry
)
89 free((void*)entry
->key
);
93 struct dcc_retile_tile_key
{
94 enum radeon_family family
;
96 unsigned swizzle_mode
;
101 struct dcc_retile_tile_data
{
102 unsigned tile_width_log2
;
103 unsigned tile_height_log2
;
107 static uint32_t dcc_retile_tile_hash_key(const void *key
)
109 return _mesa_hash_data(key
, sizeof(struct dcc_retile_tile_key
));
112 static bool dcc_retile_tile_keys_equal(const void *a
, const void *b
)
114 return memcmp(a
, b
, sizeof(struct dcc_retile_tile_key
)) == 0;
117 static void dcc_retile_tile_free(struct hash_entry
*entry
)
119 free((void*)entry
->key
);
120 free(((struct dcc_retile_tile_data
*)entry
->data
)->data
);
124 /* Assumes dcc_retile_map_lock is taken. */
125 static const struct dcc_retile_tile_data
*
126 ac_compute_dcc_retile_tile_indices(struct ac_addrlib
*addrlib
,
127 const struct radeon_info
*info
,
128 unsigned bpp
, unsigned swizzle_mode
,
129 bool rb_aligned
, bool pipe_aligned
)
131 struct dcc_retile_tile_key key
= (struct dcc_retile_tile_key
) {
132 .family
= info
->family
,
134 .swizzle_mode
= swizzle_mode
,
135 .rb_aligned
= rb_aligned
,
136 .pipe_aligned
= pipe_aligned
139 struct hash_entry
*entry
= _mesa_hash_table_search(addrlib
->dcc_retile_tile_indices
, &key
);
143 ADDR2_COMPUTE_DCCINFO_INPUT din
= {0};
144 ADDR2_COMPUTE_DCCINFO_OUTPUT dout
= {0};
145 din
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_INPUT
);
146 dout
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT
);
148 din
.dccKeyFlags
.pipeAligned
= pipe_aligned
;
149 din
.dccKeyFlags
.rbAligned
= rb_aligned
;
150 din
.resourceType
= ADDR_RSRC_TEX_2D
;
151 din
.swizzleMode
= swizzle_mode
;
153 din
.unalignedWidth
= 1;
154 din
.unalignedHeight
= 1;
157 din
.numMipLevels
= 1;
159 ADDR_E_RETURNCODE ret
= Addr2ComputeDccInfo(addrlib
->handle
, &din
, &dout
);
163 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin
= {0};
164 addrin
.size
= sizeof(addrin
);
165 addrin
.swizzleMode
= swizzle_mode
;
166 addrin
.resourceType
= ADDR_RSRC_TEX_2D
;
168 addrin
.numSlices
= 1;
169 addrin
.numMipLevels
= 1;
171 addrin
.pitch
= dout
.pitch
;
172 addrin
.height
= dout
.height
;
173 addrin
.compressBlkWidth
= dout
.compressBlkWidth
;
174 addrin
.compressBlkHeight
= dout
.compressBlkHeight
;
175 addrin
.compressBlkDepth
= dout
.compressBlkDepth
;
176 addrin
.metaBlkWidth
= dout
.metaBlkWidth
;
177 addrin
.metaBlkHeight
= dout
.metaBlkHeight
;
178 addrin
.metaBlkDepth
= dout
.metaBlkDepth
;
179 addrin
.dccKeyFlags
.pipeAligned
= pipe_aligned
;
180 addrin
.dccKeyFlags
.rbAligned
= rb_aligned
;
182 unsigned w
= dout
.metaBlkWidth
/ dout
.compressBlkWidth
;
183 unsigned h
= dout
.metaBlkHeight
/ dout
.compressBlkHeight
;
184 uint16_t *indices
= malloc(w
* h
* sizeof (uint16_t));
188 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout
= {};
189 addrout
.size
= sizeof(addrout
);
191 for (unsigned y
= 0; y
< h
; ++y
) {
192 addrin
.y
= y
* dout
.compressBlkHeight
;
193 for (unsigned x
= 0; x
< w
; ++x
) {
194 addrin
.x
= x
* dout
.compressBlkWidth
;
197 if (Addr2ComputeDccAddrFromCoord(addrlib
->handle
, &addrin
, &addrout
) != ADDR_OK
) {
201 indices
[y
* w
+ x
] = addrout
.addr
;
205 struct dcc_retile_tile_data
*data
= calloc(1, sizeof(*data
));
211 data
->tile_width_log2
= util_logbase2(w
);
212 data
->tile_height_log2
= util_logbase2(h
);
213 data
->data
= indices
;
215 struct dcc_retile_tile_key
*heap_key
= mem_dup(&key
, sizeof(key
));
222 entry
= _mesa_hash_table_insert(addrlib
->dcc_retile_tile_indices
, heap_key
, data
);
231 static uint32_t ac_compute_retile_tile_addr(const struct dcc_retile_tile_data
*tile
,
232 unsigned stride
, unsigned x
, unsigned y
)
234 unsigned x_mask
= (1u << tile
->tile_width_log2
) - 1;
235 unsigned y_mask
= (1u << tile
->tile_height_log2
) - 1;
236 unsigned tile_size_log2
= tile
->tile_width_log2
+ tile
->tile_height_log2
;
238 unsigned base
= ((y
>> tile
->tile_height_log2
) * stride
+ (x
>> tile
->tile_width_log2
)) << tile_size_log2
;
239 unsigned offset_in_tile
= tile
->data
[((y
& y_mask
) << tile
->tile_width_log2
) + (x
& x_mask
)];
240 return base
+ offset_in_tile
;
243 static uint32_t *ac_compute_dcc_retile_map(struct ac_addrlib
*addrlib
,
244 const struct radeon_info
*info
,
245 unsigned retile_width
, unsigned retile_height
,
246 bool rb_aligned
, bool pipe_aligned
, bool use_uint16
,
247 unsigned dcc_retile_num_elements
,
248 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
*in
)
250 unsigned dcc_retile_map_size
= dcc_retile_num_elements
* (use_uint16
? 2 : 4);
251 struct dcc_retile_map_key key
;
253 assert(in
->numFrags
== 1 && in
->numSlices
== 1 && in
->numMipLevels
== 1);
255 memset(&key
, 0, sizeof(key
));
256 key
.family
= info
->family
;
257 key
.retile_width
= retile_width
;
258 key
.retile_height
= retile_height
;
259 key
.rb_aligned
= rb_aligned
;
260 key
.pipe_aligned
= pipe_aligned
;
261 key
.dcc_retile_num_elements
= dcc_retile_num_elements
;
262 memcpy(&key
.input
, in
, sizeof(*in
));
264 simple_mtx_lock(&addrlib
->dcc_retile_map_lock
);
266 /* If we have already computed this retile map, get it from the hash table. */
267 struct hash_entry
*entry
= _mesa_hash_table_search(addrlib
->dcc_retile_maps
, &key
);
269 uint32_t *map
= entry
->data
;
270 simple_mtx_unlock(&addrlib
->dcc_retile_map_lock
);
274 const struct dcc_retile_tile_data
*src_tile
=
275 ac_compute_dcc_retile_tile_indices(addrlib
, info
, in
->bpp
,
277 rb_aligned
, pipe_aligned
);
278 const struct dcc_retile_tile_data
*dst_tile
=
279 ac_compute_dcc_retile_tile_indices(addrlib
, info
, in
->bpp
,
280 in
->swizzleMode
, false, false);
281 if (!src_tile
|| !dst_tile
) {
282 simple_mtx_unlock(&addrlib
->dcc_retile_map_lock
);
286 void *dcc_retile_map
= malloc(dcc_retile_map_size
);
287 if (!dcc_retile_map
) {
288 simple_mtx_unlock(&addrlib
->dcc_retile_map_lock
);
293 unsigned w
= DIV_ROUND_UP(retile_width
, in
->compressBlkWidth
);
294 unsigned h
= DIV_ROUND_UP(retile_height
, in
->compressBlkHeight
);
295 unsigned src_stride
= DIV_ROUND_UP(w
, 1u << src_tile
->tile_width_log2
);
296 unsigned dst_stride
= DIV_ROUND_UP(w
, 1u << dst_tile
->tile_width_log2
);
298 for (unsigned y
= 0; y
< h
; ++y
) {
299 for (unsigned x
= 0; x
< w
; ++x
) {
300 unsigned src_addr
= ac_compute_retile_tile_addr(src_tile
, src_stride
, x
, y
);
301 unsigned dst_addr
= ac_compute_retile_tile_addr(dst_tile
, dst_stride
, x
, y
);
304 ((uint16_t*)dcc_retile_map
)[2 * index
] = src_addr
;
305 ((uint16_t*)dcc_retile_map
)[2 * index
+ 1] = dst_addr
;
307 ((uint32_t*)dcc_retile_map
)[2 * index
] = src_addr
;
308 ((uint32_t*)dcc_retile_map
)[2 * index
+ 1] = dst_addr
;
314 /* Fill the remaining pairs with the last one (for the compute shader). */
315 for (unsigned i
= index
* 2; i
< dcc_retile_num_elements
; i
++) {
317 ((uint16_t*)dcc_retile_map
)[i
] = ((uint16_t*)dcc_retile_map
)[i
- 2];
319 ((uint32_t*)dcc_retile_map
)[i
] = ((uint32_t*)dcc_retile_map
)[i
- 2];
322 /* Insert the retile map into the hash table, so that it can be reused and
323 * the computation can be skipped for similar image sizes.
325 _mesa_hash_table_insert(addrlib
->dcc_retile_maps
,
326 mem_dup(&key
, sizeof(key
)), dcc_retile_map
);
328 simple_mtx_unlock(&addrlib
->dcc_retile_map_lock
);
329 return dcc_retile_map
;
332 static void *ADDR_API
allocSysMem(const ADDR_ALLOCSYSMEM_INPUT
* pInput
)
334 return malloc(pInput
->sizeInBytes
);
337 static ADDR_E_RETURNCODE ADDR_API
freeSysMem(const ADDR_FREESYSMEM_INPUT
* pInput
)
339 free(pInput
->pVirtAddr
);
343 struct ac_addrlib
*ac_addrlib_create(const struct radeon_info
*info
,
344 const struct amdgpu_gpu_info
*amdinfo
,
345 uint64_t *max_alignment
)
347 ADDR_CREATE_INPUT addrCreateInput
= {0};
348 ADDR_CREATE_OUTPUT addrCreateOutput
= {0};
349 ADDR_REGISTER_VALUE regValue
= {0};
350 ADDR_CREATE_FLAGS createFlags
= {{0}};
351 ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput
= {0};
352 ADDR_E_RETURNCODE addrRet
;
354 addrCreateInput
.size
= sizeof(ADDR_CREATE_INPUT
);
355 addrCreateOutput
.size
= sizeof(ADDR_CREATE_OUTPUT
);
357 regValue
.gbAddrConfig
= amdinfo
->gb_addr_cfg
;
358 createFlags
.value
= 0;
360 addrCreateInput
.chipFamily
= info
->family_id
;
361 addrCreateInput
.chipRevision
= info
->chip_external_rev
;
363 if (addrCreateInput
.chipFamily
== FAMILY_UNKNOWN
)
366 if (addrCreateInput
.chipFamily
>= FAMILY_AI
) {
367 addrCreateInput
.chipEngine
= CIASICIDGFXENGINE_ARCTICISLAND
;
369 regValue
.noOfBanks
= amdinfo
->mc_arb_ramcfg
& 0x3;
370 regValue
.noOfRanks
= (amdinfo
->mc_arb_ramcfg
& 0x4) >> 2;
372 regValue
.backendDisables
= amdinfo
->enabled_rb_pipes_mask
;
373 regValue
.pTileConfig
= amdinfo
->gb_tile_mode
;
374 regValue
.noOfEntries
= ARRAY_SIZE(amdinfo
->gb_tile_mode
);
375 if (addrCreateInput
.chipFamily
== FAMILY_SI
) {
376 regValue
.pMacroTileConfig
= NULL
;
377 regValue
.noOfMacroEntries
= 0;
379 regValue
.pMacroTileConfig
= amdinfo
->gb_macro_tile_mode
;
380 regValue
.noOfMacroEntries
= ARRAY_SIZE(amdinfo
->gb_macro_tile_mode
);
383 createFlags
.useTileIndex
= 1;
384 createFlags
.useHtileSliceAlign
= 1;
386 addrCreateInput
.chipEngine
= CIASICIDGFXENGINE_SOUTHERNISLAND
;
389 addrCreateInput
.callbacks
.allocSysMem
= allocSysMem
;
390 addrCreateInput
.callbacks
.freeSysMem
= freeSysMem
;
391 addrCreateInput
.callbacks
.debugPrint
= 0;
392 addrCreateInput
.createFlags
= createFlags
;
393 addrCreateInput
.regValue
= regValue
;
395 addrRet
= AddrCreate(&addrCreateInput
, &addrCreateOutput
);
396 if (addrRet
!= ADDR_OK
)
400 addrRet
= AddrGetMaxAlignments(addrCreateOutput
.hLib
, &addrGetMaxAlignmentsOutput
);
401 if (addrRet
== ADDR_OK
){
402 *max_alignment
= addrGetMaxAlignmentsOutput
.baseAlign
;
406 struct ac_addrlib
*addrlib
= calloc(1, sizeof(struct ac_addrlib
));
408 AddrDestroy(addrCreateOutput
.hLib
);
412 addrlib
->handle
= addrCreateOutput
.hLib
;
413 simple_mtx_init(&addrlib
->dcc_retile_map_lock
, mtx_plain
);
414 addrlib
->dcc_retile_maps
= _mesa_hash_table_create(NULL
, dcc_retile_map_hash_key
,
415 dcc_retile_map_keys_equal
);
416 addrlib
->dcc_retile_tile_indices
= _mesa_hash_table_create(NULL
, dcc_retile_tile_hash_key
,
417 dcc_retile_tile_keys_equal
);
421 void ac_addrlib_destroy(struct ac_addrlib
*addrlib
)
423 AddrDestroy(addrlib
->handle
);
424 simple_mtx_destroy(&addrlib
->dcc_retile_map_lock
);
425 _mesa_hash_table_destroy(addrlib
->dcc_retile_maps
, dcc_retile_map_free
);
426 _mesa_hash_table_destroy(addrlib
->dcc_retile_tile_indices
, dcc_retile_tile_free
);
430 static int surf_config_sanity(const struct ac_surf_config
*config
,
433 /* FMASK is allocated together with the color surface and can't be
434 * allocated separately.
436 assert(!(flags
& RADEON_SURF_FMASK
));
437 if (flags
& RADEON_SURF_FMASK
)
440 /* all dimension must be at least 1 ! */
441 if (!config
->info
.width
|| !config
->info
.height
|| !config
->info
.depth
||
442 !config
->info
.array_size
|| !config
->info
.levels
)
445 switch (config
->info
.samples
) {
453 if (flags
& RADEON_SURF_Z_OR_SBUFFER
)
460 if (!(flags
& RADEON_SURF_Z_OR_SBUFFER
)) {
461 switch (config
->info
.storage_samples
) {
473 if (config
->is_3d
&& config
->info
.array_size
> 1)
475 if (config
->is_cube
&& config
->info
.depth
> 1)
481 static int gfx6_compute_level(ADDR_HANDLE addrlib
,
482 const struct ac_surf_config
*config
,
483 struct radeon_surf
*surf
, bool is_stencil
,
484 unsigned level
, bool compressed
,
485 ADDR_COMPUTE_SURFACE_INFO_INPUT
*AddrSurfInfoIn
,
486 ADDR_COMPUTE_SURFACE_INFO_OUTPUT
*AddrSurfInfoOut
,
487 ADDR_COMPUTE_DCCINFO_INPUT
*AddrDccIn
,
488 ADDR_COMPUTE_DCCINFO_OUTPUT
*AddrDccOut
,
489 ADDR_COMPUTE_HTILE_INFO_INPUT
*AddrHtileIn
,
490 ADDR_COMPUTE_HTILE_INFO_OUTPUT
*AddrHtileOut
)
492 struct legacy_surf_level
*surf_level
;
493 ADDR_E_RETURNCODE ret
;
495 AddrSurfInfoIn
->mipLevel
= level
;
496 AddrSurfInfoIn
->width
= u_minify(config
->info
.width
, level
);
497 AddrSurfInfoIn
->height
= u_minify(config
->info
.height
, level
);
499 /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
500 * because GFX9 needs linear alignment of 256 bytes.
502 if (config
->info
.levels
== 1 &&
503 AddrSurfInfoIn
->tileMode
== ADDR_TM_LINEAR_ALIGNED
&&
504 AddrSurfInfoIn
->bpp
&&
505 util_is_power_of_two_or_zero(AddrSurfInfoIn
->bpp
)) {
506 unsigned alignment
= 256 / (AddrSurfInfoIn
->bpp
/ 8);
508 AddrSurfInfoIn
->width
= align(AddrSurfInfoIn
->width
, alignment
);
511 /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
512 * true for r32g32b32 formats. */
513 if (AddrSurfInfoIn
->bpp
== 96) {
514 assert(config
->info
.levels
== 1);
515 assert(AddrSurfInfoIn
->tileMode
== ADDR_TM_LINEAR_ALIGNED
);
517 /* The least common multiple of 64 bytes and 12 bytes/pixel is
518 * 192 bytes, or 16 pixels. */
519 AddrSurfInfoIn
->width
= align(AddrSurfInfoIn
->width
, 16);
523 AddrSurfInfoIn
->numSlices
= u_minify(config
->info
.depth
, level
);
524 else if (config
->is_cube
)
525 AddrSurfInfoIn
->numSlices
= 6;
527 AddrSurfInfoIn
->numSlices
= config
->info
.array_size
;
530 /* Set the base level pitch. This is needed for calculation
531 * of non-zero levels. */
533 AddrSurfInfoIn
->basePitch
= surf
->u
.legacy
.stencil_level
[0].nblk_x
;
535 AddrSurfInfoIn
->basePitch
= surf
->u
.legacy
.level
[0].nblk_x
;
537 /* Convert blocks to pixels for compressed formats. */
539 AddrSurfInfoIn
->basePitch
*= surf
->blk_w
;
542 ret
= AddrComputeSurfaceInfo(addrlib
,
545 if (ret
!= ADDR_OK
) {
549 surf_level
= is_stencil
? &surf
->u
.legacy
.stencil_level
[level
] : &surf
->u
.legacy
.level
[level
];
550 surf_level
->offset
= align64(surf
->surf_size
, AddrSurfInfoOut
->baseAlign
);
551 surf_level
->slice_size_dw
= AddrSurfInfoOut
->sliceSize
/ 4;
552 surf_level
->nblk_x
= AddrSurfInfoOut
->pitch
;
553 surf_level
->nblk_y
= AddrSurfInfoOut
->height
;
555 switch (AddrSurfInfoOut
->tileMode
) {
556 case ADDR_TM_LINEAR_ALIGNED
:
557 surf_level
->mode
= RADEON_SURF_MODE_LINEAR_ALIGNED
;
559 case ADDR_TM_1D_TILED_THIN1
:
560 surf_level
->mode
= RADEON_SURF_MODE_1D
;
562 case ADDR_TM_2D_TILED_THIN1
:
563 surf_level
->mode
= RADEON_SURF_MODE_2D
;
570 surf
->u
.legacy
.stencil_tiling_index
[level
] = AddrSurfInfoOut
->tileIndex
;
572 surf
->u
.legacy
.tiling_index
[level
] = AddrSurfInfoOut
->tileIndex
;
574 surf
->surf_size
= surf_level
->offset
+ AddrSurfInfoOut
->surfSize
;
576 /* Clear DCC fields at the beginning. */
577 surf_level
->dcc_offset
= 0;
579 /* The previous level's flag tells us if we can use DCC for this level. */
580 if (AddrSurfInfoIn
->flags
.dccCompatible
&&
581 (level
== 0 || AddrDccOut
->subLvlCompressible
)) {
582 bool prev_level_clearable
= level
== 0 ||
583 AddrDccOut
->dccRamSizeAligned
;
585 AddrDccIn
->colorSurfSize
= AddrSurfInfoOut
->surfSize
;
586 AddrDccIn
->tileMode
= AddrSurfInfoOut
->tileMode
;
587 AddrDccIn
->tileInfo
= *AddrSurfInfoOut
->pTileInfo
;
588 AddrDccIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
589 AddrDccIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
591 ret
= AddrComputeDccInfo(addrlib
,
595 if (ret
== ADDR_OK
) {
596 surf_level
->dcc_offset
= surf
->dcc_size
;
597 surf
->num_dcc_levels
= level
+ 1;
598 surf
->dcc_size
= surf_level
->dcc_offset
+ AddrDccOut
->dccRamSize
;
599 surf
->dcc_alignment
= MAX2(surf
->dcc_alignment
, AddrDccOut
->dccRamBaseAlign
);
601 /* If the DCC size of a subresource (1 mip level or 1 slice)
602 * is not aligned, the DCC memory layout is not contiguous for
603 * that subresource, which means we can't use fast clear.
605 * We only do fast clears for whole mipmap levels. If we did
606 * per-slice fast clears, the same restriction would apply.
607 * (i.e. only compute the slice size and see if it's aligned)
609 * The last level can be non-contiguous and still be clearable
610 * if it's interleaved with the next level that doesn't exist.
612 if (AddrDccOut
->dccRamSizeAligned
||
613 (prev_level_clearable
&& level
== config
->info
.levels
- 1))
614 surf_level
->dcc_fast_clear_size
= AddrDccOut
->dccFastClearSize
;
616 surf_level
->dcc_fast_clear_size
= 0;
618 /* Compute the DCC slice size because addrlib doesn't
619 * provide this info. As DCC memory is linear (each
620 * slice is the same size) it's easy to compute.
622 surf
->dcc_slice_size
= AddrDccOut
->dccRamSize
/ config
->info
.array_size
;
624 /* For arrays, we have to compute the DCC info again
625 * with one slice size to get a correct fast clear
628 if (config
->info
.array_size
> 1) {
629 AddrDccIn
->colorSurfSize
= AddrSurfInfoOut
->sliceSize
;
630 AddrDccIn
->tileMode
= AddrSurfInfoOut
->tileMode
;
631 AddrDccIn
->tileInfo
= *AddrSurfInfoOut
->pTileInfo
;
632 AddrDccIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
633 AddrDccIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
635 ret
= AddrComputeDccInfo(addrlib
,
636 AddrDccIn
, AddrDccOut
);
637 if (ret
== ADDR_OK
) {
638 /* If the DCC memory isn't properly
639 * aligned, the data are interleaved
642 if (AddrDccOut
->dccRamSizeAligned
)
643 surf_level
->dcc_slice_fast_clear_size
= AddrDccOut
->dccFastClearSize
;
645 surf_level
->dcc_slice_fast_clear_size
= 0;
648 if (surf
->flags
& RADEON_SURF_CONTIGUOUS_DCC_LAYERS
&&
649 surf
->dcc_slice_size
!= surf_level
->dcc_slice_fast_clear_size
) {
651 surf
->num_dcc_levels
= 0;
652 AddrDccOut
->subLvlCompressible
= false;
655 surf_level
->dcc_slice_fast_clear_size
= surf_level
->dcc_fast_clear_size
;
662 AddrSurfInfoIn
->flags
.depth
&&
663 surf_level
->mode
== RADEON_SURF_MODE_2D
&&
665 !(surf
->flags
& RADEON_SURF_NO_HTILE
)) {
666 AddrHtileIn
->flags
.tcCompatible
= AddrSurfInfoOut
->tcCompatible
;
667 AddrHtileIn
->pitch
= AddrSurfInfoOut
->pitch
;
668 AddrHtileIn
->height
= AddrSurfInfoOut
->height
;
669 AddrHtileIn
->numSlices
= AddrSurfInfoOut
->depth
;
670 AddrHtileIn
->blockWidth
= ADDR_HTILE_BLOCKSIZE_8
;
671 AddrHtileIn
->blockHeight
= ADDR_HTILE_BLOCKSIZE_8
;
672 AddrHtileIn
->pTileInfo
= AddrSurfInfoOut
->pTileInfo
;
673 AddrHtileIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
674 AddrHtileIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
676 ret
= AddrComputeHtileInfo(addrlib
,
680 if (ret
== ADDR_OK
) {
681 surf
->htile_size
= AddrHtileOut
->htileBytes
;
682 surf
->htile_slice_size
= AddrHtileOut
->sliceSize
;
683 surf
->htile_alignment
= AddrHtileOut
->baseAlign
;
690 static void gfx6_set_micro_tile_mode(struct radeon_surf
*surf
,
691 const struct radeon_info
*info
)
693 uint32_t tile_mode
= info
->si_tile_mode_array
[surf
->u
.legacy
.tiling_index
[0]];
695 if (info
->chip_class
>= GFX7
)
696 surf
->micro_tile_mode
= G_009910_MICRO_TILE_MODE_NEW(tile_mode
);
698 surf
->micro_tile_mode
= G_009910_MICRO_TILE_MODE(tile_mode
);
701 static unsigned cik_get_macro_tile_index(struct radeon_surf
*surf
)
703 unsigned index
, tileb
;
705 tileb
= 8 * 8 * surf
->bpe
;
706 tileb
= MIN2(surf
->u
.legacy
.tile_split
, tileb
);
708 for (index
= 0; tileb
> 64; index
++)
715 static bool get_display_flag(const struct ac_surf_config
*config
,
716 const struct radeon_surf
*surf
)
718 unsigned num_channels
= config
->info
.num_channels
;
719 unsigned bpe
= surf
->bpe
;
721 if (!config
->is_3d
&&
723 !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
) &&
724 surf
->flags
& RADEON_SURF_SCANOUT
&&
725 config
->info
.samples
<= 1 &&
726 surf
->blk_w
<= 2 && surf
->blk_h
== 1) {
728 if (surf
->blk_w
== 2 && surf
->blk_h
== 1)
731 if (/* RGBA8 or RGBA16F */
732 (bpe
>= 4 && bpe
<= 8 && num_channels
== 4) ||
733 /* R5G6B5 or R5G5B5A1 */
734 (bpe
== 2 && num_channels
>= 3) ||
736 (bpe
== 1 && num_channels
== 1))
743 * This must be called after the first level is computed.
745 * Copy surface-global settings like pipe/bank config from level 0 surface
746 * computation, and compute tile swizzle.
748 static int gfx6_surface_settings(ADDR_HANDLE addrlib
,
749 const struct radeon_info
*info
,
750 const struct ac_surf_config
*config
,
751 ADDR_COMPUTE_SURFACE_INFO_OUTPUT
* csio
,
752 struct radeon_surf
*surf
)
754 surf
->surf_alignment
= csio
->baseAlign
;
755 surf
->u
.legacy
.pipe_config
= csio
->pTileInfo
->pipeConfig
- 1;
756 gfx6_set_micro_tile_mode(surf
, info
);
758 /* For 2D modes only. */
759 if (csio
->tileMode
>= ADDR_TM_2D_TILED_THIN1
) {
760 surf
->u
.legacy
.bankw
= csio
->pTileInfo
->bankWidth
;
761 surf
->u
.legacy
.bankh
= csio
->pTileInfo
->bankHeight
;
762 surf
->u
.legacy
.mtilea
= csio
->pTileInfo
->macroAspectRatio
;
763 surf
->u
.legacy
.tile_split
= csio
->pTileInfo
->tileSplitBytes
;
764 surf
->u
.legacy
.num_banks
= csio
->pTileInfo
->banks
;
765 surf
->u
.legacy
.macro_tile_index
= csio
->macroModeIndex
;
767 surf
->u
.legacy
.macro_tile_index
= 0;
770 /* Compute tile swizzle. */
771 /* TODO: fix tile swizzle with mipmapping for GFX6 */
772 if ((info
->chip_class
>= GFX7
|| config
->info
.levels
== 1) &&
773 config
->info
.surf_index
&&
774 surf
->u
.legacy
.level
[0].mode
== RADEON_SURF_MODE_2D
&&
775 !(surf
->flags
& (RADEON_SURF_Z_OR_SBUFFER
| RADEON_SURF_SHAREABLE
)) &&
776 !get_display_flag(config
, surf
)) {
777 ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn
= {0};
778 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut
= {0};
780 AddrBaseSwizzleIn
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT
);
781 AddrBaseSwizzleOut
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
);
783 AddrBaseSwizzleIn
.surfIndex
= p_atomic_inc_return(config
->info
.surf_index
) - 1;
784 AddrBaseSwizzleIn
.tileIndex
= csio
->tileIndex
;
785 AddrBaseSwizzleIn
.macroModeIndex
= csio
->macroModeIndex
;
786 AddrBaseSwizzleIn
.pTileInfo
= csio
->pTileInfo
;
787 AddrBaseSwizzleIn
.tileMode
= csio
->tileMode
;
789 int r
= AddrComputeBaseSwizzle(addrlib
, &AddrBaseSwizzleIn
,
790 &AddrBaseSwizzleOut
);
794 assert(AddrBaseSwizzleOut
.tileSwizzle
<=
795 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
796 surf
->tile_swizzle
= AddrBaseSwizzleOut
.tileSwizzle
;
801 static void ac_compute_cmask(const struct radeon_info
*info
,
802 const struct ac_surf_config
*config
,
803 struct radeon_surf
*surf
)
805 unsigned pipe_interleave_bytes
= info
->pipe_interleave_bytes
;
806 unsigned num_pipes
= info
->num_tile_pipes
;
807 unsigned cl_width
, cl_height
;
809 if (surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
|| surf
->is_linear
||
810 (config
->info
.samples
>= 2 && !surf
->fmask_size
))
813 assert(info
->chip_class
<= GFX8
);
828 case 16: /* Hawaii */
837 unsigned base_align
= num_pipes
* pipe_interleave_bytes
;
839 unsigned width
= align(surf
->u
.legacy
.level
[0].nblk_x
, cl_width
*8);
840 unsigned height
= align(surf
->u
.legacy
.level
[0].nblk_y
, cl_height
*8);
841 unsigned slice_elements
= (width
* height
) / (8*8);
843 /* Each element of CMASK is a nibble. */
844 unsigned slice_bytes
= slice_elements
/ 2;
846 surf
->u
.legacy
.cmask_slice_tile_max
= (width
* height
) / (128*128);
847 if (surf
->u
.legacy
.cmask_slice_tile_max
)
848 surf
->u
.legacy
.cmask_slice_tile_max
-= 1;
852 num_layers
= config
->info
.depth
;
853 else if (config
->is_cube
)
856 num_layers
= config
->info
.array_size
;
858 surf
->cmask_alignment
= MAX2(256, base_align
);
859 surf
->cmask_slice_size
= align(slice_bytes
, base_align
);
860 surf
->cmask_size
= surf
->cmask_slice_size
* num_layers
;
864 * Fill in the tiling information in \p surf based on the given surface config.
866 * The following fields of \p surf must be initialized by the caller:
867 * blk_w, blk_h, bpe, flags.
869 static int gfx6_compute_surface(ADDR_HANDLE addrlib
,
870 const struct radeon_info
*info
,
871 const struct ac_surf_config
*config
,
872 enum radeon_surf_mode mode
,
873 struct radeon_surf
*surf
)
877 ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn
= {0};
878 ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut
= {0};
879 ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn
= {0};
880 ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut
= {0};
881 ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn
= {0};
882 ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut
= {0};
883 ADDR_TILEINFO AddrTileInfoIn
= {0};
884 ADDR_TILEINFO AddrTileInfoOut
= {0};
887 AddrSurfInfoIn
.size
= sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT
);
888 AddrSurfInfoOut
.size
= sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT
);
889 AddrDccIn
.size
= sizeof(ADDR_COMPUTE_DCCINFO_INPUT
);
890 AddrDccOut
.size
= sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT
);
891 AddrHtileIn
.size
= sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT
);
892 AddrHtileOut
.size
= sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT
);
893 AddrSurfInfoOut
.pTileInfo
= &AddrTileInfoOut
;
895 compressed
= surf
->blk_w
== 4 && surf
->blk_h
== 4;
897 /* MSAA requires 2D tiling. */
898 if (config
->info
.samples
> 1)
899 mode
= RADEON_SURF_MODE_2D
;
901 /* DB doesn't support linear layouts. */
902 if (surf
->flags
& (RADEON_SURF_Z_OR_SBUFFER
) &&
903 mode
< RADEON_SURF_MODE_1D
)
904 mode
= RADEON_SURF_MODE_1D
;
906 /* Set the requested tiling mode. */
908 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
909 AddrSurfInfoIn
.tileMode
= ADDR_TM_LINEAR_ALIGNED
;
911 case RADEON_SURF_MODE_1D
:
912 AddrSurfInfoIn
.tileMode
= ADDR_TM_1D_TILED_THIN1
;
914 case RADEON_SURF_MODE_2D
:
915 AddrSurfInfoIn
.tileMode
= ADDR_TM_2D_TILED_THIN1
;
921 /* The format must be set correctly for the allocation of compressed
922 * textures to work. In other cases, setting the bpp is sufficient.
927 AddrSurfInfoIn
.format
= ADDR_FMT_BC1
;
930 AddrSurfInfoIn
.format
= ADDR_FMT_BC3
;
937 AddrDccIn
.bpp
= AddrSurfInfoIn
.bpp
= surf
->bpe
* 8;
940 AddrDccIn
.numSamples
= AddrSurfInfoIn
.numSamples
=
941 MAX2(1, config
->info
.samples
);
942 AddrSurfInfoIn
.tileIndex
= -1;
944 if (!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
)) {
945 AddrDccIn
.numSamples
= AddrSurfInfoIn
.numFrags
=
946 MAX2(1, config
->info
.storage_samples
);
949 /* Set the micro tile type. */
950 if (surf
->flags
& RADEON_SURF_SCANOUT
)
951 AddrSurfInfoIn
.tileType
= ADDR_DISPLAYABLE
;
952 else if (surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
)
953 AddrSurfInfoIn
.tileType
= ADDR_DEPTH_SAMPLE_ORDER
;
955 AddrSurfInfoIn
.tileType
= ADDR_NON_DISPLAYABLE
;
957 AddrSurfInfoIn
.flags
.color
= !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
958 AddrSurfInfoIn
.flags
.depth
= (surf
->flags
& RADEON_SURF_ZBUFFER
) != 0;
959 AddrSurfInfoIn
.flags
.cube
= config
->is_cube
;
960 AddrSurfInfoIn
.flags
.display
= get_display_flag(config
, surf
);
961 AddrSurfInfoIn
.flags
.pow2Pad
= config
->info
.levels
> 1;
962 AddrSurfInfoIn
.flags
.tcCompatible
= (surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
) != 0;
964 /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
965 * requested, because TC-compatible HTILE requires 2D tiling.
967 AddrSurfInfoIn
.flags
.opt4Space
= !AddrSurfInfoIn
.flags
.tcCompatible
&&
968 !AddrSurfInfoIn
.flags
.fmask
&&
969 config
->info
.samples
<= 1 &&
970 !(surf
->flags
& RADEON_SURF_FORCE_SWIZZLE_MODE
);
973 * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
975 * - Mipmapped array textures have low performance (discovered by a closed
978 AddrSurfInfoIn
.flags
.dccCompatible
=
979 info
->chip_class
>= GFX8
&&
980 info
->has_graphics
&& /* disable DCC on compute-only chips */
981 !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
) &&
982 !(surf
->flags
& RADEON_SURF_DISABLE_DCC
) &&
984 ((config
->info
.array_size
== 1 && config
->info
.depth
== 1) ||
985 config
->info
.levels
== 1);
987 AddrSurfInfoIn
.flags
.noStencil
= (surf
->flags
& RADEON_SURF_SBUFFER
) == 0;
988 AddrSurfInfoIn
.flags
.compressZ
= !!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
990 /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
991 * for Z and stencil. This can cause a number of problems which we work
994 * - a depth part that is incompatible with mipmapped texturing
995 * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
996 * incorrect tiling applied to the stencil part, stencil buffer
997 * memory accesses that go out of bounds) even without mipmapping
999 * Some piglit tests that are prone to different types of related
1001 * ./bin/ext_framebuffer_multisample-upsample 2 stencil
1002 * ./bin/framebuffer-blit-levels {draw,read} stencil
1003 * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
1004 * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
1005 * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
1007 int stencil_tile_idx
= -1;
1009 if (AddrSurfInfoIn
.flags
.depth
&& !AddrSurfInfoIn
.flags
.noStencil
&&
1010 (config
->info
.levels
> 1 || info
->family
== CHIP_STONEY
)) {
1011 /* Compute stencilTileIdx that is compatible with the (depth)
1012 * tileIdx. This degrades the depth surface if necessary to
1013 * ensure that a matching stencilTileIdx exists. */
1014 AddrSurfInfoIn
.flags
.matchStencilTileCfg
= 1;
1016 /* Keep the depth mip-tail compatible with texturing. */
1017 AddrSurfInfoIn
.flags
.noStencil
= 1;
1020 /* Set preferred macrotile parameters. This is usually required
1021 * for shared resources. This is for 2D tiling only. */
1022 if (AddrSurfInfoIn
.tileMode
>= ADDR_TM_2D_TILED_THIN1
&&
1023 surf
->u
.legacy
.bankw
&& surf
->u
.legacy
.bankh
&&
1024 surf
->u
.legacy
.mtilea
&& surf
->u
.legacy
.tile_split
) {
1025 /* If any of these parameters are incorrect, the calculation
1027 AddrTileInfoIn
.banks
= surf
->u
.legacy
.num_banks
;
1028 AddrTileInfoIn
.bankWidth
= surf
->u
.legacy
.bankw
;
1029 AddrTileInfoIn
.bankHeight
= surf
->u
.legacy
.bankh
;
1030 AddrTileInfoIn
.macroAspectRatio
= surf
->u
.legacy
.mtilea
;
1031 AddrTileInfoIn
.tileSplitBytes
= surf
->u
.legacy
.tile_split
;
1032 AddrTileInfoIn
.pipeConfig
= surf
->u
.legacy
.pipe_config
+ 1; /* +1 compared to GB_TILE_MODE */
1033 AddrSurfInfoIn
.flags
.opt4Space
= 0;
1034 AddrSurfInfoIn
.pTileInfo
= &AddrTileInfoIn
;
1036 /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
1037 * the tile index, because we are expected to know it if
1038 * we know the other parameters.
1040 * This is something that can easily be fixed in Addrlib.
1041 * For now, just figure it out here.
1042 * Note that only 2D_TILE_THIN1 is handled here.
1044 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1045 assert(AddrSurfInfoIn
.tileMode
== ADDR_TM_2D_TILED_THIN1
);
1047 if (info
->chip_class
== GFX6
) {
1048 if (AddrSurfInfoIn
.tileType
== ADDR_DISPLAYABLE
) {
1050 AddrSurfInfoIn
.tileIndex
= 11; /* 16bpp */
1052 AddrSurfInfoIn
.tileIndex
= 12; /* 32bpp */
1055 AddrSurfInfoIn
.tileIndex
= 14; /* 8bpp */
1056 else if (surf
->bpe
== 2)
1057 AddrSurfInfoIn
.tileIndex
= 15; /* 16bpp */
1058 else if (surf
->bpe
== 4)
1059 AddrSurfInfoIn
.tileIndex
= 16; /* 32bpp */
1061 AddrSurfInfoIn
.tileIndex
= 17; /* 64bpp (and 128bpp) */
1065 if (AddrSurfInfoIn
.tileType
== ADDR_DISPLAYABLE
)
1066 AddrSurfInfoIn
.tileIndex
= 10; /* 2D displayable */
1068 AddrSurfInfoIn
.tileIndex
= 14; /* 2D non-displayable */
1070 /* Addrlib doesn't set this if tileIndex is forced like above. */
1071 AddrSurfInfoOut
.macroModeIndex
= cik_get_macro_tile_index(surf
);
1075 surf
->has_stencil
= !!(surf
->flags
& RADEON_SURF_SBUFFER
);
1076 surf
->num_dcc_levels
= 0;
1077 surf
->surf_size
= 0;
1079 surf
->dcc_alignment
= 1;
1080 surf
->htile_size
= 0;
1081 surf
->htile_slice_size
= 0;
1082 surf
->htile_alignment
= 1;
1084 const bool only_stencil
= (surf
->flags
& RADEON_SURF_SBUFFER
) &&
1085 !(surf
->flags
& RADEON_SURF_ZBUFFER
);
1087 /* Calculate texture layout information. */
1088 if (!only_stencil
) {
1089 for (level
= 0; level
< config
->info
.levels
; level
++) {
1090 r
= gfx6_compute_level(addrlib
, config
, surf
, false, level
, compressed
,
1091 &AddrSurfInfoIn
, &AddrSurfInfoOut
,
1092 &AddrDccIn
, &AddrDccOut
, &AddrHtileIn
, &AddrHtileOut
);
1099 if (!AddrSurfInfoOut
.tcCompatible
) {
1100 AddrSurfInfoIn
.flags
.tcCompatible
= 0;
1101 surf
->flags
&= ~RADEON_SURF_TC_COMPATIBLE_HTILE
;
1104 if (AddrSurfInfoIn
.flags
.matchStencilTileCfg
) {
1105 AddrSurfInfoIn
.flags
.matchStencilTileCfg
= 0;
1106 AddrSurfInfoIn
.tileIndex
= AddrSurfInfoOut
.tileIndex
;
1107 stencil_tile_idx
= AddrSurfInfoOut
.stencilTileIdx
;
1109 assert(stencil_tile_idx
>= 0);
1112 r
= gfx6_surface_settings(addrlib
, info
, config
,
1113 &AddrSurfInfoOut
, surf
);
1119 /* Calculate texture layout information for stencil. */
1120 if (surf
->flags
& RADEON_SURF_SBUFFER
) {
1121 AddrSurfInfoIn
.tileIndex
= stencil_tile_idx
;
1122 AddrSurfInfoIn
.bpp
= 8;
1123 AddrSurfInfoIn
.flags
.depth
= 0;
1124 AddrSurfInfoIn
.flags
.stencil
= 1;
1125 AddrSurfInfoIn
.flags
.tcCompatible
= 0;
1126 /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
1127 AddrTileInfoIn
.tileSplitBytes
= surf
->u
.legacy
.stencil_tile_split
;
1129 for (level
= 0; level
< config
->info
.levels
; level
++) {
1130 r
= gfx6_compute_level(addrlib
, config
, surf
, true, level
, compressed
,
1131 &AddrSurfInfoIn
, &AddrSurfInfoOut
,
1132 &AddrDccIn
, &AddrDccOut
,
1137 /* DB uses the depth pitch for both stencil and depth. */
1138 if (!only_stencil
) {
1139 if (surf
->u
.legacy
.stencil_level
[level
].nblk_x
!=
1140 surf
->u
.legacy
.level
[level
].nblk_x
)
1141 surf
->u
.legacy
.stencil_adjusted
= true;
1143 surf
->u
.legacy
.level
[level
].nblk_x
=
1144 surf
->u
.legacy
.stencil_level
[level
].nblk_x
;
1149 r
= gfx6_surface_settings(addrlib
, info
, config
,
1150 &AddrSurfInfoOut
, surf
);
1155 /* For 2D modes only. */
1156 if (AddrSurfInfoOut
.tileMode
>= ADDR_TM_2D_TILED_THIN1
) {
1157 surf
->u
.legacy
.stencil_tile_split
=
1158 AddrSurfInfoOut
.pTileInfo
->tileSplitBytes
;
1164 /* Compute FMASK. */
1165 if (config
->info
.samples
>= 2 && AddrSurfInfoIn
.flags
.color
&&
1166 info
->has_graphics
&& !(surf
->flags
& RADEON_SURF_NO_FMASK
)) {
1167 ADDR_COMPUTE_FMASK_INFO_INPUT fin
= {0};
1168 ADDR_COMPUTE_FMASK_INFO_OUTPUT fout
= {0};
1169 ADDR_TILEINFO fmask_tile_info
= {};
1171 fin
.size
= sizeof(fin
);
1172 fout
.size
= sizeof(fout
);
1174 fin
.tileMode
= AddrSurfInfoOut
.tileMode
;
1175 fin
.pitch
= AddrSurfInfoOut
.pitch
;
1176 fin
.height
= config
->info
.height
;
1177 fin
.numSlices
= AddrSurfInfoIn
.numSlices
;
1178 fin
.numSamples
= AddrSurfInfoIn
.numSamples
;
1179 fin
.numFrags
= AddrSurfInfoIn
.numFrags
;
1181 fout
.pTileInfo
= &fmask_tile_info
;
1183 r
= AddrComputeFmaskInfo(addrlib
, &fin
, &fout
);
1187 surf
->fmask_size
= fout
.fmaskBytes
;
1188 surf
->fmask_alignment
= fout
.baseAlign
;
1189 surf
->fmask_tile_swizzle
= 0;
1191 surf
->u
.legacy
.fmask
.slice_tile_max
=
1192 (fout
.pitch
* fout
.height
) / 64;
1193 if (surf
->u
.legacy
.fmask
.slice_tile_max
)
1194 surf
->u
.legacy
.fmask
.slice_tile_max
-= 1;
1196 surf
->u
.legacy
.fmask
.tiling_index
= fout
.tileIndex
;
1197 surf
->u
.legacy
.fmask
.bankh
= fout
.pTileInfo
->bankHeight
;
1198 surf
->u
.legacy
.fmask
.pitch_in_pixels
= fout
.pitch
;
1199 surf
->u
.legacy
.fmask
.slice_size
= fout
.sliceSize
;
1201 /* Compute tile swizzle for FMASK. */
1202 if (config
->info
.fmask_surf_index
&&
1203 !(surf
->flags
& RADEON_SURF_SHAREABLE
)) {
1204 ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin
= {0};
1205 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout
= {0};
1207 xin
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT
);
1208 xout
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
);
1210 /* This counter starts from 1 instead of 0. */
1211 xin
.surfIndex
= p_atomic_inc_return(config
->info
.fmask_surf_index
);
1212 xin
.tileIndex
= fout
.tileIndex
;
1213 xin
.macroModeIndex
= fout
.macroModeIndex
;
1214 xin
.pTileInfo
= fout
.pTileInfo
;
1215 xin
.tileMode
= fin
.tileMode
;
1217 int r
= AddrComputeBaseSwizzle(addrlib
, &xin
, &xout
);
1221 assert(xout
.tileSwizzle
<=
1222 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
1223 surf
->fmask_tile_swizzle
= xout
.tileSwizzle
;
1227 /* Recalculate the whole DCC miptree size including disabled levels.
1228 * This is what addrlib does, but calling addrlib would be a lot more
1231 if (surf
->dcc_size
&& config
->info
.levels
> 1) {
1232 /* The smallest miplevels that are never compressed by DCC
1233 * still read the DCC buffer via TC if the base level uses DCC,
1234 * and for some reason the DCC buffer needs to be larger if
1235 * the miptree uses non-zero tile_swizzle. Otherwise there are
1238 * "dcc_alignment * 4" was determined by trial and error.
1240 surf
->dcc_size
= align64(surf
->surf_size
>> 8,
1241 surf
->dcc_alignment
* 4);
1244 /* Make sure HTILE covers the whole miptree, because the shader reads
1245 * TC-compatible HTILE even for levels where it's disabled by DB.
1247 if (surf
->htile_size
&& config
->info
.levels
> 1 &&
1248 surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
) {
1249 /* MSAA can't occur with levels > 1, so ignore the sample count. */
1250 const unsigned total_pixels
= surf
->surf_size
/ surf
->bpe
;
1251 const unsigned htile_block_size
= 8 * 8;
1252 const unsigned htile_element_size
= 4;
1254 surf
->htile_size
= (total_pixels
/ htile_block_size
) *
1256 surf
->htile_size
= align(surf
->htile_size
, surf
->htile_alignment
);
1257 } else if (!surf
->htile_size
) {
1258 /* Unset this if HTILE is not present. */
1259 surf
->flags
&= ~RADEON_SURF_TC_COMPATIBLE_HTILE
;
1262 surf
->is_linear
= surf
->u
.legacy
.level
[0].mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
;
1263 surf
->is_displayable
= surf
->is_linear
||
1264 surf
->micro_tile_mode
== RADEON_MICRO_MODE_DISPLAY
||
1265 surf
->micro_tile_mode
== RADEON_MICRO_MODE_RENDER
;
1267 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1268 * used at the same time. This case is not currently expected to occur
1269 * because we don't use rotated. Enforce this restriction on all chips
1270 * to facilitate testing.
1272 if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_RENDER
) {
1273 assert(!"rotate micro tile mode is unsupported");
1277 ac_compute_cmask(info
, config
, surf
);
1281 /* This is only called when expecting a tiled layout. */
1283 gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib
,
1284 struct radeon_surf
*surf
,
1285 ADDR2_COMPUTE_SURFACE_INFO_INPUT
*in
,
1286 bool is_fmask
, AddrSwizzleMode
*swizzle_mode
)
1288 ADDR_E_RETURNCODE ret
;
1289 ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin
= {0};
1290 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout
= {0};
1292 sin
.size
= sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
);
1293 sout
.size
= sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
);
1295 sin
.flags
= in
->flags
;
1296 sin
.resourceType
= in
->resourceType
;
1297 sin
.format
= in
->format
;
1298 sin
.resourceLoction
= ADDR_RSRC_LOC_INVIS
;
1299 /* TODO: We could allow some of these: */
1300 sin
.forbiddenBlock
.micro
= 1; /* don't allow the 256B swizzle modes */
1301 sin
.forbiddenBlock
.var
= 1; /* don't allow the variable-sized swizzle modes */
1303 sin
.width
= in
->width
;
1304 sin
.height
= in
->height
;
1305 sin
.numSlices
= in
->numSlices
;
1306 sin
.numMipLevels
= in
->numMipLevels
;
1307 sin
.numSamples
= in
->numSamples
;
1308 sin
.numFrags
= in
->numFrags
;
1311 sin
.flags
.display
= 0;
1312 sin
.flags
.color
= 0;
1313 sin
.flags
.fmask
= 1;
1316 if (surf
->flags
& RADEON_SURF_FORCE_MICRO_TILE_MODE
) {
1317 sin
.forbiddenBlock
.linear
= 1;
1319 if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_DISPLAY
)
1320 sin
.preferredSwSet
.sw_D
= 1;
1321 else if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_STANDARD
)
1322 sin
.preferredSwSet
.sw_S
= 1;
1323 else if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_DEPTH
)
1324 sin
.preferredSwSet
.sw_Z
= 1;
1325 else if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_RENDER
)
1326 sin
.preferredSwSet
.sw_R
= 1;
1329 ret
= Addr2GetPreferredSurfaceSetting(addrlib
, &sin
, &sout
);
1333 *swizzle_mode
= sout
.swizzleMode
;
1337 static bool is_dcc_supported_by_CB(const struct radeon_info
*info
, unsigned sw_mode
)
1339 if (info
->chip_class
>= GFX10
)
1340 return sw_mode
== ADDR_SW_64KB_Z_X
|| sw_mode
== ADDR_SW_64KB_R_X
;
1342 return sw_mode
!= ADDR_SW_LINEAR
;
1345 ASSERTED
static bool is_dcc_supported_by_L2(const struct radeon_info
*info
,
1346 const struct radeon_surf
*surf
)
1348 if (info
->chip_class
<= GFX9
) {
1349 /* Only independent 64B blocks are supported. */
1350 return surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1351 !surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1352 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
;
1355 if (info
->family
== CHIP_NAVI10
) {
1356 /* Only independent 128B blocks are supported. */
1357 return !surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1358 surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1359 surf
->u
.gfx9
.dcc
.max_compressed_block_size
<= V_028C78_MAX_BLOCK_SIZE_128B
;
1362 if (info
->family
== CHIP_NAVI12
||
1363 info
->family
== CHIP_NAVI14
) {
1364 /* Either 64B or 128B can be used, but not both.
1365 * If 64B is used, DCC image stores are unsupported.
1367 return surf
->u
.gfx9
.dcc
.independent_64B_blocks
!=
1368 surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1369 (!surf
->u
.gfx9
.dcc
.independent_64B_blocks
||
1370 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
) &&
1371 (!surf
->u
.gfx9
.dcc
.independent_128B_blocks
||
1372 surf
->u
.gfx9
.dcc
.max_compressed_block_size
<= V_028C78_MAX_BLOCK_SIZE_128B
);
1375 /* 128B is recommended, but 64B can be set too if needed for 4K by DCN.
1376 * Since there is no reason to ever disable 128B, require it.
1377 * DCC image stores are always supported.
1379 return surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1380 surf
->u
.gfx9
.dcc
.max_compressed_block_size
<= V_028C78_MAX_BLOCK_SIZE_128B
;
1383 static bool is_dcc_supported_by_DCN(const struct radeon_info
*info
,
1384 const struct ac_surf_config
*config
,
1385 const struct radeon_surf
*surf
,
1386 bool rb_aligned
, bool pipe_aligned
)
1388 if (!info
->use_display_dcc_unaligned
&&
1389 !info
->use_display_dcc_with_retile_blit
)
1392 /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
1396 /* Handle unaligned DCC. */
1397 if (info
->use_display_dcc_unaligned
&&
1398 (rb_aligned
|| pipe_aligned
))
1401 switch (info
->chip_class
) {
1403 /* There are more constraints, but we always set
1404 * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
1405 * which always works.
1407 assert(surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1408 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
);
1412 /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
1413 if (info
->chip_class
== GFX10
&&
1414 surf
->u
.gfx9
.dcc
.independent_128B_blocks
)
1417 /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1. */
1418 return ((config
->info
.width
<= 2560 &&
1419 config
->info
.height
<= 2560) ||
1420 (surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1421 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
));
1423 unreachable("unhandled chip");
1428 static int gfx9_compute_miptree(struct ac_addrlib
*addrlib
,
1429 const struct radeon_info
*info
,
1430 const struct ac_surf_config
*config
,
1431 struct radeon_surf
*surf
, bool compressed
,
1432 ADDR2_COMPUTE_SURFACE_INFO_INPUT
*in
)
1434 ADDR2_MIP_INFO mip_info
[RADEON_SURF_MAX_LEVELS
] = {};
1435 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out
= {0};
1436 ADDR_E_RETURNCODE ret
;
1438 out
.size
= sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
);
1439 out
.pMipInfo
= mip_info
;
1441 ret
= Addr2ComputeSurfaceInfo(addrlib
->handle
, in
, &out
);
1445 if (in
->flags
.stencil
) {
1446 surf
->u
.gfx9
.stencil
.swizzle_mode
= in
->swizzleMode
;
1447 surf
->u
.gfx9
.stencil
.epitch
= out
.epitchIsHeight
? out
.mipChainHeight
- 1 :
1448 out
.mipChainPitch
- 1;
1449 surf
->surf_alignment
= MAX2(surf
->surf_alignment
, out
.baseAlign
);
1450 surf
->u
.gfx9
.stencil_offset
= align(surf
->surf_size
, out
.baseAlign
);
1451 surf
->surf_size
= surf
->u
.gfx9
.stencil_offset
+ out
.surfSize
;
1455 surf
->u
.gfx9
.surf
.swizzle_mode
= in
->swizzleMode
;
1456 surf
->u
.gfx9
.surf
.epitch
= out
.epitchIsHeight
? out
.mipChainHeight
- 1 :
1457 out
.mipChainPitch
- 1;
1459 /* CMASK fast clear uses these even if FMASK isn't allocated.
1460 * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
1462 surf
->u
.gfx9
.fmask
.swizzle_mode
= surf
->u
.gfx9
.surf
.swizzle_mode
& ~0x3;
1463 surf
->u
.gfx9
.fmask
.epitch
= surf
->u
.gfx9
.surf
.epitch
;
1465 surf
->u
.gfx9
.surf_slice_size
= out
.sliceSize
;
1466 surf
->u
.gfx9
.surf_pitch
= out
.pitch
;
1467 surf
->u
.gfx9
.surf_height
= out
.height
;
1468 surf
->surf_size
= out
.surfSize
;
1469 surf
->surf_alignment
= out
.baseAlign
;
1471 if (!compressed
&& surf
->blk_w
> 1 && out
.pitch
== out
.pixelPitch
&&
1472 surf
->u
.gfx9
.surf
.swizzle_mode
== ADDR_SW_LINEAR
) {
1473 /* Adjust surf_pitch to be in elements units not in pixels */
1474 surf
->u
.gfx9
.surf_pitch
=
1475 align(surf
->u
.gfx9
.surf_pitch
/ surf
->blk_w
, 256 / surf
->bpe
);
1476 surf
->u
.gfx9
.surf
.epitch
= MAX2(surf
->u
.gfx9
.surf
.epitch
,
1477 surf
->u
.gfx9
.surf_pitch
* surf
->blk_w
- 1);
1478 /* The surface is really a surf->bpe bytes per pixel surface even if we
1479 * use it as a surf->bpe bytes per element one.
1480 * Adjust surf_slice_size and surf_size to reflect the change
1481 * made to surf_pitch.
1483 surf
->u
.gfx9
.surf_slice_size
= MAX2(
1484 surf
->u
.gfx9
.surf_slice_size
,
1485 surf
->u
.gfx9
.surf_pitch
* out
.height
* surf
->bpe
* surf
->blk_w
);
1486 surf
->surf_size
= surf
->u
.gfx9
.surf_slice_size
* in
->numSlices
;
1489 if (in
->swizzleMode
== ADDR_SW_LINEAR
) {
1490 for (unsigned i
= 0; i
< in
->numMipLevels
; i
++) {
1491 surf
->u
.gfx9
.offset
[i
] = mip_info
[i
].offset
;
1492 surf
->u
.gfx9
.pitch
[i
] = mip_info
[i
].pitch
;
1496 if (in
->flags
.depth
) {
1497 assert(in
->swizzleMode
!= ADDR_SW_LINEAR
);
1499 if (surf
->flags
& RADEON_SURF_NO_HTILE
)
1503 ADDR2_COMPUTE_HTILE_INFO_INPUT hin
= {0};
1504 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout
= {0};
1506 hin
.size
= sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT
);
1507 hout
.size
= sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT
);
1509 assert(in
->flags
.metaPipeUnaligned
== 0);
1510 assert(in
->flags
.metaRbUnaligned
== 0);
1512 hin
.hTileFlags
.pipeAligned
= 1;
1513 hin
.hTileFlags
.rbAligned
= 1;
1514 hin
.depthFlags
= in
->flags
;
1515 hin
.swizzleMode
= in
->swizzleMode
;
1516 hin
.unalignedWidth
= in
->width
;
1517 hin
.unalignedHeight
= in
->height
;
1518 hin
.numSlices
= in
->numSlices
;
1519 hin
.numMipLevels
= in
->numMipLevels
;
1520 hin
.firstMipIdInTail
= out
.firstMipIdInTail
;
1522 ret
= Addr2ComputeHtileInfo(addrlib
->handle
, &hin
, &hout
);
1526 surf
->htile_size
= hout
.htileBytes
;
1527 surf
->htile_slice_size
= hout
.sliceSize
;
1528 surf
->htile_alignment
= hout
.baseAlign
;
1533 /* Compute tile swizzle for the color surface.
1534 * All *_X and *_T modes can use the swizzle.
1536 if (config
->info
.surf_index
&&
1537 in
->swizzleMode
>= ADDR_SW_64KB_Z_T
&&
1538 !out
.mipChainInTail
&&
1539 !(surf
->flags
& RADEON_SURF_SHAREABLE
) &&
1540 !in
->flags
.display
) {
1541 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin
= {0};
1542 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout
= {0};
1544 xin
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT
);
1545 xout
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
);
1547 xin
.surfIndex
= p_atomic_inc_return(config
->info
.surf_index
) - 1;
1548 xin
.flags
= in
->flags
;
1549 xin
.swizzleMode
= in
->swizzleMode
;
1550 xin
.resourceType
= in
->resourceType
;
1551 xin
.format
= in
->format
;
1552 xin
.numSamples
= in
->numSamples
;
1553 xin
.numFrags
= in
->numFrags
;
1555 ret
= Addr2ComputePipeBankXor(addrlib
->handle
, &xin
, &xout
);
1559 assert(xout
.pipeBankXor
<=
1560 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
1561 surf
->tile_swizzle
= xout
.pipeBankXor
;
1565 if (info
->has_graphics
&&
1566 !(surf
->flags
& RADEON_SURF_DISABLE_DCC
) &&
1568 is_dcc_supported_by_CB(info
, in
->swizzleMode
) &&
1569 (!in
->flags
.display
||
1570 is_dcc_supported_by_DCN(info
, config
, surf
,
1571 !in
->flags
.metaRbUnaligned
,
1572 !in
->flags
.metaPipeUnaligned
))) {
1573 ADDR2_COMPUTE_DCCINFO_INPUT din
= {0};
1574 ADDR2_COMPUTE_DCCINFO_OUTPUT dout
= {0};
1575 ADDR2_META_MIP_INFO meta_mip_info
[RADEON_SURF_MAX_LEVELS
] = {};
1577 din
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_INPUT
);
1578 dout
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT
);
1579 dout
.pMipInfo
= meta_mip_info
;
1581 din
.dccKeyFlags
.pipeAligned
= !in
->flags
.metaPipeUnaligned
;
1582 din
.dccKeyFlags
.rbAligned
= !in
->flags
.metaRbUnaligned
;
1583 din
.resourceType
= in
->resourceType
;
1584 din
.swizzleMode
= in
->swizzleMode
;
1586 din
.unalignedWidth
= in
->width
;
1587 din
.unalignedHeight
= in
->height
;
1588 din
.numSlices
= in
->numSlices
;
1589 din
.numFrags
= in
->numFrags
;
1590 din
.numMipLevels
= in
->numMipLevels
;
1591 din
.dataSurfaceSize
= out
.surfSize
;
1592 din
.firstMipIdInTail
= out
.firstMipIdInTail
;
1594 ret
= Addr2ComputeDccInfo(addrlib
->handle
, &din
, &dout
);
1598 surf
->u
.gfx9
.dcc
.rb_aligned
= din
.dccKeyFlags
.rbAligned
;
1599 surf
->u
.gfx9
.dcc
.pipe_aligned
= din
.dccKeyFlags
.pipeAligned
;
1600 surf
->u
.gfx9
.dcc_block_width
= dout
.compressBlkWidth
;
1601 surf
->u
.gfx9
.dcc_block_height
= dout
.compressBlkHeight
;
1602 surf
->u
.gfx9
.dcc_block_depth
= dout
.compressBlkDepth
;
1603 surf
->dcc_size
= dout
.dccRamSize
;
1604 surf
->dcc_alignment
= dout
.dccRamBaseAlign
;
1605 surf
->num_dcc_levels
= in
->numMipLevels
;
1607 /* Disable DCC for levels that are in the mip tail.
1609 * There are two issues that this is intended to
1612 * 1. Multiple mip levels may share a cache line. This
1613 * can lead to corruption when switching between
1614 * rendering to different mip levels because the
1615 * RBs don't maintain coherency.
1617 * 2. Texturing with metadata after rendering sometimes
1618 * fails with corruption, probably for a similar
1621 * Working around these issues for all levels in the
1622 * mip tail may be overly conservative, but it's what
1625 * Alternative solutions that also work but are worse:
1626 * - Disable DCC entirely.
1627 * - Flush TC L2 after rendering.
1629 for (unsigned i
= 0; i
< in
->numMipLevels
; i
++) {
1630 if (meta_mip_info
[i
].inMiptail
) {
1631 /* GFX10 can only compress the first level
1634 * TODO: Try to do the same thing for gfx9
1635 * if there are no regressions.
1637 if (info
->chip_class
>= GFX10
)
1638 surf
->num_dcc_levels
= i
+ 1;
1640 surf
->num_dcc_levels
= i
;
1645 if (!surf
->num_dcc_levels
)
1648 surf
->u
.gfx9
.display_dcc_size
= surf
->dcc_size
;
1649 surf
->u
.gfx9
.display_dcc_alignment
= surf
->dcc_alignment
;
1650 surf
->u
.gfx9
.display_dcc_pitch_max
= dout
.pitch
- 1;
1652 /* Compute displayable DCC. */
1653 if (in
->flags
.display
&&
1654 surf
->num_dcc_levels
&&
1655 info
->use_display_dcc_with_retile_blit
) {
1656 /* Compute displayable DCC info. */
1657 din
.dccKeyFlags
.pipeAligned
= 0;
1658 din
.dccKeyFlags
.rbAligned
= 0;
1660 assert(din
.numSlices
== 1);
1661 assert(din
.numMipLevels
== 1);
1662 assert(din
.numFrags
== 1);
1663 assert(surf
->tile_swizzle
== 0);
1664 assert(surf
->u
.gfx9
.dcc
.pipe_aligned
||
1665 surf
->u
.gfx9
.dcc
.rb_aligned
);
1667 ret
= Addr2ComputeDccInfo(addrlib
->handle
, &din
, &dout
);
1671 surf
->u
.gfx9
.display_dcc_size
= dout
.dccRamSize
;
1672 surf
->u
.gfx9
.display_dcc_alignment
= dout
.dccRamBaseAlign
;
1673 surf
->u
.gfx9
.display_dcc_pitch_max
= dout
.pitch
- 1;
1674 assert(surf
->u
.gfx9
.display_dcc_size
<= surf
->dcc_size
);
1676 surf
->u
.gfx9
.dcc_retile_use_uint16
=
1677 surf
->u
.gfx9
.display_dcc_size
<= UINT16_MAX
+ 1 &&
1678 surf
->dcc_size
<= UINT16_MAX
+ 1;
1680 /* Align the retile map size to get more hash table hits and
1681 * decrease the maximum memory footprint when all retile maps
1682 * are cached in the hash table.
1684 unsigned retile_dim
[2] = {in
->width
, in
->height
};
1686 for (unsigned i
= 0; i
< 2; i
++) {
1687 /* Increase the alignment as the size increases.
1688 * Greater alignment increases retile compute work,
1689 * but decreases maximum memory footprint for the cache.
1691 * With this alignment, the worst case memory footprint of
1697 * The worst case size in MB can be computed in Haskell as follows:
1698 * (sum (map get_retile_size (map get_dcc_size (deduplicate (map align_pair
1699 * [(i*16,j*16) | i <- [1..maxwidth`div`16], j <- [1..maxheight`div`16]]))))) `div` 1024^2
1701 * alignment x = if x <= 512 then 16 else if x <= 1024 then 32 else if x <= 2048 then 64 else 128
1702 * align x = (x + (alignment x) - 1) `div` (alignment x) * (alignment x)
1703 * align_pair e = (align (fst e), align (snd e))
1704 * deduplicate = map head . groupBy (\ a b -> ((fst a) == (fst b)) && ((snd a) == (snd b))) . sortBy compare
1705 * get_dcc_size e = ((fst e) * (snd e) * bpp) `div` 256
1706 * get_retile_size dcc_size = dcc_size * 2 * (if dcc_size <= 2^16 then 2 else 4)
1707 * bpp = 4; maxwidth = 3840; maxheight = 2160
1709 if (retile_dim
[i
] <= 512)
1710 retile_dim
[i
] = align(retile_dim
[i
], 16);
1711 else if (retile_dim
[i
] <= 1024)
1712 retile_dim
[i
] = align(retile_dim
[i
], 32);
1713 else if (retile_dim
[i
] <= 2048)
1714 retile_dim
[i
] = align(retile_dim
[i
], 64);
1716 retile_dim
[i
] = align(retile_dim
[i
], 128);
1718 /* Don't align more than the DCC pixel alignment. */
1719 assert(dout
.metaBlkWidth
>= 128 && dout
.metaBlkHeight
>= 128);
1722 surf
->u
.gfx9
.dcc_retile_num_elements
=
1723 DIV_ROUND_UP(retile_dim
[0], dout
.compressBlkWidth
) *
1724 DIV_ROUND_UP(retile_dim
[1], dout
.compressBlkHeight
) * 2;
1725 /* Align the size to 4 (for the compute shader). */
1726 surf
->u
.gfx9
.dcc_retile_num_elements
=
1727 align(surf
->u
.gfx9
.dcc_retile_num_elements
, 4);
1729 if (!(surf
->flags
& RADEON_SURF_IMPORTED
)) {
1730 /* Compute address mapping from non-displayable to displayable DCC. */
1731 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin
;
1732 memset(&addrin
, 0, sizeof(addrin
));
1733 addrin
.size
= sizeof(addrin
);
1734 addrin
.swizzleMode
= din
.swizzleMode
;
1735 addrin
.resourceType
= din
.resourceType
;
1736 addrin
.bpp
= din
.bpp
;
1737 addrin
.numSlices
= 1;
1738 addrin
.numMipLevels
= 1;
1739 addrin
.numFrags
= 1;
1740 addrin
.pitch
= dout
.pitch
;
1741 addrin
.height
= dout
.height
;
1742 addrin
.compressBlkWidth
= dout
.compressBlkWidth
;
1743 addrin
.compressBlkHeight
= dout
.compressBlkHeight
;
1744 addrin
.compressBlkDepth
= dout
.compressBlkDepth
;
1745 addrin
.metaBlkWidth
= dout
.metaBlkWidth
;
1746 addrin
.metaBlkHeight
= dout
.metaBlkHeight
;
1747 addrin
.metaBlkDepth
= dout
.metaBlkDepth
;
1748 addrin
.dccRamSliceSize
= 0; /* Don't care for non-layered images. */
1750 surf
->u
.gfx9
.dcc_retile_map
=
1751 ac_compute_dcc_retile_map(addrlib
, info
,
1752 retile_dim
[0], retile_dim
[1],
1753 surf
->u
.gfx9
.dcc
.rb_aligned
,
1754 surf
->u
.gfx9
.dcc
.pipe_aligned
,
1755 surf
->u
.gfx9
.dcc_retile_use_uint16
,
1756 surf
->u
.gfx9
.dcc_retile_num_elements
,
1758 if (!surf
->u
.gfx9
.dcc_retile_map
)
1759 return ADDR_OUTOFMEMORY
;
1765 if (in
->numSamples
> 1 && info
->has_graphics
&&
1766 !(surf
->flags
& RADEON_SURF_NO_FMASK
)) {
1767 ADDR2_COMPUTE_FMASK_INFO_INPUT fin
= {0};
1768 ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout
= {0};
1770 fin
.size
= sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT
);
1771 fout
.size
= sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT
);
1773 ret
= gfx9_get_preferred_swizzle_mode(addrlib
->handle
, surf
, in
,
1774 true, &fin
.swizzleMode
);
1778 fin
.unalignedWidth
= in
->width
;
1779 fin
.unalignedHeight
= in
->height
;
1780 fin
.numSlices
= in
->numSlices
;
1781 fin
.numSamples
= in
->numSamples
;
1782 fin
.numFrags
= in
->numFrags
;
1784 ret
= Addr2ComputeFmaskInfo(addrlib
->handle
, &fin
, &fout
);
1788 surf
->u
.gfx9
.fmask
.swizzle_mode
= fin
.swizzleMode
;
1789 surf
->u
.gfx9
.fmask
.epitch
= fout
.pitch
- 1;
1790 surf
->fmask_size
= fout
.fmaskBytes
;
1791 surf
->fmask_alignment
= fout
.baseAlign
;
1793 /* Compute tile swizzle for the FMASK surface. */
1794 if (config
->info
.fmask_surf_index
&&
1795 fin
.swizzleMode
>= ADDR_SW_64KB_Z_T
&&
1796 !(surf
->flags
& RADEON_SURF_SHAREABLE
)) {
1797 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin
= {0};
1798 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout
= {0};
1800 xin
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT
);
1801 xout
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
);
1803 /* This counter starts from 1 instead of 0. */
1804 xin
.surfIndex
= p_atomic_inc_return(config
->info
.fmask_surf_index
);
1805 xin
.flags
= in
->flags
;
1806 xin
.swizzleMode
= fin
.swizzleMode
;
1807 xin
.resourceType
= in
->resourceType
;
1808 xin
.format
= in
->format
;
1809 xin
.numSamples
= in
->numSamples
;
1810 xin
.numFrags
= in
->numFrags
;
1812 ret
= Addr2ComputePipeBankXor(addrlib
->handle
, &xin
, &xout
);
1816 assert(xout
.pipeBankXor
<=
1817 u_bit_consecutive(0, sizeof(surf
->fmask_tile_swizzle
) * 8));
1818 surf
->fmask_tile_swizzle
= xout
.pipeBankXor
;
1822 /* CMASK -- on GFX10 only for FMASK */
1823 if (in
->swizzleMode
!= ADDR_SW_LINEAR
&&
1824 in
->resourceType
== ADDR_RSRC_TEX_2D
&&
1825 ((info
->chip_class
<= GFX9
&&
1826 in
->numSamples
== 1 &&
1827 in
->flags
.metaPipeUnaligned
== 0 &&
1828 in
->flags
.metaRbUnaligned
== 0) ||
1829 (surf
->fmask_size
&& in
->numSamples
>= 2))) {
1830 ADDR2_COMPUTE_CMASK_INFO_INPUT cin
= {0};
1831 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout
= {0};
1833 cin
.size
= sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT
);
1834 cout
.size
= sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT
);
1836 assert(in
->flags
.metaPipeUnaligned
== 0);
1837 assert(in
->flags
.metaRbUnaligned
== 0);
1839 cin
.cMaskFlags
.pipeAligned
= 1;
1840 cin
.cMaskFlags
.rbAligned
= 1;
1841 cin
.resourceType
= in
->resourceType
;
1842 cin
.unalignedWidth
= in
->width
;
1843 cin
.unalignedHeight
= in
->height
;
1844 cin
.numSlices
= in
->numSlices
;
1846 if (in
->numSamples
> 1)
1847 cin
.swizzleMode
= surf
->u
.gfx9
.fmask
.swizzle_mode
;
1849 cin
.swizzleMode
= in
->swizzleMode
;
1851 ret
= Addr2ComputeCmaskInfo(addrlib
->handle
, &cin
, &cout
);
1855 surf
->cmask_size
= cout
.cmaskBytes
;
1856 surf
->cmask_alignment
= cout
.baseAlign
;
1863 static int gfx9_compute_surface(struct ac_addrlib
*addrlib
,
1864 const struct radeon_info
*info
,
1865 const struct ac_surf_config
*config
,
1866 enum radeon_surf_mode mode
,
1867 struct radeon_surf
*surf
)
1870 ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn
= {0};
1873 AddrSurfInfoIn
.size
= sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT
);
1875 compressed
= surf
->blk_w
== 4 && surf
->blk_h
== 4;
1877 /* The format must be set correctly for the allocation of compressed
1878 * textures to work. In other cases, setting the bpp is sufficient. */
1880 switch (surf
->bpe
) {
1882 AddrSurfInfoIn
.format
= ADDR_FMT_BC1
;
1885 AddrSurfInfoIn
.format
= ADDR_FMT_BC3
;
1891 switch (surf
->bpe
) {
1893 assert(!(surf
->flags
& RADEON_SURF_ZBUFFER
));
1894 AddrSurfInfoIn
.format
= ADDR_FMT_8
;
1897 assert(surf
->flags
& RADEON_SURF_ZBUFFER
||
1898 !(surf
->flags
& RADEON_SURF_SBUFFER
));
1899 AddrSurfInfoIn
.format
= ADDR_FMT_16
;
1902 assert(surf
->flags
& RADEON_SURF_ZBUFFER
||
1903 !(surf
->flags
& RADEON_SURF_SBUFFER
));
1904 AddrSurfInfoIn
.format
= ADDR_FMT_32
;
1907 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1908 AddrSurfInfoIn
.format
= ADDR_FMT_32_32
;
1911 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1912 AddrSurfInfoIn
.format
= ADDR_FMT_32_32_32
;
1915 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1916 AddrSurfInfoIn
.format
= ADDR_FMT_32_32_32_32
;
1921 AddrSurfInfoIn
.bpp
= surf
->bpe
* 8;
1924 bool is_color_surface
= !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
1925 AddrSurfInfoIn
.flags
.color
= is_color_surface
&&
1926 !(surf
->flags
& RADEON_SURF_NO_RENDER_TARGET
);
1927 AddrSurfInfoIn
.flags
.depth
= (surf
->flags
& RADEON_SURF_ZBUFFER
) != 0;
1928 AddrSurfInfoIn
.flags
.display
= get_display_flag(config
, surf
);
1929 /* flags.texture currently refers to TC-compatible HTILE */
1930 AddrSurfInfoIn
.flags
.texture
= is_color_surface
||
1931 surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1932 AddrSurfInfoIn
.flags
.opt4space
= 1;
1934 AddrSurfInfoIn
.numMipLevels
= config
->info
.levels
;
1935 AddrSurfInfoIn
.numSamples
= MAX2(1, config
->info
.samples
);
1936 AddrSurfInfoIn
.numFrags
= AddrSurfInfoIn
.numSamples
;
1938 if (!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
))
1939 AddrSurfInfoIn
.numFrags
= MAX2(1, config
->info
.storage_samples
);
1941 /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
1942 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
1943 * must sample 1D textures as 2D. */
1945 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_3D
;
1946 else if (info
->chip_class
!= GFX9
&& config
->is_1d
)
1947 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_1D
;
1949 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_2D
;
1951 AddrSurfInfoIn
.width
= config
->info
.width
;
1952 AddrSurfInfoIn
.height
= config
->info
.height
;
1955 AddrSurfInfoIn
.numSlices
= config
->info
.depth
;
1956 else if (config
->is_cube
)
1957 AddrSurfInfoIn
.numSlices
= 6;
1959 AddrSurfInfoIn
.numSlices
= config
->info
.array_size
;
1961 /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
1962 AddrSurfInfoIn
.flags
.metaPipeUnaligned
= 0;
1963 AddrSurfInfoIn
.flags
.metaRbUnaligned
= 0;
1965 /* Optimal values for the L2 cache. */
1966 if (info
->chip_class
== GFX9
) {
1967 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 1;
1968 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 0;
1969 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_64B
;
1970 } else if (info
->chip_class
>= GFX10
) {
1971 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 0;
1972 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 1;
1973 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_128B
;
1976 if (AddrSurfInfoIn
.flags
.display
) {
1977 /* The display hardware can only read DCC with RB_ALIGNED=0 and
1978 * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
1980 * The CB block requires RB_ALIGNED=1 except 1 RB chips.
1981 * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
1982 * after rendering, so PIPE_ALIGNED=1 is recommended.
1984 if (info
->use_display_dcc_unaligned
) {
1985 AddrSurfInfoIn
.flags
.metaPipeUnaligned
= 1;
1986 AddrSurfInfoIn
.flags
.metaRbUnaligned
= 1;
1989 /* Adjust DCC settings to meet DCN requirements. */
1990 if (info
->use_display_dcc_unaligned
||
1991 info
->use_display_dcc_with_retile_blit
) {
1992 /* Only Navi12/14 support independent 64B blocks in L2,
1993 * but without DCC image stores.
1995 if (info
->family
== CHIP_NAVI12
||
1996 info
->family
== CHIP_NAVI14
) {
1997 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 1;
1998 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 0;
1999 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_64B
;
2002 if (info
->chip_class
>= GFX10_3
) {
2003 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 1;
2004 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 1;
2005 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_64B
;
2011 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
2012 assert(config
->info
.samples
<= 1);
2013 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
2014 AddrSurfInfoIn
.swizzleMode
= ADDR_SW_LINEAR
;
2017 case RADEON_SURF_MODE_1D
:
2018 case RADEON_SURF_MODE_2D
:
2019 if (surf
->flags
& RADEON_SURF_IMPORTED
||
2020 (info
->chip_class
>= GFX10
&&
2021 surf
->flags
& RADEON_SURF_FORCE_SWIZZLE_MODE
)) {
2022 AddrSurfInfoIn
.swizzleMode
= surf
->u
.gfx9
.surf
.swizzle_mode
;
2026 r
= gfx9_get_preferred_swizzle_mode(addrlib
->handle
, surf
, &AddrSurfInfoIn
,
2027 false, &AddrSurfInfoIn
.swizzleMode
);
2036 surf
->u
.gfx9
.resource_type
= AddrSurfInfoIn
.resourceType
;
2037 surf
->has_stencil
= !!(surf
->flags
& RADEON_SURF_SBUFFER
);
2039 surf
->num_dcc_levels
= 0;
2040 surf
->surf_size
= 0;
2041 surf
->fmask_size
= 0;
2043 surf
->htile_size
= 0;
2044 surf
->htile_slice_size
= 0;
2045 surf
->u
.gfx9
.surf_offset
= 0;
2046 surf
->u
.gfx9
.stencil_offset
= 0;
2047 surf
->cmask_size
= 0;
2048 surf
->u
.gfx9
.dcc_retile_use_uint16
= false;
2049 surf
->u
.gfx9
.dcc_retile_num_elements
= 0;
2050 surf
->u
.gfx9
.dcc_retile_map
= NULL
;
2052 /* Calculate texture layout information. */
2053 r
= gfx9_compute_miptree(addrlib
, info
, config
, surf
, compressed
,
2058 /* Calculate texture layout information for stencil. */
2059 if (surf
->flags
& RADEON_SURF_SBUFFER
) {
2060 AddrSurfInfoIn
.flags
.stencil
= 1;
2061 AddrSurfInfoIn
.bpp
= 8;
2062 AddrSurfInfoIn
.format
= ADDR_FMT_8
;
2064 if (!AddrSurfInfoIn
.flags
.depth
) {
2065 r
= gfx9_get_preferred_swizzle_mode(addrlib
->handle
, surf
, &AddrSurfInfoIn
,
2066 false, &AddrSurfInfoIn
.swizzleMode
);
2070 AddrSurfInfoIn
.flags
.depth
= 0;
2072 r
= gfx9_compute_miptree(addrlib
, info
, config
, surf
, compressed
,
2078 surf
->is_linear
= surf
->u
.gfx9
.surf
.swizzle_mode
== ADDR_SW_LINEAR
;
2080 /* Query whether the surface is displayable. */
2081 /* This is only useful for surfaces that are allocated without SCANOUT. */
2082 bool displayable
= false;
2083 if (!config
->is_3d
&& !config
->is_cube
) {
2084 r
= Addr2IsValidDisplaySwizzleMode(addrlib
->handle
, surf
->u
.gfx9
.surf
.swizzle_mode
,
2085 surf
->bpe
* 8, &displayable
);
2089 /* Display needs unaligned DCC. */
2090 if (surf
->num_dcc_levels
&&
2091 (!is_dcc_supported_by_DCN(info
, config
, surf
,
2092 surf
->u
.gfx9
.dcc
.rb_aligned
,
2093 surf
->u
.gfx9
.dcc
.pipe_aligned
) ||
2094 /* Don't set is_displayable if displayable DCC is missing. */
2095 (info
->use_display_dcc_with_retile_blit
&&
2096 !surf
->u
.gfx9
.dcc_retile_num_elements
)))
2097 displayable
= false;
2099 surf
->is_displayable
= displayable
;
2101 /* Validate that we allocated a displayable surface if requested. */
2102 assert(!AddrSurfInfoIn
.flags
.display
|| surf
->is_displayable
);
2104 /* Validate that DCC is set up correctly. */
2105 if (surf
->num_dcc_levels
) {
2106 assert(is_dcc_supported_by_L2(info
, surf
));
2107 if (AddrSurfInfoIn
.flags
.color
)
2108 assert(is_dcc_supported_by_CB(info
, surf
->u
.gfx9
.surf
.swizzle_mode
));
2109 if (AddrSurfInfoIn
.flags
.display
) {
2110 assert(is_dcc_supported_by_DCN(info
, config
, surf
,
2111 surf
->u
.gfx9
.dcc
.rb_aligned
,
2112 surf
->u
.gfx9
.dcc
.pipe_aligned
));
2116 if (info
->has_graphics
&&
2119 config
->info
.levels
== 1 &&
2120 AddrSurfInfoIn
.flags
.color
&&
2122 surf
->surf_alignment
>= 64 * 1024 && /* 64KB tiling */
2123 !(surf
->flags
& (RADEON_SURF_DISABLE_DCC
|
2124 RADEON_SURF_FORCE_SWIZZLE_MODE
|
2125 RADEON_SURF_FORCE_MICRO_TILE_MODE
))) {
2126 /* Validate that DCC is enabled if DCN can do it. */
2127 if ((info
->use_display_dcc_unaligned
||
2128 info
->use_display_dcc_with_retile_blit
) &&
2129 AddrSurfInfoIn
.flags
.display
&&
2131 assert(surf
->num_dcc_levels
);
2134 /* Validate that non-scanout DCC is always enabled. */
2135 if (!AddrSurfInfoIn
.flags
.display
)
2136 assert(surf
->num_dcc_levels
);
2139 if (!surf
->htile_size
) {
2140 /* Unset this if HTILE is not present. */
2141 surf
->flags
&= ~RADEON_SURF_TC_COMPATIBLE_HTILE
;
2144 switch (surf
->u
.gfx9
.surf
.swizzle_mode
) {
2146 case ADDR_SW_256B_S
:
2148 case ADDR_SW_64KB_S
:
2149 case ADDR_SW_64KB_S_T
:
2150 case ADDR_SW_4KB_S_X
:
2151 case ADDR_SW_64KB_S_X
:
2152 surf
->micro_tile_mode
= RADEON_MICRO_MODE_STANDARD
;
2156 case ADDR_SW_LINEAR
:
2157 case ADDR_SW_256B_D
:
2159 case ADDR_SW_64KB_D
:
2160 case ADDR_SW_64KB_D_T
:
2161 case ADDR_SW_4KB_D_X
:
2162 case ADDR_SW_64KB_D_X
:
2163 surf
->micro_tile_mode
= RADEON_MICRO_MODE_DISPLAY
;
2166 /* R = rotated (gfx9), render target (gfx10). */
2167 case ADDR_SW_256B_R
:
2169 case ADDR_SW_64KB_R
:
2170 case ADDR_SW_64KB_R_T
:
2171 case ADDR_SW_4KB_R_X
:
2172 case ADDR_SW_64KB_R_X
:
2173 case ADDR_SW_VAR_R_X
:
2174 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
2175 * used at the same time. We currently do not use rotated
2178 assert(info
->chip_class
>= GFX10
||
2179 !"rotate micro tile mode is unsupported");
2180 surf
->micro_tile_mode
= RADEON_MICRO_MODE_RENDER
;
2185 case ADDR_SW_64KB_Z
:
2186 case ADDR_SW_64KB_Z_T
:
2187 case ADDR_SW_4KB_Z_X
:
2188 case ADDR_SW_64KB_Z_X
:
2189 case ADDR_SW_VAR_Z_X
:
2190 surf
->micro_tile_mode
= RADEON_MICRO_MODE_DEPTH
;
2200 int ac_compute_surface(struct ac_addrlib
*addrlib
, const struct radeon_info
*info
,
2201 const struct ac_surf_config
*config
,
2202 enum radeon_surf_mode mode
,
2203 struct radeon_surf
*surf
)
2207 r
= surf_config_sanity(config
, surf
->flags
);
2211 if (info
->chip_class
>= GFX9
)
2212 r
= gfx9_compute_surface(addrlib
, info
, config
, mode
, surf
);
2214 r
= gfx6_compute_surface(addrlib
->handle
, info
, config
, mode
, surf
);
2219 /* Determine the memory layout of multiple allocations in one buffer. */
2220 surf
->total_size
= surf
->surf_size
;
2221 surf
->alignment
= surf
->surf_alignment
;
2223 if (surf
->htile_size
) {
2224 surf
->htile_offset
= align64(surf
->total_size
, surf
->htile_alignment
);
2225 surf
->total_size
= surf
->htile_offset
+ surf
->htile_size
;
2226 surf
->alignment
= MAX2(surf
->alignment
, surf
->htile_alignment
);
2229 if (surf
->fmask_size
) {
2230 assert(config
->info
.samples
>= 2);
2231 surf
->fmask_offset
= align64(surf
->total_size
, surf
->fmask_alignment
);
2232 surf
->total_size
= surf
->fmask_offset
+ surf
->fmask_size
;
2233 surf
->alignment
= MAX2(surf
->alignment
, surf
->fmask_alignment
);
2236 /* Single-sample CMASK is in a separate buffer. */
2237 if (surf
->cmask_size
&& config
->info
.samples
>= 2) {
2238 surf
->cmask_offset
= align64(surf
->total_size
, surf
->cmask_alignment
);
2239 surf
->total_size
= surf
->cmask_offset
+ surf
->cmask_size
;
2240 surf
->alignment
= MAX2(surf
->alignment
, surf
->cmask_alignment
);
2243 if (surf
->is_displayable
)
2244 surf
->flags
|= RADEON_SURF_SCANOUT
;
2246 if (surf
->dcc_size
&&
2247 /* dcc_size is computed on GFX9+ only if it's displayable. */
2248 (info
->chip_class
>= GFX9
|| !get_display_flag(config
, surf
))) {
2249 /* It's better when displayable DCC is immediately after
2250 * the image due to hw-specific reasons.
2252 if (info
->chip_class
>= GFX9
&&
2253 surf
->u
.gfx9
.dcc_retile_num_elements
) {
2254 /* Add space for the displayable DCC buffer. */
2255 surf
->display_dcc_offset
=
2256 align64(surf
->total_size
, surf
->u
.gfx9
.display_dcc_alignment
);
2257 surf
->total_size
= surf
->display_dcc_offset
+
2258 surf
->u
.gfx9
.display_dcc_size
;
2260 /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
2261 surf
->dcc_retile_map_offset
=
2262 align64(surf
->total_size
, info
->tcc_cache_line_size
);
2264 if (surf
->u
.gfx9
.dcc_retile_use_uint16
) {
2265 surf
->total_size
= surf
->dcc_retile_map_offset
+
2266 surf
->u
.gfx9
.dcc_retile_num_elements
* 2;
2268 surf
->total_size
= surf
->dcc_retile_map_offset
+
2269 surf
->u
.gfx9
.dcc_retile_num_elements
* 4;
2273 surf
->dcc_offset
= align64(surf
->total_size
, surf
->dcc_alignment
);
2274 surf
->total_size
= surf
->dcc_offset
+ surf
->dcc_size
;
2275 surf
->alignment
= MAX2(surf
->alignment
, surf
->dcc_alignment
);
2281 /* This is meant to be used for disabling DCC. */
2282 void ac_surface_zero_dcc_fields(struct radeon_surf
*surf
)
2284 surf
->dcc_offset
= 0;
2285 surf
->display_dcc_offset
= 0;
2286 surf
->dcc_retile_map_offset
= 0;
2289 static unsigned eg_tile_split(unsigned tile_split
)
2291 switch (tile_split
) {
2292 case 0: tile_split
= 64; break;
2293 case 1: tile_split
= 128; break;
2294 case 2: tile_split
= 256; break;
2295 case 3: tile_split
= 512; break;
2297 case 4: tile_split
= 1024; break;
2298 case 5: tile_split
= 2048; break;
2299 case 6: tile_split
= 4096; break;
2304 static unsigned eg_tile_split_rev(unsigned eg_tile_split
)
2306 switch (eg_tile_split
) {
2312 case 1024: return 4;
2313 case 2048: return 5;
2314 case 4096: return 6;
2318 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
2319 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
2321 /* This should be called before ac_compute_surface. */
2322 void ac_surface_set_bo_metadata(const struct radeon_info
*info
,
2323 struct radeon_surf
*surf
, uint64_t tiling_flags
,
2324 enum radeon_surf_mode
*mode
)
2328 if (info
->chip_class
>= GFX9
) {
2329 surf
->u
.gfx9
.surf
.swizzle_mode
= AMDGPU_TILING_GET(tiling_flags
, SWIZZLE_MODE
);
2330 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= AMDGPU_TILING_GET(tiling_flags
, DCC_INDEPENDENT_64B
);
2331 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= AMDGPU_TILING_GET(tiling_flags
, DCC_INDEPENDENT_128B
);
2332 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= AMDGPU_TILING_GET(tiling_flags
, DCC_MAX_COMPRESSED_BLOCK_SIZE
);
2333 surf
->u
.gfx9
.display_dcc_pitch_max
= AMDGPU_TILING_GET(tiling_flags
, DCC_PITCH_MAX
);
2334 scanout
= AMDGPU_TILING_GET(tiling_flags
, SCANOUT
);
2335 *mode
= surf
->u
.gfx9
.surf
.swizzle_mode
> 0 ? RADEON_SURF_MODE_2D
: RADEON_SURF_MODE_LINEAR_ALIGNED
;
2337 surf
->u
.legacy
.pipe_config
= AMDGPU_TILING_GET(tiling_flags
, PIPE_CONFIG
);
2338 surf
->u
.legacy
.bankw
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_WIDTH
);
2339 surf
->u
.legacy
.bankh
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_HEIGHT
);
2340 surf
->u
.legacy
.tile_split
= eg_tile_split(AMDGPU_TILING_GET(tiling_flags
, TILE_SPLIT
));
2341 surf
->u
.legacy
.mtilea
= 1 << AMDGPU_TILING_GET(tiling_flags
, MACRO_TILE_ASPECT
);
2342 surf
->u
.legacy
.num_banks
= 2 << AMDGPU_TILING_GET(tiling_flags
, NUM_BANKS
);
2343 scanout
= AMDGPU_TILING_GET(tiling_flags
, MICRO_TILE_MODE
) == 0; /* DISPLAY */
2345 if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 4) /* 2D_TILED_THIN1 */
2346 *mode
= RADEON_SURF_MODE_2D
;
2347 else if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 2) /* 1D_TILED_THIN1 */
2348 *mode
= RADEON_SURF_MODE_1D
;
2350 *mode
= RADEON_SURF_MODE_LINEAR_ALIGNED
;
2354 surf
->flags
|= RADEON_SURF_SCANOUT
;
2356 surf
->flags
&= ~RADEON_SURF_SCANOUT
;
2359 void ac_surface_get_bo_metadata(const struct radeon_info
*info
,
2360 struct radeon_surf
*surf
, uint64_t *tiling_flags
)
2364 if (info
->chip_class
>= GFX9
) {
2365 uint64_t dcc_offset
= 0;
2367 if (surf
->dcc_offset
) {
2368 dcc_offset
= surf
->display_dcc_offset
? surf
->display_dcc_offset
2370 assert((dcc_offset
>> 8) != 0 && (dcc_offset
>> 8) < (1 << 24));
2373 *tiling_flags
|= AMDGPU_TILING_SET(SWIZZLE_MODE
, surf
->u
.gfx9
.surf
.swizzle_mode
);
2374 *tiling_flags
|= AMDGPU_TILING_SET(DCC_OFFSET_256B
, dcc_offset
>> 8);
2375 *tiling_flags
|= AMDGPU_TILING_SET(DCC_PITCH_MAX
, surf
->u
.gfx9
.display_dcc_pitch_max
);
2376 *tiling_flags
|= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B
, surf
->u
.gfx9
.dcc
.independent_64B_blocks
);
2377 *tiling_flags
|= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B
, surf
->u
.gfx9
.dcc
.independent_128B_blocks
);
2378 *tiling_flags
|= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE
, surf
->u
.gfx9
.dcc
.max_compressed_block_size
);
2379 *tiling_flags
|= AMDGPU_TILING_SET(SCANOUT
, (surf
->flags
& RADEON_SURF_SCANOUT
) != 0);
2381 if (surf
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
)
2382 *tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 4); /* 2D_TILED_THIN1 */
2383 else if (surf
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
)
2384 *tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 2); /* 1D_TILED_THIN1 */
2386 *tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 1); /* LINEAR_ALIGNED */
2388 *tiling_flags
|= AMDGPU_TILING_SET(PIPE_CONFIG
, surf
->u
.legacy
.pipe_config
);
2389 *tiling_flags
|= AMDGPU_TILING_SET(BANK_WIDTH
, util_logbase2(surf
->u
.legacy
.bankw
));
2390 *tiling_flags
|= AMDGPU_TILING_SET(BANK_HEIGHT
, util_logbase2(surf
->u
.legacy
.bankh
));
2391 if (surf
->u
.legacy
.tile_split
)
2392 *tiling_flags
|= AMDGPU_TILING_SET(TILE_SPLIT
, eg_tile_split_rev(surf
->u
.legacy
.tile_split
));
2393 *tiling_flags
|= AMDGPU_TILING_SET(MACRO_TILE_ASPECT
, util_logbase2(surf
->u
.legacy
.mtilea
));
2394 *tiling_flags
|= AMDGPU_TILING_SET(NUM_BANKS
, util_logbase2(surf
->u
.legacy
.num_banks
)-1);
2396 if (surf
->flags
& RADEON_SURF_SCANOUT
)
2397 *tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 0); /* DISPLAY_MICRO_TILING */
2399 *tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 1); /* THIN_MICRO_TILING */
2403 static uint32_t ac_get_umd_metadata_word1(const struct radeon_info
*info
)
2405 return (ATI_VENDOR_ID
<< 16) | info
->pci_id
;
2408 /* This should be called after ac_compute_surface. */
2409 bool ac_surface_set_umd_metadata(const struct radeon_info
*info
,
2410 struct radeon_surf
*surf
,
2411 unsigned num_storage_samples
,
2412 unsigned num_mipmap_levels
,
2413 unsigned size_metadata
,
2414 uint32_t metadata
[64])
2416 uint32_t *desc
= &metadata
[2];
2419 if (info
->chip_class
>= GFX9
)
2420 offset
= surf
->u
.gfx9
.surf_offset
;
2422 offset
= surf
->u
.legacy
.level
[0].offset
;
2424 if (offset
|| /* Non-zero planes ignore metadata. */
2425 size_metadata
< 10 * 4 || /* at least 2(header) + 8(desc) dwords */
2426 metadata
[0] == 0 || /* invalid version number */
2427 metadata
[1] != ac_get_umd_metadata_word1(info
)) /* invalid PCI ID */ {
2428 /* Disable DCC because it might not be enabled. */
2429 ac_surface_zero_dcc_fields(surf
);
2431 /* Don't report an error if the texture comes from an incompatible driver,
2432 * but this might not work.
2437 /* Validate that sample counts and the number of mipmap levels match. */
2438 unsigned desc_last_level
= G_008F1C_LAST_LEVEL(desc
[3]);
2439 unsigned type
= G_008F1C_TYPE(desc
[3]);
2441 if (type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA
|| type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
2442 unsigned log_samples
= util_logbase2(MAX2(1, num_storage_samples
));
2444 if (desc_last_level
!= log_samples
) {
2446 "amdgpu: invalid MSAA texture import, "
2447 "metadata has log2(samples) = %u, the caller set %u\n",
2448 desc_last_level
, log_samples
);
2452 if (desc_last_level
!= num_mipmap_levels
- 1) {
2454 "amdgpu: invalid mipmapped texture import, "
2455 "metadata has last_level = %u, the caller set %u\n",
2456 desc_last_level
, num_mipmap_levels
- 1);
2461 if (info
->chip_class
>= GFX8
&& G_008F28_COMPRESSION_EN(desc
[6])) {
2462 /* Read DCC information. */
2463 switch (info
->chip_class
) {
2465 surf
->dcc_offset
= (uint64_t)desc
[7] << 8;
2470 ((uint64_t)desc
[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc
[5]) << 40);
2471 surf
->u
.gfx9
.dcc
.pipe_aligned
= G_008F24_META_PIPE_ALIGNED(desc
[5]);
2472 surf
->u
.gfx9
.dcc
.rb_aligned
= G_008F24_META_RB_ALIGNED(desc
[5]);
2474 /* If DCC is unaligned, this can only be a displayable image. */
2475 if (!surf
->u
.gfx9
.dcc
.pipe_aligned
&& !surf
->u
.gfx9
.dcc
.rb_aligned
)
2476 assert(surf
->is_displayable
);
2482 ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc
[6]) << 8) | ((uint64_t)desc
[7] << 16);
2483 surf
->u
.gfx9
.dcc
.pipe_aligned
= G_00A018_META_PIPE_ALIGNED(desc
[6]);
2491 /* Disable DCC. dcc_offset is always set by texture_from_handle
2492 * and must be cleared here.
2494 ac_surface_zero_dcc_fields(surf
);
2500 void ac_surface_get_umd_metadata(const struct radeon_info
*info
,
2501 struct radeon_surf
*surf
,
2502 unsigned num_mipmap_levels
,
2504 unsigned *size_metadata
, uint32_t metadata
[64])
2506 /* Clear the base address and set the relative DCC offset. */
2508 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
2510 switch (info
->chip_class
) {
2515 desc
[7] = surf
->dcc_offset
>> 8;
2518 desc
[7] = surf
->dcc_offset
>> 8;
2519 desc
[5] &= C_008F24_META_DATA_ADDRESS
;
2520 desc
[5] |= S_008F24_META_DATA_ADDRESS(surf
->dcc_offset
>> 40);
2524 desc
[6] &= C_00A018_META_DATA_ADDRESS_LO
;
2525 desc
[6] |= S_00A018_META_DATA_ADDRESS_LO(surf
->dcc_offset
>> 8);
2526 desc
[7] = surf
->dcc_offset
>> 16;
2532 /* Metadata image format format version 1:
2533 * [0] = 1 (metadata format identifier)
2534 * [1] = (VENDOR_ID << 16) | PCI_ID
2535 * [2:9] = image descriptor for the whole resource
2536 * [2] is always 0, because the base address is cleared
2537 * [9] is the DCC offset bits [39:8] from the beginning of
2539 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
2542 metadata
[0] = 1; /* metadata image format version 1 */
2544 /* Tiling modes are ambiguous without a PCI ID. */
2545 metadata
[1] = ac_get_umd_metadata_word1(info
);
2547 /* Dwords [2:9] contain the image descriptor. */
2548 memcpy(&metadata
[2], desc
, 8 * 4);
2549 *size_metadata
= 10 * 4;
2551 /* Dwords [10:..] contain the mipmap level offsets. */
2552 if (info
->chip_class
<= GFX8
) {
2553 for (unsigned i
= 0; i
< num_mipmap_levels
; i
++)
2554 metadata
[10 + i
] = surf
->u
.legacy
.level
[i
].offset
>> 8;
2556 *size_metadata
+= num_mipmap_levels
* 4;
2560 void ac_surface_override_offset_stride(const struct radeon_info
*info
,
2561 struct radeon_surf
*surf
,
2562 unsigned num_mipmap_levels
,
2563 uint64_t offset
, unsigned pitch
)
2565 if (info
->chip_class
>= GFX9
) {
2567 surf
->u
.gfx9
.surf_pitch
= pitch
;
2568 if (num_mipmap_levels
== 1)
2569 surf
->u
.gfx9
.surf
.epitch
= pitch
- 1;
2570 surf
->u
.gfx9
.surf_slice_size
=
2571 (uint64_t)pitch
* surf
->u
.gfx9
.surf_height
* surf
->bpe
;
2573 surf
->u
.gfx9
.surf_offset
= offset
;
2574 if (surf
->u
.gfx9
.stencil_offset
)
2575 surf
->u
.gfx9
.stencil_offset
+= offset
;
2578 surf
->u
.legacy
.level
[0].nblk_x
= pitch
;
2579 surf
->u
.legacy
.level
[0].slice_size_dw
=
2580 ((uint64_t)pitch
* surf
->u
.legacy
.level
[0].nblk_y
* surf
->bpe
) / 4;
2584 for (unsigned i
= 0; i
< ARRAY_SIZE(surf
->u
.legacy
.level
); ++i
)
2585 surf
->u
.legacy
.level
[i
].offset
+= offset
;
2589 if (surf
->htile_offset
)
2590 surf
->htile_offset
+= offset
;
2591 if (surf
->fmask_offset
)
2592 surf
->fmask_offset
+= offset
;
2593 if (surf
->cmask_offset
)
2594 surf
->cmask_offset
+= offset
;
2595 if (surf
->dcc_offset
)
2596 surf
->dcc_offset
+= offset
;
2597 if (surf
->display_dcc_offset
)
2598 surf
->display_dcc_offset
+= offset
;
2599 if (surf
->dcc_retile_map_offset
)
2600 surf
->dcc_retile_map_offset
+= offset
;