2 * Copyright © 2011 Red Hat All Rights Reserved.
3 * Copyright © 2017 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
28 #include "ac_surface.h"
29 #include "amd_family.h"
30 #include "addrlib/src/amdgpu_asic_addr.h"
31 #include "ac_gpu_info.h"
32 #include "util/macros.h"
33 #include "util/u_atomic.h"
34 #include "util/u_math.h"
41 #include "drm-uapi/amdgpu_drm.h"
43 #include "addrlib/inc/addrinterface.h"
45 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
46 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
49 #ifndef CIASICIDGFXENGINE_ARCTICISLAND
50 #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
53 static void *ADDR_API
allocSysMem(const ADDR_ALLOCSYSMEM_INPUT
* pInput
)
55 return malloc(pInput
->sizeInBytes
);
58 static ADDR_E_RETURNCODE ADDR_API
freeSysMem(const ADDR_FREESYSMEM_INPUT
* pInput
)
60 free(pInput
->pVirtAddr
);
64 ADDR_HANDLE
amdgpu_addr_create(const struct radeon_info
*info
,
65 const struct amdgpu_gpu_info
*amdinfo
,
66 uint64_t *max_alignment
)
68 ADDR_CREATE_INPUT addrCreateInput
= {0};
69 ADDR_CREATE_OUTPUT addrCreateOutput
= {0};
70 ADDR_REGISTER_VALUE regValue
= {0};
71 ADDR_CREATE_FLAGS createFlags
= {{0}};
72 ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput
= {0};
73 ADDR_E_RETURNCODE addrRet
;
75 addrCreateInput
.size
= sizeof(ADDR_CREATE_INPUT
);
76 addrCreateOutput
.size
= sizeof(ADDR_CREATE_OUTPUT
);
78 regValue
.gbAddrConfig
= amdinfo
->gb_addr_cfg
;
79 createFlags
.value
= 0;
81 addrCreateInput
.chipFamily
= info
->family_id
;
82 addrCreateInput
.chipRevision
= info
->chip_external_rev
;
84 if (addrCreateInput
.chipFamily
== FAMILY_UNKNOWN
)
87 if (addrCreateInput
.chipFamily
>= FAMILY_AI
) {
88 addrCreateInput
.chipEngine
= CIASICIDGFXENGINE_ARCTICISLAND
;
90 regValue
.noOfBanks
= amdinfo
->mc_arb_ramcfg
& 0x3;
91 regValue
.noOfRanks
= (amdinfo
->mc_arb_ramcfg
& 0x4) >> 2;
93 regValue
.backendDisables
= amdinfo
->enabled_rb_pipes_mask
;
94 regValue
.pTileConfig
= amdinfo
->gb_tile_mode
;
95 regValue
.noOfEntries
= ARRAY_SIZE(amdinfo
->gb_tile_mode
);
96 if (addrCreateInput
.chipFamily
== FAMILY_SI
) {
97 regValue
.pMacroTileConfig
= NULL
;
98 regValue
.noOfMacroEntries
= 0;
100 regValue
.pMacroTileConfig
= amdinfo
->gb_macro_tile_mode
;
101 regValue
.noOfMacroEntries
= ARRAY_SIZE(amdinfo
->gb_macro_tile_mode
);
104 createFlags
.useTileIndex
= 1;
105 createFlags
.useHtileSliceAlign
= 1;
107 addrCreateInput
.chipEngine
= CIASICIDGFXENGINE_SOUTHERNISLAND
;
110 addrCreateInput
.callbacks
.allocSysMem
= allocSysMem
;
111 addrCreateInput
.callbacks
.freeSysMem
= freeSysMem
;
112 addrCreateInput
.callbacks
.debugPrint
= 0;
113 addrCreateInput
.createFlags
= createFlags
;
114 addrCreateInput
.regValue
= regValue
;
116 addrRet
= AddrCreate(&addrCreateInput
, &addrCreateOutput
);
117 if (addrRet
!= ADDR_OK
)
121 addrRet
= AddrGetMaxAlignments(addrCreateOutput
.hLib
, &addrGetMaxAlignmentsOutput
);
122 if (addrRet
== ADDR_OK
){
123 *max_alignment
= addrGetMaxAlignmentsOutput
.baseAlign
;
126 return addrCreateOutput
.hLib
;
129 static int surf_config_sanity(const struct ac_surf_config
*config
,
132 /* FMASK is allocated together with the color surface and can't be
133 * allocated separately.
135 assert(!(flags
& RADEON_SURF_FMASK
));
136 if (flags
& RADEON_SURF_FMASK
)
139 /* all dimension must be at least 1 ! */
140 if (!config
->info
.width
|| !config
->info
.height
|| !config
->info
.depth
||
141 !config
->info
.array_size
|| !config
->info
.levels
)
144 switch (config
->info
.samples
) {
152 if (flags
& RADEON_SURF_Z_OR_SBUFFER
)
159 if (!(flags
& RADEON_SURF_Z_OR_SBUFFER
)) {
160 switch (config
->info
.storage_samples
) {
172 if (config
->is_3d
&& config
->info
.array_size
> 1)
174 if (config
->is_cube
&& config
->info
.depth
> 1)
180 static int gfx6_compute_level(ADDR_HANDLE addrlib
,
181 const struct ac_surf_config
*config
,
182 struct radeon_surf
*surf
, bool is_stencil
,
183 unsigned level
, bool compressed
,
184 ADDR_COMPUTE_SURFACE_INFO_INPUT
*AddrSurfInfoIn
,
185 ADDR_COMPUTE_SURFACE_INFO_OUTPUT
*AddrSurfInfoOut
,
186 ADDR_COMPUTE_DCCINFO_INPUT
*AddrDccIn
,
187 ADDR_COMPUTE_DCCINFO_OUTPUT
*AddrDccOut
,
188 ADDR_COMPUTE_HTILE_INFO_INPUT
*AddrHtileIn
,
189 ADDR_COMPUTE_HTILE_INFO_OUTPUT
*AddrHtileOut
)
191 struct legacy_surf_level
*surf_level
;
192 ADDR_E_RETURNCODE ret
;
194 AddrSurfInfoIn
->mipLevel
= level
;
195 AddrSurfInfoIn
->width
= u_minify(config
->info
.width
, level
);
196 AddrSurfInfoIn
->height
= u_minify(config
->info
.height
, level
);
198 /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
199 * because GFX9 needs linear alignment of 256 bytes.
201 if (config
->info
.levels
== 1 &&
202 AddrSurfInfoIn
->tileMode
== ADDR_TM_LINEAR_ALIGNED
&&
203 AddrSurfInfoIn
->bpp
&&
204 util_is_power_of_two_or_zero(AddrSurfInfoIn
->bpp
)) {
205 unsigned alignment
= 256 / (AddrSurfInfoIn
->bpp
/ 8);
207 AddrSurfInfoIn
->width
= align(AddrSurfInfoIn
->width
, alignment
);
210 /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
211 * true for r32g32b32 formats. */
212 if (AddrSurfInfoIn
->bpp
== 96) {
213 assert(config
->info
.levels
== 1);
214 assert(AddrSurfInfoIn
->tileMode
== ADDR_TM_LINEAR_ALIGNED
);
216 /* The least common multiple of 64 bytes and 12 bytes/pixel is
217 * 192 bytes, or 16 pixels. */
218 AddrSurfInfoIn
->width
= align(AddrSurfInfoIn
->width
, 16);
222 AddrSurfInfoIn
->numSlices
= u_minify(config
->info
.depth
, level
);
223 else if (config
->is_cube
)
224 AddrSurfInfoIn
->numSlices
= 6;
226 AddrSurfInfoIn
->numSlices
= config
->info
.array_size
;
229 /* Set the base level pitch. This is needed for calculation
230 * of non-zero levels. */
232 AddrSurfInfoIn
->basePitch
= surf
->u
.legacy
.stencil_level
[0].nblk_x
;
234 AddrSurfInfoIn
->basePitch
= surf
->u
.legacy
.level
[0].nblk_x
;
236 /* Convert blocks to pixels for compressed formats. */
238 AddrSurfInfoIn
->basePitch
*= surf
->blk_w
;
241 ret
= AddrComputeSurfaceInfo(addrlib
,
244 if (ret
!= ADDR_OK
) {
248 surf_level
= is_stencil
? &surf
->u
.legacy
.stencil_level
[level
] : &surf
->u
.legacy
.level
[level
];
249 surf_level
->offset
= align64(surf
->surf_size
, AddrSurfInfoOut
->baseAlign
);
250 surf_level
->slice_size_dw
= AddrSurfInfoOut
->sliceSize
/ 4;
251 surf_level
->nblk_x
= AddrSurfInfoOut
->pitch
;
252 surf_level
->nblk_y
= AddrSurfInfoOut
->height
;
254 switch (AddrSurfInfoOut
->tileMode
) {
255 case ADDR_TM_LINEAR_ALIGNED
:
256 surf_level
->mode
= RADEON_SURF_MODE_LINEAR_ALIGNED
;
258 case ADDR_TM_1D_TILED_THIN1
:
259 surf_level
->mode
= RADEON_SURF_MODE_1D
;
261 case ADDR_TM_2D_TILED_THIN1
:
262 surf_level
->mode
= RADEON_SURF_MODE_2D
;
269 surf
->u
.legacy
.stencil_tiling_index
[level
] = AddrSurfInfoOut
->tileIndex
;
271 surf
->u
.legacy
.tiling_index
[level
] = AddrSurfInfoOut
->tileIndex
;
273 surf
->surf_size
= surf_level
->offset
+ AddrSurfInfoOut
->surfSize
;
275 /* Clear DCC fields at the beginning. */
276 surf_level
->dcc_offset
= 0;
278 /* The previous level's flag tells us if we can use DCC for this level. */
279 if (AddrSurfInfoIn
->flags
.dccCompatible
&&
280 (level
== 0 || AddrDccOut
->subLvlCompressible
)) {
281 bool prev_level_clearable
= level
== 0 ||
282 AddrDccOut
->dccRamSizeAligned
;
284 AddrDccIn
->colorSurfSize
= AddrSurfInfoOut
->surfSize
;
285 AddrDccIn
->tileMode
= AddrSurfInfoOut
->tileMode
;
286 AddrDccIn
->tileInfo
= *AddrSurfInfoOut
->pTileInfo
;
287 AddrDccIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
288 AddrDccIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
290 ret
= AddrComputeDccInfo(addrlib
,
294 if (ret
== ADDR_OK
) {
295 surf_level
->dcc_offset
= surf
->dcc_size
;
296 surf
->num_dcc_levels
= level
+ 1;
297 surf
->dcc_size
= surf_level
->dcc_offset
+ AddrDccOut
->dccRamSize
;
298 surf
->dcc_alignment
= MAX2(surf
->dcc_alignment
, AddrDccOut
->dccRamBaseAlign
);
300 /* If the DCC size of a subresource (1 mip level or 1 slice)
301 * is not aligned, the DCC memory layout is not contiguous for
302 * that subresource, which means we can't use fast clear.
304 * We only do fast clears for whole mipmap levels. If we did
305 * per-slice fast clears, the same restriction would apply.
306 * (i.e. only compute the slice size and see if it's aligned)
308 * The last level can be non-contiguous and still be clearable
309 * if it's interleaved with the next level that doesn't exist.
311 if (AddrDccOut
->dccRamSizeAligned
||
312 (prev_level_clearable
&& level
== config
->info
.levels
- 1))
313 surf_level
->dcc_fast_clear_size
= AddrDccOut
->dccFastClearSize
;
315 surf_level
->dcc_fast_clear_size
= 0;
317 /* Compute the DCC slice size because addrlib doesn't
318 * provide this info. As DCC memory is linear (each
319 * slice is the same size) it's easy to compute.
321 surf
->dcc_slice_size
= AddrDccOut
->dccRamSize
/ config
->info
.array_size
;
323 /* For arrays, we have to compute the DCC info again
324 * with one slice size to get a correct fast clear
327 if (config
->info
.array_size
> 1) {
328 AddrDccIn
->colorSurfSize
= AddrSurfInfoOut
->sliceSize
;
329 AddrDccIn
->tileMode
= AddrSurfInfoOut
->tileMode
;
330 AddrDccIn
->tileInfo
= *AddrSurfInfoOut
->pTileInfo
;
331 AddrDccIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
332 AddrDccIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
334 ret
= AddrComputeDccInfo(addrlib
,
335 AddrDccIn
, AddrDccOut
);
336 if (ret
== ADDR_OK
) {
337 /* If the DCC memory isn't properly
338 * aligned, the data are interleaved
341 if (AddrDccOut
->dccRamSizeAligned
)
342 surf_level
->dcc_slice_fast_clear_size
= AddrDccOut
->dccFastClearSize
;
344 surf_level
->dcc_slice_fast_clear_size
= 0;
347 if (surf
->flags
& RADEON_SURF_CONTIGUOUS_DCC_LAYERS
&&
348 surf
->dcc_slice_size
!= surf_level
->dcc_slice_fast_clear_size
) {
350 surf
->num_dcc_levels
= 0;
351 AddrDccOut
->subLvlCompressible
= false;
354 surf_level
->dcc_slice_fast_clear_size
= surf_level
->dcc_fast_clear_size
;
361 AddrSurfInfoIn
->flags
.depth
&&
362 surf_level
->mode
== RADEON_SURF_MODE_2D
&&
364 !(surf
->flags
& RADEON_SURF_NO_HTILE
)) {
365 AddrHtileIn
->flags
.tcCompatible
= AddrSurfInfoOut
->tcCompatible
;
366 AddrHtileIn
->pitch
= AddrSurfInfoOut
->pitch
;
367 AddrHtileIn
->height
= AddrSurfInfoOut
->height
;
368 AddrHtileIn
->numSlices
= AddrSurfInfoOut
->depth
;
369 AddrHtileIn
->blockWidth
= ADDR_HTILE_BLOCKSIZE_8
;
370 AddrHtileIn
->blockHeight
= ADDR_HTILE_BLOCKSIZE_8
;
371 AddrHtileIn
->pTileInfo
= AddrSurfInfoOut
->pTileInfo
;
372 AddrHtileIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
373 AddrHtileIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
375 ret
= AddrComputeHtileInfo(addrlib
,
379 if (ret
== ADDR_OK
) {
380 surf
->htile_size
= AddrHtileOut
->htileBytes
;
381 surf
->htile_slice_size
= AddrHtileOut
->sliceSize
;
382 surf
->htile_alignment
= AddrHtileOut
->baseAlign
;
389 static void gfx6_set_micro_tile_mode(struct radeon_surf
*surf
,
390 const struct radeon_info
*info
)
392 uint32_t tile_mode
= info
->si_tile_mode_array
[surf
->u
.legacy
.tiling_index
[0]];
394 if (info
->chip_class
>= GFX7
)
395 surf
->micro_tile_mode
= G_009910_MICRO_TILE_MODE_NEW(tile_mode
);
397 surf
->micro_tile_mode
= G_009910_MICRO_TILE_MODE(tile_mode
);
400 static unsigned cik_get_macro_tile_index(struct radeon_surf
*surf
)
402 unsigned index
, tileb
;
404 tileb
= 8 * 8 * surf
->bpe
;
405 tileb
= MIN2(surf
->u
.legacy
.tile_split
, tileb
);
407 for (index
= 0; tileb
> 64; index
++)
414 static bool get_display_flag(const struct ac_surf_config
*config
,
415 const struct radeon_surf
*surf
)
417 unsigned num_channels
= config
->info
.num_channels
;
418 unsigned bpe
= surf
->bpe
;
420 if (!config
->is_3d
&&
422 !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
) &&
423 surf
->flags
& RADEON_SURF_SCANOUT
&&
424 config
->info
.samples
<= 1 &&
425 surf
->blk_w
<= 2 && surf
->blk_h
== 1) {
427 if (surf
->blk_w
== 2 && surf
->blk_h
== 1)
430 if (/* RGBA8 or RGBA16F */
431 (bpe
>= 4 && bpe
<= 8 && num_channels
== 4) ||
432 /* R5G6B5 or R5G5B5A1 */
433 (bpe
== 2 && num_channels
>= 3) ||
435 (bpe
== 1 && num_channels
== 1))
442 * This must be called after the first level is computed.
444 * Copy surface-global settings like pipe/bank config from level 0 surface
445 * computation, and compute tile swizzle.
447 static int gfx6_surface_settings(ADDR_HANDLE addrlib
,
448 const struct radeon_info
*info
,
449 const struct ac_surf_config
*config
,
450 ADDR_COMPUTE_SURFACE_INFO_OUTPUT
* csio
,
451 struct radeon_surf
*surf
)
453 surf
->surf_alignment
= csio
->baseAlign
;
454 surf
->u
.legacy
.pipe_config
= csio
->pTileInfo
->pipeConfig
- 1;
455 gfx6_set_micro_tile_mode(surf
, info
);
457 /* For 2D modes only. */
458 if (csio
->tileMode
>= ADDR_TM_2D_TILED_THIN1
) {
459 surf
->u
.legacy
.bankw
= csio
->pTileInfo
->bankWidth
;
460 surf
->u
.legacy
.bankh
= csio
->pTileInfo
->bankHeight
;
461 surf
->u
.legacy
.mtilea
= csio
->pTileInfo
->macroAspectRatio
;
462 surf
->u
.legacy
.tile_split
= csio
->pTileInfo
->tileSplitBytes
;
463 surf
->u
.legacy
.num_banks
= csio
->pTileInfo
->banks
;
464 surf
->u
.legacy
.macro_tile_index
= csio
->macroModeIndex
;
466 surf
->u
.legacy
.macro_tile_index
= 0;
469 /* Compute tile swizzle. */
470 /* TODO: fix tile swizzle with mipmapping for GFX6 */
471 if ((info
->chip_class
>= GFX7
|| config
->info
.levels
== 1) &&
472 config
->info
.surf_index
&&
473 surf
->u
.legacy
.level
[0].mode
== RADEON_SURF_MODE_2D
&&
474 !(surf
->flags
& (RADEON_SURF_Z_OR_SBUFFER
| RADEON_SURF_SHAREABLE
)) &&
475 !get_display_flag(config
, surf
)) {
476 ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn
= {0};
477 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut
= {0};
479 AddrBaseSwizzleIn
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT
);
480 AddrBaseSwizzleOut
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
);
482 AddrBaseSwizzleIn
.surfIndex
= p_atomic_inc_return(config
->info
.surf_index
) - 1;
483 AddrBaseSwizzleIn
.tileIndex
= csio
->tileIndex
;
484 AddrBaseSwizzleIn
.macroModeIndex
= csio
->macroModeIndex
;
485 AddrBaseSwizzleIn
.pTileInfo
= csio
->pTileInfo
;
486 AddrBaseSwizzleIn
.tileMode
= csio
->tileMode
;
488 int r
= AddrComputeBaseSwizzle(addrlib
, &AddrBaseSwizzleIn
,
489 &AddrBaseSwizzleOut
);
493 assert(AddrBaseSwizzleOut
.tileSwizzle
<=
494 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
495 surf
->tile_swizzle
= AddrBaseSwizzleOut
.tileSwizzle
;
500 static void ac_compute_cmask(const struct radeon_info
*info
,
501 const struct ac_surf_config
*config
,
502 struct radeon_surf
*surf
)
504 unsigned pipe_interleave_bytes
= info
->pipe_interleave_bytes
;
505 unsigned num_pipes
= info
->num_tile_pipes
;
506 unsigned cl_width
, cl_height
;
508 if (surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
|| surf
->is_linear
||
509 (config
->info
.samples
>= 2 && !surf
->fmask_size
))
512 assert(info
->chip_class
<= GFX8
);
527 case 16: /* Hawaii */
536 unsigned base_align
= num_pipes
* pipe_interleave_bytes
;
538 unsigned width
= align(surf
->u
.legacy
.level
[0].nblk_x
, cl_width
*8);
539 unsigned height
= align(surf
->u
.legacy
.level
[0].nblk_y
, cl_height
*8);
540 unsigned slice_elements
= (width
* height
) / (8*8);
542 /* Each element of CMASK is a nibble. */
543 unsigned slice_bytes
= slice_elements
/ 2;
545 surf
->u
.legacy
.cmask_slice_tile_max
= (width
* height
) / (128*128);
546 if (surf
->u
.legacy
.cmask_slice_tile_max
)
547 surf
->u
.legacy
.cmask_slice_tile_max
-= 1;
551 num_layers
= config
->info
.depth
;
552 else if (config
->is_cube
)
555 num_layers
= config
->info
.array_size
;
557 surf
->cmask_alignment
= MAX2(256, base_align
);
558 surf
->cmask_slice_size
= align(slice_bytes
, base_align
);
559 surf
->cmask_size
= surf
->cmask_slice_size
* num_layers
;
563 * Fill in the tiling information in \p surf based on the given surface config.
565 * The following fields of \p surf must be initialized by the caller:
566 * blk_w, blk_h, bpe, flags.
568 static int gfx6_compute_surface(ADDR_HANDLE addrlib
,
569 const struct radeon_info
*info
,
570 const struct ac_surf_config
*config
,
571 enum radeon_surf_mode mode
,
572 struct radeon_surf
*surf
)
576 ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn
= {0};
577 ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut
= {0};
578 ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn
= {0};
579 ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut
= {0};
580 ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn
= {0};
581 ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut
= {0};
582 ADDR_TILEINFO AddrTileInfoIn
= {0};
583 ADDR_TILEINFO AddrTileInfoOut
= {0};
586 AddrSurfInfoIn
.size
= sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT
);
587 AddrSurfInfoOut
.size
= sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT
);
588 AddrDccIn
.size
= sizeof(ADDR_COMPUTE_DCCINFO_INPUT
);
589 AddrDccOut
.size
= sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT
);
590 AddrHtileIn
.size
= sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT
);
591 AddrHtileOut
.size
= sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT
);
592 AddrSurfInfoOut
.pTileInfo
= &AddrTileInfoOut
;
594 compressed
= surf
->blk_w
== 4 && surf
->blk_h
== 4;
596 /* MSAA requires 2D tiling. */
597 if (config
->info
.samples
> 1)
598 mode
= RADEON_SURF_MODE_2D
;
600 /* DB doesn't support linear layouts. */
601 if (surf
->flags
& (RADEON_SURF_Z_OR_SBUFFER
) &&
602 mode
< RADEON_SURF_MODE_1D
)
603 mode
= RADEON_SURF_MODE_1D
;
605 /* Set the requested tiling mode. */
607 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
608 AddrSurfInfoIn
.tileMode
= ADDR_TM_LINEAR_ALIGNED
;
610 case RADEON_SURF_MODE_1D
:
611 AddrSurfInfoIn
.tileMode
= ADDR_TM_1D_TILED_THIN1
;
613 case RADEON_SURF_MODE_2D
:
614 AddrSurfInfoIn
.tileMode
= ADDR_TM_2D_TILED_THIN1
;
620 /* The format must be set correctly for the allocation of compressed
621 * textures to work. In other cases, setting the bpp is sufficient.
626 AddrSurfInfoIn
.format
= ADDR_FMT_BC1
;
629 AddrSurfInfoIn
.format
= ADDR_FMT_BC3
;
636 AddrDccIn
.bpp
= AddrSurfInfoIn
.bpp
= surf
->bpe
* 8;
639 AddrDccIn
.numSamples
= AddrSurfInfoIn
.numSamples
=
640 MAX2(1, config
->info
.samples
);
641 AddrSurfInfoIn
.tileIndex
= -1;
643 if (!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
)) {
644 AddrDccIn
.numSamples
= AddrSurfInfoIn
.numFrags
=
645 MAX2(1, config
->info
.storage_samples
);
648 /* Set the micro tile type. */
649 if (surf
->flags
& RADEON_SURF_SCANOUT
)
650 AddrSurfInfoIn
.tileType
= ADDR_DISPLAYABLE
;
651 else if (surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
)
652 AddrSurfInfoIn
.tileType
= ADDR_DEPTH_SAMPLE_ORDER
;
654 AddrSurfInfoIn
.tileType
= ADDR_NON_DISPLAYABLE
;
656 AddrSurfInfoIn
.flags
.color
= !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
657 AddrSurfInfoIn
.flags
.depth
= (surf
->flags
& RADEON_SURF_ZBUFFER
) != 0;
658 AddrSurfInfoIn
.flags
.cube
= config
->is_cube
;
659 AddrSurfInfoIn
.flags
.display
= get_display_flag(config
, surf
);
660 AddrSurfInfoIn
.flags
.pow2Pad
= config
->info
.levels
> 1;
661 AddrSurfInfoIn
.flags
.tcCompatible
= (surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
) != 0;
663 /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
664 * requested, because TC-compatible HTILE requires 2D tiling.
666 AddrSurfInfoIn
.flags
.opt4Space
= !AddrSurfInfoIn
.flags
.tcCompatible
&&
667 !AddrSurfInfoIn
.flags
.fmask
&&
668 config
->info
.samples
<= 1 &&
669 !(surf
->flags
& RADEON_SURF_FORCE_SWIZZLE_MODE
);
672 * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
674 * - Mipmapped array textures have low performance (discovered by a closed
677 AddrSurfInfoIn
.flags
.dccCompatible
=
678 info
->chip_class
>= GFX8
&&
679 info
->has_graphics
&& /* disable DCC on compute-only chips */
680 !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
) &&
681 !(surf
->flags
& RADEON_SURF_DISABLE_DCC
) &&
683 ((config
->info
.array_size
== 1 && config
->info
.depth
== 1) ||
684 config
->info
.levels
== 1);
686 AddrSurfInfoIn
.flags
.noStencil
= (surf
->flags
& RADEON_SURF_SBUFFER
) == 0;
687 AddrSurfInfoIn
.flags
.compressZ
= !!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
689 /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
690 * for Z and stencil. This can cause a number of problems which we work
693 * - a depth part that is incompatible with mipmapped texturing
694 * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
695 * incorrect tiling applied to the stencil part, stencil buffer
696 * memory accesses that go out of bounds) even without mipmapping
698 * Some piglit tests that are prone to different types of related
700 * ./bin/ext_framebuffer_multisample-upsample 2 stencil
701 * ./bin/framebuffer-blit-levels {draw,read} stencil
702 * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
703 * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
704 * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
706 int stencil_tile_idx
= -1;
708 if (AddrSurfInfoIn
.flags
.depth
&& !AddrSurfInfoIn
.flags
.noStencil
&&
709 (config
->info
.levels
> 1 || info
->family
== CHIP_STONEY
)) {
710 /* Compute stencilTileIdx that is compatible with the (depth)
711 * tileIdx. This degrades the depth surface if necessary to
712 * ensure that a matching stencilTileIdx exists. */
713 AddrSurfInfoIn
.flags
.matchStencilTileCfg
= 1;
715 /* Keep the depth mip-tail compatible with texturing. */
716 AddrSurfInfoIn
.flags
.noStencil
= 1;
719 /* Set preferred macrotile parameters. This is usually required
720 * for shared resources. This is for 2D tiling only. */
721 if (AddrSurfInfoIn
.tileMode
>= ADDR_TM_2D_TILED_THIN1
&&
722 surf
->u
.legacy
.bankw
&& surf
->u
.legacy
.bankh
&&
723 surf
->u
.legacy
.mtilea
&& surf
->u
.legacy
.tile_split
) {
724 /* If any of these parameters are incorrect, the calculation
726 AddrTileInfoIn
.banks
= surf
->u
.legacy
.num_banks
;
727 AddrTileInfoIn
.bankWidth
= surf
->u
.legacy
.bankw
;
728 AddrTileInfoIn
.bankHeight
= surf
->u
.legacy
.bankh
;
729 AddrTileInfoIn
.macroAspectRatio
= surf
->u
.legacy
.mtilea
;
730 AddrTileInfoIn
.tileSplitBytes
= surf
->u
.legacy
.tile_split
;
731 AddrTileInfoIn
.pipeConfig
= surf
->u
.legacy
.pipe_config
+ 1; /* +1 compared to GB_TILE_MODE */
732 AddrSurfInfoIn
.flags
.opt4Space
= 0;
733 AddrSurfInfoIn
.pTileInfo
= &AddrTileInfoIn
;
735 /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
736 * the tile index, because we are expected to know it if
737 * we know the other parameters.
739 * This is something that can easily be fixed in Addrlib.
740 * For now, just figure it out here.
741 * Note that only 2D_TILE_THIN1 is handled here.
743 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
744 assert(AddrSurfInfoIn
.tileMode
== ADDR_TM_2D_TILED_THIN1
);
746 if (info
->chip_class
== GFX6
) {
747 if (AddrSurfInfoIn
.tileType
== ADDR_DISPLAYABLE
) {
749 AddrSurfInfoIn
.tileIndex
= 11; /* 16bpp */
751 AddrSurfInfoIn
.tileIndex
= 12; /* 32bpp */
754 AddrSurfInfoIn
.tileIndex
= 14; /* 8bpp */
755 else if (surf
->bpe
== 2)
756 AddrSurfInfoIn
.tileIndex
= 15; /* 16bpp */
757 else if (surf
->bpe
== 4)
758 AddrSurfInfoIn
.tileIndex
= 16; /* 32bpp */
760 AddrSurfInfoIn
.tileIndex
= 17; /* 64bpp (and 128bpp) */
764 if (AddrSurfInfoIn
.tileType
== ADDR_DISPLAYABLE
)
765 AddrSurfInfoIn
.tileIndex
= 10; /* 2D displayable */
767 AddrSurfInfoIn
.tileIndex
= 14; /* 2D non-displayable */
769 /* Addrlib doesn't set this if tileIndex is forced like above. */
770 AddrSurfInfoOut
.macroModeIndex
= cik_get_macro_tile_index(surf
);
774 surf
->has_stencil
= !!(surf
->flags
& RADEON_SURF_SBUFFER
);
775 surf
->num_dcc_levels
= 0;
778 surf
->dcc_alignment
= 1;
779 surf
->htile_size
= 0;
780 surf
->htile_slice_size
= 0;
781 surf
->htile_alignment
= 1;
783 const bool only_stencil
= (surf
->flags
& RADEON_SURF_SBUFFER
) &&
784 !(surf
->flags
& RADEON_SURF_ZBUFFER
);
786 /* Calculate texture layout information. */
788 for (level
= 0; level
< config
->info
.levels
; level
++) {
789 r
= gfx6_compute_level(addrlib
, config
, surf
, false, level
, compressed
,
790 &AddrSurfInfoIn
, &AddrSurfInfoOut
,
791 &AddrDccIn
, &AddrDccOut
, &AddrHtileIn
, &AddrHtileOut
);
798 if (!AddrSurfInfoOut
.tcCompatible
) {
799 AddrSurfInfoIn
.flags
.tcCompatible
= 0;
800 surf
->flags
&= ~RADEON_SURF_TC_COMPATIBLE_HTILE
;
803 if (AddrSurfInfoIn
.flags
.matchStencilTileCfg
) {
804 AddrSurfInfoIn
.flags
.matchStencilTileCfg
= 0;
805 AddrSurfInfoIn
.tileIndex
= AddrSurfInfoOut
.tileIndex
;
806 stencil_tile_idx
= AddrSurfInfoOut
.stencilTileIdx
;
808 assert(stencil_tile_idx
>= 0);
811 r
= gfx6_surface_settings(addrlib
, info
, config
,
812 &AddrSurfInfoOut
, surf
);
818 /* Calculate texture layout information for stencil. */
819 if (surf
->flags
& RADEON_SURF_SBUFFER
) {
820 AddrSurfInfoIn
.tileIndex
= stencil_tile_idx
;
821 AddrSurfInfoIn
.bpp
= 8;
822 AddrSurfInfoIn
.flags
.depth
= 0;
823 AddrSurfInfoIn
.flags
.stencil
= 1;
824 AddrSurfInfoIn
.flags
.tcCompatible
= 0;
825 /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
826 AddrTileInfoIn
.tileSplitBytes
= surf
->u
.legacy
.stencil_tile_split
;
828 for (level
= 0; level
< config
->info
.levels
; level
++) {
829 r
= gfx6_compute_level(addrlib
, config
, surf
, true, level
, compressed
,
830 &AddrSurfInfoIn
, &AddrSurfInfoOut
,
831 &AddrDccIn
, &AddrDccOut
,
836 /* DB uses the depth pitch for both stencil and depth. */
838 if (surf
->u
.legacy
.stencil_level
[level
].nblk_x
!=
839 surf
->u
.legacy
.level
[level
].nblk_x
)
840 surf
->u
.legacy
.stencil_adjusted
= true;
842 surf
->u
.legacy
.level
[level
].nblk_x
=
843 surf
->u
.legacy
.stencil_level
[level
].nblk_x
;
848 r
= gfx6_surface_settings(addrlib
, info
, config
,
849 &AddrSurfInfoOut
, surf
);
854 /* For 2D modes only. */
855 if (AddrSurfInfoOut
.tileMode
>= ADDR_TM_2D_TILED_THIN1
) {
856 surf
->u
.legacy
.stencil_tile_split
=
857 AddrSurfInfoOut
.pTileInfo
->tileSplitBytes
;
864 if (config
->info
.samples
>= 2 && AddrSurfInfoIn
.flags
.color
&&
865 info
->has_graphics
&& !(surf
->flags
& RADEON_SURF_NO_FMASK
)) {
866 ADDR_COMPUTE_FMASK_INFO_INPUT fin
= {0};
867 ADDR_COMPUTE_FMASK_INFO_OUTPUT fout
= {0};
868 ADDR_TILEINFO fmask_tile_info
= {};
870 fin
.size
= sizeof(fin
);
871 fout
.size
= sizeof(fout
);
873 fin
.tileMode
= AddrSurfInfoOut
.tileMode
;
874 fin
.pitch
= AddrSurfInfoOut
.pitch
;
875 fin
.height
= config
->info
.height
;
876 fin
.numSlices
= AddrSurfInfoIn
.numSlices
;
877 fin
.numSamples
= AddrSurfInfoIn
.numSamples
;
878 fin
.numFrags
= AddrSurfInfoIn
.numFrags
;
880 fout
.pTileInfo
= &fmask_tile_info
;
882 r
= AddrComputeFmaskInfo(addrlib
, &fin
, &fout
);
886 surf
->fmask_size
= fout
.fmaskBytes
;
887 surf
->fmask_alignment
= fout
.baseAlign
;
888 surf
->fmask_tile_swizzle
= 0;
890 surf
->u
.legacy
.fmask
.slice_tile_max
=
891 (fout
.pitch
* fout
.height
) / 64;
892 if (surf
->u
.legacy
.fmask
.slice_tile_max
)
893 surf
->u
.legacy
.fmask
.slice_tile_max
-= 1;
895 surf
->u
.legacy
.fmask
.tiling_index
= fout
.tileIndex
;
896 surf
->u
.legacy
.fmask
.bankh
= fout
.pTileInfo
->bankHeight
;
897 surf
->u
.legacy
.fmask
.pitch_in_pixels
= fout
.pitch
;
898 surf
->u
.legacy
.fmask
.slice_size
= fout
.sliceSize
;
900 /* Compute tile swizzle for FMASK. */
901 if (config
->info
.fmask_surf_index
&&
902 !(surf
->flags
& RADEON_SURF_SHAREABLE
)) {
903 ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin
= {0};
904 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout
= {0};
906 xin
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT
);
907 xout
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
);
909 /* This counter starts from 1 instead of 0. */
910 xin
.surfIndex
= p_atomic_inc_return(config
->info
.fmask_surf_index
);
911 xin
.tileIndex
= fout
.tileIndex
;
912 xin
.macroModeIndex
= fout
.macroModeIndex
;
913 xin
.pTileInfo
= fout
.pTileInfo
;
914 xin
.tileMode
= fin
.tileMode
;
916 int r
= AddrComputeBaseSwizzle(addrlib
, &xin
, &xout
);
920 assert(xout
.tileSwizzle
<=
921 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
922 surf
->fmask_tile_swizzle
= xout
.tileSwizzle
;
926 /* Recalculate the whole DCC miptree size including disabled levels.
927 * This is what addrlib does, but calling addrlib would be a lot more
930 if (surf
->dcc_size
&& config
->info
.levels
> 1) {
931 /* The smallest miplevels that are never compressed by DCC
932 * still read the DCC buffer via TC if the base level uses DCC,
933 * and for some reason the DCC buffer needs to be larger if
934 * the miptree uses non-zero tile_swizzle. Otherwise there are
937 * "dcc_alignment * 4" was determined by trial and error.
939 surf
->dcc_size
= align64(surf
->surf_size
>> 8,
940 surf
->dcc_alignment
* 4);
943 /* Make sure HTILE covers the whole miptree, because the shader reads
944 * TC-compatible HTILE even for levels where it's disabled by DB.
946 if (surf
->htile_size
&& config
->info
.levels
> 1 &&
947 surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
) {
948 /* MSAA can't occur with levels > 1, so ignore the sample count. */
949 const unsigned total_pixels
= surf
->surf_size
/ surf
->bpe
;
950 const unsigned htile_block_size
= 8 * 8;
951 const unsigned htile_element_size
= 4;
953 surf
->htile_size
= (total_pixels
/ htile_block_size
) *
955 surf
->htile_size
= align(surf
->htile_size
, surf
->htile_alignment
);
956 } else if (!surf
->htile_size
) {
957 /* Unset this if HTILE is not present. */
958 surf
->flags
&= ~RADEON_SURF_TC_COMPATIBLE_HTILE
;
961 surf
->is_linear
= surf
->u
.legacy
.level
[0].mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
;
962 surf
->is_displayable
= surf
->is_linear
||
963 surf
->micro_tile_mode
== RADEON_MICRO_MODE_DISPLAY
||
964 surf
->micro_tile_mode
== RADEON_MICRO_MODE_RENDER
;
966 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
967 * used at the same time. This case is not currently expected to occur
968 * because we don't use rotated. Enforce this restriction on all chips
969 * to facilitate testing.
971 if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_RENDER
) {
972 assert(!"rotate micro tile mode is unsupported");
976 ac_compute_cmask(info
, config
, surf
);
980 /* This is only called when expecting a tiled layout. */
982 gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib
,
983 struct radeon_surf
*surf
,
984 ADDR2_COMPUTE_SURFACE_INFO_INPUT
*in
,
985 bool is_fmask
, AddrSwizzleMode
*swizzle_mode
)
987 ADDR_E_RETURNCODE ret
;
988 ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin
= {0};
989 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout
= {0};
991 sin
.size
= sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
);
992 sout
.size
= sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
);
994 sin
.flags
= in
->flags
;
995 sin
.resourceType
= in
->resourceType
;
996 sin
.format
= in
->format
;
997 sin
.resourceLoction
= ADDR_RSRC_LOC_INVIS
;
998 /* TODO: We could allow some of these: */
999 sin
.forbiddenBlock
.micro
= 1; /* don't allow the 256B swizzle modes */
1000 sin
.forbiddenBlock
.var
= 1; /* don't allow the variable-sized swizzle modes */
1002 sin
.width
= in
->width
;
1003 sin
.height
= in
->height
;
1004 sin
.numSlices
= in
->numSlices
;
1005 sin
.numMipLevels
= in
->numMipLevels
;
1006 sin
.numSamples
= in
->numSamples
;
1007 sin
.numFrags
= in
->numFrags
;
1010 sin
.flags
.display
= 0;
1011 sin
.flags
.color
= 0;
1012 sin
.flags
.fmask
= 1;
1015 if (surf
->flags
& RADEON_SURF_FORCE_MICRO_TILE_MODE
) {
1016 sin
.forbiddenBlock
.linear
= 1;
1018 if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_DISPLAY
)
1019 sin
.preferredSwSet
.sw_D
= 1;
1020 else if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_STANDARD
)
1021 sin
.preferredSwSet
.sw_S
= 1;
1022 else if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_DEPTH
)
1023 sin
.preferredSwSet
.sw_Z
= 1;
1024 else if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_RENDER
)
1025 sin
.preferredSwSet
.sw_R
= 1;
1028 ret
= Addr2GetPreferredSurfaceSetting(addrlib
, &sin
, &sout
);
1032 *swizzle_mode
= sout
.swizzleMode
;
1036 static bool is_dcc_supported_by_CB(const struct radeon_info
*info
, unsigned sw_mode
)
1038 if (info
->chip_class
>= GFX10
)
1039 return sw_mode
== ADDR_SW_64KB_Z_X
|| sw_mode
== ADDR_SW_64KB_R_X
;
1041 return sw_mode
!= ADDR_SW_LINEAR
;
1044 ASSERTED
static bool is_dcc_supported_by_L2(const struct radeon_info
*info
,
1045 const struct radeon_surf
*surf
)
1047 if (info
->chip_class
<= GFX9
) {
1048 /* Only independent 64B blocks are supported. */
1049 return surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1050 !surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1051 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
;
1054 if (info
->family
== CHIP_NAVI10
) {
1055 /* Only independent 128B blocks are supported. */
1056 return !surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1057 surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1058 surf
->u
.gfx9
.dcc
.max_compressed_block_size
<= V_028C78_MAX_BLOCK_SIZE_128B
;
1061 if (info
->family
== CHIP_NAVI12
||
1062 info
->family
== CHIP_NAVI14
) {
1063 /* Either 64B or 128B can be used, but not both.
1064 * If 64B is used, DCC image stores are unsupported.
1066 return surf
->u
.gfx9
.dcc
.independent_64B_blocks
!=
1067 surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1068 (!surf
->u
.gfx9
.dcc
.independent_64B_blocks
||
1069 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
) &&
1070 (!surf
->u
.gfx9
.dcc
.independent_128B_blocks
||
1071 surf
->u
.gfx9
.dcc
.max_compressed_block_size
<= V_028C78_MAX_BLOCK_SIZE_128B
);
1074 /* 128B is recommended, but 64B can be set too if needed for 4K by DCN.
1075 * Since there is no reason to ever disable 128B, require it.
1076 * DCC image stores are always supported.
1078 return surf
->u
.gfx9
.dcc
.independent_128B_blocks
&&
1079 surf
->u
.gfx9
.dcc
.max_compressed_block_size
<= V_028C78_MAX_BLOCK_SIZE_128B
;
1082 static bool is_dcc_supported_by_DCN(const struct radeon_info
*info
,
1083 const struct ac_surf_config
*config
,
1084 const struct radeon_surf
*surf
,
1085 bool rb_aligned
, bool pipe_aligned
)
1087 if (!info
->use_display_dcc_unaligned
&&
1088 !info
->use_display_dcc_with_retile_blit
)
1091 /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
1095 /* Handle unaligned DCC. */
1096 if (info
->use_display_dcc_unaligned
&&
1097 (rb_aligned
|| pipe_aligned
))
1100 switch (info
->chip_class
) {
1102 /* There are more constraints, but we always set
1103 * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
1104 * which always works.
1106 assert(surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1107 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
);
1111 /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
1112 if (info
->chip_class
== GFX10
&&
1113 surf
->u
.gfx9
.dcc
.independent_128B_blocks
)
1116 /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1. */
1117 return ((config
->info
.width
<= 2560 &&
1118 config
->info
.height
<= 2560) ||
1119 (surf
->u
.gfx9
.dcc
.independent_64B_blocks
&&
1120 surf
->u
.gfx9
.dcc
.max_compressed_block_size
== V_028C78_MAX_BLOCK_SIZE_64B
));
1122 unreachable("unhandled chip");
1127 static int gfx9_compute_miptree(ADDR_HANDLE addrlib
,
1128 const struct radeon_info
*info
,
1129 const struct ac_surf_config
*config
,
1130 struct radeon_surf
*surf
, bool compressed
,
1131 ADDR2_COMPUTE_SURFACE_INFO_INPUT
*in
)
1133 ADDR2_MIP_INFO mip_info
[RADEON_SURF_MAX_LEVELS
] = {};
1134 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out
= {0};
1135 ADDR_E_RETURNCODE ret
;
1137 out
.size
= sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
);
1138 out
.pMipInfo
= mip_info
;
1140 ret
= Addr2ComputeSurfaceInfo(addrlib
, in
, &out
);
1144 if (in
->flags
.stencil
) {
1145 surf
->u
.gfx9
.stencil
.swizzle_mode
= in
->swizzleMode
;
1146 surf
->u
.gfx9
.stencil
.epitch
= out
.epitchIsHeight
? out
.mipChainHeight
- 1 :
1147 out
.mipChainPitch
- 1;
1148 surf
->surf_alignment
= MAX2(surf
->surf_alignment
, out
.baseAlign
);
1149 surf
->u
.gfx9
.stencil_offset
= align(surf
->surf_size
, out
.baseAlign
);
1150 surf
->surf_size
= surf
->u
.gfx9
.stencil_offset
+ out
.surfSize
;
1154 surf
->u
.gfx9
.surf
.swizzle_mode
= in
->swizzleMode
;
1155 surf
->u
.gfx9
.surf
.epitch
= out
.epitchIsHeight
? out
.mipChainHeight
- 1 :
1156 out
.mipChainPitch
- 1;
1158 /* CMASK fast clear uses these even if FMASK isn't allocated.
1159 * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
1161 surf
->u
.gfx9
.fmask
.swizzle_mode
= surf
->u
.gfx9
.surf
.swizzle_mode
& ~0x3;
1162 surf
->u
.gfx9
.fmask
.epitch
= surf
->u
.gfx9
.surf
.epitch
;
1164 surf
->u
.gfx9
.surf_slice_size
= out
.sliceSize
;
1165 surf
->u
.gfx9
.surf_pitch
= out
.pitch
;
1166 if (!compressed
&& surf
->blk_w
> 1 && out
.pitch
== out
.pixelPitch
&&
1167 surf
->u
.gfx9
.surf
.swizzle_mode
== ADDR_SW_LINEAR
) {
1168 /* Adjust surf_pitch to be in elements units,
1170 surf
->u
.gfx9
.surf_pitch
=
1171 align(surf
->u
.gfx9
.surf_pitch
/ surf
->blk_w
, 256 / surf
->bpe
);
1172 surf
->u
.gfx9
.surf
.epitch
= MAX2(surf
->u
.gfx9
.surf
.epitch
,
1173 surf
->u
.gfx9
.surf_pitch
* surf
->blk_w
- 1);
1175 surf
->u
.gfx9
.surf_height
= out
.height
;
1176 surf
->surf_size
= out
.surfSize
;
1177 surf
->surf_alignment
= out
.baseAlign
;
1179 if (in
->swizzleMode
== ADDR_SW_LINEAR
) {
1180 for (unsigned i
= 0; i
< in
->numMipLevels
; i
++) {
1181 surf
->u
.gfx9
.offset
[i
] = mip_info
[i
].offset
;
1182 surf
->u
.gfx9
.pitch
[i
] = mip_info
[i
].pitch
;
1186 if (in
->flags
.depth
) {
1187 assert(in
->swizzleMode
!= ADDR_SW_LINEAR
);
1189 if (surf
->flags
& RADEON_SURF_NO_HTILE
)
1193 ADDR2_COMPUTE_HTILE_INFO_INPUT hin
= {0};
1194 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout
= {0};
1196 hin
.size
= sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT
);
1197 hout
.size
= sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT
);
1199 assert(in
->flags
.metaPipeUnaligned
== 0);
1200 assert(in
->flags
.metaRbUnaligned
== 0);
1202 hin
.hTileFlags
.pipeAligned
= 1;
1203 hin
.hTileFlags
.rbAligned
= 1;
1204 hin
.depthFlags
= in
->flags
;
1205 hin
.swizzleMode
= in
->swizzleMode
;
1206 hin
.unalignedWidth
= in
->width
;
1207 hin
.unalignedHeight
= in
->height
;
1208 hin
.numSlices
= in
->numSlices
;
1209 hin
.numMipLevels
= in
->numMipLevels
;
1210 hin
.firstMipIdInTail
= out
.firstMipIdInTail
;
1212 ret
= Addr2ComputeHtileInfo(addrlib
, &hin
, &hout
);
1216 surf
->htile_size
= hout
.htileBytes
;
1217 surf
->htile_slice_size
= hout
.sliceSize
;
1218 surf
->htile_alignment
= hout
.baseAlign
;
1223 /* Compute tile swizzle for the color surface.
1224 * All *_X and *_T modes can use the swizzle.
1226 if (config
->info
.surf_index
&&
1227 in
->swizzleMode
>= ADDR_SW_64KB_Z_T
&&
1228 !out
.mipChainInTail
&&
1229 !(surf
->flags
& RADEON_SURF_SHAREABLE
) &&
1230 !in
->flags
.display
) {
1231 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin
= {0};
1232 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout
= {0};
1234 xin
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT
);
1235 xout
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
);
1237 xin
.surfIndex
= p_atomic_inc_return(config
->info
.surf_index
) - 1;
1238 xin
.flags
= in
->flags
;
1239 xin
.swizzleMode
= in
->swizzleMode
;
1240 xin
.resourceType
= in
->resourceType
;
1241 xin
.format
= in
->format
;
1242 xin
.numSamples
= in
->numSamples
;
1243 xin
.numFrags
= in
->numFrags
;
1245 ret
= Addr2ComputePipeBankXor(addrlib
, &xin
, &xout
);
1249 assert(xout
.pipeBankXor
<=
1250 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
1251 surf
->tile_swizzle
= xout
.pipeBankXor
;
1255 if (info
->has_graphics
&&
1256 !(surf
->flags
& RADEON_SURF_DISABLE_DCC
) &&
1258 is_dcc_supported_by_CB(info
, in
->swizzleMode
) &&
1259 (!in
->flags
.display
||
1260 is_dcc_supported_by_DCN(info
, config
, surf
,
1261 !in
->flags
.metaRbUnaligned
,
1262 !in
->flags
.metaPipeUnaligned
))) {
1263 ADDR2_COMPUTE_DCCINFO_INPUT din
= {0};
1264 ADDR2_COMPUTE_DCCINFO_OUTPUT dout
= {0};
1265 ADDR2_META_MIP_INFO meta_mip_info
[RADEON_SURF_MAX_LEVELS
] = {};
1267 din
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_INPUT
);
1268 dout
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT
);
1269 dout
.pMipInfo
= meta_mip_info
;
1271 din
.dccKeyFlags
.pipeAligned
= !in
->flags
.metaPipeUnaligned
;
1272 din
.dccKeyFlags
.rbAligned
= !in
->flags
.metaRbUnaligned
;
1273 din
.colorFlags
= in
->flags
;
1274 din
.resourceType
= in
->resourceType
;
1275 din
.swizzleMode
= in
->swizzleMode
;
1277 din
.unalignedWidth
= in
->width
;
1278 din
.unalignedHeight
= in
->height
;
1279 din
.numSlices
= in
->numSlices
;
1280 din
.numFrags
= in
->numFrags
;
1281 din
.numMipLevels
= in
->numMipLevels
;
1282 din
.dataSurfaceSize
= out
.surfSize
;
1283 din
.firstMipIdInTail
= out
.firstMipIdInTail
;
1285 ret
= Addr2ComputeDccInfo(addrlib
, &din
, &dout
);
1289 surf
->u
.gfx9
.dcc
.rb_aligned
= din
.dccKeyFlags
.rbAligned
;
1290 surf
->u
.gfx9
.dcc
.pipe_aligned
= din
.dccKeyFlags
.pipeAligned
;
1291 surf
->u
.gfx9
.dcc_block_width
= dout
.compressBlkWidth
;
1292 surf
->u
.gfx9
.dcc_block_height
= dout
.compressBlkHeight
;
1293 surf
->u
.gfx9
.dcc_block_depth
= dout
.compressBlkDepth
;
1294 surf
->dcc_size
= dout
.dccRamSize
;
1295 surf
->dcc_alignment
= dout
.dccRamBaseAlign
;
1296 surf
->num_dcc_levels
= in
->numMipLevels
;
1298 /* Disable DCC for levels that are in the mip tail.
1300 * There are two issues that this is intended to
1303 * 1. Multiple mip levels may share a cache line. This
1304 * can lead to corruption when switching between
1305 * rendering to different mip levels because the
1306 * RBs don't maintain coherency.
1308 * 2. Texturing with metadata after rendering sometimes
1309 * fails with corruption, probably for a similar
1312 * Working around these issues for all levels in the
1313 * mip tail may be overly conservative, but it's what
1316 * Alternative solutions that also work but are worse:
1317 * - Disable DCC entirely.
1318 * - Flush TC L2 after rendering.
1320 for (unsigned i
= 0; i
< in
->numMipLevels
; i
++) {
1321 if (meta_mip_info
[i
].inMiptail
) {
1322 surf
->num_dcc_levels
= i
;
1327 if (!surf
->num_dcc_levels
)
1330 surf
->u
.gfx9
.display_dcc_size
= surf
->dcc_size
;
1331 surf
->u
.gfx9
.display_dcc_alignment
= surf
->dcc_alignment
;
1332 surf
->u
.gfx9
.display_dcc_pitch_max
= dout
.pitch
- 1;
1334 /* Compute displayable DCC. */
1335 if (in
->flags
.display
&&
1336 surf
->num_dcc_levels
&&
1337 info
->use_display_dcc_with_retile_blit
) {
1338 /* Compute displayable DCC info. */
1339 din
.dccKeyFlags
.pipeAligned
= 0;
1340 din
.dccKeyFlags
.rbAligned
= 0;
1342 assert(din
.numSlices
== 1);
1343 assert(din
.numMipLevels
== 1);
1344 assert(din
.numFrags
== 1);
1345 assert(surf
->tile_swizzle
== 0);
1346 assert(surf
->u
.gfx9
.dcc
.pipe_aligned
||
1347 surf
->u
.gfx9
.dcc
.rb_aligned
);
1349 ret
= Addr2ComputeDccInfo(addrlib
, &din
, &dout
);
1353 surf
->u
.gfx9
.display_dcc_size
= dout
.dccRamSize
;
1354 surf
->u
.gfx9
.display_dcc_alignment
= dout
.dccRamBaseAlign
;
1355 surf
->u
.gfx9
.display_dcc_pitch_max
= dout
.pitch
- 1;
1356 assert(surf
->u
.gfx9
.display_dcc_size
<= surf
->dcc_size
);
1358 surf
->u
.gfx9
.dcc_retile_use_uint16
=
1359 surf
->u
.gfx9
.display_dcc_size
<= UINT16_MAX
+ 1 &&
1360 surf
->dcc_size
<= UINT16_MAX
+ 1;
1361 surf
->u
.gfx9
.dcc_retile_num_elements
=
1362 DIV_ROUND_UP(in
->width
, dout
.compressBlkWidth
) *
1363 DIV_ROUND_UP(in
->height
, dout
.compressBlkHeight
) * 2;
1364 /* Align the size to 4 (for the compute shader). */
1365 surf
->u
.gfx9
.dcc_retile_num_elements
=
1366 align(surf
->u
.gfx9
.dcc_retile_num_elements
, 4);
1368 if (!(surf
->flags
& RADEON_SURF_IMPORTED
)) {
1369 /* Compute address mapping from non-displayable to displayable DCC. */
1370 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin
= {};
1371 addrin
.size
= sizeof(addrin
);
1372 addrin
.colorFlags
.color
= 1;
1373 addrin
.swizzleMode
= din
.swizzleMode
;
1374 addrin
.resourceType
= din
.resourceType
;
1375 addrin
.bpp
= din
.bpp
;
1376 addrin
.unalignedWidth
= din
.unalignedWidth
;
1377 addrin
.unalignedHeight
= din
.unalignedHeight
;
1378 addrin
.numSlices
= 1;
1379 addrin
.numMipLevels
= 1;
1380 addrin
.numFrags
= 1;
1381 addrin
.pitch
= dout
.pitch
;
1382 addrin
.height
= dout
.height
;
1383 addrin
.compressBlkWidth
= dout
.compressBlkWidth
;
1384 addrin
.compressBlkHeight
= dout
.compressBlkHeight
;
1385 addrin
.compressBlkDepth
= dout
.compressBlkDepth
;
1386 addrin
.metaBlkWidth
= dout
.metaBlkWidth
;
1387 addrin
.metaBlkHeight
= dout
.metaBlkHeight
;
1388 addrin
.metaBlkDepth
= dout
.metaBlkDepth
;
1389 addrin
.dccRamSliceSize
= dout
.dccRamSliceSize
;
1391 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout
= {};
1392 addrout
.size
= sizeof(addrout
);
1394 surf
->u
.gfx9
.dcc_retile_map
=
1395 malloc(surf
->u
.gfx9
.dcc_retile_num_elements
* 4);
1396 if (!surf
->u
.gfx9
.dcc_retile_map
)
1397 return ADDR_OUTOFMEMORY
;
1401 for (unsigned y
= 0; y
< in
->height
; y
+= dout
.compressBlkHeight
) {
1404 for (unsigned x
= 0; x
< in
->width
; x
+= dout
.compressBlkWidth
) {
1407 /* Compute src DCC address */
1408 addrin
.dccKeyFlags
.pipeAligned
= surf
->u
.gfx9
.dcc
.pipe_aligned
;
1409 addrin
.dccKeyFlags
.rbAligned
= surf
->u
.gfx9
.dcc
.rb_aligned
;
1412 ret
= Addr2ComputeDccAddrFromCoord(addrlib
, &addrin
, &addrout
);
1416 surf
->u
.gfx9
.dcc_retile_map
[index
* 2] = addrout
.addr
;
1418 /* Compute dst DCC address */
1419 addrin
.dccKeyFlags
.pipeAligned
= 0;
1420 addrin
.dccKeyFlags
.rbAligned
= 0;
1423 ret
= Addr2ComputeDccAddrFromCoord(addrlib
, &addrin
, &addrout
);
1427 surf
->u
.gfx9
.dcc_retile_map
[index
* 2 + 1] = addrout
.addr
;
1429 assert(index
* 2 + 1 < surf
->u
.gfx9
.dcc_retile_num_elements
);
1433 /* Fill the remaining pairs with the last one (for the compute shader). */
1434 for (unsigned i
= index
* 2; i
< surf
->u
.gfx9
.dcc_retile_num_elements
; i
++)
1435 surf
->u
.gfx9
.dcc_retile_map
[i
] = surf
->u
.gfx9
.dcc_retile_map
[i
- 2];
1441 if (in
->numSamples
> 1 && info
->has_graphics
&&
1442 !(surf
->flags
& RADEON_SURF_NO_FMASK
)) {
1443 ADDR2_COMPUTE_FMASK_INFO_INPUT fin
= {0};
1444 ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout
= {0};
1446 fin
.size
= sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT
);
1447 fout
.size
= sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT
);
1449 ret
= gfx9_get_preferred_swizzle_mode(addrlib
, surf
, in
,
1450 true, &fin
.swizzleMode
);
1454 fin
.unalignedWidth
= in
->width
;
1455 fin
.unalignedHeight
= in
->height
;
1456 fin
.numSlices
= in
->numSlices
;
1457 fin
.numSamples
= in
->numSamples
;
1458 fin
.numFrags
= in
->numFrags
;
1460 ret
= Addr2ComputeFmaskInfo(addrlib
, &fin
, &fout
);
1464 surf
->u
.gfx9
.fmask
.swizzle_mode
= fin
.swizzleMode
;
1465 surf
->u
.gfx9
.fmask
.epitch
= fout
.pitch
- 1;
1466 surf
->fmask_size
= fout
.fmaskBytes
;
1467 surf
->fmask_alignment
= fout
.baseAlign
;
1469 /* Compute tile swizzle for the FMASK surface. */
1470 if (config
->info
.fmask_surf_index
&&
1471 fin
.swizzleMode
>= ADDR_SW_64KB_Z_T
&&
1472 !(surf
->flags
& RADEON_SURF_SHAREABLE
)) {
1473 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin
= {0};
1474 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout
= {0};
1476 xin
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT
);
1477 xout
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
);
1479 /* This counter starts from 1 instead of 0. */
1480 xin
.surfIndex
= p_atomic_inc_return(config
->info
.fmask_surf_index
);
1481 xin
.flags
= in
->flags
;
1482 xin
.swizzleMode
= fin
.swizzleMode
;
1483 xin
.resourceType
= in
->resourceType
;
1484 xin
.format
= in
->format
;
1485 xin
.numSamples
= in
->numSamples
;
1486 xin
.numFrags
= in
->numFrags
;
1488 ret
= Addr2ComputePipeBankXor(addrlib
, &xin
, &xout
);
1492 assert(xout
.pipeBankXor
<=
1493 u_bit_consecutive(0, sizeof(surf
->fmask_tile_swizzle
) * 8));
1494 surf
->fmask_tile_swizzle
= xout
.pipeBankXor
;
1498 /* CMASK -- on GFX10 only for FMASK */
1499 if (in
->swizzleMode
!= ADDR_SW_LINEAR
&&
1500 in
->resourceType
== ADDR_RSRC_TEX_2D
&&
1501 ((info
->chip_class
<= GFX9
&&
1502 in
->numSamples
== 1 &&
1503 in
->flags
.metaPipeUnaligned
== 0 &&
1504 in
->flags
.metaRbUnaligned
== 0) ||
1505 (surf
->fmask_size
&& in
->numSamples
>= 2))) {
1506 ADDR2_COMPUTE_CMASK_INFO_INPUT cin
= {0};
1507 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout
= {0};
1509 cin
.size
= sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT
);
1510 cout
.size
= sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT
);
1512 assert(in
->flags
.metaPipeUnaligned
== 0);
1513 assert(in
->flags
.metaRbUnaligned
== 0);
1515 cin
.cMaskFlags
.pipeAligned
= 1;
1516 cin
.cMaskFlags
.rbAligned
= 1;
1517 cin
.colorFlags
= in
->flags
;
1518 cin
.resourceType
= in
->resourceType
;
1519 cin
.unalignedWidth
= in
->width
;
1520 cin
.unalignedHeight
= in
->height
;
1521 cin
.numSlices
= in
->numSlices
;
1523 if (in
->numSamples
> 1)
1524 cin
.swizzleMode
= surf
->u
.gfx9
.fmask
.swizzle_mode
;
1526 cin
.swizzleMode
= in
->swizzleMode
;
1528 ret
= Addr2ComputeCmaskInfo(addrlib
, &cin
, &cout
);
1532 surf
->cmask_size
= cout
.cmaskBytes
;
1533 surf
->cmask_alignment
= cout
.baseAlign
;
1540 static int gfx9_compute_surface(ADDR_HANDLE addrlib
,
1541 const struct radeon_info
*info
,
1542 const struct ac_surf_config
*config
,
1543 enum radeon_surf_mode mode
,
1544 struct radeon_surf
*surf
)
1547 ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn
= {0};
1550 AddrSurfInfoIn
.size
= sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT
);
1552 compressed
= surf
->blk_w
== 4 && surf
->blk_h
== 4;
1554 /* The format must be set correctly for the allocation of compressed
1555 * textures to work. In other cases, setting the bpp is sufficient. */
1557 switch (surf
->bpe
) {
1559 AddrSurfInfoIn
.format
= ADDR_FMT_BC1
;
1562 AddrSurfInfoIn
.format
= ADDR_FMT_BC3
;
1568 switch (surf
->bpe
) {
1570 assert(!(surf
->flags
& RADEON_SURF_ZBUFFER
));
1571 AddrSurfInfoIn
.format
= ADDR_FMT_8
;
1574 assert(surf
->flags
& RADEON_SURF_ZBUFFER
||
1575 !(surf
->flags
& RADEON_SURF_SBUFFER
));
1576 AddrSurfInfoIn
.format
= ADDR_FMT_16
;
1579 assert(surf
->flags
& RADEON_SURF_ZBUFFER
||
1580 !(surf
->flags
& RADEON_SURF_SBUFFER
));
1581 AddrSurfInfoIn
.format
= ADDR_FMT_32
;
1584 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1585 AddrSurfInfoIn
.format
= ADDR_FMT_32_32
;
1588 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1589 AddrSurfInfoIn
.format
= ADDR_FMT_32_32_32
;
1592 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1593 AddrSurfInfoIn
.format
= ADDR_FMT_32_32_32_32
;
1598 AddrSurfInfoIn
.bpp
= surf
->bpe
* 8;
1601 bool is_color_surface
= !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
1602 AddrSurfInfoIn
.flags
.color
= is_color_surface
&&
1603 !(surf
->flags
& RADEON_SURF_NO_RENDER_TARGET
);
1604 AddrSurfInfoIn
.flags
.depth
= (surf
->flags
& RADEON_SURF_ZBUFFER
) != 0;
1605 AddrSurfInfoIn
.flags
.display
= get_display_flag(config
, surf
);
1606 /* flags.texture currently refers to TC-compatible HTILE */
1607 AddrSurfInfoIn
.flags
.texture
= is_color_surface
||
1608 surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1609 AddrSurfInfoIn
.flags
.opt4space
= 1;
1611 AddrSurfInfoIn
.numMipLevels
= config
->info
.levels
;
1612 AddrSurfInfoIn
.numSamples
= MAX2(1, config
->info
.samples
);
1613 AddrSurfInfoIn
.numFrags
= AddrSurfInfoIn
.numSamples
;
1615 if (!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
))
1616 AddrSurfInfoIn
.numFrags
= MAX2(1, config
->info
.storage_samples
);
1618 /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
1619 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
1620 * must sample 1D textures as 2D. */
1622 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_3D
;
1623 else if (info
->chip_class
!= GFX9
&& config
->is_1d
)
1624 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_1D
;
1626 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_2D
;
1628 AddrSurfInfoIn
.width
= config
->info
.width
;
1629 AddrSurfInfoIn
.height
= config
->info
.height
;
1632 AddrSurfInfoIn
.numSlices
= config
->info
.depth
;
1633 else if (config
->is_cube
)
1634 AddrSurfInfoIn
.numSlices
= 6;
1636 AddrSurfInfoIn
.numSlices
= config
->info
.array_size
;
1638 /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
1639 AddrSurfInfoIn
.flags
.metaPipeUnaligned
= 0;
1640 AddrSurfInfoIn
.flags
.metaRbUnaligned
= 0;
1642 /* Optimal values for the L2 cache. */
1643 if (info
->chip_class
== GFX9
) {
1644 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 1;
1645 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 0;
1646 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_64B
;
1647 } else if (info
->chip_class
>= GFX10
) {
1648 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 0;
1649 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 1;
1650 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_128B
;
1653 if (AddrSurfInfoIn
.flags
.display
) {
1654 /* The display hardware can only read DCC with RB_ALIGNED=0 and
1655 * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
1657 * The CB block requires RB_ALIGNED=1 except 1 RB chips.
1658 * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
1659 * after rendering, so PIPE_ALIGNED=1 is recommended.
1661 if (info
->use_display_dcc_unaligned
) {
1662 AddrSurfInfoIn
.flags
.metaPipeUnaligned
= 1;
1663 AddrSurfInfoIn
.flags
.metaRbUnaligned
= 1;
1666 /* Adjust DCC settings to meet DCN requirements. */
1667 if (info
->use_display_dcc_unaligned
||
1668 info
->use_display_dcc_with_retile_blit
) {
1669 /* Only Navi12/14 support independent 64B blocks in L2,
1670 * but without DCC image stores.
1672 if (info
->family
== CHIP_NAVI12
||
1673 info
->family
== CHIP_NAVI14
) {
1674 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 1;
1675 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 0;
1676 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_64B
;
1679 if (info
->chip_class
>= GFX10_3
) {
1680 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= 1;
1681 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= 1;
1682 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= V_028C78_MAX_BLOCK_SIZE_64B
;
1688 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1689 assert(config
->info
.samples
<= 1);
1690 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1691 AddrSurfInfoIn
.swizzleMode
= ADDR_SW_LINEAR
;
1694 case RADEON_SURF_MODE_1D
:
1695 case RADEON_SURF_MODE_2D
:
1696 if (surf
->flags
& RADEON_SURF_IMPORTED
||
1697 (info
->chip_class
>= GFX10
&&
1698 surf
->flags
& RADEON_SURF_FORCE_SWIZZLE_MODE
)) {
1699 AddrSurfInfoIn
.swizzleMode
= surf
->u
.gfx9
.surf
.swizzle_mode
;
1703 r
= gfx9_get_preferred_swizzle_mode(addrlib
, surf
, &AddrSurfInfoIn
,
1704 false, &AddrSurfInfoIn
.swizzleMode
);
1713 surf
->u
.gfx9
.resource_type
= AddrSurfInfoIn
.resourceType
;
1714 surf
->has_stencil
= !!(surf
->flags
& RADEON_SURF_SBUFFER
);
1716 surf
->num_dcc_levels
= 0;
1717 surf
->surf_size
= 0;
1718 surf
->fmask_size
= 0;
1720 surf
->htile_size
= 0;
1721 surf
->htile_slice_size
= 0;
1722 surf
->u
.gfx9
.surf_offset
= 0;
1723 surf
->u
.gfx9
.stencil_offset
= 0;
1724 surf
->cmask_size
= 0;
1725 surf
->u
.gfx9
.dcc_retile_use_uint16
= false;
1726 surf
->u
.gfx9
.dcc_retile_num_elements
= 0;
1727 surf
->u
.gfx9
.dcc_retile_map
= NULL
;
1729 /* Calculate texture layout information. */
1730 r
= gfx9_compute_miptree(addrlib
, info
, config
, surf
, compressed
,
1735 /* Calculate texture layout information for stencil. */
1736 if (surf
->flags
& RADEON_SURF_SBUFFER
) {
1737 AddrSurfInfoIn
.flags
.stencil
= 1;
1738 AddrSurfInfoIn
.bpp
= 8;
1739 AddrSurfInfoIn
.format
= ADDR_FMT_8
;
1741 if (!AddrSurfInfoIn
.flags
.depth
) {
1742 r
= gfx9_get_preferred_swizzle_mode(addrlib
, surf
, &AddrSurfInfoIn
,
1743 false, &AddrSurfInfoIn
.swizzleMode
);
1747 AddrSurfInfoIn
.flags
.depth
= 0;
1749 r
= gfx9_compute_miptree(addrlib
, info
, config
, surf
, compressed
,
1755 surf
->is_linear
= surf
->u
.gfx9
.surf
.swizzle_mode
== ADDR_SW_LINEAR
;
1757 /* Query whether the surface is displayable. */
1758 /* This is only useful for surfaces that are allocated without SCANOUT. */
1759 bool displayable
= false;
1760 if (!config
->is_3d
&& !config
->is_cube
) {
1761 r
= Addr2IsValidDisplaySwizzleMode(addrlib
, surf
->u
.gfx9
.surf
.swizzle_mode
,
1762 surf
->bpe
* 8, &displayable
);
1766 /* Display needs unaligned DCC. */
1767 if (surf
->num_dcc_levels
&&
1768 !is_dcc_supported_by_DCN(info
, config
, surf
,
1769 surf
->u
.gfx9
.dcc
.rb_aligned
,
1770 surf
->u
.gfx9
.dcc
.pipe_aligned
))
1771 displayable
= false;
1773 surf
->is_displayable
= displayable
;
1775 /* Validate that we allocated a displayable surface if requested. */
1776 assert(!AddrSurfInfoIn
.flags
.display
|| surf
->is_displayable
);
1778 /* Validate that DCC is set up correctly. */
1779 if (surf
->num_dcc_levels
) {
1780 assert(is_dcc_supported_by_L2(info
, surf
));
1781 if (AddrSurfInfoIn
.flags
.color
)
1782 assert(is_dcc_supported_by_CB(info
, surf
->u
.gfx9
.surf
.swizzle_mode
));
1783 if (AddrSurfInfoIn
.flags
.display
) {
1784 assert(is_dcc_supported_by_DCN(info
, config
, surf
,
1785 surf
->u
.gfx9
.dcc
.rb_aligned
,
1786 surf
->u
.gfx9
.dcc
.pipe_aligned
));
1790 if (info
->has_graphics
&&
1793 config
->info
.levels
== 1 &&
1794 AddrSurfInfoIn
.flags
.color
&&
1796 surf
->surf_alignment
>= 64 * 1024 && /* 64KB tiling */
1797 !(surf
->flags
& (RADEON_SURF_DISABLE_DCC
|
1798 RADEON_SURF_FORCE_SWIZZLE_MODE
|
1799 RADEON_SURF_FORCE_MICRO_TILE_MODE
))) {
1800 /* Validate that DCC is enabled if DCN can do it. */
1801 if ((info
->use_display_dcc_unaligned
||
1802 info
->use_display_dcc_with_retile_blit
) &&
1803 AddrSurfInfoIn
.flags
.display
&&
1805 assert(surf
->num_dcc_levels
);
1808 /* Validate that non-scanout DCC is always enabled. */
1809 if (!AddrSurfInfoIn
.flags
.display
)
1810 assert(surf
->num_dcc_levels
);
1813 if (!surf
->htile_size
) {
1814 /* Unset this if HTILE is not present. */
1815 surf
->flags
&= ~RADEON_SURF_TC_COMPATIBLE_HTILE
;
1818 switch (surf
->u
.gfx9
.surf
.swizzle_mode
) {
1820 case ADDR_SW_256B_S
:
1822 case ADDR_SW_64KB_S
:
1823 case ADDR_SW_64KB_S_T
:
1824 case ADDR_SW_4KB_S_X
:
1825 case ADDR_SW_64KB_S_X
:
1826 surf
->micro_tile_mode
= RADEON_MICRO_MODE_STANDARD
;
1830 case ADDR_SW_LINEAR
:
1831 case ADDR_SW_256B_D
:
1833 case ADDR_SW_64KB_D
:
1834 case ADDR_SW_64KB_D_T
:
1835 case ADDR_SW_4KB_D_X
:
1836 case ADDR_SW_64KB_D_X
:
1837 surf
->micro_tile_mode
= RADEON_MICRO_MODE_DISPLAY
;
1840 /* R = rotated (gfx9), render target (gfx10). */
1841 case ADDR_SW_256B_R
:
1843 case ADDR_SW_64KB_R
:
1844 case ADDR_SW_64KB_R_T
:
1845 case ADDR_SW_4KB_R_X
:
1846 case ADDR_SW_64KB_R_X
:
1847 case ADDR_SW_VAR_R_X
:
1848 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1849 * used at the same time. We currently do not use rotated
1852 assert(info
->chip_class
>= GFX10
||
1853 !"rotate micro tile mode is unsupported");
1854 surf
->micro_tile_mode
= RADEON_MICRO_MODE_RENDER
;
1859 case ADDR_SW_64KB_Z
:
1860 case ADDR_SW_64KB_Z_T
:
1861 case ADDR_SW_4KB_Z_X
:
1862 case ADDR_SW_64KB_Z_X
:
1863 case ADDR_SW_VAR_Z_X
:
1864 surf
->micro_tile_mode
= RADEON_MICRO_MODE_DEPTH
;
1874 free(surf
->u
.gfx9
.dcc_retile_map
);
1875 surf
->u
.gfx9
.dcc_retile_map
= NULL
;
1879 int ac_compute_surface(ADDR_HANDLE addrlib
, const struct radeon_info
*info
,
1880 const struct ac_surf_config
*config
,
1881 enum radeon_surf_mode mode
,
1882 struct radeon_surf
*surf
)
1886 r
= surf_config_sanity(config
, surf
->flags
);
1890 if (info
->chip_class
>= GFX9
)
1891 r
= gfx9_compute_surface(addrlib
, info
, config
, mode
, surf
);
1893 r
= gfx6_compute_surface(addrlib
, info
, config
, mode
, surf
);
1898 /* Determine the memory layout of multiple allocations in one buffer. */
1899 surf
->total_size
= surf
->surf_size
;
1900 surf
->alignment
= surf
->surf_alignment
;
1902 if (surf
->htile_size
) {
1903 surf
->htile_offset
= align64(surf
->total_size
, surf
->htile_alignment
);
1904 surf
->total_size
= surf
->htile_offset
+ surf
->htile_size
;
1905 surf
->alignment
= MAX2(surf
->alignment
, surf
->htile_alignment
);
1908 if (surf
->fmask_size
) {
1909 assert(config
->info
.samples
>= 2);
1910 surf
->fmask_offset
= align64(surf
->total_size
, surf
->fmask_alignment
);
1911 surf
->total_size
= surf
->fmask_offset
+ surf
->fmask_size
;
1912 surf
->alignment
= MAX2(surf
->alignment
, surf
->fmask_alignment
);
1915 /* Single-sample CMASK is in a separate buffer. */
1916 if (surf
->cmask_size
&& config
->info
.samples
>= 2) {
1917 surf
->cmask_offset
= align64(surf
->total_size
, surf
->cmask_alignment
);
1918 surf
->total_size
= surf
->cmask_offset
+ surf
->cmask_size
;
1919 surf
->alignment
= MAX2(surf
->alignment
, surf
->cmask_alignment
);
1922 if (surf
->is_displayable
)
1923 surf
->flags
|= RADEON_SURF_SCANOUT
;
1925 if (surf
->dcc_size
&&
1926 /* dcc_size is computed on GFX9+ only if it's displayable. */
1927 (info
->chip_class
>= GFX9
|| !get_display_flag(config
, surf
))) {
1928 /* It's better when displayable DCC is immediately after
1929 * the image due to hw-specific reasons.
1931 if (info
->chip_class
>= GFX9
&&
1932 surf
->u
.gfx9
.dcc_retile_num_elements
) {
1933 /* Add space for the displayable DCC buffer. */
1934 surf
->display_dcc_offset
=
1935 align64(surf
->total_size
, surf
->u
.gfx9
.display_dcc_alignment
);
1936 surf
->total_size
= surf
->display_dcc_offset
+
1937 surf
->u
.gfx9
.display_dcc_size
;
1939 /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
1940 surf
->dcc_retile_map_offset
=
1941 align64(surf
->total_size
, info
->tcc_cache_line_size
);
1943 if (surf
->u
.gfx9
.dcc_retile_use_uint16
) {
1944 surf
->total_size
= surf
->dcc_retile_map_offset
+
1945 surf
->u
.gfx9
.dcc_retile_num_elements
* 2;
1947 surf
->total_size
= surf
->dcc_retile_map_offset
+
1948 surf
->u
.gfx9
.dcc_retile_num_elements
* 4;
1952 surf
->dcc_offset
= align64(surf
->total_size
, surf
->dcc_alignment
);
1953 surf
->total_size
= surf
->dcc_offset
+ surf
->dcc_size
;
1954 surf
->alignment
= MAX2(surf
->alignment
, surf
->dcc_alignment
);
1960 /* This is meant to be used for disabling DCC. */
1961 void ac_surface_zero_dcc_fields(struct radeon_surf
*surf
)
1963 surf
->dcc_offset
= 0;
1964 surf
->display_dcc_offset
= 0;
1965 surf
->dcc_retile_map_offset
= 0;
1968 static unsigned eg_tile_split(unsigned tile_split
)
1970 switch (tile_split
) {
1971 case 0: tile_split
= 64; break;
1972 case 1: tile_split
= 128; break;
1973 case 2: tile_split
= 256; break;
1974 case 3: tile_split
= 512; break;
1976 case 4: tile_split
= 1024; break;
1977 case 5: tile_split
= 2048; break;
1978 case 6: tile_split
= 4096; break;
1983 static unsigned eg_tile_split_rev(unsigned eg_tile_split
)
1985 switch (eg_tile_split
) {
1991 case 1024: return 4;
1992 case 2048: return 5;
1993 case 4096: return 6;
1997 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
1998 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
2000 /* This should be called before ac_compute_surface. */
2001 void ac_surface_set_bo_metadata(const struct radeon_info
*info
,
2002 struct radeon_surf
*surf
, uint64_t tiling_flags
,
2003 enum radeon_surf_mode
*mode
)
2007 if (info
->chip_class
>= GFX9
) {
2008 surf
->u
.gfx9
.surf
.swizzle_mode
= AMDGPU_TILING_GET(tiling_flags
, SWIZZLE_MODE
);
2009 surf
->u
.gfx9
.dcc
.independent_64B_blocks
= AMDGPU_TILING_GET(tiling_flags
, DCC_INDEPENDENT_64B
);
2010 surf
->u
.gfx9
.dcc
.independent_128B_blocks
= AMDGPU_TILING_GET(tiling_flags
, DCC_INDEPENDENT_128B
);
2011 surf
->u
.gfx9
.dcc
.max_compressed_block_size
= AMDGPU_TILING_GET(tiling_flags
, DCC_MAX_COMPRESSED_BLOCK_SIZE
);
2012 surf
->u
.gfx9
.display_dcc_pitch_max
= AMDGPU_TILING_GET(tiling_flags
, DCC_PITCH_MAX
);
2013 scanout
= AMDGPU_TILING_GET(tiling_flags
, SCANOUT
);
2014 *mode
= surf
->u
.gfx9
.surf
.swizzle_mode
> 0 ? RADEON_SURF_MODE_2D
: RADEON_SURF_MODE_LINEAR_ALIGNED
;
2016 surf
->u
.legacy
.pipe_config
= AMDGPU_TILING_GET(tiling_flags
, PIPE_CONFIG
);
2017 surf
->u
.legacy
.bankw
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_WIDTH
);
2018 surf
->u
.legacy
.bankh
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_HEIGHT
);
2019 surf
->u
.legacy
.tile_split
= eg_tile_split(AMDGPU_TILING_GET(tiling_flags
, TILE_SPLIT
));
2020 surf
->u
.legacy
.mtilea
= 1 << AMDGPU_TILING_GET(tiling_flags
, MACRO_TILE_ASPECT
);
2021 surf
->u
.legacy
.num_banks
= 2 << AMDGPU_TILING_GET(tiling_flags
, NUM_BANKS
);
2022 scanout
= AMDGPU_TILING_GET(tiling_flags
, MICRO_TILE_MODE
) == 0; /* DISPLAY */
2024 if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 4) /* 2D_TILED_THIN1 */
2025 *mode
= RADEON_SURF_MODE_2D
;
2026 else if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 2) /* 1D_TILED_THIN1 */
2027 *mode
= RADEON_SURF_MODE_1D
;
2029 *mode
= RADEON_SURF_MODE_LINEAR_ALIGNED
;
2033 surf
->flags
|= RADEON_SURF_SCANOUT
;
2035 surf
->flags
&= ~RADEON_SURF_SCANOUT
;
2038 void ac_surface_get_bo_metadata(const struct radeon_info
*info
,
2039 struct radeon_surf
*surf
, uint64_t *tiling_flags
)
2043 if (info
->chip_class
>= GFX9
) {
2044 uint64_t dcc_offset
= 0;
2046 if (surf
->dcc_offset
) {
2047 dcc_offset
= surf
->display_dcc_offset
? surf
->display_dcc_offset
2049 assert((dcc_offset
>> 8) != 0 && (dcc_offset
>> 8) < (1 << 24));
2052 *tiling_flags
|= AMDGPU_TILING_SET(SWIZZLE_MODE
, surf
->u
.gfx9
.surf
.swizzle_mode
);
2053 *tiling_flags
|= AMDGPU_TILING_SET(DCC_OFFSET_256B
, dcc_offset
>> 8);
2054 *tiling_flags
|= AMDGPU_TILING_SET(DCC_PITCH_MAX
, surf
->u
.gfx9
.display_dcc_pitch_max
);
2055 *tiling_flags
|= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B
, surf
->u
.gfx9
.dcc
.independent_64B_blocks
);
2056 *tiling_flags
|= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B
, surf
->u
.gfx9
.dcc
.independent_128B_blocks
);
2057 *tiling_flags
|= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE
, surf
->u
.gfx9
.dcc
.max_compressed_block_size
);
2058 *tiling_flags
|= AMDGPU_TILING_SET(SCANOUT
, (surf
->flags
& RADEON_SURF_SCANOUT
) != 0);
2060 if (surf
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
)
2061 *tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 4); /* 2D_TILED_THIN1 */
2062 else if (surf
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
)
2063 *tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 2); /* 1D_TILED_THIN1 */
2065 *tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 1); /* LINEAR_ALIGNED */
2067 *tiling_flags
|= AMDGPU_TILING_SET(PIPE_CONFIG
, surf
->u
.legacy
.pipe_config
);
2068 *tiling_flags
|= AMDGPU_TILING_SET(BANK_WIDTH
, util_logbase2(surf
->u
.legacy
.bankw
));
2069 *tiling_flags
|= AMDGPU_TILING_SET(BANK_HEIGHT
, util_logbase2(surf
->u
.legacy
.bankh
));
2070 if (surf
->u
.legacy
.tile_split
)
2071 *tiling_flags
|= AMDGPU_TILING_SET(TILE_SPLIT
, eg_tile_split_rev(surf
->u
.legacy
.tile_split
));
2072 *tiling_flags
|= AMDGPU_TILING_SET(MACRO_TILE_ASPECT
, util_logbase2(surf
->u
.legacy
.mtilea
));
2073 *tiling_flags
|= AMDGPU_TILING_SET(NUM_BANKS
, util_logbase2(surf
->u
.legacy
.num_banks
)-1);
2075 if (surf
->flags
& RADEON_SURF_SCANOUT
)
2076 *tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 0); /* DISPLAY_MICRO_TILING */
2078 *tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 1); /* THIN_MICRO_TILING */
2082 static uint32_t ac_get_umd_metadata_word1(const struct radeon_info
*info
)
2084 return (ATI_VENDOR_ID
<< 16) | info
->pci_id
;
2087 /* This should be called after ac_compute_surface. */
2088 bool ac_surface_set_umd_metadata(const struct radeon_info
*info
,
2089 struct radeon_surf
*surf
,
2090 unsigned num_storage_samples
,
2091 unsigned num_mipmap_levels
,
2092 unsigned size_metadata
,
2093 uint32_t metadata
[64])
2095 uint32_t *desc
= &metadata
[2];
2098 if (info
->chip_class
>= GFX9
)
2099 offset
= surf
->u
.gfx9
.surf_offset
;
2101 offset
= surf
->u
.legacy
.level
[0].offset
;
2103 if (offset
|| /* Non-zero planes ignore metadata. */
2104 size_metadata
< 10 * 4 || /* at least 2(header) + 8(desc) dwords */
2105 metadata
[0] == 0 || /* invalid version number */
2106 metadata
[1] != ac_get_umd_metadata_word1(info
)) /* invalid PCI ID */ {
2107 /* Disable DCC because it might not be enabled. */
2108 ac_surface_zero_dcc_fields(surf
);
2110 /* Don't report an error if the texture comes from an incompatible driver,
2111 * but this might not work.
2116 /* Validate that sample counts and the number of mipmap levels match. */
2117 unsigned desc_last_level
= G_008F1C_LAST_LEVEL(desc
[3]);
2118 unsigned type
= G_008F1C_TYPE(desc
[3]);
2120 if (type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA
|| type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
2121 unsigned log_samples
= util_logbase2(MAX2(1, num_storage_samples
));
2123 if (desc_last_level
!= log_samples
) {
2125 "amdgpu: invalid MSAA texture import, "
2126 "metadata has log2(samples) = %u, the caller set %u\n",
2127 desc_last_level
, log_samples
);
2131 if (desc_last_level
!= num_mipmap_levels
- 1) {
2133 "amdgpu: invalid mipmapped texture import, "
2134 "metadata has last_level = %u, the caller set %u\n",
2135 desc_last_level
, num_mipmap_levels
- 1);
2140 if (info
->chip_class
>= GFX8
&& G_008F28_COMPRESSION_EN(desc
[6])) {
2141 /* Read DCC information. */
2142 switch (info
->chip_class
) {
2144 surf
->dcc_offset
= (uint64_t)desc
[7] << 8;
2149 ((uint64_t)desc
[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc
[5]) << 40);
2150 surf
->u
.gfx9
.dcc
.pipe_aligned
= G_008F24_META_PIPE_ALIGNED(desc
[5]);
2151 surf
->u
.gfx9
.dcc
.rb_aligned
= G_008F24_META_RB_ALIGNED(desc
[5]);
2153 /* If DCC is unaligned, this can only be a displayable image. */
2154 if (!surf
->u
.gfx9
.dcc
.pipe_aligned
&& !surf
->u
.gfx9
.dcc
.rb_aligned
)
2155 assert(surf
->is_displayable
);
2161 ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc
[6]) << 8) | ((uint64_t)desc
[7] << 16);
2162 surf
->u
.gfx9
.dcc
.pipe_aligned
= G_00A018_META_PIPE_ALIGNED(desc
[6]);
2170 /* Disable DCC. dcc_offset is always set by texture_from_handle
2171 * and must be cleared here.
2173 ac_surface_zero_dcc_fields(surf
);
2179 void ac_surface_get_umd_metadata(const struct radeon_info
*info
,
2180 struct radeon_surf
*surf
,
2181 unsigned num_mipmap_levels
,
2183 unsigned *size_metadata
, uint32_t metadata
[64])
2185 /* Clear the base address and set the relative DCC offset. */
2187 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
2189 switch (info
->chip_class
) {
2194 desc
[7] = surf
->dcc_offset
>> 8;
2197 desc
[7] = surf
->dcc_offset
>> 8;
2198 desc
[5] &= C_008F24_META_DATA_ADDRESS
;
2199 desc
[5] |= S_008F24_META_DATA_ADDRESS(surf
->dcc_offset
>> 40);
2203 desc
[6] &= C_00A018_META_DATA_ADDRESS_LO
;
2204 desc
[6] |= S_00A018_META_DATA_ADDRESS_LO(surf
->dcc_offset
>> 8);
2205 desc
[7] = surf
->dcc_offset
>> 16;
2211 /* Metadata image format format version 1:
2212 * [0] = 1 (metadata format identifier)
2213 * [1] = (VENDOR_ID << 16) | PCI_ID
2214 * [2:9] = image descriptor for the whole resource
2215 * [2] is always 0, because the base address is cleared
2216 * [9] is the DCC offset bits [39:8] from the beginning of
2218 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
2221 metadata
[0] = 1; /* metadata image format version 1 */
2223 /* Tiling modes are ambiguous without a PCI ID. */
2224 metadata
[1] = ac_get_umd_metadata_word1(info
);
2226 /* Dwords [2:9] contain the image descriptor. */
2227 memcpy(&metadata
[2], desc
, 8 * 4);
2228 *size_metadata
= 10 * 4;
2230 /* Dwords [10:..] contain the mipmap level offsets. */
2231 if (info
->chip_class
<= GFX8
) {
2232 for (unsigned i
= 0; i
< num_mipmap_levels
; i
++)
2233 metadata
[10 + i
] = surf
->u
.legacy
.level
[i
].offset
>> 8;
2235 *size_metadata
+= num_mipmap_levels
* 4;
2239 void ac_surface_override_offset_stride(const struct radeon_info
*info
,
2240 struct radeon_surf
*surf
,
2241 unsigned num_mipmap_levels
,
2242 uint64_t offset
, unsigned pitch
)
2244 if (info
->chip_class
>= GFX9
) {
2246 surf
->u
.gfx9
.surf_pitch
= pitch
;
2247 if (num_mipmap_levels
== 1)
2248 surf
->u
.gfx9
.surf
.epitch
= pitch
- 1;
2249 surf
->u
.gfx9
.surf_slice_size
=
2250 (uint64_t)pitch
* surf
->u
.gfx9
.surf_height
* surf
->bpe
;
2252 surf
->u
.gfx9
.surf_offset
= offset
;
2253 if (surf
->u
.gfx9
.stencil_offset
)
2254 surf
->u
.gfx9
.stencil_offset
+= offset
;
2257 surf
->u
.legacy
.level
[0].nblk_x
= pitch
;
2258 surf
->u
.legacy
.level
[0].slice_size_dw
=
2259 ((uint64_t)pitch
* surf
->u
.legacy
.level
[0].nblk_y
* surf
->bpe
) / 4;
2263 for (unsigned i
= 0; i
< ARRAY_SIZE(surf
->u
.legacy
.level
); ++i
)
2264 surf
->u
.legacy
.level
[i
].offset
+= offset
;
2268 if (surf
->htile_offset
)
2269 surf
->htile_offset
+= offset
;
2270 if (surf
->fmask_offset
)
2271 surf
->fmask_offset
+= offset
;
2272 if (surf
->cmask_offset
)
2273 surf
->cmask_offset
+= offset
;
2274 if (surf
->dcc_offset
)
2275 surf
->dcc_offset
+= offset
;
2276 if (surf
->display_dcc_offset
)
2277 surf
->display_dcc_offset
+= offset
;
2278 if (surf
->dcc_retile_map_offset
)
2279 surf
->dcc_retile_map_offset
+= offset
;