2 * Copyright © 2011 Red Hat All Rights Reserved.
3 * Copyright © 2017 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
28 #include "ac_surface.h"
29 #include "amd_family.h"
30 #include "addrlib/amdgpu_asic_addr.h"
31 #include "ac_gpu_info.h"
32 #include "util/macros.h"
33 #include "util/u_atomic.h"
34 #include "util/u_math.h"
40 #include <amdgpu_drm.h>
42 #include "addrlib/addrinterface.h"
44 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
45 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
48 #ifndef CIASICIDGFXENGINE_ARCTICISLAND
49 #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
52 static unsigned get_first(unsigned x
, unsigned y
)
57 static void addrlib_family_rev_id(enum radeon_family family
,
58 unsigned *addrlib_family
,
59 unsigned *addrlib_revid
)
63 *addrlib_family
= FAMILY_SI
;
64 *addrlib_revid
= get_first(AMDGPU_TAHITI_RANGE
);
67 *addrlib_family
= FAMILY_SI
;
68 *addrlib_revid
= get_first(AMDGPU_PITCAIRN_RANGE
);
71 *addrlib_family
= FAMILY_SI
;
72 *addrlib_revid
= get_first(AMDGPU_CAPEVERDE_RANGE
);
75 *addrlib_family
= FAMILY_SI
;
76 *addrlib_revid
= get_first(AMDGPU_OLAND_RANGE
);
79 *addrlib_family
= FAMILY_SI
;
80 *addrlib_revid
= get_first(AMDGPU_HAINAN_RANGE
);
83 *addrlib_family
= FAMILY_CI
;
84 *addrlib_revid
= get_first(AMDGPU_BONAIRE_RANGE
);
87 *addrlib_family
= FAMILY_KV
;
88 *addrlib_revid
= get_first(AMDGPU_SPECTRE_RANGE
);
91 *addrlib_family
= FAMILY_KV
;
92 *addrlib_revid
= get_first(AMDGPU_KALINDI_RANGE
);
95 *addrlib_family
= FAMILY_CI
;
96 *addrlib_revid
= get_first(AMDGPU_HAWAII_RANGE
);
99 *addrlib_family
= FAMILY_KV
;
100 *addrlib_revid
= get_first(AMDGPU_GODAVARI_RANGE
);
103 *addrlib_family
= FAMILY_VI
;
104 *addrlib_revid
= get_first(AMDGPU_TONGA_RANGE
);
107 *addrlib_family
= FAMILY_VI
;
108 *addrlib_revid
= get_first(AMDGPU_ICELAND_RANGE
);
111 *addrlib_family
= FAMILY_CZ
;
112 *addrlib_revid
= get_first(AMDGPU_CARRIZO_RANGE
);
115 *addrlib_family
= FAMILY_CZ
;
116 *addrlib_revid
= get_first(AMDGPU_STONEY_RANGE
);
119 *addrlib_family
= FAMILY_VI
;
120 *addrlib_revid
= get_first(AMDGPU_FIJI_RANGE
);
123 *addrlib_family
= FAMILY_VI
;
124 *addrlib_revid
= get_first(AMDGPU_POLARIS10_RANGE
);
127 *addrlib_family
= FAMILY_VI
;
128 *addrlib_revid
= get_first(AMDGPU_POLARIS11_RANGE
);
131 *addrlib_family
= FAMILY_VI
;
132 *addrlib_revid
= get_first(AMDGPU_POLARIS12_RANGE
);
135 *addrlib_family
= FAMILY_VI
;
136 *addrlib_revid
= get_first(AMDGPU_VEGAM_RANGE
);
139 *addrlib_family
= FAMILY_AI
;
140 *addrlib_revid
= get_first(AMDGPU_VEGA10_RANGE
);
143 *addrlib_family
= FAMILY_AI
;
144 *addrlib_revid
= get_first(AMDGPU_VEGA12_RANGE
);
147 *addrlib_family
= FAMILY_AI
;
148 *addrlib_revid
= get_first(AMDGPU_VEGA20_RANGE
);
151 *addrlib_family
= FAMILY_RV
;
152 *addrlib_revid
= get_first(AMDGPU_RAVEN_RANGE
);
155 fprintf(stderr
, "amdgpu: Unknown family.\n");
159 static void *ADDR_API
allocSysMem(const ADDR_ALLOCSYSMEM_INPUT
* pInput
)
161 return malloc(pInput
->sizeInBytes
);
164 static ADDR_E_RETURNCODE ADDR_API
freeSysMem(const ADDR_FREESYSMEM_INPUT
* pInput
)
166 free(pInput
->pVirtAddr
);
170 ADDR_HANDLE
amdgpu_addr_create(const struct radeon_info
*info
,
171 const struct amdgpu_gpu_info
*amdinfo
,
172 uint64_t *max_alignment
)
174 ADDR_CREATE_INPUT addrCreateInput
= {0};
175 ADDR_CREATE_OUTPUT addrCreateOutput
= {0};
176 ADDR_REGISTER_VALUE regValue
= {0};
177 ADDR_CREATE_FLAGS createFlags
= {{0}};
178 ADDR_GET_MAX_ALINGMENTS_OUTPUT addrGetMaxAlignmentsOutput
= {0};
179 ADDR_E_RETURNCODE addrRet
;
181 addrCreateInput
.size
= sizeof(ADDR_CREATE_INPUT
);
182 addrCreateOutput
.size
= sizeof(ADDR_CREATE_OUTPUT
);
184 regValue
.gbAddrConfig
= amdinfo
->gb_addr_cfg
;
185 createFlags
.value
= 0;
187 addrlib_family_rev_id(info
->family
, &addrCreateInput
.chipFamily
, &addrCreateInput
.chipRevision
);
188 if (addrCreateInput
.chipFamily
== FAMILY_UNKNOWN
)
191 if (addrCreateInput
.chipFamily
>= FAMILY_AI
) {
192 addrCreateInput
.chipEngine
= CIASICIDGFXENGINE_ARCTICISLAND
;
193 regValue
.blockVarSizeLog2
= 0;
195 regValue
.noOfBanks
= amdinfo
->mc_arb_ramcfg
& 0x3;
196 regValue
.noOfRanks
= (amdinfo
->mc_arb_ramcfg
& 0x4) >> 2;
198 regValue
.backendDisables
= amdinfo
->enabled_rb_pipes_mask
;
199 regValue
.pTileConfig
= amdinfo
->gb_tile_mode
;
200 regValue
.noOfEntries
= ARRAY_SIZE(amdinfo
->gb_tile_mode
);
201 if (addrCreateInput
.chipFamily
== FAMILY_SI
) {
202 regValue
.pMacroTileConfig
= NULL
;
203 regValue
.noOfMacroEntries
= 0;
205 regValue
.pMacroTileConfig
= amdinfo
->gb_macro_tile_mode
;
206 regValue
.noOfMacroEntries
= ARRAY_SIZE(amdinfo
->gb_macro_tile_mode
);
209 createFlags
.useTileIndex
= 1;
210 createFlags
.useHtileSliceAlign
= 1;
212 addrCreateInput
.chipEngine
= CIASICIDGFXENGINE_SOUTHERNISLAND
;
215 addrCreateInput
.callbacks
.allocSysMem
= allocSysMem
;
216 addrCreateInput
.callbacks
.freeSysMem
= freeSysMem
;
217 addrCreateInput
.callbacks
.debugPrint
= 0;
218 addrCreateInput
.createFlags
= createFlags
;
219 addrCreateInput
.regValue
= regValue
;
221 addrRet
= AddrCreate(&addrCreateInput
, &addrCreateOutput
);
222 if (addrRet
!= ADDR_OK
)
226 addrRet
= AddrGetMaxAlignments(addrCreateOutput
.hLib
, &addrGetMaxAlignmentsOutput
);
227 if (addrRet
== ADDR_OK
){
228 *max_alignment
= addrGetMaxAlignmentsOutput
.baseAlign
;
231 return addrCreateOutput
.hLib
;
234 static int surf_config_sanity(const struct ac_surf_config
*config
,
237 /* FMASK is allocated together with the color surface and can't be
238 * allocated separately.
240 assert(!(flags
& RADEON_SURF_FMASK
));
241 if (flags
& RADEON_SURF_FMASK
)
244 /* all dimension must be at least 1 ! */
245 if (!config
->info
.width
|| !config
->info
.height
|| !config
->info
.depth
||
246 !config
->info
.array_size
|| !config
->info
.levels
)
249 switch (config
->info
.samples
) {
257 if (flags
& RADEON_SURF_Z_OR_SBUFFER
)
264 if (!(flags
& RADEON_SURF_Z_OR_SBUFFER
)) {
265 switch (config
->info
.storage_samples
) {
277 if (config
->is_3d
&& config
->info
.array_size
> 1)
279 if (config
->is_cube
&& config
->info
.depth
> 1)
285 static int gfx6_compute_level(ADDR_HANDLE addrlib
,
286 const struct ac_surf_config
*config
,
287 struct radeon_surf
*surf
, bool is_stencil
,
288 unsigned level
, bool compressed
,
289 ADDR_COMPUTE_SURFACE_INFO_INPUT
*AddrSurfInfoIn
,
290 ADDR_COMPUTE_SURFACE_INFO_OUTPUT
*AddrSurfInfoOut
,
291 ADDR_COMPUTE_DCCINFO_INPUT
*AddrDccIn
,
292 ADDR_COMPUTE_DCCINFO_OUTPUT
*AddrDccOut
,
293 ADDR_COMPUTE_HTILE_INFO_INPUT
*AddrHtileIn
,
294 ADDR_COMPUTE_HTILE_INFO_OUTPUT
*AddrHtileOut
)
296 struct legacy_surf_level
*surf_level
;
297 ADDR_E_RETURNCODE ret
;
299 AddrSurfInfoIn
->mipLevel
= level
;
300 AddrSurfInfoIn
->width
= u_minify(config
->info
.width
, level
);
301 AddrSurfInfoIn
->height
= u_minify(config
->info
.height
, level
);
303 /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
304 * because GFX9 needs linear alignment of 256 bytes.
306 if (config
->info
.levels
== 1 &&
307 AddrSurfInfoIn
->tileMode
== ADDR_TM_LINEAR_ALIGNED
&&
308 AddrSurfInfoIn
->bpp
&&
309 util_is_power_of_two_or_zero(AddrSurfInfoIn
->bpp
)) {
310 unsigned alignment
= 256 / (AddrSurfInfoIn
->bpp
/ 8);
312 AddrSurfInfoIn
->width
= align(AddrSurfInfoIn
->width
, alignment
);
316 AddrSurfInfoIn
->numSlices
= u_minify(config
->info
.depth
, level
);
317 else if (config
->is_cube
)
318 AddrSurfInfoIn
->numSlices
= 6;
320 AddrSurfInfoIn
->numSlices
= config
->info
.array_size
;
323 /* Set the base level pitch. This is needed for calculation
324 * of non-zero levels. */
326 AddrSurfInfoIn
->basePitch
= surf
->u
.legacy
.stencil_level
[0].nblk_x
;
328 AddrSurfInfoIn
->basePitch
= surf
->u
.legacy
.level
[0].nblk_x
;
330 /* Convert blocks to pixels for compressed formats. */
332 AddrSurfInfoIn
->basePitch
*= surf
->blk_w
;
335 ret
= AddrComputeSurfaceInfo(addrlib
,
338 if (ret
!= ADDR_OK
) {
342 surf_level
= is_stencil
? &surf
->u
.legacy
.stencil_level
[level
] : &surf
->u
.legacy
.level
[level
];
343 surf_level
->offset
= align64(surf
->surf_size
, AddrSurfInfoOut
->baseAlign
);
344 surf_level
->slice_size_dw
= AddrSurfInfoOut
->sliceSize
/ 4;
345 surf_level
->nblk_x
= AddrSurfInfoOut
->pitch
;
346 surf_level
->nblk_y
= AddrSurfInfoOut
->height
;
348 switch (AddrSurfInfoOut
->tileMode
) {
349 case ADDR_TM_LINEAR_ALIGNED
:
350 surf_level
->mode
= RADEON_SURF_MODE_LINEAR_ALIGNED
;
352 case ADDR_TM_1D_TILED_THIN1
:
353 surf_level
->mode
= RADEON_SURF_MODE_1D
;
355 case ADDR_TM_2D_TILED_THIN1
:
356 surf_level
->mode
= RADEON_SURF_MODE_2D
;
363 surf
->u
.legacy
.stencil_tiling_index
[level
] = AddrSurfInfoOut
->tileIndex
;
365 surf
->u
.legacy
.tiling_index
[level
] = AddrSurfInfoOut
->tileIndex
;
367 surf
->surf_size
= surf_level
->offset
+ AddrSurfInfoOut
->surfSize
;
369 /* Clear DCC fields at the beginning. */
370 surf_level
->dcc_offset
= 0;
372 /* The previous level's flag tells us if we can use DCC for this level. */
373 if (AddrSurfInfoIn
->flags
.dccCompatible
&&
374 (level
== 0 || AddrDccOut
->subLvlCompressible
)) {
375 bool prev_level_clearable
= level
== 0 ||
376 AddrDccOut
->dccRamSizeAligned
;
378 AddrDccIn
->colorSurfSize
= AddrSurfInfoOut
->surfSize
;
379 AddrDccIn
->tileMode
= AddrSurfInfoOut
->tileMode
;
380 AddrDccIn
->tileInfo
= *AddrSurfInfoOut
->pTileInfo
;
381 AddrDccIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
382 AddrDccIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
384 ret
= AddrComputeDccInfo(addrlib
,
388 if (ret
== ADDR_OK
) {
389 surf_level
->dcc_offset
= surf
->dcc_size
;
390 surf
->num_dcc_levels
= level
+ 1;
391 surf
->dcc_size
= surf_level
->dcc_offset
+ AddrDccOut
->dccRamSize
;
392 surf
->dcc_alignment
= MAX2(surf
->dcc_alignment
, AddrDccOut
->dccRamBaseAlign
);
394 /* If the DCC size of a subresource (1 mip level or 1 slice)
395 * is not aligned, the DCC memory layout is not contiguous for
396 * that subresource, which means we can't use fast clear.
398 * We only do fast clears for whole mipmap levels. If we did
399 * per-slice fast clears, the same restriction would apply.
400 * (i.e. only compute the slice size and see if it's aligned)
402 * The last level can be non-contiguous and still be clearable
403 * if it's interleaved with the next level that doesn't exist.
405 if (AddrDccOut
->dccRamSizeAligned
||
406 (prev_level_clearable
&& level
== config
->info
.levels
- 1))
407 surf_level
->dcc_fast_clear_size
= AddrDccOut
->dccFastClearSize
;
409 surf_level
->dcc_fast_clear_size
= 0;
413 /* TC-compatible HTILE. */
415 AddrSurfInfoIn
->flags
.depth
&&
416 surf_level
->mode
== RADEON_SURF_MODE_2D
&&
418 AddrHtileIn
->flags
.tcCompatible
= AddrSurfInfoIn
->flags
.tcCompatible
;
419 AddrHtileIn
->pitch
= AddrSurfInfoOut
->pitch
;
420 AddrHtileIn
->height
= AddrSurfInfoOut
->height
;
421 AddrHtileIn
->numSlices
= AddrSurfInfoOut
->depth
;
422 AddrHtileIn
->blockWidth
= ADDR_HTILE_BLOCKSIZE_8
;
423 AddrHtileIn
->blockHeight
= ADDR_HTILE_BLOCKSIZE_8
;
424 AddrHtileIn
->pTileInfo
= AddrSurfInfoOut
->pTileInfo
;
425 AddrHtileIn
->tileIndex
= AddrSurfInfoOut
->tileIndex
;
426 AddrHtileIn
->macroModeIndex
= AddrSurfInfoOut
->macroModeIndex
;
428 ret
= AddrComputeHtileInfo(addrlib
,
432 if (ret
== ADDR_OK
) {
433 surf
->htile_size
= AddrHtileOut
->htileBytes
;
434 surf
->htile_slice_size
= AddrHtileOut
->sliceSize
;
435 surf
->htile_alignment
= AddrHtileOut
->baseAlign
;
442 #define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03)
443 #define V_009910_ADDR_SURF_THICK_MICRO_TILING 0x03
444 #define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07)
446 static void gfx6_set_micro_tile_mode(struct radeon_surf
*surf
,
447 const struct radeon_info
*info
)
449 uint32_t tile_mode
= info
->si_tile_mode_array
[surf
->u
.legacy
.tiling_index
[0]];
451 if (info
->chip_class
>= CIK
)
452 surf
->micro_tile_mode
= G_009910_MICRO_TILE_MODE_NEW(tile_mode
);
454 surf
->micro_tile_mode
= G_009910_MICRO_TILE_MODE(tile_mode
);
457 static unsigned cik_get_macro_tile_index(struct radeon_surf
*surf
)
459 unsigned index
, tileb
;
461 tileb
= 8 * 8 * surf
->bpe
;
462 tileb
= MIN2(surf
->u
.legacy
.tile_split
, tileb
);
464 for (index
= 0; tileb
> 64; index
++)
471 static bool get_display_flag(const struct ac_surf_config
*config
,
472 const struct radeon_surf
*surf
)
474 unsigned num_channels
= config
->info
.num_channels
;
475 unsigned bpe
= surf
->bpe
;
477 if (surf
->flags
& RADEON_SURF_SCANOUT
&&
478 config
->info
.samples
<= 1 &&
479 surf
->blk_w
<= 2 && surf
->blk_h
== 1) {
481 if (surf
->blk_w
== 2 && surf
->blk_h
== 1)
484 if (/* RGBA8 or RGBA16F */
485 (bpe
>= 4 && bpe
<= 8 && num_channels
== 4) ||
486 /* R5G6B5 or R5G5B5A1 */
487 (bpe
== 2 && num_channels
>= 3) ||
489 (bpe
== 1 && num_channels
== 1))
496 * This must be called after the first level is computed.
498 * Copy surface-global settings like pipe/bank config from level 0 surface
499 * computation, and compute tile swizzle.
501 static int gfx6_surface_settings(ADDR_HANDLE addrlib
,
502 const struct radeon_info
*info
,
503 const struct ac_surf_config
*config
,
504 ADDR_COMPUTE_SURFACE_INFO_OUTPUT
* csio
,
505 struct radeon_surf
*surf
)
507 surf
->surf_alignment
= csio
->baseAlign
;
508 surf
->u
.legacy
.pipe_config
= csio
->pTileInfo
->pipeConfig
- 1;
509 gfx6_set_micro_tile_mode(surf
, info
);
511 /* For 2D modes only. */
512 if (csio
->tileMode
>= ADDR_TM_2D_TILED_THIN1
) {
513 surf
->u
.legacy
.bankw
= csio
->pTileInfo
->bankWidth
;
514 surf
->u
.legacy
.bankh
= csio
->pTileInfo
->bankHeight
;
515 surf
->u
.legacy
.mtilea
= csio
->pTileInfo
->macroAspectRatio
;
516 surf
->u
.legacy
.tile_split
= csio
->pTileInfo
->tileSplitBytes
;
517 surf
->u
.legacy
.num_banks
= csio
->pTileInfo
->banks
;
518 surf
->u
.legacy
.macro_tile_index
= csio
->macroModeIndex
;
520 surf
->u
.legacy
.macro_tile_index
= 0;
523 /* Compute tile swizzle. */
524 /* TODO: fix tile swizzle with mipmapping for SI */
525 if ((info
->chip_class
>= CIK
|| config
->info
.levels
== 1) &&
526 config
->info
.surf_index
&&
527 surf
->u
.legacy
.level
[0].mode
== RADEON_SURF_MODE_2D
&&
528 !(surf
->flags
& (RADEON_SURF_Z_OR_SBUFFER
| RADEON_SURF_SHAREABLE
)) &&
529 !get_display_flag(config
, surf
)) {
530 ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn
= {0};
531 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut
= {0};
533 AddrBaseSwizzleIn
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT
);
534 AddrBaseSwizzleOut
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
);
536 AddrBaseSwizzleIn
.surfIndex
= p_atomic_inc_return(config
->info
.surf_index
) - 1;
537 AddrBaseSwizzleIn
.tileIndex
= csio
->tileIndex
;
538 AddrBaseSwizzleIn
.macroModeIndex
= csio
->macroModeIndex
;
539 AddrBaseSwizzleIn
.pTileInfo
= csio
->pTileInfo
;
540 AddrBaseSwizzleIn
.tileMode
= csio
->tileMode
;
542 int r
= AddrComputeBaseSwizzle(addrlib
, &AddrBaseSwizzleIn
,
543 &AddrBaseSwizzleOut
);
547 assert(AddrBaseSwizzleOut
.tileSwizzle
<=
548 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
549 surf
->tile_swizzle
= AddrBaseSwizzleOut
.tileSwizzle
;
554 void ac_compute_cmask(const struct radeon_info
*info
,
555 const struct ac_surf_config
*config
,
556 struct radeon_surf
*surf
)
558 unsigned pipe_interleave_bytes
= info
->pipe_interleave_bytes
;
559 unsigned num_pipes
= info
->num_tile_pipes
;
560 unsigned cl_width
, cl_height
;
562 if (surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
)
565 assert(info
->chip_class
<= VI
);
580 case 16: /* Hawaii */
589 unsigned base_align
= num_pipes
* pipe_interleave_bytes
;
591 unsigned width
= align(config
->info
.width
, cl_width
*8);
592 unsigned height
= align(config
->info
.height
, cl_height
*8);
593 unsigned slice_elements
= (width
* height
) / (8*8);
595 /* Each element of CMASK is a nibble. */
596 unsigned slice_bytes
= slice_elements
/ 2;
598 surf
->u
.legacy
.cmask_slice_tile_max
= (width
* height
) / (128*128);
599 if (surf
->u
.legacy
.cmask_slice_tile_max
)
600 surf
->u
.legacy
.cmask_slice_tile_max
-= 1;
604 num_layers
= config
->info
.depth
;
605 else if (config
->is_cube
)
608 num_layers
= config
->info
.array_size
;
610 surf
->cmask_alignment
= MAX2(256, base_align
);
611 surf
->cmask_size
= align(slice_bytes
, base_align
) * num_layers
;
615 * Fill in the tiling information in \p surf based on the given surface config.
617 * The following fields of \p surf must be initialized by the caller:
618 * blk_w, blk_h, bpe, flags.
620 static int gfx6_compute_surface(ADDR_HANDLE addrlib
,
621 const struct radeon_info
*info
,
622 const struct ac_surf_config
*config
,
623 enum radeon_surf_mode mode
,
624 struct radeon_surf
*surf
)
628 ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn
= {0};
629 ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut
= {0};
630 ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn
= {0};
631 ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut
= {0};
632 ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn
= {0};
633 ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut
= {0};
634 ADDR_TILEINFO AddrTileInfoIn
= {0};
635 ADDR_TILEINFO AddrTileInfoOut
= {0};
638 AddrSurfInfoIn
.size
= sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT
);
639 AddrSurfInfoOut
.size
= sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT
);
640 AddrDccIn
.size
= sizeof(ADDR_COMPUTE_DCCINFO_INPUT
);
641 AddrDccOut
.size
= sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT
);
642 AddrHtileIn
.size
= sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT
);
643 AddrHtileOut
.size
= sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT
);
644 AddrSurfInfoOut
.pTileInfo
= &AddrTileInfoOut
;
646 compressed
= surf
->blk_w
== 4 && surf
->blk_h
== 4;
648 /* MSAA requires 2D tiling. */
649 if (config
->info
.samples
> 1)
650 mode
= RADEON_SURF_MODE_2D
;
652 /* DB doesn't support linear layouts. */
653 if (surf
->flags
& (RADEON_SURF_Z_OR_SBUFFER
) &&
654 mode
< RADEON_SURF_MODE_1D
)
655 mode
= RADEON_SURF_MODE_1D
;
657 /* Set the requested tiling mode. */
659 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
660 AddrSurfInfoIn
.tileMode
= ADDR_TM_LINEAR_ALIGNED
;
662 case RADEON_SURF_MODE_1D
:
663 AddrSurfInfoIn
.tileMode
= ADDR_TM_1D_TILED_THIN1
;
665 case RADEON_SURF_MODE_2D
:
666 AddrSurfInfoIn
.tileMode
= ADDR_TM_2D_TILED_THIN1
;
672 /* The format must be set correctly for the allocation of compressed
673 * textures to work. In other cases, setting the bpp is sufficient.
678 AddrSurfInfoIn
.format
= ADDR_FMT_BC1
;
681 AddrSurfInfoIn
.format
= ADDR_FMT_BC3
;
688 AddrDccIn
.bpp
= AddrSurfInfoIn
.bpp
= surf
->bpe
* 8;
691 AddrDccIn
.numSamples
= AddrSurfInfoIn
.numSamples
=
692 MAX2(1, config
->info
.samples
);
693 AddrSurfInfoIn
.tileIndex
= -1;
695 if (!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
)) {
696 AddrDccIn
.numSamples
= AddrSurfInfoIn
.numFrags
=
697 MAX2(1, config
->info
.storage_samples
);
700 /* Set the micro tile type. */
701 if (surf
->flags
& RADEON_SURF_SCANOUT
)
702 AddrSurfInfoIn
.tileType
= ADDR_DISPLAYABLE
;
703 else if (surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
)
704 AddrSurfInfoIn
.tileType
= ADDR_DEPTH_SAMPLE_ORDER
;
706 AddrSurfInfoIn
.tileType
= ADDR_NON_DISPLAYABLE
;
708 AddrSurfInfoIn
.flags
.color
= !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
709 AddrSurfInfoIn
.flags
.depth
= (surf
->flags
& RADEON_SURF_ZBUFFER
) != 0;
710 AddrSurfInfoIn
.flags
.cube
= config
->is_cube
;
711 AddrSurfInfoIn
.flags
.display
= get_display_flag(config
, surf
);
712 AddrSurfInfoIn
.flags
.pow2Pad
= config
->info
.levels
> 1;
713 AddrSurfInfoIn
.flags
.tcCompatible
= (surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
) != 0;
715 /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
716 * requested, because TC-compatible HTILE requires 2D tiling.
718 AddrSurfInfoIn
.flags
.opt4Space
= !AddrSurfInfoIn
.flags
.tcCompatible
&&
719 !AddrSurfInfoIn
.flags
.fmask
&&
720 config
->info
.samples
<= 1 &&
721 (surf
->flags
& RADEON_SURF_OPTIMIZE_FOR_SPACE
);
724 * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
726 * - Mipmapped array textures have low performance (discovered by a closed
729 AddrSurfInfoIn
.flags
.dccCompatible
=
730 info
->chip_class
>= VI
&&
731 !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
) &&
732 !(surf
->flags
& RADEON_SURF_DISABLE_DCC
) &&
734 ((config
->info
.array_size
== 1 && config
->info
.depth
== 1) ||
735 config
->info
.levels
== 1);
737 AddrSurfInfoIn
.flags
.noStencil
= (surf
->flags
& RADEON_SURF_SBUFFER
) == 0;
738 AddrSurfInfoIn
.flags
.compressZ
= !!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
740 /* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit)
741 * for Z and stencil. This can cause a number of problems which we work
744 * - a depth part that is incompatible with mipmapped texturing
745 * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
746 * incorrect tiling applied to the stencil part, stencil buffer
747 * memory accesses that go out of bounds) even without mipmapping
749 * Some piglit tests that are prone to different types of related
751 * ./bin/ext_framebuffer_multisample-upsample 2 stencil
752 * ./bin/framebuffer-blit-levels {draw,read} stencil
753 * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
754 * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
755 * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
757 int stencil_tile_idx
= -1;
759 if (AddrSurfInfoIn
.flags
.depth
&& !AddrSurfInfoIn
.flags
.noStencil
&&
760 (config
->info
.levels
> 1 || info
->family
== CHIP_STONEY
)) {
761 /* Compute stencilTileIdx that is compatible with the (depth)
762 * tileIdx. This degrades the depth surface if necessary to
763 * ensure that a matching stencilTileIdx exists. */
764 AddrSurfInfoIn
.flags
.matchStencilTileCfg
= 1;
766 /* Keep the depth mip-tail compatible with texturing. */
767 AddrSurfInfoIn
.flags
.noStencil
= 1;
770 /* Set preferred macrotile parameters. This is usually required
771 * for shared resources. This is for 2D tiling only. */
772 if (AddrSurfInfoIn
.tileMode
>= ADDR_TM_2D_TILED_THIN1
&&
773 surf
->u
.legacy
.bankw
&& surf
->u
.legacy
.bankh
&&
774 surf
->u
.legacy
.mtilea
&& surf
->u
.legacy
.tile_split
) {
775 /* If any of these parameters are incorrect, the calculation
777 AddrTileInfoIn
.banks
= surf
->u
.legacy
.num_banks
;
778 AddrTileInfoIn
.bankWidth
= surf
->u
.legacy
.bankw
;
779 AddrTileInfoIn
.bankHeight
= surf
->u
.legacy
.bankh
;
780 AddrTileInfoIn
.macroAspectRatio
= surf
->u
.legacy
.mtilea
;
781 AddrTileInfoIn
.tileSplitBytes
= surf
->u
.legacy
.tile_split
;
782 AddrTileInfoIn
.pipeConfig
= surf
->u
.legacy
.pipe_config
+ 1; /* +1 compared to GB_TILE_MODE */
783 AddrSurfInfoIn
.flags
.opt4Space
= 0;
784 AddrSurfInfoIn
.pTileInfo
= &AddrTileInfoIn
;
786 /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
787 * the tile index, because we are expected to know it if
788 * we know the other parameters.
790 * This is something that can easily be fixed in Addrlib.
791 * For now, just figure it out here.
792 * Note that only 2D_TILE_THIN1 is handled here.
794 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
795 assert(AddrSurfInfoIn
.tileMode
== ADDR_TM_2D_TILED_THIN1
);
797 if (info
->chip_class
== SI
) {
798 if (AddrSurfInfoIn
.tileType
== ADDR_DISPLAYABLE
) {
800 AddrSurfInfoIn
.tileIndex
= 11; /* 16bpp */
802 AddrSurfInfoIn
.tileIndex
= 12; /* 32bpp */
805 AddrSurfInfoIn
.tileIndex
= 14; /* 8bpp */
806 else if (surf
->bpe
== 2)
807 AddrSurfInfoIn
.tileIndex
= 15; /* 16bpp */
808 else if (surf
->bpe
== 4)
809 AddrSurfInfoIn
.tileIndex
= 16; /* 32bpp */
811 AddrSurfInfoIn
.tileIndex
= 17; /* 64bpp (and 128bpp) */
815 if (AddrSurfInfoIn
.tileType
== ADDR_DISPLAYABLE
)
816 AddrSurfInfoIn
.tileIndex
= 10; /* 2D displayable */
818 AddrSurfInfoIn
.tileIndex
= 14; /* 2D non-displayable */
820 /* Addrlib doesn't set this if tileIndex is forced like above. */
821 AddrSurfInfoOut
.macroModeIndex
= cik_get_macro_tile_index(surf
);
825 surf
->has_stencil
= !!(surf
->flags
& RADEON_SURF_SBUFFER
);
826 surf
->num_dcc_levels
= 0;
829 surf
->dcc_alignment
= 1;
830 surf
->htile_size
= 0;
831 surf
->htile_slice_size
= 0;
832 surf
->htile_alignment
= 1;
834 const bool only_stencil
= (surf
->flags
& RADEON_SURF_SBUFFER
) &&
835 !(surf
->flags
& RADEON_SURF_ZBUFFER
);
837 /* Calculate texture layout information. */
839 for (level
= 0; level
< config
->info
.levels
; level
++) {
840 r
= gfx6_compute_level(addrlib
, config
, surf
, false, level
, compressed
,
841 &AddrSurfInfoIn
, &AddrSurfInfoOut
,
842 &AddrDccIn
, &AddrDccOut
, &AddrHtileIn
, &AddrHtileOut
);
849 /* Check that we actually got a TC-compatible HTILE if
850 * we requested it (only for level 0, since we're not
851 * supporting HTILE on higher mip levels anyway). */
852 assert(AddrSurfInfoOut
.tcCompatible
||
853 !AddrSurfInfoIn
.flags
.tcCompatible
||
854 AddrSurfInfoIn
.flags
.matchStencilTileCfg
);
856 if (AddrSurfInfoIn
.flags
.matchStencilTileCfg
) {
857 if (!AddrSurfInfoOut
.tcCompatible
) {
858 AddrSurfInfoIn
.flags
.tcCompatible
= 0;
859 surf
->flags
&= ~RADEON_SURF_TC_COMPATIBLE_HTILE
;
862 AddrSurfInfoIn
.flags
.matchStencilTileCfg
= 0;
863 AddrSurfInfoIn
.tileIndex
= AddrSurfInfoOut
.tileIndex
;
864 stencil_tile_idx
= AddrSurfInfoOut
.stencilTileIdx
;
866 assert(stencil_tile_idx
>= 0);
869 r
= gfx6_surface_settings(addrlib
, info
, config
,
870 &AddrSurfInfoOut
, surf
);
876 /* Calculate texture layout information for stencil. */
877 if (surf
->flags
& RADEON_SURF_SBUFFER
) {
878 AddrSurfInfoIn
.tileIndex
= stencil_tile_idx
;
879 AddrSurfInfoIn
.bpp
= 8;
880 AddrSurfInfoIn
.flags
.depth
= 0;
881 AddrSurfInfoIn
.flags
.stencil
= 1;
882 AddrSurfInfoIn
.flags
.tcCompatible
= 0;
883 /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
884 AddrTileInfoIn
.tileSplitBytes
= surf
->u
.legacy
.stencil_tile_split
;
886 for (level
= 0; level
< config
->info
.levels
; level
++) {
887 r
= gfx6_compute_level(addrlib
, config
, surf
, true, level
, compressed
,
888 &AddrSurfInfoIn
, &AddrSurfInfoOut
,
889 &AddrDccIn
, &AddrDccOut
,
894 /* DB uses the depth pitch for both stencil and depth. */
896 if (surf
->u
.legacy
.stencil_level
[level
].nblk_x
!=
897 surf
->u
.legacy
.level
[level
].nblk_x
)
898 surf
->u
.legacy
.stencil_adjusted
= true;
900 surf
->u
.legacy
.level
[level
].nblk_x
=
901 surf
->u
.legacy
.stencil_level
[level
].nblk_x
;
906 r
= gfx6_surface_settings(addrlib
, info
, config
,
907 &AddrSurfInfoOut
, surf
);
912 /* For 2D modes only. */
913 if (AddrSurfInfoOut
.tileMode
>= ADDR_TM_2D_TILED_THIN1
) {
914 surf
->u
.legacy
.stencil_tile_split
=
915 AddrSurfInfoOut
.pTileInfo
->tileSplitBytes
;
922 if (config
->info
.samples
>= 2 && AddrSurfInfoIn
.flags
.color
) {
923 ADDR_COMPUTE_FMASK_INFO_INPUT fin
= {0};
924 ADDR_COMPUTE_FMASK_INFO_OUTPUT fout
= {0};
925 ADDR_TILEINFO fmask_tile_info
= {};
927 fin
.size
= sizeof(fin
);
928 fout
.size
= sizeof(fout
);
930 fin
.tileMode
= AddrSurfInfoOut
.tileMode
;
931 fin
.pitch
= AddrSurfInfoOut
.pitch
;
932 fin
.height
= config
->info
.height
;
933 fin
.numSlices
= AddrSurfInfoIn
.numSlices
;
934 fin
.numSamples
= AddrSurfInfoIn
.numSamples
;
935 fin
.numFrags
= AddrSurfInfoIn
.numFrags
;
937 fout
.pTileInfo
= &fmask_tile_info
;
939 r
= AddrComputeFmaskInfo(addrlib
, &fin
, &fout
);
943 surf
->fmask_size
= fout
.fmaskBytes
;
944 surf
->fmask_alignment
= fout
.baseAlign
;
945 surf
->fmask_tile_swizzle
= 0;
947 surf
->u
.legacy
.fmask
.slice_tile_max
=
948 (fout
.pitch
* fout
.height
) / 64;
949 if (surf
->u
.legacy
.fmask
.slice_tile_max
)
950 surf
->u
.legacy
.fmask
.slice_tile_max
-= 1;
952 surf
->u
.legacy
.fmask
.tiling_index
= fout
.tileIndex
;
953 surf
->u
.legacy
.fmask
.bankh
= fout
.pTileInfo
->bankHeight
;
954 surf
->u
.legacy
.fmask
.pitch_in_pixels
= fout
.pitch
;
956 /* Compute tile swizzle for FMASK. */
957 if (config
->info
.fmask_surf_index
&&
958 !(surf
->flags
& RADEON_SURF_SHAREABLE
)) {
959 ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin
= {0};
960 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout
= {0};
962 xin
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT
);
963 xout
.size
= sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
);
965 /* This counter starts from 1 instead of 0. */
966 xin
.surfIndex
= p_atomic_inc_return(config
->info
.fmask_surf_index
);
967 xin
.tileIndex
= fout
.tileIndex
;
968 xin
.macroModeIndex
= fout
.macroModeIndex
;
969 xin
.pTileInfo
= fout
.pTileInfo
;
970 xin
.tileMode
= fin
.tileMode
;
972 int r
= AddrComputeBaseSwizzle(addrlib
, &xin
, &xout
);
976 assert(xout
.tileSwizzle
<=
977 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
978 surf
->fmask_tile_swizzle
= xout
.tileSwizzle
;
982 /* Recalculate the whole DCC miptree size including disabled levels.
983 * This is what addrlib does, but calling addrlib would be a lot more
986 if (surf
->dcc_size
&& config
->info
.levels
> 1) {
987 /* The smallest miplevels that are never compressed by DCC
988 * still read the DCC buffer via TC if the base level uses DCC,
989 * and for some reason the DCC buffer needs to be larger if
990 * the miptree uses non-zero tile_swizzle. Otherwise there are
993 * "dcc_alignment * 4" was determined by trial and error.
995 surf
->dcc_size
= align64(surf
->surf_size
>> 8,
996 surf
->dcc_alignment
* 4);
999 /* Make sure HTILE covers the whole miptree, because the shader reads
1000 * TC-compatible HTILE even for levels where it's disabled by DB.
1002 if (surf
->htile_size
&& config
->info
.levels
> 1 &&
1003 surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
) {
1004 /* MSAA can't occur with levels > 1, so ignore the sample count. */
1005 const unsigned total_pixels
= surf
->surf_size
/ surf
->bpe
;
1006 const unsigned htile_block_size
= 8 * 8;
1007 const unsigned htile_element_size
= 4;
1009 surf
->htile_size
= (total_pixels
/ htile_block_size
) *
1011 surf
->htile_size
= align(surf
->htile_size
, surf
->htile_alignment
);
1014 surf
->is_linear
= surf
->u
.legacy
.level
[0].mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
;
1015 surf
->is_displayable
= surf
->is_linear
||
1016 surf
->micro_tile_mode
== RADEON_MICRO_MODE_DISPLAY
||
1017 surf
->micro_tile_mode
== RADEON_MICRO_MODE_ROTATED
;
1019 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1020 * used at the same time. This case is not currently expected to occur
1021 * because we don't use rotated. Enforce this restriction on all chips
1022 * to facilitate testing.
1024 if (surf
->micro_tile_mode
== RADEON_MICRO_MODE_ROTATED
) {
1025 assert(!"rotate micro tile mode is unsupported");
1029 ac_compute_cmask(info
, config
, surf
);
1033 /* This is only called when expecting a tiled layout. */
1035 gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib
,
1036 ADDR2_COMPUTE_SURFACE_INFO_INPUT
*in
,
1037 bool is_fmask
, unsigned flags
,
1038 AddrSwizzleMode
*swizzle_mode
)
1040 ADDR_E_RETURNCODE ret
;
1041 ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin
= {0};
1042 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout
= {0};
1044 sin
.size
= sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
);
1045 sout
.size
= sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
);
1047 sin
.flags
= in
->flags
;
1048 sin
.resourceType
= in
->resourceType
;
1049 sin
.format
= in
->format
;
1050 sin
.resourceLoction
= ADDR_RSRC_LOC_INVIS
;
1051 /* TODO: We could allow some of these: */
1052 sin
.forbiddenBlock
.micro
= 1; /* don't allow the 256B swizzle modes */
1053 sin
.forbiddenBlock
.var
= 1; /* don't allow the variable-sized swizzle modes */
1054 sin
.forbiddenBlock
.linear
= 1; /* don't allow linear swizzle modes */
1056 sin
.width
= in
->width
;
1057 sin
.height
= in
->height
;
1058 sin
.numSlices
= in
->numSlices
;
1059 sin
.numMipLevels
= in
->numMipLevels
;
1060 sin
.numSamples
= in
->numSamples
;
1061 sin
.numFrags
= in
->numFrags
;
1063 if (flags
& RADEON_SURF_SCANOUT
) {
1064 sin
.preferredSwSet
.sw_D
= 1;
1065 /* Raven only allows S for displayable surfaces with < 64 bpp, so
1066 * allow it as fallback */
1067 sin
.preferredSwSet
.sw_S
= 1;
1068 } else if (in
->flags
.depth
|| in
->flags
.stencil
|| is_fmask
)
1069 sin
.preferredSwSet
.sw_Z
= 1;
1071 sin
.preferredSwSet
.sw_S
= 1;
1074 sin
.flags
.display
= 0;
1075 sin
.flags
.color
= 0;
1076 sin
.flags
.fmask
= 1;
1079 ret
= Addr2GetPreferredSurfaceSetting(addrlib
, &sin
, &sout
);
1083 *swizzle_mode
= sout
.swizzleMode
;
1087 static int gfx9_compute_miptree(ADDR_HANDLE addrlib
,
1088 const struct ac_surf_config
*config
,
1089 struct radeon_surf
*surf
, bool compressed
,
1090 ADDR2_COMPUTE_SURFACE_INFO_INPUT
*in
)
1092 ADDR2_MIP_INFO mip_info
[RADEON_SURF_MAX_LEVELS
] = {};
1093 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out
= {0};
1094 ADDR_E_RETURNCODE ret
;
1096 out
.size
= sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
);
1097 out
.pMipInfo
= mip_info
;
1099 ret
= Addr2ComputeSurfaceInfo(addrlib
, in
, &out
);
1103 if (in
->flags
.stencil
) {
1104 surf
->u
.gfx9
.stencil
.swizzle_mode
= in
->swizzleMode
;
1105 surf
->u
.gfx9
.stencil
.epitch
= out
.epitchIsHeight
? out
.mipChainHeight
- 1 :
1106 out
.mipChainPitch
- 1;
1107 surf
->surf_alignment
= MAX2(surf
->surf_alignment
, out
.baseAlign
);
1108 surf
->u
.gfx9
.stencil_offset
= align(surf
->surf_size
, out
.baseAlign
);
1109 surf
->surf_size
= surf
->u
.gfx9
.stencil_offset
+ out
.surfSize
;
1113 surf
->u
.gfx9
.surf
.swizzle_mode
= in
->swizzleMode
;
1114 surf
->u
.gfx9
.surf
.epitch
= out
.epitchIsHeight
? out
.mipChainHeight
- 1 :
1115 out
.mipChainPitch
- 1;
1117 /* CMASK fast clear uses these even if FMASK isn't allocated.
1118 * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
1120 surf
->u
.gfx9
.fmask
.swizzle_mode
= surf
->u
.gfx9
.surf
.swizzle_mode
& ~0x3;
1121 surf
->u
.gfx9
.fmask
.epitch
= surf
->u
.gfx9
.surf
.epitch
;
1123 surf
->u
.gfx9
.surf_slice_size
= out
.sliceSize
;
1124 surf
->u
.gfx9
.surf_pitch
= out
.pitch
;
1125 surf
->u
.gfx9
.surf_height
= out
.height
;
1126 surf
->surf_size
= out
.surfSize
;
1127 surf
->surf_alignment
= out
.baseAlign
;
1129 if (in
->swizzleMode
== ADDR_SW_LINEAR
) {
1130 for (unsigned i
= 0; i
< in
->numMipLevels
; i
++)
1131 surf
->u
.gfx9
.offset
[i
] = mip_info
[i
].offset
;
1134 if (in
->flags
.depth
) {
1135 assert(in
->swizzleMode
!= ADDR_SW_LINEAR
);
1138 ADDR2_COMPUTE_HTILE_INFO_INPUT hin
= {0};
1139 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout
= {0};
1141 hin
.size
= sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT
);
1142 hout
.size
= sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT
);
1144 hin
.hTileFlags
.pipeAligned
= !in
->flags
.metaPipeUnaligned
;
1145 hin
.hTileFlags
.rbAligned
= !in
->flags
.metaRbUnaligned
;
1146 hin
.depthFlags
= in
->flags
;
1147 hin
.swizzleMode
= in
->swizzleMode
;
1148 hin
.unalignedWidth
= in
->width
;
1149 hin
.unalignedHeight
= in
->height
;
1150 hin
.numSlices
= in
->numSlices
;
1151 hin
.numMipLevels
= in
->numMipLevels
;
1153 ret
= Addr2ComputeHtileInfo(addrlib
, &hin
, &hout
);
1157 surf
->u
.gfx9
.htile
.rb_aligned
= hin
.hTileFlags
.rbAligned
;
1158 surf
->u
.gfx9
.htile
.pipe_aligned
= hin
.hTileFlags
.pipeAligned
;
1159 surf
->htile_size
= hout
.htileBytes
;
1160 surf
->htile_slice_size
= hout
.sliceSize
;
1161 surf
->htile_alignment
= hout
.baseAlign
;
1163 /* Compute tile swizzle for the color surface.
1164 * All *_X and *_T modes can use the swizzle.
1166 if (config
->info
.surf_index
&&
1167 in
->swizzleMode
>= ADDR_SW_64KB_Z_T
&&
1168 !out
.mipChainInTail
&&
1169 !(surf
->flags
& RADEON_SURF_SHAREABLE
) &&
1170 !in
->flags
.display
) {
1171 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin
= {0};
1172 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout
= {0};
1174 xin
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT
);
1175 xout
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
);
1177 xin
.surfIndex
= p_atomic_inc_return(config
->info
.surf_index
) - 1;
1178 xin
.flags
= in
->flags
;
1179 xin
.swizzleMode
= in
->swizzleMode
;
1180 xin
.resourceType
= in
->resourceType
;
1181 xin
.format
= in
->format
;
1182 xin
.numSamples
= in
->numSamples
;
1183 xin
.numFrags
= in
->numFrags
;
1185 ret
= Addr2ComputePipeBankXor(addrlib
, &xin
, &xout
);
1189 assert(xout
.pipeBankXor
<=
1190 u_bit_consecutive(0, sizeof(surf
->tile_swizzle
) * 8));
1191 surf
->tile_swizzle
= xout
.pipeBankXor
;
1195 if (!(surf
->flags
& RADEON_SURF_DISABLE_DCC
) &&
1197 in
->swizzleMode
!= ADDR_SW_LINEAR
) {
1198 ADDR2_COMPUTE_DCCINFO_INPUT din
= {0};
1199 ADDR2_COMPUTE_DCCINFO_OUTPUT dout
= {0};
1200 ADDR2_META_MIP_INFO meta_mip_info
[RADEON_SURF_MAX_LEVELS
] = {};
1202 din
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_INPUT
);
1203 dout
.size
= sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT
);
1204 dout
.pMipInfo
= meta_mip_info
;
1206 din
.dccKeyFlags
.pipeAligned
= !in
->flags
.metaPipeUnaligned
;
1207 din
.dccKeyFlags
.rbAligned
= !in
->flags
.metaRbUnaligned
;
1208 din
.colorFlags
= in
->flags
;
1209 din
.resourceType
= in
->resourceType
;
1210 din
.swizzleMode
= in
->swizzleMode
;
1212 din
.unalignedWidth
= in
->width
;
1213 din
.unalignedHeight
= in
->height
;
1214 din
.numSlices
= in
->numSlices
;
1215 din
.numFrags
= in
->numFrags
;
1216 din
.numMipLevels
= in
->numMipLevels
;
1217 din
.dataSurfaceSize
= out
.surfSize
;
1219 ret
= Addr2ComputeDccInfo(addrlib
, &din
, &dout
);
1223 surf
->u
.gfx9
.dcc
.rb_aligned
= din
.dccKeyFlags
.rbAligned
;
1224 surf
->u
.gfx9
.dcc
.pipe_aligned
= din
.dccKeyFlags
.pipeAligned
;
1225 surf
->u
.gfx9
.dcc_pitch_max
= dout
.pitch
- 1;
1226 surf
->dcc_size
= dout
.dccRamSize
;
1227 surf
->dcc_alignment
= dout
.dccRamBaseAlign
;
1228 surf
->num_dcc_levels
= in
->numMipLevels
;
1230 /* Disable DCC for levels that are in the mip tail.
1232 * There are two issues that this is intended to
1235 * 1. Multiple mip levels may share a cache line. This
1236 * can lead to corruption when switching between
1237 * rendering to different mip levels because the
1238 * RBs don't maintain coherency.
1240 * 2. Texturing with metadata after rendering sometimes
1241 * fails with corruption, probably for a similar
1244 * Working around these issues for all levels in the
1245 * mip tail may be overly conservative, but it's what
1248 * Alternative solutions that also work but are worse:
1249 * - Disable DCC entirely.
1250 * - Flush TC L2 after rendering.
1252 for (unsigned i
= 0; i
< in
->numMipLevels
; i
++) {
1253 if (meta_mip_info
[i
].inMiptail
) {
1254 surf
->num_dcc_levels
= i
;
1259 if (!surf
->num_dcc_levels
)
1264 if (in
->numSamples
> 1) {
1265 ADDR2_COMPUTE_FMASK_INFO_INPUT fin
= {0};
1266 ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout
= {0};
1268 fin
.size
= sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT
);
1269 fout
.size
= sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT
);
1271 ret
= gfx9_get_preferred_swizzle_mode(addrlib
, in
,
1277 fin
.unalignedWidth
= in
->width
;
1278 fin
.unalignedHeight
= in
->height
;
1279 fin
.numSlices
= in
->numSlices
;
1280 fin
.numSamples
= in
->numSamples
;
1281 fin
.numFrags
= in
->numFrags
;
1283 ret
= Addr2ComputeFmaskInfo(addrlib
, &fin
, &fout
);
1287 surf
->u
.gfx9
.fmask
.swizzle_mode
= fin
.swizzleMode
;
1288 surf
->u
.gfx9
.fmask
.epitch
= fout
.pitch
- 1;
1289 surf
->fmask_size
= fout
.fmaskBytes
;
1290 surf
->fmask_alignment
= fout
.baseAlign
;
1292 /* Compute tile swizzle for the FMASK surface. */
1293 if (config
->info
.fmask_surf_index
&&
1294 fin
.swizzleMode
>= ADDR_SW_64KB_Z_T
&&
1295 !(surf
->flags
& RADEON_SURF_SHAREABLE
)) {
1296 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin
= {0};
1297 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout
= {0};
1299 xin
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT
);
1300 xout
.size
= sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
);
1302 /* This counter starts from 1 instead of 0. */
1303 xin
.surfIndex
= p_atomic_inc_return(config
->info
.fmask_surf_index
);
1304 xin
.flags
= in
->flags
;
1305 xin
.swizzleMode
= fin
.swizzleMode
;
1306 xin
.resourceType
= in
->resourceType
;
1307 xin
.format
= in
->format
;
1308 xin
.numSamples
= in
->numSamples
;
1309 xin
.numFrags
= in
->numFrags
;
1311 ret
= Addr2ComputePipeBankXor(addrlib
, &xin
, &xout
);
1315 assert(xout
.pipeBankXor
<=
1316 u_bit_consecutive(0, sizeof(surf
->fmask_tile_swizzle
) * 8));
1317 surf
->fmask_tile_swizzle
= xout
.pipeBankXor
;
1322 if (in
->swizzleMode
!= ADDR_SW_LINEAR
) {
1323 ADDR2_COMPUTE_CMASK_INFO_INPUT cin
= {0};
1324 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout
= {0};
1326 cin
.size
= sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT
);
1327 cout
.size
= sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT
);
1329 if (in
->numSamples
> 1) {
1330 /* FMASK is always aligned. */
1331 cin
.cMaskFlags
.pipeAligned
= 1;
1332 cin
.cMaskFlags
.rbAligned
= 1;
1334 cin
.cMaskFlags
.pipeAligned
= !in
->flags
.metaPipeUnaligned
;
1335 cin
.cMaskFlags
.rbAligned
= !in
->flags
.metaRbUnaligned
;
1337 cin
.colorFlags
= in
->flags
;
1338 cin
.resourceType
= in
->resourceType
;
1339 cin
.unalignedWidth
= in
->width
;
1340 cin
.unalignedHeight
= in
->height
;
1341 cin
.numSlices
= in
->numSlices
;
1343 if (in
->numSamples
> 1)
1344 cin
.swizzleMode
= surf
->u
.gfx9
.fmask
.swizzle_mode
;
1346 cin
.swizzleMode
= in
->swizzleMode
;
1348 ret
= Addr2ComputeCmaskInfo(addrlib
, &cin
, &cout
);
1352 surf
->u
.gfx9
.cmask
.rb_aligned
= cin
.cMaskFlags
.rbAligned
;
1353 surf
->u
.gfx9
.cmask
.pipe_aligned
= cin
.cMaskFlags
.pipeAligned
;
1354 surf
->cmask_size
= cout
.cmaskBytes
;
1355 surf
->cmask_alignment
= cout
.baseAlign
;
1362 static int gfx9_compute_surface(ADDR_HANDLE addrlib
,
1363 const struct radeon_info
*info
,
1364 const struct ac_surf_config
*config
,
1365 enum radeon_surf_mode mode
,
1366 struct radeon_surf
*surf
)
1369 ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn
= {0};
1372 AddrSurfInfoIn
.size
= sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT
);
1374 compressed
= surf
->blk_w
== 4 && surf
->blk_h
== 4;
1376 /* The format must be set correctly for the allocation of compressed
1377 * textures to work. In other cases, setting the bpp is sufficient. */
1379 switch (surf
->bpe
) {
1381 AddrSurfInfoIn
.format
= ADDR_FMT_BC1
;
1384 AddrSurfInfoIn
.format
= ADDR_FMT_BC3
;
1390 switch (surf
->bpe
) {
1392 assert(!(surf
->flags
& RADEON_SURF_ZBUFFER
));
1393 AddrSurfInfoIn
.format
= ADDR_FMT_8
;
1396 assert(surf
->flags
& RADEON_SURF_ZBUFFER
||
1397 !(surf
->flags
& RADEON_SURF_SBUFFER
));
1398 AddrSurfInfoIn
.format
= ADDR_FMT_16
;
1401 assert(surf
->flags
& RADEON_SURF_ZBUFFER
||
1402 !(surf
->flags
& RADEON_SURF_SBUFFER
));
1403 AddrSurfInfoIn
.format
= ADDR_FMT_32
;
1406 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1407 AddrSurfInfoIn
.format
= ADDR_FMT_32_32
;
1410 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1411 AddrSurfInfoIn
.format
= ADDR_FMT_32_32_32
;
1414 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1415 AddrSurfInfoIn
.format
= ADDR_FMT_32_32_32_32
;
1420 AddrSurfInfoIn
.bpp
= surf
->bpe
* 8;
1423 AddrSurfInfoIn
.flags
.color
= !(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
);
1424 AddrSurfInfoIn
.flags
.depth
= (surf
->flags
& RADEON_SURF_ZBUFFER
) != 0;
1425 AddrSurfInfoIn
.flags
.display
= get_display_flag(config
, surf
);
1426 /* flags.texture currently refers to TC-compatible HTILE */
1427 AddrSurfInfoIn
.flags
.texture
= AddrSurfInfoIn
.flags
.color
||
1428 surf
->flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1429 AddrSurfInfoIn
.flags
.opt4space
= 1;
1431 AddrSurfInfoIn
.numMipLevels
= config
->info
.levels
;
1432 AddrSurfInfoIn
.numSamples
= MAX2(1, config
->info
.samples
);
1433 AddrSurfInfoIn
.numFrags
= AddrSurfInfoIn
.numSamples
;
1435 if (!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
))
1436 AddrSurfInfoIn
.numFrags
= MAX2(1, config
->info
.storage_samples
);
1438 /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
1439 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
1440 * must sample 1D textures as 2D. */
1442 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_3D
;
1444 AddrSurfInfoIn
.resourceType
= ADDR_RSRC_TEX_2D
;
1446 AddrSurfInfoIn
.width
= config
->info
.width
;
1447 AddrSurfInfoIn
.height
= config
->info
.height
;
1450 AddrSurfInfoIn
.numSlices
= config
->info
.depth
;
1451 else if (config
->is_cube
)
1452 AddrSurfInfoIn
.numSlices
= 6;
1454 AddrSurfInfoIn
.numSlices
= config
->info
.array_size
;
1456 /* This is propagated to HTILE/DCC/CMASK. */
1457 AddrSurfInfoIn
.flags
.metaPipeUnaligned
= 0;
1458 AddrSurfInfoIn
.flags
.metaRbUnaligned
= 0;
1461 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1462 assert(config
->info
.samples
<= 1);
1463 assert(!(surf
->flags
& RADEON_SURF_Z_OR_SBUFFER
));
1464 AddrSurfInfoIn
.swizzleMode
= ADDR_SW_LINEAR
;
1467 case RADEON_SURF_MODE_1D
:
1468 case RADEON_SURF_MODE_2D
:
1469 if (surf
->flags
& RADEON_SURF_IMPORTED
) {
1470 AddrSurfInfoIn
.swizzleMode
= surf
->u
.gfx9
.surf
.swizzle_mode
;
1474 r
= gfx9_get_preferred_swizzle_mode(addrlib
, &AddrSurfInfoIn
,
1476 &AddrSurfInfoIn
.swizzleMode
);
1485 surf
->u
.gfx9
.resource_type
= AddrSurfInfoIn
.resourceType
;
1486 surf
->has_stencil
= !!(surf
->flags
& RADEON_SURF_SBUFFER
);
1488 surf
->num_dcc_levels
= 0;
1489 surf
->surf_size
= 0;
1490 surf
->fmask_size
= 0;
1492 surf
->htile_size
= 0;
1493 surf
->htile_slice_size
= 0;
1494 surf
->u
.gfx9
.surf_offset
= 0;
1495 surf
->u
.gfx9
.stencil_offset
= 0;
1496 surf
->cmask_size
= 0;
1498 /* Calculate texture layout information. */
1499 r
= gfx9_compute_miptree(addrlib
, config
, surf
, compressed
,
1504 /* Calculate texture layout information for stencil. */
1505 if (surf
->flags
& RADEON_SURF_SBUFFER
) {
1506 AddrSurfInfoIn
.flags
.stencil
= 1;
1507 AddrSurfInfoIn
.bpp
= 8;
1508 AddrSurfInfoIn
.format
= ADDR_FMT_8
;
1510 if (!AddrSurfInfoIn
.flags
.depth
) {
1511 r
= gfx9_get_preferred_swizzle_mode(addrlib
, &AddrSurfInfoIn
,
1513 &AddrSurfInfoIn
.swizzleMode
);
1517 AddrSurfInfoIn
.flags
.depth
= 0;
1519 r
= gfx9_compute_miptree(addrlib
, config
, surf
, compressed
,
1525 surf
->is_linear
= surf
->u
.gfx9
.surf
.swizzle_mode
== ADDR_SW_LINEAR
;
1527 /* Query whether the surface is displayable. */
1528 bool displayable
= false;
1529 r
= Addr2IsValidDisplaySwizzleMode(addrlib
, surf
->u
.gfx9
.surf
.swizzle_mode
,
1530 surf
->bpe
* 8, &displayable
);
1533 surf
->is_displayable
= displayable
;
1535 switch (surf
->u
.gfx9
.surf
.swizzle_mode
) {
1537 case ADDR_SW_256B_S
:
1539 case ADDR_SW_64KB_S
:
1541 case ADDR_SW_64KB_S_T
:
1542 case ADDR_SW_4KB_S_X
:
1543 case ADDR_SW_64KB_S_X
:
1544 case ADDR_SW_VAR_S_X
:
1545 surf
->micro_tile_mode
= RADEON_MICRO_MODE_THIN
;
1549 case ADDR_SW_LINEAR
:
1550 case ADDR_SW_256B_D
:
1552 case ADDR_SW_64KB_D
:
1554 case ADDR_SW_64KB_D_T
:
1555 case ADDR_SW_4KB_D_X
:
1556 case ADDR_SW_64KB_D_X
:
1557 case ADDR_SW_VAR_D_X
:
1558 surf
->micro_tile_mode
= RADEON_MICRO_MODE_DISPLAY
;
1562 case ADDR_SW_256B_R
:
1564 case ADDR_SW_64KB_R
:
1566 case ADDR_SW_64KB_R_T
:
1567 case ADDR_SW_4KB_R_X
:
1568 case ADDR_SW_64KB_R_X
:
1569 case ADDR_SW_VAR_R_X
:
1570 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1571 * used at the same time. This case is not currently expected to occur
1572 * because we don't use rotated. Enforce this restriction on all chips
1573 * to facilitate testing.
1575 assert(!"rotate micro tile mode is unsupported");
1580 case ADDR_SW_64KB_Z
:
1582 case ADDR_SW_64KB_Z_T
:
1583 case ADDR_SW_4KB_Z_X
:
1584 case ADDR_SW_64KB_Z_X
:
1585 case ADDR_SW_VAR_Z_X
:
1586 surf
->micro_tile_mode
= RADEON_MICRO_MODE_DEPTH
;
1593 /* Temporary workaround to prevent VM faults and hangs. */
1594 if (info
->family
== CHIP_VEGA12
)
1595 surf
->fmask_size
*= 8;
1600 int ac_compute_surface(ADDR_HANDLE addrlib
, const struct radeon_info
*info
,
1601 const struct ac_surf_config
*config
,
1602 enum radeon_surf_mode mode
,
1603 struct radeon_surf
*surf
)
1607 r
= surf_config_sanity(config
, surf
->flags
);
1611 if (info
->chip_class
>= GFX9
)
1612 return gfx9_compute_surface(addrlib
, info
, config
, mode
, surf
);
1614 return gfx6_compute_surface(addrlib
, info
, config
, mode
, surf
);