radeonsi: add support for Raven2 (v2)
[mesa.git] / src / amd / addrlib / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
89 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
90 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
91 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
109 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
110 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
111 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
116 8, 6, 5, 4, 3, 2, 1, 0};
117
118 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
119
120 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
121
122 /**
123 ************************************************************************************************************************
124 * Gfx9Lib::Gfx9Lib
125 *
126 * @brief
127 * Constructor
128 *
129 ************************************************************************************************************************
130 */
131 Gfx9Lib::Gfx9Lib(const Client* pClient)
132 :
133 Lib(pClient),
134 m_numEquations(0)
135 {
136 m_class = AI_ADDRLIB;
137 memset(&m_settings, 0, sizeof(m_settings));
138 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
139 }
140
141 /**
142 ************************************************************************************************************************
143 * Gfx9Lib::~Gfx9Lib
144 *
145 * @brief
146 * Destructor
147 ************************************************************************************************************************
148 */
149 Gfx9Lib::~Gfx9Lib()
150 {
151 }
152
153 /**
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
156 *
157 * @brief
158 * Interface function stub of AddrComputeHtilenfo
159 *
160 * @return
161 * ADDR_E_RETURNCODE
162 ************************************************************************************************************************
163 */
164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
167 ) const
168 {
169 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
170 pIn->swizzleMode);
171
172 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
173
174 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
175
176 if ((numPipeTotal == 1) && (numRbTotal == 1))
177 {
178 numCompressBlkPerMetaBlkLog2 = 10;
179 }
180 else
181 {
182 if (m_settings.applyAliasFix)
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
185 }
186 else
187 {
188 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
189 }
190 }
191
192 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
193
194 Dim3d metaBlkDim = {8, 8, 1};
195 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
196 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
197 UINT_32 heightAmp = totalAmpBits - widthAmp;
198 metaBlkDim.w <<= widthAmp;
199 metaBlkDim.h <<= heightAmp;
200
201 #if DEBUG
202 Dim3d metaBlkDimDbg = {8, 8, 1};
203 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
204 {
205 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
206 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
207 {
208 metaBlkDimDbg.h <<= 1;
209 }
210 else
211 {
212 metaBlkDimDbg.w <<= 1;
213 }
214 }
215 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
216 #endif
217
218 UINT_32 numMetaBlkX;
219 UINT_32 numMetaBlkY;
220 UINT_32 numMetaBlkZ;
221
222 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
223 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
224 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
225
226 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
227 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
228
229 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
230 {
231 align *= (numPipeTotal >> 1);
232 }
233
234 align = Max(align, metaBlkSize);
235
236 if (m_settings.metaBaseAlignFix)
237 {
238 align = Max(align, GetBlockSize(pIn->swizzleMode));
239 }
240
241 if (m_settings.htileAlignFix)
242 {
243 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
244 const INT_32 htileCachelineSizeLog2 = 11;
245 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
246
247 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
248
249 align <<= rbMaskPadding;
250 }
251
252 pOut->pitch = numMetaBlkX * metaBlkDim.w;
253 pOut->height = numMetaBlkY * metaBlkDim.h;
254 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
255
256 pOut->metaBlkWidth = metaBlkDim.w;
257 pOut->metaBlkHeight = metaBlkDim.h;
258 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
259
260 pOut->baseAlign = align;
261 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
262
263 return ADDR_OK;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 // TODO: Clarify with AddrLib team
283 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
284
285 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
286 pIn->swizzleMode);
287
288 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
289
290 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
291
292 if ((numPipeTotal == 1) && (numRbTotal == 1))
293 {
294 numCompressBlkPerMetaBlkLog2 = 13;
295 }
296 else
297 {
298 if (m_settings.applyAliasFix)
299 {
300 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
301 }
302 else
303 {
304 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
305 }
306
307 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
308 }
309
310 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
311
312 Dim2d metaBlkDim = {8, 8};
313 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
314 UINT_32 heightAmp = totalAmpBits >> 1;
315 UINT_32 widthAmp = totalAmpBits - heightAmp;
316 metaBlkDim.w <<= widthAmp;
317 metaBlkDim.h <<= heightAmp;
318
319 #if DEBUG
320 Dim2d metaBlkDimDbg = {8, 8};
321 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
322 {
323 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
324 {
325 metaBlkDimDbg.h <<= 1;
326 }
327 else
328 {
329 metaBlkDimDbg.w <<= 1;
330 }
331 }
332 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
333 #endif
334
335 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
336 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
337 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
338
339 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
340
341 if (m_settings.metaBaseAlignFix)
342 {
343 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
344 }
345
346 pOut->pitch = numMetaBlkX * metaBlkDim.w;
347 pOut->height = numMetaBlkY * metaBlkDim.h;
348 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
349 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
350 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
351
352 pOut->metaBlkWidth = metaBlkDim.w;
353 pOut->metaBlkHeight = metaBlkDim.h;
354
355 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
356
357 return ADDR_OK;
358 }
359
360 /**
361 ************************************************************************************************************************
362 * Gfx9Lib::GetMetaMipInfo
363 *
364 * @brief
365 * Get meta mip info
366 *
367 * @return
368 * N/A
369 ************************************************************************************************************************
370 */
371 VOID Gfx9Lib::GetMetaMipInfo(
372 UINT_32 numMipLevels, ///< [in] number of mip levels
373 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
374 BOOL_32 dataThick, ///< [in] data surface is thick
375 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
376 UINT_32 mip0Width, ///< [in] mip0 width
377 UINT_32 mip0Height, ///< [in] mip0 height
378 UINT_32 mip0Depth, ///< [in] mip0 depth
379 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
380 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
381 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
382 const
383 {
384 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
385 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
386 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
387 UINT_32 tailWidth = pMetaBlkDim->w;
388 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
389 UINT_32 tailDepth = pMetaBlkDim->d;
390 BOOL_32 inTail = FALSE;
391 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
392
393 if (numMipLevels > 1)
394 {
395 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
396 {
397 // Z major
398 major = ADDR_MAJOR_Z;
399 }
400 else if (numMetaBlkX >= numMetaBlkY)
401 {
402 // X major
403 major = ADDR_MAJOR_X;
404 }
405 else
406 {
407 // Y major
408 major = ADDR_MAJOR_Y;
409 }
410
411 inTail = ((mip0Width <= tailWidth) &&
412 (mip0Height <= tailHeight) &&
413 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
414
415 if (inTail == FALSE)
416 {
417 UINT_32 orderLimit;
418 UINT_32 *pMipDim;
419 UINT_32 *pOrderDim;
420
421 if (major == ADDR_MAJOR_Z)
422 {
423 // Z major
424 pMipDim = &numMetaBlkY;
425 pOrderDim = &numMetaBlkZ;
426 orderLimit = 4;
427 }
428 else if (major == ADDR_MAJOR_X)
429 {
430 // X major
431 pMipDim = &numMetaBlkY;
432 pOrderDim = &numMetaBlkX;
433 orderLimit = 4;
434 }
435 else
436 {
437 // Y major
438 pMipDim = &numMetaBlkX;
439 pOrderDim = &numMetaBlkY;
440 orderLimit = 2;
441 }
442
443 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
444 {
445 *pMipDim += 2;
446 }
447 else
448 {
449 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
450 }
451 }
452 }
453
454 if (pInfo != NULL)
455 {
456 UINT_32 mipWidth = mip0Width;
457 UINT_32 mipHeight = mip0Height;
458 UINT_32 mipDepth = mip0Depth;
459 Dim3d mipCoord = {0};
460
461 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
462 {
463 if (inTail)
464 {
465 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
466 pMetaBlkDim);
467 break;
468 }
469 else
470 {
471 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
472 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
473 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
474
475 pInfo[mip].inMiptail = FALSE;
476 pInfo[mip].startX = mipCoord.w;
477 pInfo[mip].startY = mipCoord.h;
478 pInfo[mip].startZ = mipCoord.d;
479 pInfo[mip].width = mipWidth;
480 pInfo[mip].height = mipHeight;
481 pInfo[mip].depth = dataThick ? mipDepth : 1;
482
483 if ((mip >= 3) || (mip & 1))
484 {
485 switch (major)
486 {
487 case ADDR_MAJOR_X:
488 mipCoord.w += mipWidth;
489 break;
490 case ADDR_MAJOR_Y:
491 mipCoord.h += mipHeight;
492 break;
493 case ADDR_MAJOR_Z:
494 mipCoord.d += mipDepth;
495 break;
496 default:
497 break;
498 }
499 }
500 else
501 {
502 switch (major)
503 {
504 case ADDR_MAJOR_X:
505 mipCoord.h += mipHeight;
506 break;
507 case ADDR_MAJOR_Y:
508 mipCoord.w += mipWidth;
509 break;
510 case ADDR_MAJOR_Z:
511 mipCoord.h += mipHeight;
512 break;
513 default:
514 break;
515 }
516 }
517
518 mipWidth = Max(mipWidth >> 1, 1u);
519 mipHeight = Max(mipHeight >> 1, 1u);
520 mipDepth = Max(mipDepth >> 1, 1u);
521
522 inTail = ((mipWidth <= tailWidth) &&
523 (mipHeight <= tailHeight) &&
524 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
525 }
526 }
527 }
528
529 *pNumMetaBlkX = numMetaBlkX;
530 *pNumMetaBlkY = numMetaBlkY;
531 *pNumMetaBlkZ = numMetaBlkZ;
532 }
533
534 /**
535 ************************************************************************************************************************
536 * Gfx9Lib::HwlComputeDccInfo
537 *
538 * @brief
539 * Interface function to compute DCC key info
540 *
541 * @return
542 * ADDR_E_RETURNCODE
543 ************************************************************************************************************************
544 */
545 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
546 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
547 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
548 ) const
549 {
550 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
551 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
552 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
553
554 if (dataLinear)
555 {
556 metaLinear = TRUE;
557 }
558 else if (metaLinear == TRUE)
559 {
560 pipeAligned = FALSE;
561 }
562
563 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
564
565 if (metaLinear)
566 {
567 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
568 ADDR_ASSERT_ALWAYS();
569
570 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
571 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
572 }
573 else
574 {
575 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
576
577 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
578
579 UINT_32 numFrags = Max(pIn->numFrags, 1u);
580 UINT_32 numSlices = Max(pIn->numSlices, 1u);
581
582 minMetaBlkSize /= numFrags;
583
584 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
585
586 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
587
588 if ((numPipeTotal > 1) || (numRbTotal > 1))
589 {
590 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
591
592 numCompressBlkPerMetaBlk =
593 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
594
595 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
596 {
597 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
598 }
599 }
600
601 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
602 Dim3d metaBlkDim = compressBlkDim;
603
604 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
605 {
606 if ((metaBlkDim.h < metaBlkDim.w) ||
607 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
608 {
609 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
610 {
611 metaBlkDim.h <<= 1;
612 }
613 else
614 {
615 metaBlkDim.d <<= 1;
616 }
617 }
618 else
619 {
620 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
621 {
622 metaBlkDim.w <<= 1;
623 }
624 else
625 {
626 metaBlkDim.d <<= 1;
627 }
628 }
629 }
630
631 UINT_32 numMetaBlkX;
632 UINT_32 numMetaBlkY;
633 UINT_32 numMetaBlkZ;
634
635 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
636 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
637 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
638
639 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
640
641 if (numFrags > m_maxCompFrag)
642 {
643 sizeAlign *= (numFrags / m_maxCompFrag);
644 }
645
646 if (m_settings.metaBaseAlignFix)
647 {
648 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
649 }
650
651 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
652 numCompressBlkPerMetaBlk * numFrags;
653 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
654 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
655
656 pOut->pitch = numMetaBlkX * metaBlkDim.w;
657 pOut->height = numMetaBlkY * metaBlkDim.h;
658 pOut->depth = numMetaBlkZ * metaBlkDim.d;
659
660 pOut->compressBlkWidth = compressBlkDim.w;
661 pOut->compressBlkHeight = compressBlkDim.h;
662 pOut->compressBlkDepth = compressBlkDim.d;
663
664 pOut->metaBlkWidth = metaBlkDim.w;
665 pOut->metaBlkHeight = metaBlkDim.h;
666 pOut->metaBlkDepth = metaBlkDim.d;
667
668 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
669 pOut->fastClearSizePerSlice =
670 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
671 }
672
673 return ADDR_OK;
674 }
675
676 /**
677 ************************************************************************************************************************
678 * Gfx9Lib::HwlComputeMaxBaseAlignments
679 *
680 * @brief
681 * Gets maximum alignments
682 * @return
683 * maximum alignments
684 ************************************************************************************************************************
685 */
686 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
687 {
688 return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
689 }
690
691 /**
692 ************************************************************************************************************************
693 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
694 *
695 * @brief
696 * Gets maximum alignments for metadata
697 * @return
698 * maximum alignments for metadata
699 ************************************************************************************************************************
700 */
701 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
702 {
703 // Max base alignment for Htile
704 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
705 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
706
707 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
708 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
709 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
710 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
711
712 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
713
714 if (maxNumPipeTotal > 2)
715 {
716 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
717 }
718
719 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
720
721 if (m_settings.metaBaseAlignFix)
722 {
723 maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
724 }
725
726 if (m_settings.htileAlignFix)
727 {
728 maxBaseAlignHtile *= maxNumPipeTotal;
729 }
730
731 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
732
733 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
734 UINT_32 maxBaseAlignDcc3D = 65536;
735
736 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
737 {
738 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
739 }
740
741 // Max base alignment for Msaa Dcc
742 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
743
744 if (m_settings.metaBaseAlignFix)
745 {
746 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
747 }
748
749 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
750 }
751
752 /**
753 ************************************************************************************************************************
754 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
755 *
756 * @brief
757 * Interface function stub of AddrComputeCmaskAddrFromCoord
758 *
759 * @return
760 * ADDR_E_RETURNCODE
761 ************************************************************************************************************************
762 */
763 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
764 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
765 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
766 {
767 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
768 input.size = sizeof(input);
769 input.cMaskFlags = pIn->cMaskFlags;
770 input.colorFlags = pIn->colorFlags;
771 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
772 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
773 input.numSlices = Max(pIn->numSlices, 1u);
774 input.swizzleMode = pIn->swizzleMode;
775 input.resourceType = pIn->resourceType;
776
777 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
778 output.size = sizeof(output);
779
780 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
781
782 if (returnCode == ADDR_OK)
783 {
784 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
785 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
786 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
787 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
788
789 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
790 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
791 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
792
793 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
794
795 UINT_32 xb = pIn->x / output.metaBlkWidth;
796 UINT_32 yb = pIn->y / output.metaBlkHeight;
797 UINT_32 zb = pIn->slice;
798
799 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
800 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
801 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
802
803 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
804
805 pOut->addr = address >> 1;
806 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
807
808
809 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
810 pIn->swizzleMode);
811
812 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
813
814 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
815 }
816
817 return returnCode;
818 }
819
820 /**
821 ************************************************************************************************************************
822 * Gfx9Lib::HwlComputeHtileAddrFromCoord
823 *
824 * @brief
825 * Interface function stub of AddrComputeHtileAddrFromCoord
826 *
827 * @return
828 * ADDR_E_RETURNCODE
829 ************************************************************************************************************************
830 */
831 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
832 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
833 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
834 {
835 ADDR_E_RETURNCODE returnCode = ADDR_OK;
836
837 if (pIn->numMipLevels > 1)
838 {
839 returnCode = ADDR_NOTIMPLEMENTED;
840 }
841 else
842 {
843 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
844 input.size = sizeof(input);
845 input.hTileFlags = pIn->hTileFlags;
846 input.depthFlags = pIn->depthflags;
847 input.swizzleMode = pIn->swizzleMode;
848 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
849 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
850 input.numSlices = Max(pIn->numSlices, 1u);
851 input.numMipLevels = Max(pIn->numMipLevels, 1u);
852
853 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
854 output.size = sizeof(output);
855
856 returnCode = ComputeHtileInfo(&input, &output);
857
858 if (returnCode == ADDR_OK)
859 {
860 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
861 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
862 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
863 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
864
865 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
866 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
867 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
868
869 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
870
871 UINT_32 xb = pIn->x / output.metaBlkWidth;
872 UINT_32 yb = pIn->y / output.metaBlkHeight;
873 UINT_32 zb = pIn->slice;
874
875 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
876 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
877 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
878
879 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
880
881 pOut->addr = address >> 1;
882
883 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
884 pIn->swizzleMode);
885
886 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
887
888 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
889 }
890 }
891
892 return returnCode;
893 }
894
895 /**
896 ************************************************************************************************************************
897 * Gfx9Lib::HwlComputeHtileCoordFromAddr
898 *
899 * @brief
900 * Interface function stub of AddrComputeHtileCoordFromAddr
901 *
902 * @return
903 * ADDR_E_RETURNCODE
904 ************************************************************************************************************************
905 */
906 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
907 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
908 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
909 {
910 ADDR_E_RETURNCODE returnCode = ADDR_OK;
911
912 if (pIn->numMipLevels > 1)
913 {
914 returnCode = ADDR_NOTIMPLEMENTED;
915 }
916 else
917 {
918 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
919 input.size = sizeof(input);
920 input.hTileFlags = pIn->hTileFlags;
921 input.swizzleMode = pIn->swizzleMode;
922 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
923 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
924 input.numSlices = Max(pIn->numSlices, 1u);
925 input.numMipLevels = Max(pIn->numMipLevels, 1u);
926
927 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
928 output.size = sizeof(output);
929
930 returnCode = ComputeHtileInfo(&input, &output);
931
932 if (returnCode == ADDR_OK)
933 {
934 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
935 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
936 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
937 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
938
939 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
940 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
941 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
942
943 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
944
945 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
946 pIn->swizzleMode);
947
948 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
949
950 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
951
952 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
953 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
954
955 UINT_32 x, y, z, s, m;
956 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
957
958 pOut->slice = m / sliceSizeInBlock;
959 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
960 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
961 }
962 }
963
964 return returnCode;
965 }
966
967 /**
968 ************************************************************************************************************************
969 * Gfx9Lib::HwlComputeDccAddrFromCoord
970 *
971 * @brief
972 * Interface function stub of AddrComputeDccAddrFromCoord
973 *
974 * @return
975 * ADDR_E_RETURNCODE
976 ************************************************************************************************************************
977 */
978 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
979 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
980 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
981 {
982 ADDR_E_RETURNCODE returnCode = ADDR_OK;
983
984 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
985 {
986 returnCode = ADDR_NOTIMPLEMENTED;
987 }
988 else
989 {
990 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
991 input.size = sizeof(input);
992 input.dccKeyFlags = pIn->dccKeyFlags;
993 input.colorFlags = pIn->colorFlags;
994 input.swizzleMode = pIn->swizzleMode;
995 input.resourceType = pIn->resourceType;
996 input.bpp = pIn->bpp;
997 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
998 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
999 input.numSlices = Max(pIn->numSlices, 1u);
1000 input.numFrags = Max(pIn->numFrags, 1u);
1001 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1002
1003 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1004 output.size = sizeof(output);
1005
1006 returnCode = ComputeDccInfo(&input, &output);
1007
1008 if (returnCode == ADDR_OK)
1009 {
1010 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1011 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1012 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1013 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1014 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
1015 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
1016 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1017 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
1018
1019 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1020 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1021 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1022 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1023
1024 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1025
1026 UINT_32 xb = pIn->x / output.metaBlkWidth;
1027 UINT_32 yb = pIn->y / output.metaBlkHeight;
1028 UINT_32 zb = pIn->slice / output.metaBlkDepth;
1029
1030 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1031 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1032 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1033
1034 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1035
1036 pOut->addr = address >> 1;
1037
1038 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1039 pIn->swizzleMode);
1040
1041 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1042
1043 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1044 }
1045 }
1046
1047 return returnCode;
1048 }
1049
1050 /**
1051 ************************************************************************************************************************
1052 * Gfx9Lib::HwlInitGlobalParams
1053 *
1054 * @brief
1055 * Initializes global parameters
1056 *
1057 * @return
1058 * TRUE if all settings are valid
1059 *
1060 ************************************************************************************************************************
1061 */
1062 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1063 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1064 {
1065 BOOL_32 valid = TRUE;
1066
1067 if (m_settings.isArcticIsland)
1068 {
1069 GB_ADDR_CONFIG gbAddrConfig;
1070
1071 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1072
1073 // These values are copied from CModel code
1074 switch (gbAddrConfig.bits.NUM_PIPES)
1075 {
1076 case ADDR_CONFIG_1_PIPE:
1077 m_pipes = 1;
1078 m_pipesLog2 = 0;
1079 break;
1080 case ADDR_CONFIG_2_PIPE:
1081 m_pipes = 2;
1082 m_pipesLog2 = 1;
1083 break;
1084 case ADDR_CONFIG_4_PIPE:
1085 m_pipes = 4;
1086 m_pipesLog2 = 2;
1087 break;
1088 case ADDR_CONFIG_8_PIPE:
1089 m_pipes = 8;
1090 m_pipesLog2 = 3;
1091 break;
1092 case ADDR_CONFIG_16_PIPE:
1093 m_pipes = 16;
1094 m_pipesLog2 = 4;
1095 break;
1096 case ADDR_CONFIG_32_PIPE:
1097 m_pipes = 32;
1098 m_pipesLog2 = 5;
1099 break;
1100 default:
1101 ADDR_ASSERT_ALWAYS();
1102 break;
1103 }
1104
1105 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1106 {
1107 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1108 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1109 m_pipeInterleaveLog2 = 8;
1110 break;
1111 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1112 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1113 m_pipeInterleaveLog2 = 9;
1114 break;
1115 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1116 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1117 m_pipeInterleaveLog2 = 10;
1118 break;
1119 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1120 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1121 m_pipeInterleaveLog2 = 11;
1122 break;
1123 default:
1124 ADDR_ASSERT_ALWAYS();
1125 break;
1126 }
1127
1128 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1129 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1130 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1131
1132 switch (gbAddrConfig.bits.NUM_BANKS)
1133 {
1134 case ADDR_CONFIG_1_BANK:
1135 m_banks = 1;
1136 m_banksLog2 = 0;
1137 break;
1138 case ADDR_CONFIG_2_BANK:
1139 m_banks = 2;
1140 m_banksLog2 = 1;
1141 break;
1142 case ADDR_CONFIG_4_BANK:
1143 m_banks = 4;
1144 m_banksLog2 = 2;
1145 break;
1146 case ADDR_CONFIG_8_BANK:
1147 m_banks = 8;
1148 m_banksLog2 = 3;
1149 break;
1150 case ADDR_CONFIG_16_BANK:
1151 m_banks = 16;
1152 m_banksLog2 = 4;
1153 break;
1154 default:
1155 ADDR_ASSERT_ALWAYS();
1156 break;
1157 }
1158
1159 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1160 {
1161 case ADDR_CONFIG_1_SHADER_ENGINE:
1162 m_se = 1;
1163 m_seLog2 = 0;
1164 break;
1165 case ADDR_CONFIG_2_SHADER_ENGINE:
1166 m_se = 2;
1167 m_seLog2 = 1;
1168 break;
1169 case ADDR_CONFIG_4_SHADER_ENGINE:
1170 m_se = 4;
1171 m_seLog2 = 2;
1172 break;
1173 case ADDR_CONFIG_8_SHADER_ENGINE:
1174 m_se = 8;
1175 m_seLog2 = 3;
1176 break;
1177 default:
1178 ADDR_ASSERT_ALWAYS();
1179 break;
1180 }
1181
1182 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1183 {
1184 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1185 m_rbPerSe = 1;
1186 m_rbPerSeLog2 = 0;
1187 break;
1188 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1189 m_rbPerSe = 2;
1190 m_rbPerSeLog2 = 1;
1191 break;
1192 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1193 m_rbPerSe = 4;
1194 m_rbPerSeLog2 = 2;
1195 break;
1196 default:
1197 ADDR_ASSERT_ALWAYS();
1198 break;
1199 }
1200
1201 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1202 {
1203 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1204 m_maxCompFrag = 1;
1205 m_maxCompFragLog2 = 0;
1206 break;
1207 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1208 m_maxCompFrag = 2;
1209 m_maxCompFragLog2 = 1;
1210 break;
1211 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1212 m_maxCompFrag = 4;
1213 m_maxCompFragLog2 = 2;
1214 break;
1215 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1216 m_maxCompFrag = 8;
1217 m_maxCompFragLog2 = 3;
1218 break;
1219 default:
1220 ADDR_ASSERT_ALWAYS();
1221 break;
1222 }
1223
1224 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1225 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1226 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1227 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1228
1229 if ((m_rbPerSeLog2 == 1) &&
1230 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1231 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1232 {
1233 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1234 ADDR_ASSERT(m_settings.isRaven == FALSE);
1235 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1236
1237 if (m_settings.isVega12)
1238 {
1239 m_settings.htileCacheRbConflict = 1;
1240 }
1241 }
1242 }
1243 else
1244 {
1245 valid = FALSE;
1246 ADDR_NOT_IMPLEMENTED();
1247 }
1248
1249 if (valid)
1250 {
1251 InitEquationTable();
1252 }
1253
1254 return valid;
1255 }
1256
1257 /**
1258 ************************************************************************************************************************
1259 * Gfx9Lib::HwlConvertChipFamily
1260 *
1261 * @brief
1262 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1263 * @return
1264 * ChipFamily
1265 ************************************************************************************************************************
1266 */
1267 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1268 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1269 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1270 {
1271 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1272
1273 switch (uChipFamily)
1274 {
1275 case FAMILY_AI:
1276 m_settings.isArcticIsland = 1;
1277 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1278 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1279 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1280 m_settings.isDce12 = 1;
1281
1282 if (m_settings.isVega10 == 0)
1283 {
1284 m_settings.htileAlignFix = 1;
1285 m_settings.applyAliasFix = 1;
1286 }
1287
1288 m_settings.metaBaseAlignFix = 1;
1289
1290 m_settings.depthPipeXorDisable = 1;
1291 break;
1292 case FAMILY_RV:
1293 m_settings.isArcticIsland = 1;
1294 m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision) || ASICREV_IS_RAVEN2(uChipRevision);
1295
1296 if (m_settings.isRaven)
1297 {
1298 m_settings.isDcn1 = 1;
1299 }
1300
1301 m_settings.metaBaseAlignFix = 1;
1302
1303 if (ASICREV_IS_RAVEN(uChipRevision))
1304 {
1305 m_settings.depthPipeXorDisable = 1;
1306 }
1307 break;
1308
1309 default:
1310 ADDR_ASSERT(!"This should be a Fusion");
1311 break;
1312 }
1313
1314 return family;
1315 }
1316
1317 /**
1318 ************************************************************************************************************************
1319 * Gfx9Lib::InitRbEquation
1320 *
1321 * @brief
1322 * Init RB equation
1323 * @return
1324 * N/A
1325 ************************************************************************************************************************
1326 */
1327 VOID Gfx9Lib::GetRbEquation(
1328 CoordEq* pRbEq, ///< [out] rb equation
1329 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1330 UINT_32 numSeLog2) ///< [in] number of shader engine
1331 const
1332 {
1333 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1334 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1335 Coordinate cx('x', rbRegion);
1336 Coordinate cy('y', rbRegion);
1337
1338 UINT_32 start = 0;
1339 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1340
1341 // Clear the rb equation
1342 pRbEq->resize(0);
1343 pRbEq->resize(numRbTotalLog2);
1344
1345 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1346 {
1347 // Special case when more than 1 SE, and 2 RB per SE
1348 (*pRbEq)[0].add(cx);
1349 (*pRbEq)[0].add(cy);
1350 cx++;
1351 cy++;
1352
1353 if (m_settings.applyAliasFix == false)
1354 {
1355 (*pRbEq)[0].add(cy);
1356 }
1357
1358 (*pRbEq)[0].add(cy);
1359 start++;
1360 }
1361
1362 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1363
1364 for (UINT_32 i = 0; i < numBits; i++)
1365 {
1366 UINT_32 idx =
1367 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1368
1369 if ((i % 2) == 1)
1370 {
1371 (*pRbEq)[idx].add(cx);
1372 cx++;
1373 }
1374 else
1375 {
1376 (*pRbEq)[idx].add(cy);
1377 cy++;
1378 }
1379 }
1380 }
1381
1382 /**
1383 ************************************************************************************************************************
1384 * Gfx9Lib::GetDataEquation
1385 *
1386 * @brief
1387 * Get data equation for fmask and Z
1388 * @return
1389 * N/A
1390 ************************************************************************************************************************
1391 */
1392 VOID Gfx9Lib::GetDataEquation(
1393 CoordEq* pDataEq, ///< [out] data surface equation
1394 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1395 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1396 AddrResourceType resourceType, ///< [in] data surface resource type
1397 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1398 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1399 const
1400 {
1401 Coordinate cx('x', 0);
1402 Coordinate cy('y', 0);
1403 Coordinate cz('z', 0);
1404 Coordinate cs('s', 0);
1405
1406 // Clear the equation
1407 pDataEq->resize(0);
1408 pDataEq->resize(27);
1409
1410 if (dataSurfaceType == Gfx9DataColor)
1411 {
1412 if (IsLinear(swizzleMode))
1413 {
1414 Coordinate cm('m', 0);
1415
1416 pDataEq->resize(49);
1417
1418 for (UINT_32 i = 0; i < 49; i++)
1419 {
1420 (*pDataEq)[i].add(cm);
1421 cm++;
1422 }
1423 }
1424 else if (IsThick(resourceType, swizzleMode))
1425 {
1426 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1427 UINT_32 i;
1428 if (IsStandardSwizzle(resourceType, swizzleMode))
1429 {
1430 // Standard 3d swizzle
1431 // Fill in bottom x bits
1432 for (i = elementBytesLog2; i < 4; i++)
1433 {
1434 (*pDataEq)[i].add(cx);
1435 cx++;
1436 }
1437 // Fill in 2 bits of y and then z
1438 for (i = 4; i < 6; i++)
1439 {
1440 (*pDataEq)[i].add(cy);
1441 cy++;
1442 }
1443 for (i = 6; i < 8; i++)
1444 {
1445 (*pDataEq)[i].add(cz);
1446 cz++;
1447 }
1448 if (elementBytesLog2 < 2)
1449 {
1450 // fill in z & y bit
1451 (*pDataEq)[8].add(cz);
1452 (*pDataEq)[9].add(cy);
1453 cz++;
1454 cy++;
1455 }
1456 else if (elementBytesLog2 == 2)
1457 {
1458 // fill in y and x bit
1459 (*pDataEq)[8].add(cy);
1460 (*pDataEq)[9].add(cx);
1461 cy++;
1462 cx++;
1463 }
1464 else
1465 {
1466 // fill in 2 x bits
1467 (*pDataEq)[8].add(cx);
1468 cx++;
1469 (*pDataEq)[9].add(cx);
1470 cx++;
1471 }
1472 }
1473 else
1474 {
1475 // Z 3d swizzle
1476 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1477 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1478 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1479 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1480 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1481 {
1482 (*pDataEq)[i].add(cz);
1483 cz++;
1484 }
1485 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1486 {
1487 // add an x and z
1488 (*pDataEq)[6].add(cx);
1489 (*pDataEq)[7].add(cz);
1490 cx++;
1491 cz++;
1492 }
1493 else if (elementBytesLog2 == 2)
1494 {
1495 // add a y and z
1496 (*pDataEq)[6].add(cy);
1497 (*pDataEq)[7].add(cz);
1498 cy++;
1499 cz++;
1500 }
1501 // add y and x
1502 (*pDataEq)[8].add(cy);
1503 (*pDataEq)[9].add(cx);
1504 cy++;
1505 cx++;
1506 }
1507 // Fill in bit 10 and up
1508 pDataEq->mort3d( cz, cy, cx, 10 );
1509 }
1510 else if (IsThin(resourceType, swizzleMode))
1511 {
1512 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1513 // Color 2D
1514 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1515 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1516 UINT_32 i;
1517 // Fill in bottom x bits
1518 for (i = elementBytesLog2; i < 4; i++)
1519 {
1520 (*pDataEq)[i].add(cx);
1521 cx++;
1522 }
1523 // Fill in bottom y bits
1524 for (i = 4; i < 4 + microYBits; i++)
1525 {
1526 (*pDataEq)[i].add(cy);
1527 cy++;
1528 }
1529 // Fill in last of the micro_x bits
1530 for (i = 4 + microYBits; i < 8; i++)
1531 {
1532 (*pDataEq)[i].add(cx);
1533 cx++;
1534 }
1535 // Fill in x/y bits below sample split
1536 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1537 // Fill in sample bits
1538 for (i = 0; i < numSamplesLog2; i++)
1539 {
1540 cs.set('s', i);
1541 (*pDataEq)[tileSplitStart + i].add(cs);
1542 }
1543 // Fill in x/y bits above sample split
1544 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1545 {
1546 pDataEq->mort2d(cx, cy, blockSizeLog2);
1547 }
1548 else
1549 {
1550 pDataEq->mort2d(cy, cx, blockSizeLog2);
1551 }
1552 }
1553 else
1554 {
1555 ADDR_ASSERT_ALWAYS();
1556 }
1557 }
1558 else
1559 {
1560 // Fmask or depth
1561 UINT_32 sampleStart = elementBytesLog2;
1562 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1563 UINT_32 ymajStart = 6 + numSamplesLog2;
1564
1565 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1566 {
1567 cs.set('s', s);
1568 (*pDataEq)[sampleStart + s].add(cs);
1569 }
1570
1571 // Put in the x-major order pixel bits
1572 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1573 // Put in the y-major order pixel bits
1574 pDataEq->mort2d(cy, cx, ymajStart);
1575 }
1576 }
1577
1578 /**
1579 ************************************************************************************************************************
1580 * Gfx9Lib::GetPipeEquation
1581 *
1582 * @brief
1583 * Get pipe equation
1584 * @return
1585 * N/A
1586 ************************************************************************************************************************
1587 */
1588 VOID Gfx9Lib::GetPipeEquation(
1589 CoordEq* pPipeEq, ///< [out] pipe equation
1590 CoordEq* pDataEq, ///< [in] data equation
1591 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1592 UINT_32 numPipeLog2, ///< [in] number of pipes
1593 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1594 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1595 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1596 AddrResourceType resourceType ///< [in] data surface resource type
1597 ) const
1598 {
1599 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1600 CoordEq dataEq;
1601
1602 pDataEq->copy(dataEq);
1603
1604 if (dataSurfaceType == Gfx9DataColor)
1605 {
1606 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1607 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1608 }
1609
1610 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1611
1612 // This section should only apply to z/stencil, maybe fmask
1613 // If the pipe bit is below the comp block size,
1614 // then keep moving up the address until we find a bit that is above
1615 UINT_32 pipeStart = 0;
1616
1617 if (dataSurfaceType != Gfx9DataColor)
1618 {
1619 Coordinate tileMin('x', 3);
1620
1621 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1622 {
1623 pipeStart++;
1624 }
1625
1626 // if pipe is 0, then the first pipe bit is above the comp block size,
1627 // so we don't need to do anything
1628 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1629 // we will get the same pipe equation
1630 if (pipeStart != 0)
1631 {
1632 for (UINT_32 i = 0; i < numPipeLog2; i++)
1633 {
1634 // Copy the jth bit above pipe interleave to the current pipe equation bit
1635 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1636 }
1637 }
1638 }
1639
1640 if (IsPrt(swizzleMode))
1641 {
1642 // Clear out bits above the block size if prt's are enabled
1643 dataEq.resize(blockSizeLog2);
1644 dataEq.resize(48);
1645 }
1646
1647 if (IsXor(swizzleMode))
1648 {
1649 CoordEq xorMask;
1650
1651 if (IsThick(resourceType, swizzleMode))
1652 {
1653 CoordEq xorMask2;
1654
1655 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1656
1657 xorMask.resize(numPipeLog2);
1658
1659 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1660 {
1661 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1662 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1663 }
1664 }
1665 else
1666 {
1667 // Xor in the bits above the pipe+gpu bits
1668 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1669
1670 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1671 {
1672 Coordinate co;
1673 CoordEq xorMask2;
1674 // if 1xaa and not prt, then xor in the z bits
1675 xorMask2.resize(0);
1676 xorMask2.resize(numPipeLog2);
1677 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1678 {
1679 co.set('z', numPipeLog2 - 1 - pipeIdx);
1680 xorMask2[pipeIdx].add(co);
1681 }
1682
1683 pPipeEq->xorin(xorMask2);
1684 }
1685 }
1686
1687 xorMask.reverse();
1688 pPipeEq->xorin(xorMask);
1689 }
1690 }
1691 /**
1692 ************************************************************************************************************************
1693 * Gfx9Lib::GetMetaEquation
1694 *
1695 * @brief
1696 * Get meta equation for cmask/htile/DCC
1697 * @return
1698 * Pointer to a calculated meta equation
1699 ************************************************************************************************************************
1700 */
1701 const CoordEq* Gfx9Lib::GetMetaEquation(
1702 const MetaEqParams& metaEqParams)
1703 {
1704 UINT_32 cachedMetaEqIndex;
1705
1706 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1707 {
1708 if (memcmp(&metaEqParams,
1709 &m_cachedMetaEqKey[cachedMetaEqIndex],
1710 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1711 {
1712 break;
1713 }
1714 }
1715
1716 CoordEq* pMetaEq = NULL;
1717
1718 if (cachedMetaEqIndex < MaxCachedMetaEq)
1719 {
1720 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1721 }
1722 else
1723 {
1724 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1725
1726 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1727
1728 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1729
1730 GenMetaEquation(pMetaEq,
1731 metaEqParams.maxMip,
1732 metaEqParams.elementBytesLog2,
1733 metaEqParams.numSamplesLog2,
1734 metaEqParams.metaFlag,
1735 metaEqParams.dataSurfaceType,
1736 metaEqParams.swizzleMode,
1737 metaEqParams.resourceType,
1738 metaEqParams.metaBlkWidthLog2,
1739 metaEqParams.metaBlkHeightLog2,
1740 metaEqParams.metaBlkDepthLog2,
1741 metaEqParams.compBlkWidthLog2,
1742 metaEqParams.compBlkHeightLog2,
1743 metaEqParams.compBlkDepthLog2);
1744 }
1745
1746 return pMetaEq;
1747 }
1748
1749 /**
1750 ************************************************************************************************************************
1751 * Gfx9Lib::GenMetaEquation
1752 *
1753 * @brief
1754 * Get meta equation for cmask/htile/DCC
1755 * @return
1756 * N/A
1757 ************************************************************************************************************************
1758 */
1759 VOID Gfx9Lib::GenMetaEquation(
1760 CoordEq* pMetaEq, ///< [out] meta equation
1761 UINT_32 maxMip, ///< [in] max mip Id
1762 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1763 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1764 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1765 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1766 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1767 AddrResourceType resourceType, ///< [in] data surface resource type
1768 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1769 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1770 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1771 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1772 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1773 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1774 const
1775 {
1776 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1777 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1778
1779 // Get the correct data address and rb equation
1780 CoordEq dataEq;
1781 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1782 elementBytesLog2, numSamplesLog2);
1783
1784 // Get pipe and rb equations
1785 CoordEq pipeEquation;
1786 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1787 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1788 numPipeTotalLog2 = pipeEquation.getsize();
1789
1790 if (metaFlag.linear)
1791 {
1792 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1793 ADDR_ASSERT_ALWAYS();
1794
1795 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1796
1797 dataEq.copy(*pMetaEq);
1798
1799 if (IsLinear(swizzleMode))
1800 {
1801 if (metaFlag.pipeAligned)
1802 {
1803 // Remove the pipe bits
1804 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1805 pMetaEq->shift(-shift, pipeInterleaveLog2);
1806 }
1807 // Divide by comp block size, which for linear (which is always color) is 256 B
1808 pMetaEq->shift(-8);
1809
1810 if (metaFlag.pipeAligned)
1811 {
1812 // Put pipe bits back in
1813 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1814
1815 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1816 {
1817 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1818 }
1819 }
1820 }
1821
1822 pMetaEq->shift(1);
1823 }
1824 else
1825 {
1826 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1827 UINT_32 compFragLog2 =
1828 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1829 maxCompFragLog2 : numSamplesLog2;
1830
1831 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1832
1833 // Make sure the metaaddr is cleared
1834 pMetaEq->resize(0);
1835 pMetaEq->resize(27);
1836
1837 if (IsThick(resourceType, swizzleMode))
1838 {
1839 Coordinate cx('x', 0);
1840 Coordinate cy('y', 0);
1841 Coordinate cz('z', 0);
1842
1843 if (maxMip > 0)
1844 {
1845 pMetaEq->mort3d(cy, cx, cz);
1846 }
1847 else
1848 {
1849 pMetaEq->mort3d(cx, cy, cz);
1850 }
1851 }
1852 else
1853 {
1854 Coordinate cx('x', 0);
1855 Coordinate cy('y', 0);
1856 Coordinate cs;
1857
1858 if (maxMip > 0)
1859 {
1860 pMetaEq->mort2d(cy, cx, compFragLog2);
1861 }
1862 else
1863 {
1864 pMetaEq->mort2d(cx, cy, compFragLog2);
1865 }
1866
1867 //------------------------------------------------------------------------------------------------------------------------
1868 // Put the compressible fragments at the lsb
1869 // the uncompressible frags will be at the msb of the micro address
1870 //------------------------------------------------------------------------------------------------------------------------
1871 for (UINT_32 s = 0; s < compFragLog2; s++)
1872 {
1873 cs.set('s', s);
1874 (*pMetaEq)[s].add(cs);
1875 }
1876 }
1877
1878 // Keep a copy of the pipe equations
1879 CoordEq origPipeEquation;
1880 pipeEquation.copy(origPipeEquation);
1881
1882 Coordinate co;
1883 // filter out everything under the compressed block size
1884 co.set('x', compBlkWidthLog2);
1885 pMetaEq->Filter('<', co, 0, 'x');
1886 co.set('y', compBlkHeightLog2);
1887 pMetaEq->Filter('<', co, 0, 'y');
1888 co.set('z', compBlkDepthLog2);
1889 pMetaEq->Filter('<', co, 0, 'z');
1890
1891 // For non-color, filter out sample bits
1892 if (dataSurfaceType != Gfx9DataColor)
1893 {
1894 co.set('x', 0);
1895 pMetaEq->Filter('<', co, 0, 's');
1896 }
1897
1898 // filter out everything above the metablock size
1899 co.set('x', metaBlkWidthLog2 - 1);
1900 pMetaEq->Filter('>', co, 0, 'x');
1901 co.set('y', metaBlkHeightLog2 - 1);
1902 pMetaEq->Filter('>', co, 0, 'y');
1903 co.set('z', metaBlkDepthLog2 - 1);
1904 pMetaEq->Filter('>', co, 0, 'z');
1905
1906 // filter out everything above the metablock size for the channel bits
1907 co.set('x', metaBlkWidthLog2 - 1);
1908 pipeEquation.Filter('>', co, 0, 'x');
1909 co.set('y', metaBlkHeightLog2 - 1);
1910 pipeEquation.Filter('>', co, 0, 'y');
1911 co.set('z', metaBlkDepthLog2 - 1);
1912 pipeEquation.Filter('>', co, 0, 'z');
1913
1914 // Make sure we still have the same number of channel bits
1915 if (pipeEquation.getsize() != numPipeTotalLog2)
1916 {
1917 ADDR_ASSERT_ALWAYS();
1918 }
1919
1920 // Loop through all channel and rb bits,
1921 // and make sure these components exist in the metadata address
1922 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1923 {
1924 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1925 {
1926 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1927 {
1928 ADDR_ASSERT_ALWAYS();
1929 }
1930 }
1931 }
1932
1933 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1934 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1935 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1936 CoordEq origRbEquation;
1937
1938 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1939
1940 CoordEq rbEquation = origRbEquation;
1941
1942 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1943 {
1944 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1945 {
1946 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1947 {
1948 ADDR_ASSERT_ALWAYS();
1949 }
1950 }
1951 }
1952
1953 if (m_settings.applyAliasFix)
1954 {
1955 co.set('z', -1);
1956 }
1957
1958 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1959 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1960 {
1961 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1962 {
1963 BOOL_32 isRbEquationInPipeEquation = FALSE;
1964
1965 if (m_settings.applyAliasFix)
1966 {
1967 CoordTerm filteredPipeEq;
1968 filteredPipeEq = pipeEquation[j];
1969
1970 filteredPipeEq.Filter('>', co, 0, 'z');
1971
1972 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1973 }
1974 else
1975 {
1976 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1977 }
1978
1979 if (isRbEquationInPipeEquation)
1980 {
1981 rbEquation[i].Clear();
1982 }
1983 }
1984 }
1985
1986 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1987
1988 // Loop through each bit of the channel, get the smallest coordinate,
1989 // and remove it from the metaaddr, and rb_equation
1990 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1991 {
1992 pipeEquation[i].getsmallest(co);
1993
1994 UINT_32 old_size = pMetaEq->getsize();
1995 pMetaEq->Filter('=', co);
1996 UINT_32 new_size = pMetaEq->getsize();
1997 if (new_size != old_size-1)
1998 {
1999 ADDR_ASSERT_ALWAYS();
2000 }
2001 pipeEquation.remove(co);
2002 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2003 {
2004 if (rbEquation[j].remove(co))
2005 {
2006 // if we actually removed something from this bit, then add the remaining
2007 // channel bits, as these can be removed for this bit
2008 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2009 {
2010 if (pipeEquation[i][k] != co)
2011 {
2012 rbEquation[j].add(pipeEquation[i][k]);
2013 rbAppendedWithPipeBits[j] = true;
2014 }
2015 }
2016 }
2017 }
2018 }
2019
2020 // Loop through the rb bits and see what remain;
2021 // filter out the smallest coordinate if it remains
2022 UINT_32 rbBitsLeft = 0;
2023 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2024 {
2025 BOOL_32 isRbEqAppended = FALSE;
2026
2027 if (m_settings.applyAliasFix)
2028 {
2029 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2030 }
2031 else
2032 {
2033 isRbEqAppended = (rbEquation[i].getsize() > 0);
2034 }
2035
2036 if (isRbEqAppended)
2037 {
2038 rbBitsLeft++;
2039 rbEquation[i].getsmallest(co);
2040 UINT_32 old_size = pMetaEq->getsize();
2041 pMetaEq->Filter('=', co);
2042 UINT_32 new_size = pMetaEq->getsize();
2043 if (new_size != old_size - 1)
2044 {
2045 // assert warning
2046 }
2047 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2048 {
2049 if (rbEquation[j].remove(co))
2050 {
2051 // if we actually removed something from this bit, then add the remaining
2052 // rb bits, as these can be removed for this bit
2053 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2054 {
2055 if (rbEquation[i][k] != co)
2056 {
2057 rbEquation[j].add(rbEquation[i][k]);
2058 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2059 }
2060 }
2061 }
2062 }
2063 }
2064 }
2065
2066 // capture the size of the metaaddr
2067 UINT_32 metaSize = pMetaEq->getsize();
2068 // resize to 49 bits...make this a nibble address
2069 pMetaEq->resize(49);
2070 // Concatenate the macro address above the current address
2071 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2072 {
2073 co.set('m', j);
2074 (*pMetaEq)[i].add(co);
2075 }
2076
2077 // Multiply by meta element size (in nibbles)
2078 if (dataSurfaceType == Gfx9DataColor)
2079 {
2080 pMetaEq->shift(1);
2081 }
2082 else if (dataSurfaceType == Gfx9DataDepthStencil)
2083 {
2084 pMetaEq->shift(3);
2085 }
2086
2087 //------------------------------------------------------------------------------------------
2088 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2089 // Shift up from pipe interleave number of channel
2090 // and rb bits left, and uncompressed fragments
2091 //------------------------------------------------------------------------------------------
2092
2093 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2094
2095 // Put in the channel bits
2096 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2097 {
2098 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2099 }
2100
2101 // Put in remaining rb bits
2102 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2103 {
2104 BOOL_32 isRbEqAppended = FALSE;
2105
2106 if (m_settings.applyAliasFix)
2107 {
2108 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2109 }
2110 else
2111 {
2112 isRbEqAppended = (rbEquation[i].getsize() > 0);
2113 }
2114
2115 if (isRbEqAppended)
2116 {
2117 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2118 // Mark any rb bit we add in to the rb mask
2119 j++;
2120 }
2121 }
2122
2123 //------------------------------------------------------------------------------------------
2124 // Put in the uncompressed fragment bits
2125 //------------------------------------------------------------------------------------------
2126 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2127 {
2128 co.set('s', compFragLog2 + i);
2129 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2130 }
2131 }
2132 }
2133
2134 /**
2135 ************************************************************************************************************************
2136 * Gfx9Lib::IsEquationSupported
2137 *
2138 * @brief
2139 * Check if equation is supported for given swizzle mode and resource type.
2140 *
2141 * @return
2142 * TRUE if supported
2143 ************************************************************************************************************************
2144 */
2145 BOOL_32 Gfx9Lib::IsEquationSupported(
2146 AddrResourceType rsrcType,
2147 AddrSwizzleMode swMode,
2148 UINT_32 elementBytesLog2) const
2149 {
2150 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2151 (IsLinear(swMode) == FALSE) &&
2152 (((IsTex2d(rsrcType) == TRUE) &&
2153 ((elementBytesLog2 < 4) ||
2154 ((IsRotateSwizzle(swMode) == FALSE) &&
2155 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2156 ((IsTex3d(rsrcType) == TRUE) &&
2157 (IsRotateSwizzle(swMode) == FALSE) &&
2158 (IsBlock256b(swMode) == FALSE)));
2159
2160 return supported;
2161 }
2162
2163 /**
2164 ************************************************************************************************************************
2165 * Gfx9Lib::InitEquationTable
2166 *
2167 * @brief
2168 * Initialize Equation table.
2169 *
2170 * @return
2171 * N/A
2172 ************************************************************************************************************************
2173 */
2174 VOID Gfx9Lib::InitEquationTable()
2175 {
2176 memset(m_equationTable, 0, sizeof(m_equationTable));
2177
2178 // Loop all possible resource type (2D/3D)
2179 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2180 {
2181 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2182
2183 // Loop all possible swizzle mode
2184 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2185 {
2186 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2187
2188 // Loop all possible bpp
2189 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2190 {
2191 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2192
2193 // Check if the input is supported
2194 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2195 {
2196 ADDR_EQUATION equation;
2197 ADDR_E_RETURNCODE retCode;
2198
2199 memset(&equation, 0, sizeof(ADDR_EQUATION));
2200
2201 // Generate the equation
2202 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2203 {
2204 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2205 }
2206 else if (IsThin(rsrcType, swMode))
2207 {
2208 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2209 }
2210 else
2211 {
2212 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2213 }
2214
2215 // Only fill the equation into the table if the return code is ADDR_OK,
2216 // otherwise if the return code is not ADDR_OK, it indicates this is not
2217 // a valid input, we do nothing but just fill invalid equation index
2218 // into the lookup table.
2219 if (retCode == ADDR_OK)
2220 {
2221 equationIndex = m_numEquations;
2222 ADDR_ASSERT(equationIndex < EquationTableSize);
2223
2224 m_equationTable[equationIndex] = equation;
2225
2226 m_numEquations++;
2227 }
2228 else
2229 {
2230 ADDR_ASSERT_ALWAYS();
2231 }
2232 }
2233
2234 // Fill the index into the lookup table, if the combination is not supported
2235 // fill the invalid equation index
2236 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2237 }
2238 }
2239 }
2240 }
2241
2242 /**
2243 ************************************************************************************************************************
2244 * Gfx9Lib::HwlGetEquationIndex
2245 *
2246 * @brief
2247 * Interface function stub of GetEquationIndex
2248 *
2249 * @return
2250 * ADDR_E_RETURNCODE
2251 ************************************************************************************************************************
2252 */
2253 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2254 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2255 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2256 ) const
2257 {
2258 AddrResourceType rsrcType = pIn->resourceType;
2259 AddrSwizzleMode swMode = pIn->swizzleMode;
2260 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2261 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2262
2263 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2264 {
2265 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2266 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2267
2268 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2269 }
2270
2271 if (pOut->pMipInfo != NULL)
2272 {
2273 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2274 {
2275 pOut->pMipInfo[i].equationIndex = index;
2276 }
2277 }
2278
2279 return index;
2280 }
2281
2282 /**
2283 ************************************************************************************************************************
2284 * Gfx9Lib::HwlComputeBlock256Equation
2285 *
2286 * @brief
2287 * Interface function stub of ComputeBlock256Equation
2288 *
2289 * @return
2290 * ADDR_E_RETURNCODE
2291 ************************************************************************************************************************
2292 */
2293 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2294 AddrResourceType rsrcType,
2295 AddrSwizzleMode swMode,
2296 UINT_32 elementBytesLog2,
2297 ADDR_EQUATION* pEquation) const
2298 {
2299 ADDR_E_RETURNCODE ret = ADDR_OK;
2300
2301 pEquation->numBits = 8;
2302
2303 UINT_32 i = 0;
2304 for (; i < elementBytesLog2; i++)
2305 {
2306 InitChannel(1, 0 , i, &pEquation->addr[i]);
2307 }
2308
2309 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2310
2311 const UINT_32 maxBitsUsed = 4;
2312 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2313 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2314
2315 for (i = 0; i < maxBitsUsed; i++)
2316 {
2317 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2318 InitChannel(1, 1, i, &y[i]);
2319 }
2320
2321 if (IsStandardSwizzle(rsrcType, swMode))
2322 {
2323 switch (elementBytesLog2)
2324 {
2325 case 0:
2326 pixelBit[0] = x[0];
2327 pixelBit[1] = x[1];
2328 pixelBit[2] = x[2];
2329 pixelBit[3] = x[3];
2330 pixelBit[4] = y[0];
2331 pixelBit[5] = y[1];
2332 pixelBit[6] = y[2];
2333 pixelBit[7] = y[3];
2334 break;
2335 case 1:
2336 pixelBit[0] = x[0];
2337 pixelBit[1] = x[1];
2338 pixelBit[2] = x[2];
2339 pixelBit[3] = y[0];
2340 pixelBit[4] = y[1];
2341 pixelBit[5] = y[2];
2342 pixelBit[6] = x[3];
2343 break;
2344 case 2:
2345 pixelBit[0] = x[0];
2346 pixelBit[1] = x[1];
2347 pixelBit[2] = y[0];
2348 pixelBit[3] = y[1];
2349 pixelBit[4] = y[2];
2350 pixelBit[5] = x[2];
2351 break;
2352 case 3:
2353 pixelBit[0] = x[0];
2354 pixelBit[1] = y[0];
2355 pixelBit[2] = y[1];
2356 pixelBit[3] = x[1];
2357 pixelBit[4] = x[2];
2358 break;
2359 case 4:
2360 pixelBit[0] = y[0];
2361 pixelBit[1] = y[1];
2362 pixelBit[2] = x[0];
2363 pixelBit[3] = x[1];
2364 break;
2365 default:
2366 ADDR_ASSERT_ALWAYS();
2367 ret = ADDR_INVALIDPARAMS;
2368 break;
2369 }
2370 }
2371 else if (IsDisplaySwizzle(rsrcType, swMode))
2372 {
2373 switch (elementBytesLog2)
2374 {
2375 case 0:
2376 pixelBit[0] = x[0];
2377 pixelBit[1] = x[1];
2378 pixelBit[2] = x[2];
2379 pixelBit[3] = y[1];
2380 pixelBit[4] = y[0];
2381 pixelBit[5] = y[2];
2382 pixelBit[6] = x[3];
2383 pixelBit[7] = y[3];
2384 break;
2385 case 1:
2386 pixelBit[0] = x[0];
2387 pixelBit[1] = x[1];
2388 pixelBit[2] = x[2];
2389 pixelBit[3] = y[0];
2390 pixelBit[4] = y[1];
2391 pixelBit[5] = y[2];
2392 pixelBit[6] = x[3];
2393 break;
2394 case 2:
2395 pixelBit[0] = x[0];
2396 pixelBit[1] = x[1];
2397 pixelBit[2] = y[0];
2398 pixelBit[3] = x[2];
2399 pixelBit[4] = y[1];
2400 pixelBit[5] = y[2];
2401 break;
2402 case 3:
2403 pixelBit[0] = x[0];
2404 pixelBit[1] = y[0];
2405 pixelBit[2] = x[1];
2406 pixelBit[3] = x[2];
2407 pixelBit[4] = y[1];
2408 break;
2409 case 4:
2410 pixelBit[0] = x[0];
2411 pixelBit[1] = y[0];
2412 pixelBit[2] = x[1];
2413 pixelBit[3] = y[1];
2414 break;
2415 default:
2416 ADDR_ASSERT_ALWAYS();
2417 ret = ADDR_INVALIDPARAMS;
2418 break;
2419 }
2420 }
2421 else if (IsRotateSwizzle(swMode))
2422 {
2423 switch (elementBytesLog2)
2424 {
2425 case 0:
2426 pixelBit[0] = y[0];
2427 pixelBit[1] = y[1];
2428 pixelBit[2] = y[2];
2429 pixelBit[3] = x[1];
2430 pixelBit[4] = x[0];
2431 pixelBit[5] = x[2];
2432 pixelBit[6] = x[3];
2433 pixelBit[7] = y[3];
2434 break;
2435 case 1:
2436 pixelBit[0] = y[0];
2437 pixelBit[1] = y[1];
2438 pixelBit[2] = y[2];
2439 pixelBit[3] = x[0];
2440 pixelBit[4] = x[1];
2441 pixelBit[5] = x[2];
2442 pixelBit[6] = x[3];
2443 break;
2444 case 2:
2445 pixelBit[0] = y[0];
2446 pixelBit[1] = y[1];
2447 pixelBit[2] = x[0];
2448 pixelBit[3] = y[2];
2449 pixelBit[4] = x[1];
2450 pixelBit[5] = x[2];
2451 break;
2452 case 3:
2453 pixelBit[0] = y[0];
2454 pixelBit[1] = x[0];
2455 pixelBit[2] = y[1];
2456 pixelBit[3] = x[1];
2457 pixelBit[4] = x[2];
2458 break;
2459 default:
2460 ADDR_ASSERT_ALWAYS();
2461 case 4:
2462 ret = ADDR_INVALIDPARAMS;
2463 break;
2464 }
2465 }
2466 else
2467 {
2468 ADDR_ASSERT_ALWAYS();
2469 ret = ADDR_INVALIDPARAMS;
2470 }
2471
2472 // Post validation
2473 if (ret == ADDR_OK)
2474 {
2475 MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2476 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2477 (microBlockDim.w * (1 << elementBytesLog2)));
2478 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2479 }
2480
2481 return ret;
2482 }
2483
2484 /**
2485 ************************************************************************************************************************
2486 * Gfx9Lib::HwlComputeThinEquation
2487 *
2488 * @brief
2489 * Interface function stub of ComputeThinEquation
2490 *
2491 * @return
2492 * ADDR_E_RETURNCODE
2493 ************************************************************************************************************************
2494 */
2495 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2496 AddrResourceType rsrcType,
2497 AddrSwizzleMode swMode,
2498 UINT_32 elementBytesLog2,
2499 ADDR_EQUATION* pEquation) const
2500 {
2501 ADDR_E_RETURNCODE ret = ADDR_OK;
2502
2503 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2504
2505 UINT_32 maxXorBits = blockSizeLog2;
2506 if (IsNonPrtXor(swMode))
2507 {
2508 // For non-prt-xor, maybe need to initialize some more bits for xor
2509 // The highest xor bit used in equation will be max the following 3 items:
2510 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2511 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2512 // 3. blockSizeLog2
2513
2514 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2515 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2516 GetPipeXorBits(blockSizeLog2) +
2517 2 * GetBankXorBits(blockSizeLog2));
2518 }
2519
2520 const UINT_32 maxBitsUsed = 14;
2521 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2522 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2523 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2524
2525 const UINT_32 extraXorBits = 16;
2526 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2527 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2528
2529 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2530 {
2531 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2532 InitChannel(1, 1, i, &y[i]);
2533 }
2534
2535 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2536
2537 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2538 {
2539 InitChannel(1, 0 , i, &pixelBit[i]);
2540 }
2541
2542 UINT_32 xIdx = 0;
2543 UINT_32 yIdx = 0;
2544 UINT_32 lowBits = 0;
2545
2546 if (IsZOrderSwizzle(swMode))
2547 {
2548 if (elementBytesLog2 <= 3)
2549 {
2550 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2551 {
2552 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2553 }
2554
2555 lowBits = 6;
2556 }
2557 else
2558 {
2559 ret = ADDR_INVALIDPARAMS;
2560 }
2561 }
2562 else
2563 {
2564 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2565
2566 if (ret == ADDR_OK)
2567 {
2568 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2569 xIdx = Log2(microBlockDim.w);
2570 yIdx = Log2(microBlockDim.h);
2571 lowBits = 8;
2572 }
2573 }
2574
2575 if (ret == ADDR_OK)
2576 {
2577 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2578 {
2579 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2580 }
2581
2582 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2583 {
2584 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2585 }
2586
2587 if (IsXor(swMode))
2588 {
2589 // Fill XOR bits
2590 UINT_32 pipeStart = m_pipeInterleaveLog2;
2591 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2592
2593 UINT_32 bankStart = pipeStart + pipeXorBits;
2594 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2595
2596 for (UINT_32 i = 0; i < pipeXorBits; i++)
2597 {
2598 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2599 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2600 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2601
2602 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2603 }
2604
2605 for (UINT_32 i = 0; i < bankXorBits; i++)
2606 {
2607 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2608 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2609 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2610
2611 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2612 }
2613
2614 if (IsPrt(swMode) == FALSE)
2615 {
2616 for (UINT_32 i = 0; i < pipeXorBits; i++)
2617 {
2618 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2619 }
2620
2621 for (UINT_32 i = 0; i < bankXorBits; i++)
2622 {
2623 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2624 }
2625 }
2626 }
2627
2628 pEquation->numBits = blockSizeLog2;
2629 }
2630
2631 return ret;
2632 }
2633
2634 /**
2635 ************************************************************************************************************************
2636 * Gfx9Lib::HwlComputeThickEquation
2637 *
2638 * @brief
2639 * Interface function stub of ComputeThickEquation
2640 *
2641 * @return
2642 * ADDR_E_RETURNCODE
2643 ************************************************************************************************************************
2644 */
2645 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2646 AddrResourceType rsrcType,
2647 AddrSwizzleMode swMode,
2648 UINT_32 elementBytesLog2,
2649 ADDR_EQUATION* pEquation) const
2650 {
2651 ADDR_E_RETURNCODE ret = ADDR_OK;
2652
2653 ADDR_ASSERT(IsTex3d(rsrcType));
2654
2655 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2656
2657 UINT_32 maxXorBits = blockSizeLog2;
2658 if (IsNonPrtXor(swMode))
2659 {
2660 // For non-prt-xor, maybe need to initialize some more bits for xor
2661 // The highest xor bit used in equation will be max the following 3:
2662 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2663 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2664 // 3. blockSizeLog2
2665
2666 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2667 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2668 GetPipeXorBits(blockSizeLog2) +
2669 3 * GetBankXorBits(blockSizeLog2));
2670 }
2671
2672 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2673 {
2674 InitChannel(1, 0 , i, &pEquation->addr[i]);
2675 }
2676
2677 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2678
2679 const UINT_32 maxBitsUsed = 12;
2680 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2681 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2682 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2683 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2684
2685 const UINT_32 extraXorBits = 24;
2686 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2687 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2688
2689 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2690 {
2691 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2692 InitChannel(1, 1, i, &y[i]);
2693 InitChannel(1, 2, i, &z[i]);
2694 }
2695
2696 if (IsZOrderSwizzle(swMode))
2697 {
2698 switch (elementBytesLog2)
2699 {
2700 case 0:
2701 pixelBit[0] = x[0];
2702 pixelBit[1] = y[0];
2703 pixelBit[2] = x[1];
2704 pixelBit[3] = y[1];
2705 pixelBit[4] = z[0];
2706 pixelBit[5] = z[1];
2707 pixelBit[6] = x[2];
2708 pixelBit[7] = z[2];
2709 pixelBit[8] = y[2];
2710 pixelBit[9] = x[3];
2711 break;
2712 case 1:
2713 pixelBit[0] = x[0];
2714 pixelBit[1] = y[0];
2715 pixelBit[2] = x[1];
2716 pixelBit[3] = y[1];
2717 pixelBit[4] = z[0];
2718 pixelBit[5] = z[1];
2719 pixelBit[6] = z[2];
2720 pixelBit[7] = y[2];
2721 pixelBit[8] = x[2];
2722 break;
2723 case 2:
2724 pixelBit[0] = x[0];
2725 pixelBit[1] = y[0];
2726 pixelBit[2] = x[1];
2727 pixelBit[3] = z[0];
2728 pixelBit[4] = y[1];
2729 pixelBit[5] = z[1];
2730 pixelBit[6] = y[2];
2731 pixelBit[7] = x[2];
2732 break;
2733 case 3:
2734 pixelBit[0] = x[0];
2735 pixelBit[1] = y[0];
2736 pixelBit[2] = z[0];
2737 pixelBit[3] = x[1];
2738 pixelBit[4] = z[1];
2739 pixelBit[5] = y[1];
2740 pixelBit[6] = x[2];
2741 break;
2742 case 4:
2743 pixelBit[0] = x[0];
2744 pixelBit[1] = y[0];
2745 pixelBit[2] = z[0];
2746 pixelBit[3] = z[1];
2747 pixelBit[4] = y[1];
2748 pixelBit[5] = x[1];
2749 break;
2750 default:
2751 ADDR_ASSERT_ALWAYS();
2752 ret = ADDR_INVALIDPARAMS;
2753 break;
2754 }
2755 }
2756 else if (IsStandardSwizzle(rsrcType, swMode))
2757 {
2758 switch (elementBytesLog2)
2759 {
2760 case 0:
2761 pixelBit[0] = x[0];
2762 pixelBit[1] = x[1];
2763 pixelBit[2] = x[2];
2764 pixelBit[3] = x[3];
2765 pixelBit[4] = y[0];
2766 pixelBit[5] = y[1];
2767 pixelBit[6] = z[0];
2768 pixelBit[7] = z[1];
2769 pixelBit[8] = z[2];
2770 pixelBit[9] = y[2];
2771 break;
2772 case 1:
2773 pixelBit[0] = x[0];
2774 pixelBit[1] = x[1];
2775 pixelBit[2] = x[2];
2776 pixelBit[3] = y[0];
2777 pixelBit[4] = y[1];
2778 pixelBit[5] = z[0];
2779 pixelBit[6] = z[1];
2780 pixelBit[7] = z[2];
2781 pixelBit[8] = y[2];
2782 break;
2783 case 2:
2784 pixelBit[0] = x[0];
2785 pixelBit[1] = x[1];
2786 pixelBit[2] = y[0];
2787 pixelBit[3] = y[1];
2788 pixelBit[4] = z[0];
2789 pixelBit[5] = z[1];
2790 pixelBit[6] = y[2];
2791 pixelBit[7] = x[2];
2792 break;
2793 case 3:
2794 pixelBit[0] = x[0];
2795 pixelBit[1] = y[0];
2796 pixelBit[2] = y[1];
2797 pixelBit[3] = z[0];
2798 pixelBit[4] = z[1];
2799 pixelBit[5] = x[1];
2800 pixelBit[6] = x[2];
2801 break;
2802 case 4:
2803 pixelBit[0] = y[0];
2804 pixelBit[1] = y[1];
2805 pixelBit[2] = z[0];
2806 pixelBit[3] = z[1];
2807 pixelBit[4] = x[0];
2808 pixelBit[5] = x[1];
2809 break;
2810 default:
2811 ADDR_ASSERT_ALWAYS();
2812 ret = ADDR_INVALIDPARAMS;
2813 break;
2814 }
2815 }
2816 else
2817 {
2818 ADDR_ASSERT_ALWAYS();
2819 ret = ADDR_INVALIDPARAMS;
2820 }
2821
2822 if (ret == ADDR_OK)
2823 {
2824 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2825 UINT_32 xIdx = Log2(microBlockDim.w);
2826 UINT_32 yIdx = Log2(microBlockDim.h);
2827 UINT_32 zIdx = Log2(microBlockDim.d);
2828
2829 pixelBit = pEquation->addr;
2830
2831 const UINT_32 lowBits = 10;
2832 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2833 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2834
2835 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2836 {
2837 if ((i % 3) == 0)
2838 {
2839 pixelBit[i] = x[xIdx++];
2840 }
2841 else if ((i % 3) == 1)
2842 {
2843 pixelBit[i] = z[zIdx++];
2844 }
2845 else
2846 {
2847 pixelBit[i] = y[yIdx++];
2848 }
2849 }
2850
2851 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2852 {
2853 if ((i % 3) == 0)
2854 {
2855 xorExtra[i - blockSizeLog2] = x[xIdx++];
2856 }
2857 else if ((i % 3) == 1)
2858 {
2859 xorExtra[i - blockSizeLog2] = z[zIdx++];
2860 }
2861 else
2862 {
2863 xorExtra[i - blockSizeLog2] = y[yIdx++];
2864 }
2865 }
2866
2867 if (IsXor(swMode))
2868 {
2869 // Fill XOR bits
2870 UINT_32 pipeStart = m_pipeInterleaveLog2;
2871 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2872 for (UINT_32 i = 0; i < pipeXorBits; i++)
2873 {
2874 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2875 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2876 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2877
2878 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2879
2880 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2881 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2882 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2883
2884 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2885 }
2886
2887 UINT_32 bankStart = pipeStart + pipeXorBits;
2888 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2889 for (UINT_32 i = 0; i < bankXorBits; i++)
2890 {
2891 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2892 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2893 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2894
2895 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2896
2897 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2898 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2899 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2900
2901 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2902 }
2903 }
2904
2905 pEquation->numBits = blockSizeLog2;
2906 }
2907
2908 return ret;
2909 }
2910
2911 /**
2912 ************************************************************************************************************************
2913 * Gfx9Lib::IsValidDisplaySwizzleMode
2914 *
2915 * @brief
2916 * Check if a swizzle mode is supported by display engine
2917 *
2918 * @return
2919 * TRUE is swizzle mode is supported by display engine
2920 ************************************************************************************************************************
2921 */
2922 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2923 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2924 {
2925 BOOL_32 support = FALSE;
2926
2927 const AddrResourceType resourceType = pIn->resourceType;
2928 (void)resourceType;
2929 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2930
2931 if (m_settings.isDce12)
2932 {
2933 switch (swizzleMode)
2934 {
2935 case ADDR_SW_256B_D:
2936 case ADDR_SW_256B_R:
2937 support = (pIn->bpp == 32);
2938 break;
2939
2940 case ADDR_SW_LINEAR:
2941 case ADDR_SW_4KB_D:
2942 case ADDR_SW_4KB_R:
2943 case ADDR_SW_64KB_D:
2944 case ADDR_SW_64KB_R:
2945 case ADDR_SW_VAR_D:
2946 case ADDR_SW_VAR_R:
2947 case ADDR_SW_4KB_D_X:
2948 case ADDR_SW_4KB_R_X:
2949 case ADDR_SW_64KB_D_X:
2950 case ADDR_SW_64KB_R_X:
2951 case ADDR_SW_VAR_D_X:
2952 case ADDR_SW_VAR_R_X:
2953 support = (pIn->bpp <= 64);
2954 break;
2955
2956 default:
2957 break;
2958 }
2959 }
2960 else if (m_settings.isDcn1)
2961 {
2962 switch (swizzleMode)
2963 {
2964 case ADDR_SW_4KB_D:
2965 case ADDR_SW_64KB_D:
2966 case ADDR_SW_VAR_D:
2967 case ADDR_SW_64KB_D_T:
2968 case ADDR_SW_4KB_D_X:
2969 case ADDR_SW_64KB_D_X:
2970 case ADDR_SW_VAR_D_X:
2971 support = (pIn->bpp == 64);
2972 break;
2973
2974 case ADDR_SW_LINEAR:
2975 case ADDR_SW_4KB_S:
2976 case ADDR_SW_64KB_S:
2977 case ADDR_SW_VAR_S:
2978 case ADDR_SW_64KB_S_T:
2979 case ADDR_SW_4KB_S_X:
2980 case ADDR_SW_64KB_S_X:
2981 case ADDR_SW_VAR_S_X:
2982 support = (pIn->bpp <= 64);
2983 break;
2984
2985 default:
2986 break;
2987 }
2988 }
2989 else
2990 {
2991 ADDR_NOT_IMPLEMENTED();
2992 }
2993
2994 return support;
2995 }
2996
2997 /**
2998 ************************************************************************************************************************
2999 * Gfx9Lib::HwlComputePipeBankXor
3000 *
3001 * @brief
3002 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3003 *
3004 * @return
3005 * PipeBankXor value
3006 ************************************************************************************************************************
3007 */
3008 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3009 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3010 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3011 {
3012 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3013 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3014 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3015
3016 UINT_32 pipeXor = 0;
3017 UINT_32 bankXor = 0;
3018
3019 const UINT_32 bankMask = (1 << bankBits) - 1;
3020 const UINT_32 index = pIn->surfIndex & bankMask;
3021
3022 const UINT_32 bpp = pIn->flags.fmask ?
3023 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3024 if (bankBits == 4)
3025 {
3026 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3027 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3028
3029 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3030 }
3031 else if (bankBits > 0)
3032 {
3033 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3034 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3035 bankXor = (index * bankIncrease) & bankMask;
3036 }
3037
3038 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3039
3040 return ADDR_OK;
3041 }
3042
3043 /**
3044 ************************************************************************************************************************
3045 * Gfx9Lib::HwlComputeSlicePipeBankXor
3046 *
3047 * @brief
3048 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3049 *
3050 * @return
3051 * PipeBankXor value
3052 ************************************************************************************************************************
3053 */
3054 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3055 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3056 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3057 {
3058 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3059 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3060 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3061
3062 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3063 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3064
3065 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3066
3067 return ADDR_OK;
3068 }
3069
3070 /**
3071 ************************************************************************************************************************
3072 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3073 *
3074 * @brief
3075 * Compute sub resource offset to support swizzle pattern
3076 *
3077 * @return
3078 * Offset
3079 ************************************************************************************************************************
3080 */
3081 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3082 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3083 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3084 {
3085 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3086
3087 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3088 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3089 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3090 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3091 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3092 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3093
3094 pOut->offset = pIn->slice * pIn->sliceSize +
3095 pIn->macroBlockOffset +
3096 (pIn->mipTailOffset ^ pipeBankXor) -
3097 static_cast<UINT_64>(pipeBankXor);
3098 return ADDR_OK;
3099 }
3100
3101 /**
3102 ************************************************************************************************************************
3103 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3104 *
3105 * @brief
3106 * Compute surface info sanity check
3107 *
3108 * @return
3109 * Offset
3110 ************************************************************************************************************************
3111 */
3112 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3113 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3114 {
3115 BOOL_32 invalid = FALSE;
3116
3117 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3118 {
3119 invalid = TRUE;
3120 }
3121 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) ||
3122 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3123 {
3124 invalid = TRUE;
3125 }
3126
3127 BOOL_32 mipmap = (pIn->numMipLevels > 1);
3128 BOOL_32 msaa = (pIn->numFrags > 1);
3129
3130 ADDR2_SURFACE_FLAGS flags = pIn->flags;
3131 BOOL_32 zbuffer = (flags.depth || flags.stencil);
3132 BOOL_32 color = flags.color;
3133 BOOL_32 display = flags.display || flags.rotated;
3134
3135 AddrResourceType rsrcType = pIn->resourceType;
3136 BOOL_32 tex3d = IsTex3d(rsrcType);
3137 AddrSwizzleMode swizzle = pIn->swizzleMode;
3138 BOOL_32 linear = IsLinear(swizzle);
3139 BOOL_32 blk256B = IsBlock256b(swizzle);
3140 BOOL_32 blkVar = IsBlockVariable(swizzle);
3141 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3142 BOOL_32 prt = flags.prt;
3143 BOOL_32 stereo = flags.qbStereo;
3144
3145 if (invalid == FALSE)
3146 {
3147 if ((pIn->numFrags > 1) &&
3148 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3149 {
3150 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3151 invalid = TRUE;
3152 }
3153 }
3154
3155 if (invalid == FALSE)
3156 {
3157 switch (rsrcType)
3158 {
3159 case ADDR_RSRC_TEX_1D:
3160 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3161 break;
3162 case ADDR_RSRC_TEX_2D:
3163 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3164 break;
3165 case ADDR_RSRC_TEX_3D:
3166 invalid = msaa || zbuffer || display || stereo;
3167 break;
3168 default:
3169 invalid = TRUE;
3170 break;
3171 }
3172 }
3173
3174 if (invalid == FALSE)
3175 {
3176 if (display)
3177 {
3178 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3179 }
3180 }
3181
3182 if (invalid == FALSE)
3183 {
3184 if (linear)
3185 {
3186 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3187 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3188 }
3189 else
3190 {
3191 if (blk256B || blkVar || isNonPrtXor)
3192 {
3193 invalid = prt;
3194 if (blk256B)
3195 {
3196 invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3197 }
3198 }
3199
3200 if (invalid == FALSE)
3201 {
3202 if (IsZOrderSwizzle(swizzle))
3203 {
3204 invalid = color && msaa;
3205 }
3206 else if (IsStandardSwizzle(rsrcType, swizzle))
3207 {
3208 invalid = zbuffer;
3209 }
3210 else if (IsDisplaySwizzle(rsrcType, swizzle))
3211 {
3212 invalid = zbuffer;
3213 }
3214 else if (IsRotateSwizzle(swizzle))
3215 {
3216 invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3217 }
3218 else
3219 {
3220 ADDR_ASSERT(!"invalid swizzle mode");
3221 invalid = TRUE;
3222 }
3223 }
3224 }
3225 }
3226
3227 ADDR_ASSERT(invalid == FALSE);
3228
3229 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3230 }
3231
3232 /**
3233 ************************************************************************************************************************
3234 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3235 *
3236 * @brief
3237 * Internal function to get suggested surface information for cliet to use
3238 *
3239 * @return
3240 * ADDR_E_RETURNCODE
3241 ************************************************************************************************************************
3242 */
3243 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3244 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3245 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3246 {
3247 // Macro define resource block type
3248 enum AddrBlockType
3249 {
3250 AddrBlockMicro = 0, // Resource uses 256B block
3251 AddrBlock4KB = 1, // Resource uses 4KB block
3252 AddrBlock64KB = 2, // Resource uses 64KB block
3253 AddrBlockVar = 3, // Resource uses var blcok
3254 AddrBlockLinear = 4, // Resource uses linear swizzle mode
3255
3256 AddrBlockMaxTiledType = AddrBlock64KB + 1,
3257 };
3258
3259 enum AddrBlockSet
3260 {
3261 AddrBlockSetMicro = 1 << AddrBlockMicro,
3262 AddrBlockSetMacro4KB = 1 << AddrBlock4KB,
3263 AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
3264 AddrBlockSetVar = 1 << AddrBlockVar,
3265 AddrBlockSetLinear = 1 << AddrBlockLinear,
3266
3267 AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
3268 };
3269
3270 enum AddrSwSet
3271 {
3272 AddrSwSetZ = 1 << ADDR_SW_Z,
3273 AddrSwSetS = 1 << ADDR_SW_S,
3274 AddrSwSetD = 1 << ADDR_SW_D,
3275 AddrSwSetR = 1 << ADDR_SW_R,
3276
3277 AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
3278 };
3279
3280 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3281 ElemLib* pElemLib = GetElemLib();
3282
3283 // Set format to INVALID will skip this conversion
3284 UINT_32 expandX = 1;
3285 UINT_32 expandY = 1;
3286 UINT_32 bpp = pIn->bpp;
3287 UINT_32 width = pIn->width;
3288 UINT_32 height = pIn->height;
3289
3290 if (pIn->format != ADDR_FMT_INVALID)
3291 {
3292 // Don't care for this case
3293 ElemMode elemMode = ADDR_UNCOMPRESSED;
3294
3295 // Get compression/expansion factors and element mode which indicates compression/expansion
3296 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3297 &elemMode,
3298 &expandX,
3299 &expandY);
3300
3301 UINT_32 basePitch = 0;
3302 GetElemLib()->AdjustSurfaceInfo(elemMode,
3303 expandX,
3304 expandY,
3305 &bpp,
3306 &basePitch,
3307 &width,
3308 &height);
3309 }
3310
3311 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3312 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3313 UINT_32 slice = Max(pIn->numSlices, 1u);
3314 UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3315 UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
3316
3317 if (pIn->flags.fmask)
3318 {
3319 bpp = GetFmaskBpp(numSamples, numFrags);
3320 numFrags = 1;
3321 numSamples = 1;
3322 pOut->resourceType = ADDR_RSRC_TEX_2D;
3323 }
3324 else
3325 {
3326 // The output may get changed for volume(3D) texture resource in future
3327 pOut->resourceType = pIn->resourceType;
3328 }
3329
3330 if (bpp < 8)
3331 {
3332 ADDR_ASSERT_ALWAYS();
3333
3334 returnCode = ADDR_INVALIDPARAMS;
3335 }
3336 else if (IsTex1d(pOut->resourceType))
3337 {
3338 pOut->swizzleMode = ADDR_SW_LINEAR;
3339 pOut->validBlockSet.value = AddrBlockSetLinear;
3340 pOut->canXor = FALSE;
3341 }
3342 else
3343 {
3344 ADDR2_BLOCK_SET blockSet;
3345 blockSet.value = 0;
3346
3347 ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
3348 addrPreferredSwSet.value = AddrSwSetS;
3349 addrValidSwSet = addrPreferredSwSet;
3350 clientPreferredSwSet = pIn->preferredSwSet;
3351
3352 if (clientPreferredSwSet.value == 0)
3353 {
3354 clientPreferredSwSet.value = AddrSwSetAll;
3355 }
3356
3357 // prt Xor and non-xor will have less height align requirement for stereo surface
3358 BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
3359 BOOL_32 displayResource = FALSE;
3360
3361 pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
3362
3363 // Filter out improper swType and blockSet by HW restriction
3364 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3365 {
3366 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3367 blockSet.value = AddrBlockSetMacro;
3368 addrPreferredSwSet.value = AddrSwSetZ;
3369 addrValidSwSet.value = AddrSwSetZ;
3370
3371 if (pIn->flags.noMetadata == FALSE)
3372 {
3373 if (pIn->flags.depth &&
3374 pIn->flags.texture &&
3375 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3376 {
3377 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3378 // equation from wrong address within memory range a tile covered and use the
3379 // garbage data for compressed Z reading which finally leads to corruption.
3380 pOut->canXor = FALSE;
3381 prtXor = FALSE;
3382 }
3383
3384 if (m_settings.htileCacheRbConflict &&
3385 (pIn->flags.depth || pIn->flags.stencil) &&
3386 (slice > 1) &&
3387 (pIn->flags.metaRbUnaligned == FALSE) &&
3388 (pIn->flags.metaPipeUnaligned == FALSE))
3389 {
3390 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3391 pOut->canXor = FALSE;
3392 }
3393 }
3394 }
3395 else if (ElemLib::IsBlockCompressed(pIn->format))
3396 {
3397 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
3398 // Not sure under what circumstances "_D" would be appropriate as these formats
3399 // are not displayable.
3400 blockSet.value = AddrBlockSetMacro;
3401
3402 // This isn't to be used as texture and caller doesn't allow macro tiled.
3403 if ((pIn->flags.texture == FALSE) &&
3404 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
3405 {
3406 blockSet.value |= AddrBlockSetLinear;
3407 }
3408
3409 addrPreferredSwSet.value = AddrSwSetD;
3410 addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
3411 }
3412 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3413 {
3414 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
3415 // Its notclear under what circumstances the D or R modes would be appropriate
3416 // since these formats are not displayable.
3417 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3418
3419 addrPreferredSwSet.value = AddrSwSetS;
3420 addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
3421 }
3422 else if (IsTex3d(pOut->resourceType))
3423 {
3424 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3425
3426 if (pIn->flags.prt)
3427 {
3428 // PRT cannot use SW_D which gives an unexpected block dimension
3429 addrPreferredSwSet.value = AddrSwSetZ;
3430 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
3431 }
3432 else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
3433 {
3434 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3435 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3436 addrPreferredSwSet.value = AddrSwSetZ;
3437 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
3438 }
3439 else if (pIn->flags.color)
3440 {
3441 addrPreferredSwSet.value = AddrSwSetD;
3442 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
3443 }
3444 else
3445 {
3446 addrPreferredSwSet.value = AddrSwSetZ;
3447 addrValidSwSet.value = AddrSwSetZ | AddrSwSetD;
3448 if (bpp != 128)
3449 {
3450 addrValidSwSet.value |= AddrSwSetS;
3451 }
3452 }
3453 }
3454 else
3455 {
3456 addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
3457 (pIn->flags.overlay == TRUE) ||
3458 (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS;
3459
3460 addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
3461
3462 if (numMipLevels > 1)
3463 {
3464 ADDR_ASSERT(numFrags == 1);
3465 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3466 }
3467 else if ((numFrags > 1) || (numSamples > 1))
3468 {
3469 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3470 blockSet.value = AddrBlockSetMacro;
3471 }
3472 else
3473 {
3474 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3475 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
3476
3477 displayResource = pIn->flags.rotated || pIn->flags.display;
3478
3479 if (displayResource)
3480 {
3481 addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
3482
3483 if (pIn->bpp > 64)
3484 {
3485 blockSet.value = 0;
3486 }
3487 else if (m_settings.isDce12)
3488 {
3489 if (pIn->bpp != 32)
3490 {
3491 blockSet.micro = FALSE;
3492 }
3493
3494 // DCE12 does not support display surface to be _T swizzle mode
3495 prtXor = FALSE;
3496
3497 addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
3498 }
3499 else if (m_settings.isDcn1)
3500 {
3501 // _R is not supported by Dcn1
3502 if (pIn->bpp == 64)
3503 {
3504 addrPreferredSwSet.value = AddrSwSetD;
3505 addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
3506 }
3507 else
3508 {
3509 addrPreferredSwSet.value = AddrSwSetS;
3510 addrValidSwSet.value = AddrSwSetS;
3511 }
3512
3513 blockSet.micro = FALSE;
3514 }
3515 else
3516 {
3517 ADDR_NOT_IMPLEMENTED();
3518 returnCode = ADDR_NOTSUPPORTED;
3519 }
3520 }
3521 }
3522 }
3523
3524 ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
3525
3526 pOut->clientPreferredSwSet = clientPreferredSwSet;
3527
3528 // Clamp client preferred set to valid set
3529 clientPreferredSwSet.value &= addrValidSwSet.value;
3530
3531 pOut->validSwTypeSet = addrValidSwSet;
3532
3533 if (clientPreferredSwSet.value == 0)
3534 {
3535 // Client asks for an invalid swizzle type...
3536 ADDR_ASSERT_ALWAYS();
3537 returnCode = ADDR_INVALIDPARAMS;
3538 }
3539 else
3540 {
3541 if (IsPow2(clientPreferredSwSet.value))
3542 {
3543 // Only one swizzle type left, use it directly
3544 addrPreferredSwSet.value = clientPreferredSwSet.value;
3545 }
3546 else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
3547 {
3548 // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
3549 if (clientPreferredSwSet.sw_D)
3550 {
3551 addrPreferredSwSet.value = AddrSwSetD;
3552 }
3553 else if (clientPreferredSwSet.sw_Z)
3554 {
3555 addrPreferredSwSet.value = AddrSwSetZ;
3556 }
3557 else if (clientPreferredSwSet.sw_R)
3558 {
3559 addrPreferredSwSet.value = AddrSwSetR;
3560 }
3561 else
3562 {
3563 ADDR_ASSERT(clientPreferredSwSet.sw_S);
3564 addrPreferredSwSet.value = AddrSwSetS;
3565 }
3566 }
3567
3568 if ((numFrags > 1) &&
3569 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3570 {
3571 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3572 blockSet.macro4KB = FALSE;
3573 }
3574
3575 if (pIn->flags.prt)
3576 {
3577 blockSet.value &= AddrBlockSetMacro64KB;
3578 }
3579
3580 // Apply customized forbidden setting
3581 blockSet.value &= ~pIn->forbiddenBlock.value;
3582
3583 if (pIn->maxAlign > 0)
3584 {
3585 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3586 {
3587 blockSet.macro64KB = FALSE;
3588 }
3589
3590 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3591 {
3592 blockSet.macro4KB = FALSE;
3593 }
3594
3595 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3596 {
3597 blockSet.micro = FALSE;
3598 }
3599 }
3600
3601 Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3602 Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3603 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3604
3605 if (blockSet.micro)
3606 {
3607 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
3608 &blkAlign[AddrBlockMicro].h,
3609 &blkAlign[AddrBlockMicro].d,
3610 bpp,
3611 numFrags,
3612 pOut->resourceType,
3613 ADDR_SW_256B);
3614
3615 if (returnCode == ADDR_OK)
3616 {
3617 if (displayResource)
3618 {
3619 blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
3620 }
3621 else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
3622 (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
3623 {
3624 // If one 256B block can contain the surface, don't bother bigger block type
3625 blockSet.macro4KB = FALSE;
3626 blockSet.macro64KB = FALSE;
3627 blockSet.var = FALSE;
3628 }
3629
3630 padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
3631 slice, &paddedDim[AddrBlockMicro]);
3632 }
3633 }
3634
3635 if ((returnCode == ADDR_OK) && blockSet.macro4KB)
3636 {
3637 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
3638 &blkAlign[AddrBlock4KB].h,
3639 &blkAlign[AddrBlock4KB].d,
3640 bpp,
3641 numFrags,
3642 pOut->resourceType,
3643 ADDR_SW_4KB);
3644
3645 if (returnCode == ADDR_OK)
3646 {
3647 if (displayResource)
3648 {
3649 blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
3650 }
3651
3652 padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
3653 slice, &paddedDim[AddrBlock4KB]);
3654
3655 ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
3656 }
3657 }
3658
3659 if ((returnCode == ADDR_OK) && blockSet.macro64KB)
3660 {
3661 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
3662 &blkAlign[AddrBlock64KB].h,
3663 &blkAlign[AddrBlock64KB].d,
3664 bpp,
3665 numFrags,
3666 pOut->resourceType,
3667 ADDR_SW_64KB);
3668
3669 if (returnCode == ADDR_OK)
3670 {
3671 if (displayResource)
3672 {
3673 blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
3674 }
3675
3676 padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
3677 slice, &paddedDim[AddrBlock64KB]);
3678
3679 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
3680 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
3681 }
3682 }
3683
3684 if (returnCode == ADDR_OK)
3685 {
3686 UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
3687
3688 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3689 {
3690 padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
3691 }
3692
3693 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3694 if (pIn->flags.minimizeAlign)
3695 {
3696 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3697 // block, filter out 64KB block from candidate list
3698 if (blockSet.macro64KB &&
3699 ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
3700 (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
3701 {
3702 blockSet.macro64KB = FALSE;
3703 }
3704
3705 // If padded size of 4KB block is larger than padded size of 256B block,
3706 // filter out 4KB block from candidate list
3707 if (blockSet.macro4KB &&
3708 blockSet.micro &&
3709 (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
3710 {
3711 blockSet.macro4KB = FALSE;
3712 }
3713 }
3714 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3715 else if (pIn->flags.opt4space)
3716 {
3717 UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
3718 (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
3719
3720 threshold += threshold >> 1;
3721
3722 if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
3723 {
3724 blockSet.macro64KB = FALSE;
3725 }
3726
3727 if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
3728 {
3729 blockSet.macro4KB = FALSE;
3730 }
3731 }
3732 else
3733 {
3734 if (blockSet.macro64KB &&
3735 (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
3736 ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
3737 {
3738 // If 64KB block waste more than half memory on padding, filter it out from
3739 // candidate list when it is not the only choice left
3740 blockSet.macro64KB = FALSE;
3741 }
3742 }
3743
3744 if (blockSet.value == 0)
3745 {
3746 // Bad things happen, client will not get any useful information from AddrLib.
3747 // Maybe we should fill in some output earlier instead of outputing nothing?
3748 ADDR_ASSERT_ALWAYS();
3749 returnCode = ADDR_INVALIDPARAMS;
3750 }
3751 else
3752 {
3753 pOut->validBlockSet = blockSet;
3754 pOut->canXor = pOut->canXor &&
3755 (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
3756
3757 if (blockSet.macro64KB || blockSet.macro4KB)
3758 {
3759 if (addrPreferredSwSet.value == AddrSwSetZ)
3760 {
3761 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
3762 }
3763 else if (addrPreferredSwSet.value == AddrSwSetS)
3764 {
3765 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
3766 }
3767 else if (addrPreferredSwSet.value == AddrSwSetD)
3768 {
3769 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
3770 }
3771 else
3772 {
3773 ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
3774 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
3775 }
3776
3777 if (prtXor && blockSet.macro64KB)
3778 {
3779 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3780 const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
3781 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
3782 }
3783 else if (pOut->canXor)
3784 {
3785 // Client wants XOR and this is allowed, return XOR version swizzle mode
3786 const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
3787 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
3788 }
3789 }
3790 else if (blockSet.micro)
3791 {
3792 if (addrPreferredSwSet.value == AddrSwSetS)
3793 {
3794 pOut->swizzleMode = ADDR_SW_256B_S;
3795 }
3796 else if (addrPreferredSwSet.value == AddrSwSetD)
3797 {
3798 pOut->swizzleMode = ADDR_SW_256B_D;
3799 }
3800 else
3801 {
3802 ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
3803 pOut->swizzleMode = ADDR_SW_256B_R;
3804 }
3805 }
3806 else if (blockSet.linear)
3807 {
3808 // Fall into this branch doesn't mean linear is suitable, only no other choices!
3809 pOut->swizzleMode = ADDR_SW_LINEAR;
3810 }
3811 else
3812 {
3813 ADDR_ASSERT(blockSet.var);
3814
3815 // Designer consider VAR swizzle mode is usless for most cases
3816 ADDR_UNHANDLED_CASE();
3817
3818 returnCode = ADDR_NOTSUPPORTED;
3819 }
3820
3821 #if DEBUG
3822 // Post sanity check, at least AddrLib should accept the output generated by its own
3823 if (pOut->swizzleMode != ADDR_SW_LINEAR)
3824 {
3825 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3826 localIn.flags = pIn->flags;
3827 localIn.swizzleMode = pOut->swizzleMode;
3828 localIn.resourceType = pOut->resourceType;
3829 localIn.format = pIn->format;
3830 localIn.bpp = bpp;
3831 localIn.width = width;
3832 localIn.height = height;
3833 localIn.numSlices = slice;
3834 localIn.numMipLevels = numMipLevels;
3835 localIn.numSamples = numSamples;
3836 localIn.numFrags = numFrags;
3837
3838 HwlComputeSurfaceInfoSanityCheck(&localIn);
3839
3840 }
3841 #endif
3842 }
3843 }
3844 }
3845 }
3846
3847 return returnCode;
3848 }
3849
3850 /**
3851 ************************************************************************************************************************
3852 * Gfx9Lib::ComputeStereoInfo
3853 *
3854 * @brief
3855 * Compute height alignment and right eye pipeBankXor for stereo surface
3856 *
3857 * @return
3858 * Error code
3859 *
3860 ************************************************************************************************************************
3861 */
3862 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3863 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3864 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3865 UINT_32* pHeightAlign
3866 ) const
3867 {
3868 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3869
3870 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3871
3872 if (eqIndex < m_numEquations)
3873 {
3874 if (IsXor(pIn->swizzleMode))
3875 {
3876 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3877 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3878 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3879 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3880 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3881 MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3882
3883 ADDR_ASSERT(maxYCoordBlock256 ==
3884 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3885
3886 const UINT_32 maxYCoordInBaseEquation =
3887 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3888
3889 ADDR_ASSERT(maxYCoordInBaseEquation ==
3890 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3891
3892 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3893
3894 ADDR_ASSERT(maxYCoordInPipeXor ==
3895 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3896
3897 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3898 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3899
3900 ADDR_ASSERT(maxYCoordInBankXor ==
3901 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3902
3903 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3904
3905 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3906 {
3907 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3908
3909 if (pOut->pStereoInfo != NULL)
3910 {
3911 pOut->pStereoInfo->rightSwizzle = 0;
3912
3913 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3914 {
3915 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3916 {
3917 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3918 }
3919
3920 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3921 {
3922 pOut->pStereoInfo->rightSwizzle |=
3923 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3924 }
3925
3926 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3927 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3928 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3929 }
3930 }
3931 }
3932 }
3933 }
3934 else
3935 {
3936 ADDR_ASSERT_ALWAYS();
3937 returnCode = ADDR_ERROR;
3938 }
3939
3940 return returnCode;
3941 }
3942
3943 /**
3944 ************************************************************************************************************************
3945 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3946 *
3947 * @brief
3948 * Internal function to calculate alignment for tiled surface
3949 *
3950 * @return
3951 * ADDR_E_RETURNCODE
3952 ************************************************************************************************************************
3953 */
3954 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3955 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3956 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3957 ) const
3958 {
3959 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3960 &pOut->blockHeight,
3961 &pOut->blockSlices,
3962 pIn->bpp,
3963 pIn->numFrags,
3964 pIn->resourceType,
3965 pIn->swizzleMode);
3966
3967 if (returnCode == ADDR_OK)
3968 {
3969 UINT_32 pitchAlignInElement = pOut->blockWidth;
3970
3971 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3972 (pIn->flags.display || pIn->flags.rotated) &&
3973 (pIn->numMipLevels <= 1) &&
3974 (pIn->numSamples <= 1) &&
3975 (pIn->numFrags <= 1))
3976 {
3977 // Display engine needs pitch align to be at least 32 pixels.
3978 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3979 }
3980
3981 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3982
3983 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3984 {
3985 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3986 {
3987 returnCode = ADDR_INVALIDPARAMS;
3988 }
3989 else if (pIn->pitchInElement < pOut->pitch)
3990 {
3991 returnCode = ADDR_INVALIDPARAMS;
3992 }
3993 else
3994 {
3995 pOut->pitch = pIn->pitchInElement;
3996 }
3997 }
3998
3999 UINT_32 heightAlign = 0;
4000
4001 if (pIn->flags.qbStereo)
4002 {
4003 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4004 }
4005
4006 if (returnCode == ADDR_OK)
4007 {
4008 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4009
4010 if (heightAlign > 1)
4011 {
4012 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4013 }
4014
4015 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4016
4017 pOut->epitchIsHeight = FALSE;
4018 pOut->mipChainInTail = FALSE;
4019 pOut->firstMipIdInTail = pIn->numMipLevels;
4020
4021 pOut->mipChainPitch = pOut->pitch;
4022 pOut->mipChainHeight = pOut->height;
4023 pOut->mipChainSlice = pOut->numSlices;
4024
4025 if (pIn->numMipLevels > 1)
4026 {
4027 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4028 pIn->swizzleMode,
4029 pIn->bpp,
4030 pIn->width,
4031 pIn->height,
4032 pIn->numSlices,
4033 pOut->blockWidth,
4034 pOut->blockHeight,
4035 pOut->blockSlices,
4036 pIn->numMipLevels,
4037 pOut->pMipInfo);
4038
4039 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4040
4041 if (endingMipId == 0)
4042 {
4043 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4044 pIn->swizzleMode,
4045 pOut->blockWidth,
4046 pOut->blockHeight,
4047 pOut->blockSlices);
4048
4049 pOut->epitchIsHeight = TRUE;
4050 pOut->pitch = tailMaxDim.w;
4051 pOut->height = tailMaxDim.h;
4052 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4053 tailMaxDim.d : pIn->numSlices;
4054 pOut->mipChainInTail = TRUE;
4055 }
4056 else
4057 {
4058 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4059 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4060
4061 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4062 pIn->swizzleMode,
4063 mip0WidthInBlk,
4064 mip0HeightInBlk,
4065 pOut->numSlices / pOut->blockSlices);
4066 if (majorMode == ADDR_MAJOR_Y)
4067 {
4068 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4069
4070 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4071 {
4072 mip1WidthInBlk++;
4073 }
4074
4075 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4076
4077 pOut->epitchIsHeight = FALSE;
4078 }
4079 else
4080 {
4081 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4082
4083 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4084 {
4085 mip1HeightInBlk++;
4086 }
4087
4088 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4089
4090 pOut->epitchIsHeight = TRUE;
4091 }
4092 }
4093
4094 if (pOut->pMipInfo != NULL)
4095 {
4096 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4097
4098 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4099 {
4100 Dim3d mipStartPos = {0};
4101 UINT_32 mipTailOffsetInBytes = 0;
4102
4103 mipStartPos = GetMipStartPos(pIn->resourceType,
4104 pIn->swizzleMode,
4105 pOut->pitch,
4106 pOut->height,
4107 pOut->numSlices,
4108 pOut->blockWidth,
4109 pOut->blockHeight,
4110 pOut->blockSlices,
4111 i,
4112 elementBytesLog2,
4113 &mipTailOffsetInBytes);
4114
4115 UINT_32 pitchInBlock =
4116 pOut->mipChainPitch / pOut->blockWidth;
4117 UINT_32 sliceInBlock =
4118 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4119 UINT_64 blockIndex =
4120 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4121 UINT_64 macroBlockOffset =
4122 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4123
4124 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4125 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4126 }
4127 }
4128 }
4129 else if (pOut->pMipInfo != NULL)
4130 {
4131 pOut->pMipInfo[0].pitch = pOut->pitch;
4132 pOut->pMipInfo[0].height = pOut->height;
4133 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4134 pOut->pMipInfo[0].offset = 0;
4135 }
4136
4137 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4138 (pIn->bpp >> 3) * pIn->numFrags;
4139 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4140 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4141
4142 if (pIn->flags.prt)
4143 {
4144 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4145 }
4146 }
4147 }
4148
4149 return returnCode;
4150 }
4151
4152 /**
4153 ************************************************************************************************************************
4154 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4155 *
4156 * @brief
4157 * Internal function to calculate alignment for linear surface
4158 *
4159 * @return
4160 * ADDR_E_RETURNCODE
4161 ************************************************************************************************************************
4162 */
4163 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4164 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4165 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4166 ) const
4167 {
4168 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4169 UINT_32 pitch = 0;
4170 UINT_32 actualHeight = 0;
4171 UINT_32 elementBytes = pIn->bpp >> 3;
4172 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4173
4174 if (IsTex1d(pIn->resourceType))
4175 {
4176 if (pIn->height > 1)
4177 {
4178 returnCode = ADDR_INVALIDPARAMS;
4179 }
4180 else
4181 {
4182 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4183
4184 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4185 actualHeight = pIn->numMipLevels;
4186
4187 if (pIn->flags.prt == FALSE)
4188 {
4189 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4190 &pitch, &actualHeight);
4191 }
4192
4193 if (returnCode == ADDR_OK)
4194 {
4195 if (pOut->pMipInfo != NULL)
4196 {
4197 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4198 {
4199 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4200 pOut->pMipInfo[i].pitch = pitch;
4201 pOut->pMipInfo[i].height = 1;
4202 pOut->pMipInfo[i].depth = 1;
4203 }
4204 }
4205 }
4206 }
4207 }
4208 else
4209 {
4210 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4211 }
4212
4213 if ((pitch == 0) || (actualHeight == 0))
4214 {
4215 returnCode = ADDR_INVALIDPARAMS;
4216 }
4217
4218 if (returnCode == ADDR_OK)
4219 {
4220 pOut->pitch = pitch;
4221 pOut->height = pIn->height;
4222 pOut->numSlices = pIn->numSlices;
4223 pOut->mipChainPitch = pitch;
4224 pOut->mipChainHeight = actualHeight;
4225 pOut->mipChainSlice = pOut->numSlices;
4226 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4227 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4228 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4229 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4230 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4231 pOut->blockHeight = 1;
4232 pOut->blockSlices = 1;
4233 }
4234
4235 // Post calculation validate
4236 ADDR_ASSERT(pOut->sliceSize > 0);
4237
4238 return returnCode;
4239 }
4240
4241 /**
4242 ************************************************************************************************************************
4243 * Gfx9Lib::GetMipChainInfo
4244 *
4245 * @brief
4246 * Internal function to get out information about mip chain
4247 *
4248 * @return
4249 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4250 ************************************************************************************************************************
4251 */
4252 UINT_32 Gfx9Lib::GetMipChainInfo(
4253 AddrResourceType resourceType,
4254 AddrSwizzleMode swizzleMode,
4255 UINT_32 bpp,
4256 UINT_32 mip0Width,
4257 UINT_32 mip0Height,
4258 UINT_32 mip0Depth,
4259 UINT_32 blockWidth,
4260 UINT_32 blockHeight,
4261 UINT_32 blockDepth,
4262 UINT_32 numMipLevel,
4263 ADDR2_MIP_INFO* pMipInfo) const
4264 {
4265 const Dim3d tailMaxDim =
4266 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4267
4268 UINT_32 mipPitch = mip0Width;
4269 UINT_32 mipHeight = mip0Height;
4270 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4271 UINT_32 offset = 0;
4272 UINT_32 firstMipIdInTail = numMipLevel;
4273 BOOL_32 inTail = FALSE;
4274 BOOL_32 finalDim = FALSE;
4275 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4276 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4277
4278 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4279 {
4280 if (inTail)
4281 {
4282 if (finalDim == FALSE)
4283 {
4284 UINT_32 mipSize;
4285
4286 if (is3dThick)
4287 {
4288 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4289 }
4290 else
4291 {
4292 mipSize = mipPitch * mipHeight * (bpp >> 3);
4293 }
4294
4295 if (mipSize <= 256)
4296 {
4297 UINT_32 index = Log2(bpp >> 3);
4298
4299 if (is3dThick)
4300 {
4301 mipPitch = Block256_3dZ[index].w;
4302 mipHeight = Block256_3dZ[index].h;
4303 mipDepth = Block256_3dZ[index].d;
4304 }
4305 else
4306 {
4307 mipPitch = Block256_2d[index].w;
4308 mipHeight = Block256_2d[index].h;
4309 }
4310
4311 finalDim = TRUE;
4312 }
4313 }
4314 }
4315 else
4316 {
4317 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4318 mipPitch, mipHeight, mipDepth);
4319
4320 if (inTail)
4321 {
4322 firstMipIdInTail = mipId;
4323 mipPitch = tailMaxDim.w;
4324 mipHeight = tailMaxDim.h;
4325
4326 if (is3dThick)
4327 {
4328 mipDepth = tailMaxDim.d;
4329 }
4330 }
4331 else
4332 {
4333 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4334 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4335
4336 if (is3dThick)
4337 {
4338 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4339 }
4340 }
4341 }
4342
4343 if (pMipInfo != NULL)
4344 {
4345 pMipInfo[mipId].pitch = mipPitch;
4346 pMipInfo[mipId].height = mipHeight;
4347 pMipInfo[mipId].depth = mipDepth;
4348 pMipInfo[mipId].offset = offset;
4349 }
4350
4351 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4352
4353 if (finalDim)
4354 {
4355 if (is3dThin)
4356 {
4357 mipDepth = Max(mipDepth >> 1, 1u);
4358 }
4359 }
4360 else
4361 {
4362 mipPitch = Max(mipPitch >> 1, 1u);
4363 mipHeight = Max(mipHeight >> 1, 1u);
4364
4365 if (is3dThick || is3dThin)
4366 {
4367 mipDepth = Max(mipDepth >> 1, 1u);
4368 }
4369 }
4370 }
4371
4372 return firstMipIdInTail;
4373 }
4374
4375 /**
4376 ************************************************************************************************************************
4377 * Gfx9Lib::GetMetaMiptailInfo
4378 *
4379 * @brief
4380 * Get mip tail coordinate information.
4381 *
4382 * @return
4383 * N/A
4384 ************************************************************************************************************************
4385 */
4386 VOID Gfx9Lib::GetMetaMiptailInfo(
4387 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4388 Dim3d mipCoord, ///< [in] mip tail base coord
4389 UINT_32 numMipInTail, ///< [in] number of mips in tail
4390 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4391 ) const
4392 {
4393 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4394 UINT_32 mipWidth = pMetaBlkDim->w;
4395 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4396 UINT_32 mipDepth = pMetaBlkDim->d;
4397 UINT_32 minInc;
4398
4399 if (isThick)
4400 {
4401 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4402 }
4403 else if (pMetaBlkDim->h >= 1024)
4404 {
4405 minInc = 256;
4406 }
4407 else if (pMetaBlkDim->h == 512)
4408 {
4409 minInc = 128;
4410 }
4411 else
4412 {
4413 minInc = 64;
4414 }
4415
4416 UINT_32 blk32MipId = 0xFFFFFFFF;
4417
4418 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4419 {
4420 pInfo[mip].inMiptail = TRUE;
4421 pInfo[mip].startX = mipCoord.w;
4422 pInfo[mip].startY = mipCoord.h;
4423 pInfo[mip].startZ = mipCoord.d;
4424 pInfo[mip].width = mipWidth;
4425 pInfo[mip].height = mipHeight;
4426 pInfo[mip].depth = mipDepth;
4427
4428 if (mipWidth <= 32)
4429 {
4430 if (blk32MipId == 0xFFFFFFFF)
4431 {
4432 blk32MipId = mip;
4433 }
4434
4435 mipCoord.w = pInfo[blk32MipId].startX;
4436 mipCoord.h = pInfo[blk32MipId].startY;
4437 mipCoord.d = pInfo[blk32MipId].startZ;
4438
4439 switch (mip - blk32MipId)
4440 {
4441 case 0:
4442 mipCoord.w += 32; // 16x16
4443 break;
4444 case 1:
4445 mipCoord.h += 32; // 8x8
4446 break;
4447 case 2:
4448 mipCoord.h += 32; // 4x4
4449 mipCoord.w += 16;
4450 break;
4451 case 3:
4452 mipCoord.h += 32; // 2x2
4453 mipCoord.w += 32;
4454 break;
4455 case 4:
4456 mipCoord.h += 32; // 1x1
4457 mipCoord.w += 48;
4458 break;
4459 // The following are for BC/ASTC formats
4460 case 5:
4461 mipCoord.h += 48; // 1/2 x 1/2
4462 break;
4463 case 6:
4464 mipCoord.h += 48; // 1/4 x 1/4
4465 mipCoord.w += 16;
4466 break;
4467 case 7:
4468 mipCoord.h += 48; // 1/8 x 1/8
4469 mipCoord.w += 32;
4470 break;
4471 case 8:
4472 mipCoord.h += 48; // 1/16 x 1/16
4473 mipCoord.w += 48;
4474 break;
4475 default:
4476 ADDR_ASSERT_ALWAYS();
4477 break;
4478 }
4479
4480 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4481 mipHeight = mipWidth;
4482
4483 if (isThick)
4484 {
4485 mipDepth = mipWidth;
4486 }
4487 }
4488 else
4489 {
4490 if (mipWidth <= minInc)
4491 {
4492 // if we're below the minimal increment...
4493 if (isThick)
4494 {
4495 // For 3d, just go in z direction
4496 mipCoord.d += mipDepth;
4497 }
4498 else
4499 {
4500 // For 2d, first go across, then down
4501 if ((mipWidth * 2) == minInc)
4502 {
4503 // if we're 2 mips below, that's when we go back in x, and down in y
4504 mipCoord.w -= minInc;
4505 mipCoord.h += minInc;
4506 }
4507 else
4508 {
4509 // otherwise, just go across in x
4510 mipCoord.w += minInc;
4511 }
4512 }
4513 }
4514 else
4515 {
4516 // On even mip, go down, otherwise, go across
4517 if (mip & 1)
4518 {
4519 mipCoord.w += mipWidth;
4520 }
4521 else
4522 {
4523 mipCoord.h += mipHeight;
4524 }
4525 }
4526 // Divide the width by 2
4527 mipWidth >>= 1;
4528 // After the first mip in tail, the mip is always a square
4529 mipHeight = mipWidth;
4530 // ...or for 3d, a cube
4531 if (isThick)
4532 {
4533 mipDepth = mipWidth;
4534 }
4535 }
4536 }
4537 }
4538
4539 /**
4540 ************************************************************************************************************************
4541 * Gfx9Lib::GetMipStartPos
4542 *
4543 * @brief
4544 * Internal function to get out information about mip logical start position
4545 *
4546 * @return
4547 * logical start position in macro block width/heith/depth of one mip level within one slice
4548 ************************************************************************************************************************
4549 */
4550 Dim3d Gfx9Lib::GetMipStartPos(
4551 AddrResourceType resourceType,
4552 AddrSwizzleMode swizzleMode,
4553 UINT_32 width,
4554 UINT_32 height,
4555 UINT_32 depth,
4556 UINT_32 blockWidth,
4557 UINT_32 blockHeight,
4558 UINT_32 blockDepth,
4559 UINT_32 mipId,
4560 UINT_32 log2ElementBytes,
4561 UINT_32* pMipTailBytesOffset) const
4562 {
4563 Dim3d mipStartPos = {0};
4564 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4565
4566 // Report mip in tail if Mip0 is already in mip tail
4567 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4568 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4569 UINT_32 mipIndexInTail = mipId;
4570
4571 if (inMipTail == FALSE)
4572 {
4573 // Mip 0 dimension, unit in block
4574 UINT_32 mipWidthInBlk = width / blockWidth;
4575 UINT_32 mipHeightInBlk = height / blockHeight;
4576 UINT_32 mipDepthInBlk = depth / blockDepth;
4577 AddrMajorMode majorMode = GetMajorMode(resourceType,
4578 swizzleMode,
4579 mipWidthInBlk,
4580 mipHeightInBlk,
4581 mipDepthInBlk);
4582
4583 UINT_32 endingMip = mipId + 1;
4584
4585 for (UINT_32 i = 1; i <= mipId; i++)
4586 {
4587 if ((i == 1) || (i == 3))
4588 {
4589 if (majorMode == ADDR_MAJOR_Y)
4590 {
4591 mipStartPos.w += mipWidthInBlk;
4592 }
4593 else
4594 {
4595 mipStartPos.h += mipHeightInBlk;
4596 }
4597 }
4598 else
4599 {
4600 if (majorMode == ADDR_MAJOR_X)
4601 {
4602 mipStartPos.w += mipWidthInBlk;
4603 }
4604 else if (majorMode == ADDR_MAJOR_Y)
4605 {
4606 mipStartPos.h += mipHeightInBlk;
4607 }
4608 else
4609 {
4610 mipStartPos.d += mipDepthInBlk;
4611 }
4612 }
4613
4614 BOOL_32 inTail = FALSE;
4615
4616 if (IsThick(resourceType, swizzleMode))
4617 {
4618 UINT_32 dim = log2blkSize % 3;
4619
4620 if (dim == 0)
4621 {
4622 inTail =
4623 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4624 }
4625 else if (dim == 1)
4626 {
4627 inTail =
4628 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4629 }
4630 else
4631 {
4632 inTail =
4633 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4634 }
4635 }
4636 else
4637 {
4638 if (log2blkSize & 1)
4639 {
4640 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4641 }
4642 else
4643 {
4644 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4645 }
4646 }
4647
4648 if (inTail)
4649 {
4650 endingMip = i;
4651 break;
4652 }
4653
4654 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4655 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4656 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4657 }
4658
4659 if (mipId >= endingMip)
4660 {
4661 inMipTail = TRUE;
4662 mipIndexInTail = mipId - endingMip;
4663 }
4664 }
4665
4666 if (inMipTail)
4667 {
4668 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4669 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4670 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4671 }
4672
4673 return mipStartPos;
4674 }
4675
4676 /**
4677 ************************************************************************************************************************
4678 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4679 *
4680 * @brief
4681 * Internal function to calculate address from coord for tiled swizzle surface
4682 *
4683 * @return
4684 * ADDR_E_RETURNCODE
4685 ************************************************************************************************************************
4686 */
4687 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4688 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4689 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4690 ) const
4691 {
4692 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4693 localIn.swizzleMode = pIn->swizzleMode;
4694 localIn.flags = pIn->flags;
4695 localIn.resourceType = pIn->resourceType;
4696 localIn.bpp = pIn->bpp;
4697 localIn.width = Max(pIn->unalignedWidth, 1u);
4698 localIn.height = Max(pIn->unalignedHeight, 1u);
4699 localIn.numSlices = Max(pIn->numSlices, 1u);
4700 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4701 localIn.numSamples = Max(pIn->numSamples, 1u);
4702 localIn.numFrags = Max(pIn->numFrags, 1u);
4703 if (localIn.numMipLevels <= 1)
4704 {
4705 localIn.pitchInElement = pIn->pitchInElement;
4706 }
4707
4708 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4709 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4710
4711 BOOL_32 valid = (returnCode == ADDR_OK) &&
4712 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4713 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4714 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4715
4716 if (valid)
4717 {
4718 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4719 Dim3d mipStartPos = {0};
4720 UINT_32 mipTailBytesOffset = 0;
4721
4722 if (pIn->numMipLevels > 1)
4723 {
4724 // Mip-map chain cannot be MSAA surface
4725 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4726
4727 mipStartPos = GetMipStartPos(pIn->resourceType,
4728 pIn->swizzleMode,
4729 localOut.pitch,
4730 localOut.height,
4731 localOut.numSlices,
4732 localOut.blockWidth,
4733 localOut.blockHeight,
4734 localOut.blockSlices,
4735 pIn->mipId,
4736 log2ElementBytes,
4737 &mipTailBytesOffset);
4738 }
4739
4740 UINT_32 interleaveOffset = 0;
4741 UINT_32 pipeBits = 0;
4742 UINT_32 pipeXor = 0;
4743 UINT_32 bankBits = 0;
4744 UINT_32 bankXor = 0;
4745
4746 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4747 {
4748 UINT_32 blockOffset = 0;
4749 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4750
4751 if (IsZOrderSwizzle(pIn->swizzleMode))
4752 {
4753 // Morton generation
4754 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4755 {
4756 UINT_32 totalLowBits = 6 - log2ElementBytes;
4757 UINT_32 mortBits = totalLowBits / 2;
4758 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4759 // Are 9 bits enough?
4760 UINT_32 highBitsValue =
4761 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4762 blockOffset = lowBitsValue | highBitsValue;
4763 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4764 }
4765 else
4766 {
4767 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4768 }
4769
4770 // Fill LSBs with sample bits
4771 if (pIn->numSamples > 1)
4772 {
4773 blockOffset *= pIn->numSamples;
4774 blockOffset |= pIn->sample;
4775 }
4776
4777 // Shift according to BytesPP
4778 blockOffset <<= log2ElementBytes;
4779 }
4780 else
4781 {
4782 // Micro block offset
4783 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4784 blockOffset = microBlockOffset;
4785
4786 // Micro block dimension
4787 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4788 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4789 // Morton generation, does 12 bit enough?
4790 blockOffset |=
4791 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4792
4793 // Sample bits start location
4794 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4795 // Join sample bits information to the highest Macro block bits
4796 if (IsNonPrtXor(pIn->swizzleMode))
4797 {
4798 // Non-prt-Xor : xor highest Macro block bits with sample bits
4799 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4800 }
4801 else
4802 {
4803 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4804 // after this op, the blockOffset only contains log2 Macro block size bits
4805 blockOffset %= (1 << sampleStart);
4806 blockOffset |= (pIn->sample << sampleStart);
4807 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4808 }
4809 }
4810
4811 if (IsXor(pIn->swizzleMode))
4812 {
4813 // Mask off bits above Macro block bits to keep page synonyms working for prt
4814 if (IsPrt(pIn->swizzleMode))
4815 {
4816 blockOffset &= ((1 << log2blkSize) - 1);
4817 }
4818
4819 // Preserve offset inside pipe interleave
4820 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4821 blockOffset >>= m_pipeInterleaveLog2;
4822
4823 // Pipe/Se xor bits
4824 pipeBits = GetPipeXorBits(log2blkSize);
4825 // Pipe xor
4826 pipeXor = FoldXor2d(blockOffset, pipeBits);
4827 blockOffset >>= pipeBits;
4828
4829 // Bank xor bits
4830 bankBits = GetBankXorBits(log2blkSize);
4831 // Bank Xor
4832 bankXor = FoldXor2d(blockOffset, bankBits);
4833 blockOffset >>= bankBits;
4834
4835 // Put all the part back together
4836 blockOffset <<= bankBits;
4837 blockOffset |= bankXor;
4838 blockOffset <<= pipeBits;
4839 blockOffset |= pipeXor;
4840 blockOffset <<= m_pipeInterleaveLog2;
4841 blockOffset |= interleaveOffset;
4842 }
4843
4844 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4845 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4846
4847 blockOffset |= mipTailBytesOffset;
4848
4849 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4850 {
4851 // Apply slice xor if not MSAA/PRT
4852 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4853 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4854 (m_pipeInterleaveLog2 + pipeBits));
4855 }
4856
4857 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4858 bankBits, pipeBits, &blockOffset);
4859
4860 blockOffset %= (1 << log2blkSize);
4861
4862 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4863 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4864 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4865 UINT_64 macroBlockIndex =
4866 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4867 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4868 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4869
4870 pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4871 }
4872 else
4873 {
4874 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4875
4876 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4877
4878 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4879 (pIn->y / microBlockDim.h),
4880 (pIn->slice / microBlockDim.d),
4881 8);
4882
4883 blockOffset <<= 10;
4884 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4885
4886 if (IsXor(pIn->swizzleMode))
4887 {
4888 // Mask off bits above Macro block bits to keep page synonyms working for prt
4889 if (IsPrt(pIn->swizzleMode))
4890 {
4891 blockOffset &= ((1 << log2blkSize) - 1);
4892 }
4893
4894 // Preserve offset inside pipe interleave
4895 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4896 blockOffset >>= m_pipeInterleaveLog2;
4897
4898 // Pipe/Se xor bits
4899 pipeBits = GetPipeXorBits(log2blkSize);
4900 // Pipe xor
4901 pipeXor = FoldXor3d(blockOffset, pipeBits);
4902 blockOffset >>= pipeBits;
4903
4904 // Bank xor bits
4905 bankBits = GetBankXorBits(log2blkSize);
4906 // Bank Xor
4907 bankXor = FoldXor3d(blockOffset, bankBits);
4908 blockOffset >>= bankBits;
4909
4910 // Put all the part back together
4911 blockOffset <<= bankBits;
4912 blockOffset |= bankXor;
4913 blockOffset <<= pipeBits;
4914 blockOffset |= pipeXor;
4915 blockOffset <<= m_pipeInterleaveLog2;
4916 blockOffset |= interleaveOffset;
4917 }
4918
4919 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4920 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4921 blockOffset |= mipTailBytesOffset;
4922
4923 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4924 bankBits, pipeBits, &blockOffset);
4925
4926 blockOffset %= (1 << log2blkSize);
4927
4928 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4929 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4930 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4931
4932 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4933 UINT_32 sliceSizeInBlock =
4934 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4935 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4936
4937 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4938 }
4939 }
4940 else
4941 {
4942 returnCode = ADDR_INVALIDPARAMS;
4943 }
4944
4945 return returnCode;
4946 }
4947
4948 /**
4949 ************************************************************************************************************************
4950 * Gfx9Lib::ComputeSurfaceInfoLinear
4951 *
4952 * @brief
4953 * Internal function to calculate padding for linear swizzle 2D/3D surface
4954 *
4955 * @return
4956 * N/A
4957 ************************************************************************************************************************
4958 */
4959 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4960 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4961 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4962 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4963 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4964 ) const
4965 {
4966 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4967
4968 UINT_32 elementBytes = pIn->bpp >> 3;
4969 UINT_32 pitchAlignInElement = 0;
4970
4971 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4972 {
4973 ADDR_ASSERT(pIn->numMipLevels <= 1);
4974 ADDR_ASSERT(pIn->numSlices <= 1);
4975 pitchAlignInElement = 1;
4976 }
4977 else
4978 {
4979 pitchAlignInElement = (256 / elementBytes);
4980 }
4981
4982 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4983 UINT_32 slice0PaddedHeight = pIn->height;
4984
4985 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4986 &mipChainWidth, &slice0PaddedHeight);
4987
4988 if (returnCode == ADDR_OK)
4989 {
4990 UINT_32 mipChainHeight = 0;
4991 UINT_32 mipHeight = pIn->height;
4992
4993 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4994 {
4995 if (pMipInfo != NULL)
4996 {
4997 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4998 pMipInfo[i].pitch = mipChainWidth;
4999 pMipInfo[i].height = mipHeight;
5000 pMipInfo[i].depth = 1;
5001 }
5002
5003 mipChainHeight += mipHeight;
5004 mipHeight = RoundHalf(mipHeight);
5005 mipHeight = Max(mipHeight, 1u);
5006 }
5007
5008 *pMipmap0PaddedWidth = mipChainWidth;
5009 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5010 }
5011
5012 return returnCode;
5013 }
5014
5015 } // V2
5016 } // Addr