9b2537151ac1c11442a881c7621a0893bead886f
[mesa.git] / src / amd / addrlib / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37 #include "gfx9_enum.h"
38
39 #if BRAHMA_BUILD
40 #include "amdgpu_id.h"
41 #else
42 #include "ai_id.h"
43 #include "rv_id.h"
44 #endif
45
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48
49 namespace Addr
50 {
51
52 /**
53 ************************************************************************************************************************
54 * Gfx9HwlInit
55 *
56 * @brief
57 * Creates an Gfx9Lib object.
58 *
59 * @return
60 * Returns an Gfx9Lib object pointer.
61 ************************************************************************************************************************
62 */
63 Addr::Lib* Gfx9HwlInit(const Client* pClient)
64 {
65 return V2::Gfx9Lib::CreateObj(pClient);
66 }
67
68 namespace V2
69 {
70
71 ////////////////////////////////////////////////////////////////////////////////////////////////////
72 // Static Const Member
73 ////////////////////////////////////////////////////////////////////////////////////////////////////
74
75 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
76 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
77 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
78 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
79 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
80 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
81
82 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
83 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
84 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
85 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
86
87 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
88 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
89 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
90 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
91
92 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
93 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
94 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
95 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
96
97 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
98 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
99 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
100 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
101
102 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
103 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
104 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
105 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
106
107 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
108 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
109 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
110 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
111
112 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
113 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
114 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
115 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
116 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
117 };
118
119 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
120 8, 6, 5, 4, 3, 2, 1, 0};
121
122 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
123
124 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
125
126 /**
127 ************************************************************************************************************************
128 * Gfx9Lib::Gfx9Lib
129 *
130 * @brief
131 * Constructor
132 *
133 ************************************************************************************************************************
134 */
135 Gfx9Lib::Gfx9Lib(const Client* pClient)
136 :
137 Lib(pClient),
138 m_numEquations(0)
139 {
140 m_class = AI_ADDRLIB;
141 memset(&m_settings, 0, sizeof(m_settings));
142 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
143 }
144
145 /**
146 ************************************************************************************************************************
147 * Gfx9Lib::~Gfx9Lib
148 *
149 * @brief
150 * Destructor
151 ************************************************************************************************************************
152 */
153 Gfx9Lib::~Gfx9Lib()
154 {
155 }
156
157 /**
158 ************************************************************************************************************************
159 * Gfx9Lib::HwlComputeHtileInfo
160 *
161 * @brief
162 * Interface function stub of AddrComputeHtilenfo
163 *
164 * @return
165 * ADDR_E_RETURNCODE
166 ************************************************************************************************************************
167 */
168 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
169 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
170 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
171 ) const
172 {
173 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
174 pIn->swizzleMode);
175
176 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
177
178 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
179
180 if ((numPipeTotal == 1) && (numRbTotal == 1))
181 {
182 numCompressBlkPerMetaBlkLog2 = 10;
183 }
184 else
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
187 }
188
189 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
190
191 Dim3d metaBlkDim = {8, 8, 1};
192 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
193 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
194 UINT_32 heightAmp = totalAmpBits - widthAmp;
195 metaBlkDim.w <<= widthAmp;
196 metaBlkDim.h <<= heightAmp;
197
198 #if DEBUG
199 Dim3d metaBlkDimDbg = {8, 8, 1};
200 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
201 {
202 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
203 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
204 {
205 metaBlkDimDbg.h <<= 1;
206 }
207 else
208 {
209 metaBlkDimDbg.w <<= 1;
210 }
211 }
212 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
213 #endif
214
215 UINT_32 numMetaBlkX;
216 UINT_32 numMetaBlkY;
217 UINT_32 numMetaBlkZ;
218
219 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
220 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
221 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
222
223 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
224
225 pOut->pitch = numMetaBlkX * metaBlkDim.w;
226 pOut->height = numMetaBlkY * metaBlkDim.h;
227 pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
228
229 pOut->metaBlkWidth = metaBlkDim.w;
230 pOut->metaBlkHeight = metaBlkDim.h;
231 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
232
233 pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
234
235 if (m_settings.metaBaseAlignFix)
236 {
237 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
238 }
239
240 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
241 {
242 UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
243
244 if (additionalAlign > sizeAlign)
245 {
246 sizeAlign = additionalAlign;
247 }
248 }
249
250 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
251
252 return ADDR_OK;
253 }
254
255 /**
256 ************************************************************************************************************************
257 * Gfx9Lib::HwlComputeCmaskInfo
258 *
259 * @brief
260 * Interface function stub of AddrComputeCmaskInfo
261 *
262 * @return
263 * ADDR_E_RETURNCODE
264 ************************************************************************************************************************
265 */
266 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
267 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
268 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
269 ) const
270 {
271 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
272
273 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
274 pIn->swizzleMode);
275
276 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
277
278 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
279
280 if ((numPipeTotal == 1) && (numRbTotal == 1))
281 {
282 numCompressBlkPerMetaBlkLog2 = 13;
283 }
284 else
285 {
286 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
287
288 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
289 }
290
291 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
292
293 Dim2d metaBlkDim = {8, 8};
294 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
295 UINT_32 heightAmp = totalAmpBits >> 1;
296 UINT_32 widthAmp = totalAmpBits - heightAmp;
297 metaBlkDim.w <<= widthAmp;
298 metaBlkDim.h <<= heightAmp;
299
300 #if DEBUG
301 Dim2d metaBlkDimDbg = {8, 8};
302 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
303 {
304 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
305 {
306 metaBlkDimDbg.h <<= 1;
307 }
308 else
309 {
310 metaBlkDimDbg.w <<= 1;
311 }
312 }
313 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
314 #endif
315
316 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
317 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
318 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
319
320 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
321
322 pOut->pitch = numMetaBlkX * metaBlkDim.w;
323 pOut->height = numMetaBlkY * metaBlkDim.h;
324 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
325 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
326 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
327
328 if (m_settings.metaBaseAlignFix)
329 {
330 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
331 }
332
333 pOut->metaBlkWidth = metaBlkDim.w;
334 pOut->metaBlkHeight = metaBlkDim.h;
335
336 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
337
338 return ADDR_OK;
339 }
340
341 /**
342 ************************************************************************************************************************
343 * Gfx9Lib::GetMetaMipInfo
344 *
345 * @brief
346 * Get meta mip info
347 *
348 * @return
349 * N/A
350 ************************************************************************************************************************
351 */
352 VOID Gfx9Lib::GetMetaMipInfo(
353 UINT_32 numMipLevels, ///< [in] number of mip levels
354 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
355 BOOL_32 dataThick, ///< [in] data surface is thick
356 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
357 UINT_32 mip0Width, ///< [in] mip0 width
358 UINT_32 mip0Height, ///< [in] mip0 height
359 UINT_32 mip0Depth, ///< [in] mip0 depth
360 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
361 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
362 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
363 const
364 {
365 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
366 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
367 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
368 UINT_32 tailWidth = pMetaBlkDim->w;
369 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
370 UINT_32 tailDepth = pMetaBlkDim->d;
371 BOOL_32 inTail = FALSE;
372 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
373
374 if (numMipLevels > 1)
375 {
376 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
377 {
378 // Z major
379 major = ADDR_MAJOR_Z;
380 }
381 else if (numMetaBlkX >= numMetaBlkY)
382 {
383 // X major
384 major = ADDR_MAJOR_X;
385 }
386 else
387 {
388 // Y major
389 major = ADDR_MAJOR_Y;
390 }
391
392 inTail = ((mip0Width <= tailWidth) &&
393 (mip0Height <= tailHeight) &&
394 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
395
396 if (inTail == FALSE)
397 {
398 UINT_32 orderLimit;
399 UINT_32 *pMipDim;
400 UINT_32 *pOrderDim;
401
402 if (major == ADDR_MAJOR_Z)
403 {
404 // Z major
405 pMipDim = &numMetaBlkY;
406 pOrderDim = &numMetaBlkZ;
407 orderLimit = 4;
408 }
409 else if (major == ADDR_MAJOR_X)
410 {
411 // X major
412 pMipDim = &numMetaBlkY;
413 pOrderDim = &numMetaBlkX;
414 orderLimit = 4;
415 }
416 else
417 {
418 // Y major
419 pMipDim = &numMetaBlkX;
420 pOrderDim = &numMetaBlkY;
421 orderLimit = 2;
422 }
423
424 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
425 {
426 *pMipDim += 2;
427 }
428 else
429 {
430 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
431 }
432 }
433 }
434
435 if (pInfo != NULL)
436 {
437 UINT_32 mipWidth = mip0Width;
438 UINT_32 mipHeight = mip0Height;
439 UINT_32 mipDepth = mip0Depth;
440 Dim3d mipCoord = {0};
441
442 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
443 {
444 if (inTail)
445 {
446 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
447 pMetaBlkDim);
448 break;
449 }
450 else
451 {
452 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
453 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
454 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
455
456 pInfo[mip].inMiptail = FALSE;
457 pInfo[mip].startX = mipCoord.w;
458 pInfo[mip].startY = mipCoord.h;
459 pInfo[mip].startZ = mipCoord.d;
460 pInfo[mip].width = mipWidth;
461 pInfo[mip].height = mipHeight;
462 pInfo[mip].depth = dataThick ? mipDepth : 1;
463
464 if ((mip >= 3) || (mip & 1))
465 {
466 switch (major)
467 {
468 case ADDR_MAJOR_X:
469 mipCoord.w += mipWidth;
470 break;
471 case ADDR_MAJOR_Y:
472 mipCoord.h += mipHeight;
473 break;
474 case ADDR_MAJOR_Z:
475 mipCoord.d += mipDepth;
476 break;
477 default:
478 break;
479 }
480 }
481 else
482 {
483 switch (major)
484 {
485 case ADDR_MAJOR_X:
486 mipCoord.h += mipHeight;
487 break;
488 case ADDR_MAJOR_Y:
489 mipCoord.w += mipWidth;
490 break;
491 case ADDR_MAJOR_Z:
492 mipCoord.h += mipHeight;
493 break;
494 default:
495 break;
496 }
497 }
498
499 mipWidth = Max(mipWidth >> 1, 1u);
500 mipHeight = Max(mipHeight >> 1, 1u);
501 mipDepth = Max(mipDepth >> 1, 1u);
502
503 inTail = ((mipWidth <= tailWidth) &&
504 (mipHeight <= tailHeight) &&
505 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
506 }
507 }
508 }
509
510 *pNumMetaBlkX = numMetaBlkX;
511 *pNumMetaBlkY = numMetaBlkY;
512 *pNumMetaBlkZ = numMetaBlkZ;
513 }
514
515 /**
516 ************************************************************************************************************************
517 * Gfx9Lib::HwlComputeDccInfo
518 *
519 * @brief
520 * Interface function to compute DCC key info
521 *
522 * @return
523 * ADDR_E_RETURNCODE
524 ************************************************************************************************************************
525 */
526 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
527 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
528 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
529 ) const
530 {
531 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
532 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
533 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
534
535 if (dataLinear)
536 {
537 metaLinear = TRUE;
538 }
539 else if (metaLinear == TRUE)
540 {
541 pipeAligned = FALSE;
542 }
543
544 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
545
546 if (metaLinear)
547 {
548 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
549 ADDR_ASSERT_ALWAYS();
550
551 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
552 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
553 }
554 else
555 {
556 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
557
558 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
559
560 UINT_32 numFrags = Max(pIn->numFrags, 1u);
561 UINT_32 numSlices = Max(pIn->numSlices, 1u);
562
563 minMetaBlkSize /= numFrags;
564
565 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
566
567 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
568
569 if ((numPipeTotal > 1) || (numRbTotal > 1))
570 {
571 numCompressBlkPerMetaBlk =
572 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
573
574 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
575 {
576 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
577 }
578 }
579
580 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
581 Dim3d metaBlkDim = compressBlkDim;
582
583 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
584 {
585 if ((metaBlkDim.h < metaBlkDim.w) ||
586 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
587 {
588 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
589 {
590 metaBlkDim.h <<= 1;
591 }
592 else
593 {
594 metaBlkDim.d <<= 1;
595 }
596 }
597 else
598 {
599 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
600 {
601 metaBlkDim.w <<= 1;
602 }
603 else
604 {
605 metaBlkDim.d <<= 1;
606 }
607 }
608 }
609
610 UINT_32 numMetaBlkX;
611 UINT_32 numMetaBlkY;
612 UINT_32 numMetaBlkZ;
613
614 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
615 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
616 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
617
618 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
619
620 if (numFrags > m_maxCompFrag)
621 {
622 sizeAlign *= (numFrags / m_maxCompFrag);
623 }
624
625 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
626 numCompressBlkPerMetaBlk * numFrags;
627 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
628 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
629
630 if (m_settings.metaBaseAlignFix)
631 {
632 pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
633 }
634
635 pOut->pitch = numMetaBlkX * metaBlkDim.w;
636 pOut->height = numMetaBlkY * metaBlkDim.h;
637 pOut->depth = numMetaBlkZ * metaBlkDim.d;
638
639 pOut->compressBlkWidth = compressBlkDim.w;
640 pOut->compressBlkHeight = compressBlkDim.h;
641 pOut->compressBlkDepth = compressBlkDim.d;
642
643 pOut->metaBlkWidth = metaBlkDim.w;
644 pOut->metaBlkHeight = metaBlkDim.h;
645 pOut->metaBlkDepth = metaBlkDim.d;
646
647 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
648 pOut->fastClearSizePerSlice =
649 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
650 }
651
652 return ADDR_OK;
653 }
654
655 /**
656 ************************************************************************************************************************
657 * Gfx9Lib::HwlGetMaxAlignments
658 *
659 * @brief
660 * Gets maximum alignments
661 * @return
662 * ADDR_E_RETURNCODE
663 ************************************************************************************************************************
664 */
665 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
666 ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure
667 ) const
668 {
669 pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
670
671 return ADDR_OK;
672 }
673
674 /**
675 ************************************************************************************************************************
676 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
677 *
678 * @brief
679 * Interface function stub of AddrComputeCmaskAddrFromCoord
680 *
681 * @return
682 * ADDR_E_RETURNCODE
683 ************************************************************************************************************************
684 */
685 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
686 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
687 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
688 ) const
689 {
690 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
691 input.size = sizeof(input);
692 input.cMaskFlags = pIn->cMaskFlags;
693 input.colorFlags = pIn->colorFlags;
694 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
695 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
696 input.numSlices = Max(pIn->numSlices, 1u);
697 input.swizzleMode = pIn->swizzleMode;
698 input.resourceType = pIn->resourceType;
699
700 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
701 output.size = sizeof(output);
702
703 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
704
705 if (returnCode == ADDR_OK)
706 {
707 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
708 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
709 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
710 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
711
712 CoordEq metaEq;
713
714 GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
715 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
716 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
717
718 UINT_32 xb = pIn->x / output.metaBlkWidth;
719 UINT_32 yb = pIn->y / output.metaBlkHeight;
720 UINT_32 zb = pIn->slice;
721
722 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
723 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
724 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
725
726 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
727
728 pOut->addr = address >> 1;
729 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
730
731
732 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
733 pIn->swizzleMode);
734
735 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
736
737 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
738 }
739
740 return returnCode;
741 }
742
743 /**
744 ************************************************************************************************************************
745 * Gfx9Lib::HwlComputeHtileAddrFromCoord
746 *
747 * @brief
748 * Interface function stub of AddrComputeHtileAddrFromCoord
749 *
750 * @return
751 * ADDR_E_RETURNCODE
752 ************************************************************************************************************************
753 */
754 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
755 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
756 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
757 ) const
758 {
759 ADDR_E_RETURNCODE returnCode = ADDR_OK;
760
761 if (pIn->numMipLevels > 1)
762 {
763 returnCode = ADDR_NOTIMPLEMENTED;
764 }
765 else
766 {
767 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
768 input.size = sizeof(input);
769 input.hTileFlags = pIn->hTileFlags;
770 input.depthFlags = pIn->depthflags;
771 input.swizzleMode = pIn->swizzleMode;
772 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
773 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
774 input.numSlices = Max(pIn->numSlices, 1u);
775 input.numMipLevels = Max(pIn->numMipLevels, 1u);
776
777 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
778 output.size = sizeof(output);
779
780 returnCode = ComputeHtileInfo(&input, &output);
781
782 if (returnCode == ADDR_OK)
783 {
784 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
785 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
786 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
787 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
788
789 CoordEq metaEq;
790
791 GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
792 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
793 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
794
795 UINT_32 xb = pIn->x / output.metaBlkWidth;
796 UINT_32 yb = pIn->y / output.metaBlkHeight;
797 UINT_32 zb = pIn->slice;
798
799 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
800 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
801 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
802
803 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
804
805 pOut->addr = address >> 1;
806
807 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
808 pIn->swizzleMode);
809
810 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
811
812 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
813 }
814 }
815
816 return returnCode;
817 }
818
819 /**
820 ************************************************************************************************************************
821 * Gfx9Lib::HwlComputeHtileCoordFromAddr
822 *
823 * @brief
824 * Interface function stub of AddrComputeHtileCoordFromAddr
825 *
826 * @return
827 * ADDR_E_RETURNCODE
828 ************************************************************************************************************************
829 */
830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
831 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
832 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
833 ) const
834 {
835 ADDR_E_RETURNCODE returnCode = ADDR_OK;
836
837 if (pIn->numMipLevels > 1)
838 {
839 returnCode = ADDR_NOTIMPLEMENTED;
840 }
841 else
842 {
843 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
844 input.size = sizeof(input);
845 input.hTileFlags = pIn->hTileFlags;
846 input.swizzleMode = pIn->swizzleMode;
847 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
848 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
849 input.numSlices = Max(pIn->numSlices, 1u);
850 input.numMipLevels = Max(pIn->numMipLevels, 1u);
851
852 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
853 output.size = sizeof(output);
854
855 returnCode = ComputeHtileInfo(&input, &output);
856
857 if (returnCode == ADDR_OK)
858 {
859 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
860 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
861 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
862 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
863
864 CoordEq metaEq;
865
866 GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
867 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
868 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
869
870 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
871 pIn->swizzleMode);
872
873 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
874
875 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
876
877 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
878 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
879
880 UINT_32 x, y, z, s, m;
881 metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
882
883 pOut->slice = m / sliceSizeInBlock;
884 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
885 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
886 }
887 }
888
889 return returnCode;
890 }
891
892 /**
893 ************************************************************************************************************************
894 * Gfx9Lib::HwlComputeDccAddrFromCoord
895 *
896 * @brief
897 * Interface function stub of AddrComputeDccAddrFromCoord
898 *
899 * @return
900 * ADDR_E_RETURNCODE
901 ************************************************************************************************************************
902 */
903 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
904 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
905 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
906 {
907 ADDR_E_RETURNCODE returnCode = ADDR_OK;
908
909 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
910 {
911 returnCode = ADDR_NOTIMPLEMENTED;
912 }
913 else
914 {
915 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
916 input.size = sizeof(input);
917 input.dccKeyFlags = pIn->dccKeyFlags;
918 input.colorFlags = pIn->colorFlags;
919 input.swizzleMode = pIn->swizzleMode;
920 input.resourceType = pIn->resourceType;
921 input.bpp = pIn->bpp;
922 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
923 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
924 input.numSlices = Max(pIn->numSlices, 1u);
925 input.numFrags = Max(pIn->numFrags, 1u);
926 input.numMipLevels = Max(pIn->numMipLevels, 1u);
927
928 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
929 output.size = sizeof(output);
930
931 returnCode = ComputeDccInfo(&input, &output);
932
933 if (returnCode == ADDR_OK)
934 {
935 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
936 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
937 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
938 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
939 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
940 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
941 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
942 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
943
944 CoordEq metaEq;
945
946 GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
947 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
948 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
949 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2);
950
951 UINT_32 xb = pIn->x / output.metaBlkWidth;
952 UINT_32 yb = pIn->y / output.metaBlkHeight;
953 UINT_32 zb = pIn->slice / output.metaBlkDepth;
954
955 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
956 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
957 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
958
959 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
960
961 pOut->addr = address >> 1;
962
963 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
964 pIn->swizzleMode);
965
966 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
967
968 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
969 }
970 }
971
972 return returnCode;
973 }
974
975 /**
976 ************************************************************************************************************************
977 * Gfx9Lib::HwlInitGlobalParams
978 *
979 * @brief
980 * Initializes global parameters
981 *
982 * @return
983 * TRUE if all settings are valid
984 *
985 ************************************************************************************************************************
986 */
987 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
988 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
989 {
990 BOOL_32 valid = TRUE;
991
992 if (m_settings.isArcticIsland)
993 {
994 GB_ADDR_CONFIG gbAddrConfig;
995
996 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
997
998 // These values are copied from CModel code
999 switch (gbAddrConfig.bits.NUM_PIPES)
1000 {
1001 case ADDR_CONFIG_1_PIPE:
1002 m_pipes = 1;
1003 m_pipesLog2 = 0;
1004 break;
1005 case ADDR_CONFIG_2_PIPE:
1006 m_pipes = 2;
1007 m_pipesLog2 = 1;
1008 break;
1009 case ADDR_CONFIG_4_PIPE:
1010 m_pipes = 4;
1011 m_pipesLog2 = 2;
1012 break;
1013 case ADDR_CONFIG_8_PIPE:
1014 m_pipes = 8;
1015 m_pipesLog2 = 3;
1016 break;
1017 case ADDR_CONFIG_16_PIPE:
1018 m_pipes = 16;
1019 m_pipesLog2 = 4;
1020 break;
1021 case ADDR_CONFIG_32_PIPE:
1022 m_pipes = 32;
1023 m_pipesLog2 = 5;
1024 break;
1025 default:
1026 ADDR_ASSERT_ALWAYS();
1027 break;
1028 }
1029
1030 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1031 {
1032 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1033 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1034 m_pipeInterleaveLog2 = 8;
1035 break;
1036 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1037 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1038 m_pipeInterleaveLog2 = 9;
1039 break;
1040 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1041 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1042 m_pipeInterleaveLog2 = 10;
1043 break;
1044 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1045 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1046 m_pipeInterleaveLog2 = 11;
1047 break;
1048 default:
1049 ADDR_ASSERT_ALWAYS();
1050 break;
1051 }
1052
1053 switch (gbAddrConfig.bits.NUM_BANKS)
1054 {
1055 case ADDR_CONFIG_1_BANK:
1056 m_banks = 1;
1057 m_banksLog2 = 0;
1058 break;
1059 case ADDR_CONFIG_2_BANK:
1060 m_banks = 2;
1061 m_banksLog2 = 1;
1062 break;
1063 case ADDR_CONFIG_4_BANK:
1064 m_banks = 4;
1065 m_banksLog2 = 2;
1066 break;
1067 case ADDR_CONFIG_8_BANK:
1068 m_banks = 8;
1069 m_banksLog2 = 3;
1070 break;
1071 case ADDR_CONFIG_16_BANK:
1072 m_banks = 16;
1073 m_banksLog2 = 4;
1074 break;
1075 default:
1076 ADDR_ASSERT_ALWAYS();
1077 break;
1078 }
1079
1080 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1081 {
1082 case ADDR_CONFIG_1_SHADER_ENGINE:
1083 m_se = 1;
1084 m_seLog2 = 0;
1085 break;
1086 case ADDR_CONFIG_2_SHADER_ENGINE:
1087 m_se = 2;
1088 m_seLog2 = 1;
1089 break;
1090 case ADDR_CONFIG_4_SHADER_ENGINE:
1091 m_se = 4;
1092 m_seLog2 = 2;
1093 break;
1094 case ADDR_CONFIG_8_SHADER_ENGINE:
1095 m_se = 8;
1096 m_seLog2 = 3;
1097 break;
1098 default:
1099 ADDR_ASSERT_ALWAYS();
1100 break;
1101 }
1102
1103 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1104 {
1105 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1106 m_rbPerSe = 1;
1107 m_rbPerSeLog2 = 0;
1108 break;
1109 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1110 m_rbPerSe = 2;
1111 m_rbPerSeLog2 = 1;
1112 break;
1113 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1114 m_rbPerSe = 4;
1115 m_rbPerSeLog2 = 2;
1116 break;
1117 default:
1118 ADDR_ASSERT_ALWAYS();
1119 break;
1120 }
1121
1122 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1123 {
1124 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1125 m_maxCompFrag = 1;
1126 m_maxCompFragLog2 = 0;
1127 break;
1128 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1129 m_maxCompFrag = 2;
1130 m_maxCompFragLog2 = 1;
1131 break;
1132 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1133 m_maxCompFrag = 4;
1134 m_maxCompFragLog2 = 2;
1135 break;
1136 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1137 m_maxCompFrag = 8;
1138 m_maxCompFragLog2 = 3;
1139 break;
1140 default:
1141 ADDR_ASSERT_ALWAYS();
1142 break;
1143 }
1144
1145 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1146 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1147 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1148 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1149 }
1150 else
1151 {
1152 valid = FALSE;
1153 ADDR_NOT_IMPLEMENTED();
1154 }
1155
1156 if (valid)
1157 {
1158 InitEquationTable();
1159 }
1160
1161 return valid;
1162 }
1163
1164 /**
1165 ************************************************************************************************************************
1166 * Gfx9Lib::HwlConvertChipFamily
1167 *
1168 * @brief
1169 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1170 * @return
1171 * ChipFamily
1172 ************************************************************************************************************************
1173 */
1174 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1175 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1176 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1177 {
1178 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1179
1180 switch (uChipFamily)
1181 {
1182 case FAMILY_AI:
1183 m_settings.isArcticIsland = 1;
1184 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1185
1186 if (m_settings.isVega10)
1187 {
1188 m_settings.isDce12 = 1;
1189 }
1190
1191 m_settings.metaBaseAlignFix = 1;
1192
1193 m_settings.depthPipeXorDisable = 1;
1194 break;
1195
1196 case FAMILY_RV:
1197 m_settings.isArcticIsland = 1;
1198 m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
1199
1200 if (m_settings.isRaven)
1201 {
1202 m_settings.isDcn1 = 1;
1203 }
1204
1205 m_settings.metaBaseAlignFix = 1;
1206
1207 m_settings.depthPipeXorDisable = 1;
1208 break;
1209
1210 default:
1211 ADDR_ASSERT(!"This should be a Fusion");
1212 break;
1213 }
1214
1215 return family;
1216 }
1217
1218 /**
1219 ************************************************************************************************************************
1220 * Gfx9Lib::InitRbEquation
1221 *
1222 * @brief
1223 * Init RB equation
1224 * @return
1225 * N/A
1226 ************************************************************************************************************************
1227 */
1228 VOID Gfx9Lib::GetRbEquation(
1229 CoordEq* pRbEq, ///< [out] rb equation
1230 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1231 UINT_32 numSeLog2) ///< [in] number of shader engine
1232 {
1233 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1234 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1235 Coordinate cx('x', rbRegion);
1236 Coordinate cy('y', rbRegion);
1237
1238 UINT_32 start = 0;
1239 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1240
1241 // Clear the rb equation
1242 pRbEq->resize(0);
1243 pRbEq->resize(numRbTotalLog2);
1244
1245 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1246 {
1247 // Special case when more than 1 SE, and 2 RB per SE
1248 (*pRbEq)[0].add(cx);
1249 (*pRbEq)[0].add(cy);
1250 cx++;
1251 cy++;
1252 (*pRbEq)[0].add(cy);
1253 start++;
1254 }
1255
1256 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1257
1258 for (UINT_32 i = 0; i < numBits; i++)
1259 {
1260 UINT_32 idx =
1261 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1262
1263 if ((i % 2) == 1)
1264 {
1265 (*pRbEq)[idx].add(cx);
1266 cx++;
1267 }
1268 else
1269 {
1270 (*pRbEq)[idx].add(cy);
1271 cy++;
1272 }
1273 }
1274 }
1275
1276 /**
1277 ************************************************************************************************************************
1278 * Gfx9Lib::GetDataEquation
1279 *
1280 * @brief
1281 * Get data equation for fmask and Z
1282 * @return
1283 * N/A
1284 ************************************************************************************************************************
1285 */
1286 VOID Gfx9Lib::GetDataEquation(
1287 CoordEq* pDataEq, ///< [out] data surface equation
1288 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1289 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1290 AddrResourceType resourceType, ///< [in] data surface resource type
1291 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1292 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1293 const
1294 {
1295 Coordinate cx('x', 0);
1296 Coordinate cy('y', 0);
1297 Coordinate cz('z', 0);
1298 Coordinate cs('s', 0);
1299
1300 // Clear the equation
1301 pDataEq->resize(0);
1302 pDataEq->resize(27);
1303
1304 if (dataSurfaceType == Gfx9DataColor)
1305 {
1306 if (IsLinear(swizzleMode))
1307 {
1308 Coordinate cm('m', 0);
1309
1310 pDataEq->resize(49);
1311
1312 for (UINT_32 i = 0; i < 49; i++)
1313 {
1314 (*pDataEq)[i].add(cm);
1315 cm++;
1316 }
1317 }
1318 else if (IsThick(resourceType, swizzleMode))
1319 {
1320 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1321 UINT_32 i;
1322 if (IsStandardSwizzle(resourceType, swizzleMode))
1323 {
1324 // Standard 3d swizzle
1325 // Fill in bottom x bits
1326 for (i = elementBytesLog2; i < 4; i++)
1327 {
1328 (*pDataEq)[i].add(cx);
1329 cx++;
1330 }
1331 // Fill in 2 bits of y and then z
1332 for (i = 4; i < 6; i++)
1333 {
1334 (*pDataEq)[i].add(cy);
1335 cy++;
1336 }
1337 for (i = 6; i < 8; i++)
1338 {
1339 (*pDataEq)[i].add(cz);
1340 cz++;
1341 }
1342 if (elementBytesLog2 < 2)
1343 {
1344 // fill in z & y bit
1345 (*pDataEq)[8].add(cz);
1346 (*pDataEq)[9].add(cy);
1347 cz++;
1348 cy++;
1349 }
1350 else if (elementBytesLog2 == 2)
1351 {
1352 // fill in y and x bit
1353 (*pDataEq)[8].add(cy);
1354 (*pDataEq)[9].add(cx);
1355 cy++;
1356 cx++;
1357 }
1358 else
1359 {
1360 // fill in 2 x bits
1361 (*pDataEq)[8].add(cx);
1362 cx++;
1363 (*pDataEq)[9].add(cx);
1364 cx++;
1365 }
1366 }
1367 else
1368 {
1369 // Z 3d swizzle
1370 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1371 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1372 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1373 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1374 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1375 {
1376 (*pDataEq)[i].add(cz);
1377 cz++;
1378 }
1379 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1380 {
1381 // add an x and z
1382 (*pDataEq)[6].add(cx);
1383 (*pDataEq)[7].add(cz);
1384 cx++;
1385 cz++;
1386 }
1387 else if (elementBytesLog2 == 2)
1388 {
1389 // add a y and z
1390 (*pDataEq)[6].add(cy);
1391 (*pDataEq)[7].add(cz);
1392 cy++;
1393 cz++;
1394 }
1395 // add y and x
1396 (*pDataEq)[8].add(cy);
1397 (*pDataEq)[9].add(cx);
1398 cy++;
1399 cx++;
1400 }
1401 // Fill in bit 10 and up
1402 pDataEq->mort3d( cz, cy, cx, 10 );
1403 }
1404 else if (IsThin(resourceType, swizzleMode))
1405 {
1406 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1407 // Color 2D
1408 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1409 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1410 UINT_32 i;
1411 // Fill in bottom x bits
1412 for (i = elementBytesLog2; i < 4; i++)
1413 {
1414 (*pDataEq)[i].add(cx);
1415 cx++;
1416 }
1417 // Fill in bottom y bits
1418 for (i = 4; i < 4 + microYBits; i++)
1419 {
1420 (*pDataEq)[i].add(cy);
1421 cy++;
1422 }
1423 // Fill in last of the micro_x bits
1424 for (i = 4 + microYBits; i < 8; i++)
1425 {
1426 (*pDataEq)[i].add(cx);
1427 cx++;
1428 }
1429 // Fill in x/y bits below sample split
1430 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1431 // Fill in sample bits
1432 for (i = 0; i < numSamplesLog2; i++)
1433 {
1434 cs.set('s', i);
1435 (*pDataEq)[tileSplitStart + i].add(cs);
1436 }
1437 // Fill in x/y bits above sample split
1438 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1439 {
1440 pDataEq->mort2d(cx, cy, blockSizeLog2);
1441 }
1442 else
1443 {
1444 pDataEq->mort2d(cy, cx, blockSizeLog2);
1445 }
1446 }
1447 else
1448 {
1449 ADDR_ASSERT_ALWAYS();
1450 }
1451 }
1452 else
1453 {
1454 // Fmask or depth
1455 UINT_32 sampleStart = elementBytesLog2;
1456 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1457 UINT_32 ymajStart = 6 + numSamplesLog2;
1458
1459 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1460 {
1461 cs.set('s', s);
1462 (*pDataEq)[sampleStart + s].add(cs);
1463 }
1464
1465 // Put in the x-major order pixel bits
1466 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1467 // Put in the y-major order pixel bits
1468 pDataEq->mort2d(cy, cx, ymajStart);
1469 }
1470 }
1471
1472 /**
1473 ************************************************************************************************************************
1474 * Gfx9Lib::GetPipeEquation
1475 *
1476 * @brief
1477 * Get pipe equation
1478 * @return
1479 * N/A
1480 ************************************************************************************************************************
1481 */
1482 VOID Gfx9Lib::GetPipeEquation(
1483 CoordEq* pPipeEq, ///< [out] pipe equation
1484 CoordEq* pDataEq, ///< [in] data equation
1485 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1486 UINT_32 numPipeLog2, ///< [in] number of pipes
1487 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1488 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1489 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1490 AddrResourceType resourceType ///< [in] data surface resource type
1491 ) const
1492 {
1493 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1494 CoordEq dataEq;
1495
1496 pDataEq->copy(dataEq);
1497
1498 if (dataSurfaceType == Gfx9DataColor)
1499 {
1500 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1501 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1502 }
1503
1504 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1505
1506 // This section should only apply to z/stencil, maybe fmask
1507 // If the pipe bit is below the comp block size,
1508 // then keep moving up the address until we find a bit that is above
1509 UINT_32 pipeStart = 0;
1510
1511 if (dataSurfaceType != Gfx9DataColor)
1512 {
1513 Coordinate tileMin('x', 3);
1514
1515 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1516 {
1517 pipeStart++;
1518 }
1519
1520 // if pipe is 0, then the first pipe bit is above the comp block size,
1521 // so we don't need to do anything
1522 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1523 // we will get the same pipe equation
1524 if (pipeStart != 0)
1525 {
1526 for (UINT_32 i = 0; i < numPipeLog2; i++)
1527 {
1528 // Copy the jth bit above pipe interleave to the current pipe equation bit
1529 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1530 }
1531 }
1532 }
1533
1534 if (IsPrt(swizzleMode))
1535 {
1536 // Clear out bits above the block size if prt's are enabled
1537 dataEq.resize(blockSizeLog2);
1538 dataEq.resize(48);
1539 }
1540
1541 if (IsXor(swizzleMode))
1542 {
1543 CoordEq xorMask;
1544
1545 if (IsThick(resourceType, swizzleMode))
1546 {
1547 CoordEq xorMask2;
1548
1549 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1550
1551 xorMask.resize(numPipeLog2);
1552
1553 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1554 {
1555 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1556 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1557 }
1558 }
1559 else
1560 {
1561 // Xor in the bits above the pipe+gpu bits
1562 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1563
1564 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1565 {
1566 Coordinate co;
1567 CoordEq xorMask2;
1568 // if 1xaa and not prt, then xor in the z bits
1569 xorMask2.resize(0);
1570 xorMask2.resize(numPipeLog2);
1571 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1572 {
1573 co.set('z', numPipeLog2 - 1 - pipeIdx);
1574 xorMask2[pipeIdx].add(co);
1575 }
1576
1577 pPipeEq->xorin(xorMask2);
1578 }
1579 }
1580
1581 xorMask.reverse();
1582 pPipeEq->xorin(xorMask);
1583 }
1584 }
1585
1586 /**
1587 ************************************************************************************************************************
1588 * Gfx9Lib::GetMetaEquation
1589 *
1590 * @brief
1591 * Get meta equation for cmask/htile/DCC
1592 * @return
1593 * N/A
1594 ************************************************************************************************************************
1595 */
1596 VOID Gfx9Lib::GetMetaEquation(
1597 CoordEq* pMetaEq, ///< [out] meta equation
1598 UINT_32 maxMip, ///< [in] max mip Id
1599 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1600 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1601 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1602 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1603 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1604 AddrResourceType resourceType, ///< [in] data surface resource type
1605 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1606 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1607 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1608 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1609 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1610 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1611 const
1612 {
1613 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1614 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1615 //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1616
1617 // Get the correct data address and rb equation
1618 CoordEq dataEq;
1619 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1620 elementBytesLog2, numSamplesLog2);
1621
1622 // Get pipe and rb equations
1623 CoordEq pipeEquation;
1624 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1625 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1626 numPipeTotalLog2 = pipeEquation.getsize();
1627
1628 if (metaFlag.linear)
1629 {
1630 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1631 ADDR_ASSERT_ALWAYS();
1632
1633 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1634
1635 dataEq.copy(*pMetaEq);
1636
1637 if (IsLinear(swizzleMode))
1638 {
1639 if (metaFlag.pipeAligned)
1640 {
1641 // Remove the pipe bits
1642 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1643 pMetaEq->shift(-shift, pipeInterleaveLog2);
1644 }
1645 // Divide by comp block size, which for linear (which is always color) is 256 B
1646 pMetaEq->shift(-8);
1647
1648 if (metaFlag.pipeAligned)
1649 {
1650 // Put pipe bits back in
1651 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1652
1653 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1654 {
1655 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1656 }
1657 }
1658 }
1659
1660 pMetaEq->shift(1);
1661 }
1662 else
1663 {
1664 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1665 UINT_32 compFragLog2 =
1666 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1667 maxCompFragLog2 : numSamplesLog2;
1668
1669 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1670
1671 // Make sure the metaaddr is cleared
1672 pMetaEq->resize(0);
1673 pMetaEq->resize(27);
1674
1675 if (IsThick(resourceType, swizzleMode))
1676 {
1677 Coordinate cx('x', 0);
1678 Coordinate cy('y', 0);
1679 Coordinate cz('z', 0);
1680
1681 if (maxMip > 0)
1682 {
1683 pMetaEq->mort3d(cy, cx, cz);
1684 }
1685 else
1686 {
1687 pMetaEq->mort3d(cx, cy, cz);
1688 }
1689 }
1690 else
1691 {
1692 Coordinate cx('x', 0);
1693 Coordinate cy('y', 0);
1694 Coordinate cs;
1695
1696 if (maxMip > 0)
1697 {
1698 pMetaEq->mort2d(cy, cx, compFragLog2);
1699 }
1700 else
1701 {
1702 pMetaEq->mort2d(cx, cy, compFragLog2);
1703 }
1704
1705 //------------------------------------------------------------------------------------------------------------------------
1706 // Put the compressible fragments at the lsb
1707 // the uncompressible frags will be at the msb of the micro address
1708 //------------------------------------------------------------------------------------------------------------------------
1709 for (UINT_32 s = 0; s < compFragLog2; s++)
1710 {
1711 cs.set('s', s);
1712 (*pMetaEq)[s].add(cs);
1713 }
1714 }
1715
1716 // Keep a copy of the pipe equations
1717 CoordEq origPipeEquation;
1718 pipeEquation.copy(origPipeEquation);
1719
1720 Coordinate co;
1721 // filter out everything under the compressed block size
1722 co.set('x', compBlkWidthLog2);
1723 pMetaEq->Filter('<', co, 0, 'x');
1724 co.set('y', compBlkHeightLog2);
1725 pMetaEq->Filter('<', co, 0, 'y');
1726 co.set('z', compBlkDepthLog2);
1727 pMetaEq->Filter('<', co, 0, 'z');
1728
1729 // For non-color, filter out sample bits
1730 if (dataSurfaceType != Gfx9DataColor)
1731 {
1732 co.set('x', 0);
1733 pMetaEq->Filter('<', co, 0, 's');
1734 }
1735
1736 // filter out everything above the metablock size
1737 co.set('x', metaBlkWidthLog2 - 1);
1738 pMetaEq->Filter('>', co, 0, 'x');
1739 co.set('y', metaBlkHeightLog2 - 1);
1740 pMetaEq->Filter('>', co, 0, 'y');
1741 co.set('z', metaBlkDepthLog2 - 1);
1742 pMetaEq->Filter('>', co, 0, 'z');
1743
1744 // filter out everything above the metablock size for the channel bits
1745 co.set('x', metaBlkWidthLog2 - 1);
1746 pipeEquation.Filter('>', co, 0, 'x');
1747 co.set('y', metaBlkHeightLog2 - 1);
1748 pipeEquation.Filter('>', co, 0, 'y');
1749 co.set('z', metaBlkDepthLog2 - 1);
1750 pipeEquation.Filter('>', co, 0, 'z');
1751
1752 // Make sure we still have the same number of channel bits
1753 if (pipeEquation.getsize() != numPipeTotalLog2)
1754 {
1755 ADDR_ASSERT_ALWAYS();
1756 }
1757
1758 // Loop through all channel and rb bits,
1759 // and make sure these components exist in the metadata address
1760 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1761 {
1762 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1763 {
1764 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1765 {
1766 ADDR_ASSERT_ALWAYS();
1767 }
1768 }
1769 }
1770
1771 UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1772 UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1773 CoordEq origRbEquation;
1774
1775 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1776
1777 CoordEq rbEquation = origRbEquation;
1778
1779 UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1780
1781 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1782 {
1783 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1784 {
1785 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1786 {
1787 ADDR_ASSERT_ALWAYS();
1788 }
1789 }
1790 }
1791
1792 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1793 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1794 {
1795 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1796 {
1797 if (rbEquation[i] == pipeEquation[j])
1798 {
1799 rbEquation[i].Clear();
1800 }
1801 }
1802 }
1803
1804 // Loop through each bit of the channel, get the smallest coordinate,
1805 // and remove it from the metaaddr, and rb_equation
1806 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1807 {
1808 pipeEquation[i].getsmallest(co);
1809
1810 UINT_32 old_size = pMetaEq->getsize();
1811 pMetaEq->Filter('=', co);
1812 UINT_32 new_size = pMetaEq->getsize();
1813 if (new_size != old_size-1)
1814 {
1815 ADDR_ASSERT_ALWAYS();
1816 }
1817 pipeEquation.remove(co);
1818 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1819 {
1820 if (rbEquation[j].remove(co))
1821 {
1822 // if we actually removed something from this bit, then add the remaining
1823 // channel bits, as these can be removed for this bit
1824 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1825 {
1826 if (pipeEquation[i][k] != co)
1827 {
1828 rbEquation[j].add(pipeEquation[i][k]);
1829 }
1830 }
1831 }
1832 }
1833 }
1834
1835 // Loop through the rb bits and see what remain;
1836 // filter out the smallest coordinate if it remains
1837 UINT_32 rbBitsLeft = 0;
1838 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1839 {
1840 if (rbEquation[i].getsize() > 0)
1841 {
1842 rbBitsLeft++;
1843 rbEquation[i].getsmallest(co);
1844 UINT_32 old_size = pMetaEq->getsize();
1845 pMetaEq->Filter('=', co);
1846 UINT_32 new_size = pMetaEq->getsize();
1847 if (new_size != old_size - 1)
1848 {
1849 // assert warning
1850 }
1851 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1852 {
1853 if (rbEquation[j].remove(co))
1854 {
1855 // if we actually removed something from this bit, then add the remaining
1856 // rb bits, as these can be removed for this bit
1857 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1858 {
1859 if (rbEquation[i][k] != co)
1860 {
1861 rbEquation[j].add(rbEquation[i][k]);
1862 }
1863 }
1864 }
1865 }
1866 }
1867 }
1868
1869 // capture the size of the metaaddr
1870 UINT_32 metaSize = pMetaEq->getsize();
1871 // resize to 49 bits...make this a nibble address
1872 pMetaEq->resize(49);
1873 // Concatenate the macro address above the current address
1874 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1875 {
1876 co.set('m', j);
1877 (*pMetaEq)[i].add(co);
1878 }
1879
1880 // Multiply by meta element size (in nibbles)
1881 if (dataSurfaceType == Gfx9DataColor)
1882 {
1883 pMetaEq->shift(1);
1884 }
1885 else if (dataSurfaceType == Gfx9DataDepthStencil)
1886 {
1887 pMetaEq->shift(3);
1888 }
1889
1890 //------------------------------------------------------------------------------------------
1891 // Note the pipeInterleaveLog2+1 is because address is a nibble address
1892 // Shift up from pipe interleave number of channel
1893 // and rb bits left, and uncompressed fragments
1894 //------------------------------------------------------------------------------------------
1895
1896 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
1897
1898 // Put in the channel bits
1899 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1900 {
1901 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
1902 }
1903
1904 // Put in remaining rb bits
1905 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
1906 {
1907 if (rbEquation[i].getsize() > 0)
1908 {
1909 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
1910 // Mark any rb bit we add in to the rb mask
1911 j++;
1912 }
1913 }
1914
1915 //------------------------------------------------------------------------------------------
1916 // Put in the uncompressed fragment bits
1917 //------------------------------------------------------------------------------------------
1918 for (UINT_32 i = 0; i < uncompFragLog2; i++)
1919 {
1920 co.set('s', compFragLog2 + i);
1921 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
1922 }
1923 }
1924 }
1925
1926 /**
1927 ************************************************************************************************************************
1928 * Gfx9Lib::IsEquationSupported
1929 *
1930 * @brief
1931 * Check if equation is supported for given swizzle mode and resource type.
1932 *
1933 * @return
1934 * TRUE if supported
1935 ************************************************************************************************************************
1936 */
1937 BOOL_32 Gfx9Lib::IsEquationSupported(
1938 AddrResourceType rsrcType,
1939 AddrSwizzleMode swMode,
1940 UINT_32 elementBytesLog2) const
1941 {
1942 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
1943 (IsLinear(swMode) == FALSE) &&
1944 (((IsTex2d(rsrcType) == TRUE) &&
1945 ((elementBytesLog2 < 4) ||
1946 ((IsRotateSwizzle(swMode) == FALSE) &&
1947 (IsZOrderSwizzle(swMode) == FALSE)))) ||
1948 ((IsTex3d(rsrcType) == TRUE) &&
1949 (IsRotateSwizzle(swMode) == FALSE) &&
1950 (IsBlock256b(swMode) == FALSE)));
1951
1952 return supported;
1953 }
1954
1955 /**
1956 ************************************************************************************************************************
1957 * Gfx9Lib::InitEquationTable
1958 *
1959 * @brief
1960 * Initialize Equation table.
1961 *
1962 * @return
1963 * N/A
1964 ************************************************************************************************************************
1965 */
1966 VOID Gfx9Lib::InitEquationTable()
1967 {
1968 memset(m_equationTable, 0, sizeof(m_equationTable));
1969
1970 // Loop all possible resource type (2D/3D)
1971 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1972 {
1973 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1974
1975 // Loop all possible swizzle mode
1976 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1977 {
1978 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1979
1980 // Loop all possible bpp
1981 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
1982 {
1983 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1984
1985 // Check if the input is supported
1986 if (IsEquationSupported(rsrcType, swMode, bppIdx))
1987 {
1988 ADDR_EQUATION equation;
1989 ADDR_E_RETURNCODE retCode;
1990
1991 memset(&equation, 0, sizeof(ADDR_EQUATION));
1992
1993 // Generate the equation
1994 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
1995 {
1996 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
1997 }
1998 else if (IsThin(rsrcType, swMode))
1999 {
2000 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2001 }
2002 else
2003 {
2004 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2005 }
2006
2007 // Only fill the equation into the table if the return code is ADDR_OK,
2008 // otherwise if the return code is not ADDR_OK, it indicates this is not
2009 // a valid input, we do nothing but just fill invalid equation index
2010 // into the lookup table.
2011 if (retCode == ADDR_OK)
2012 {
2013 equationIndex = m_numEquations;
2014 ADDR_ASSERT(equationIndex < EquationTableSize);
2015
2016 m_equationTable[equationIndex] = equation;
2017
2018 m_numEquations++;
2019 }
2020 else
2021 {
2022 ADDR_ASSERT_ALWAYS();
2023 }
2024 }
2025
2026 // Fill the index into the lookup table, if the combination is not supported
2027 // fill the invalid equation index
2028 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2029 }
2030 }
2031 }
2032 }
2033
2034 /**
2035 ************************************************************************************************************************
2036 * Gfx9Lib::HwlGetEquationIndex
2037 *
2038 * @brief
2039 * Interface function stub of GetEquationIndex
2040 *
2041 * @return
2042 * ADDR_E_RETURNCODE
2043 ************************************************************************************************************************
2044 */
2045 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2046 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2047 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2048 ) const
2049 {
2050 AddrResourceType rsrcType = pIn->resourceType;
2051 AddrSwizzleMode swMode = pIn->swizzleMode;
2052 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2053 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2054
2055 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2056 {
2057 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2058 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2059
2060 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2061 }
2062
2063 if (pOut->pMipInfo != NULL)
2064 {
2065 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2066 {
2067 pOut->pMipInfo[i].equationIndex = index;
2068 }
2069 }
2070
2071 return index;
2072 }
2073
2074 /**
2075 ************************************************************************************************************************
2076 * Gfx9Lib::HwlComputeBlock256Equation
2077 *
2078 * @brief
2079 * Interface function stub of ComputeBlock256Equation
2080 *
2081 * @return
2082 * ADDR_E_RETURNCODE
2083 ************************************************************************************************************************
2084 */
2085 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2086 AddrResourceType rsrcType,
2087 AddrSwizzleMode swMode,
2088 UINT_32 elementBytesLog2,
2089 ADDR_EQUATION* pEquation) const
2090 {
2091 ADDR_E_RETURNCODE ret = ADDR_OK;
2092
2093 pEquation->numBits = 8;
2094
2095 UINT_32 i = 0;
2096 for (; i < elementBytesLog2; i++)
2097 {
2098 InitChannel(1, 0 , i, &pEquation->addr[i]);
2099 }
2100
2101 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2102
2103 const UINT_32 maxBitsUsed = 4;
2104 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2105 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2106
2107 for (i = 0; i < maxBitsUsed; i++)
2108 {
2109 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2110 InitChannel(1, 1, i, &y[i]);
2111 }
2112
2113 if (IsStandardSwizzle(rsrcType, swMode))
2114 {
2115 switch (elementBytesLog2)
2116 {
2117 case 0:
2118 pixelBit[0] = x[0];
2119 pixelBit[1] = x[1];
2120 pixelBit[2] = x[2];
2121 pixelBit[3] = x[3];
2122 pixelBit[4] = y[0];
2123 pixelBit[5] = y[1];
2124 pixelBit[6] = y[2];
2125 pixelBit[7] = y[3];
2126 break;
2127 case 1:
2128 pixelBit[0] = x[0];
2129 pixelBit[1] = x[1];
2130 pixelBit[2] = x[2];
2131 pixelBit[3] = y[0];
2132 pixelBit[4] = y[1];
2133 pixelBit[5] = y[2];
2134 pixelBit[6] = x[3];
2135 break;
2136 case 2:
2137 pixelBit[0] = x[0];
2138 pixelBit[1] = x[1];
2139 pixelBit[2] = y[0];
2140 pixelBit[3] = y[1];
2141 pixelBit[4] = y[2];
2142 pixelBit[5] = x[2];
2143 break;
2144 case 3:
2145 pixelBit[0] = x[0];
2146 pixelBit[1] = y[0];
2147 pixelBit[2] = y[1];
2148 pixelBit[3] = x[1];
2149 pixelBit[4] = x[2];
2150 break;
2151 case 4:
2152 pixelBit[0] = y[0];
2153 pixelBit[1] = y[1];
2154 pixelBit[2] = x[0];
2155 pixelBit[3] = x[1];
2156 break;
2157 default:
2158 ADDR_ASSERT_ALWAYS();
2159 ret = ADDR_INVALIDPARAMS;
2160 break;
2161 }
2162 }
2163 else if (IsDisplaySwizzle(rsrcType, swMode))
2164 {
2165 switch (elementBytesLog2)
2166 {
2167 case 0:
2168 pixelBit[0] = x[0];
2169 pixelBit[1] = x[1];
2170 pixelBit[2] = x[2];
2171 pixelBit[3] = y[1];
2172 pixelBit[4] = y[0];
2173 pixelBit[5] = y[2];
2174 pixelBit[6] = x[3];
2175 pixelBit[7] = y[3];
2176 break;
2177 case 1:
2178 pixelBit[0] = x[0];
2179 pixelBit[1] = x[1];
2180 pixelBit[2] = x[2];
2181 pixelBit[3] = y[0];
2182 pixelBit[4] = y[1];
2183 pixelBit[5] = y[2];
2184 pixelBit[6] = x[3];
2185 break;
2186 case 2:
2187 pixelBit[0] = x[0];
2188 pixelBit[1] = x[1];
2189 pixelBit[2] = y[0];
2190 pixelBit[3] = x[2];
2191 pixelBit[4] = y[1];
2192 pixelBit[5] = y[2];
2193 break;
2194 case 3:
2195 pixelBit[0] = x[0];
2196 pixelBit[1] = y[0];
2197 pixelBit[2] = x[1];
2198 pixelBit[3] = x[2];
2199 pixelBit[4] = y[1];
2200 break;
2201 case 4:
2202 pixelBit[0] = x[0];
2203 pixelBit[1] = y[0];
2204 pixelBit[2] = x[1];
2205 pixelBit[3] = y[1];
2206 break;
2207 default:
2208 ADDR_ASSERT_ALWAYS();
2209 ret = ADDR_INVALIDPARAMS;
2210 break;
2211 }
2212 }
2213 else if (IsRotateSwizzle(swMode))
2214 {
2215 switch (elementBytesLog2)
2216 {
2217 case 0:
2218 pixelBit[0] = y[0];
2219 pixelBit[1] = y[1];
2220 pixelBit[2] = y[2];
2221 pixelBit[3] = x[1];
2222 pixelBit[4] = x[0];
2223 pixelBit[5] = x[2];
2224 pixelBit[6] = x[3];
2225 pixelBit[7] = y[3];
2226 break;
2227 case 1:
2228 pixelBit[0] = y[0];
2229 pixelBit[1] = y[1];
2230 pixelBit[2] = y[2];
2231 pixelBit[3] = x[0];
2232 pixelBit[4] = x[1];
2233 pixelBit[5] = x[2];
2234 pixelBit[6] = x[3];
2235 break;
2236 case 2:
2237 pixelBit[0] = y[0];
2238 pixelBit[1] = y[1];
2239 pixelBit[2] = x[0];
2240 pixelBit[3] = y[2];
2241 pixelBit[4] = x[1];
2242 pixelBit[5] = x[2];
2243 break;
2244 case 3:
2245 pixelBit[0] = y[0];
2246 pixelBit[1] = x[0];
2247 pixelBit[2] = y[1];
2248 pixelBit[3] = x[1];
2249 pixelBit[4] = x[2];
2250 break;
2251 default:
2252 ADDR_ASSERT_ALWAYS();
2253 case 4:
2254 ret = ADDR_INVALIDPARAMS;
2255 break;
2256 }
2257 }
2258 else
2259 {
2260 ADDR_ASSERT_ALWAYS();
2261 ret = ADDR_INVALIDPARAMS;
2262 }
2263
2264 // Post validation
2265 if (ret == ADDR_OK)
2266 {
2267 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2268 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2269 (microBlockDim.w * (1 << elementBytesLog2)));
2270 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2271 }
2272
2273 return ret;
2274 }
2275
2276 /**
2277 ************************************************************************************************************************
2278 * Gfx9Lib::HwlComputeThinEquation
2279 *
2280 * @brief
2281 * Interface function stub of ComputeThinEquation
2282 *
2283 * @return
2284 * ADDR_E_RETURNCODE
2285 ************************************************************************************************************************
2286 */
2287 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2288 AddrResourceType rsrcType,
2289 AddrSwizzleMode swMode,
2290 UINT_32 elementBytesLog2,
2291 ADDR_EQUATION* pEquation) const
2292 {
2293 ADDR_E_RETURNCODE ret = ADDR_OK;
2294
2295 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2296
2297 UINT_32 maxXorBits = blockSizeLog2;
2298 if (IsNonPrtXor(swMode))
2299 {
2300 // For non-prt-xor, maybe need to initialize some more bits for xor
2301 // The highest xor bit used in equation will be max the following 3 items:
2302 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2303 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2304 // 3. blockSizeLog2
2305
2306 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2307 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2308 GetPipeXorBits(blockSizeLog2) +
2309 2 * GetBankXorBits(blockSizeLog2));
2310 }
2311
2312 const UINT_32 maxBitsUsed = 14;
2313 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2314 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2315 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2316
2317 const UINT_32 extraXorBits = 16;
2318 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2319 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2320
2321 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2322 {
2323 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2324 InitChannel(1, 1, i, &y[i]);
2325 }
2326
2327 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2328
2329 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2330 {
2331 InitChannel(1, 0 , i, &pixelBit[i]);
2332 }
2333
2334 UINT_32 xIdx = 0;
2335 UINT_32 yIdx = 0;
2336 UINT_32 lowBits = 0;
2337
2338 if (IsZOrderSwizzle(swMode))
2339 {
2340 if (elementBytesLog2 <= 3)
2341 {
2342 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2343 {
2344 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2345 }
2346
2347 lowBits = 6;
2348 }
2349 else
2350 {
2351 ret = ADDR_INVALIDPARAMS;
2352 }
2353 }
2354 else
2355 {
2356 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2357
2358 if (ret == ADDR_OK)
2359 {
2360 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2361 xIdx = Log2(microBlockDim.w);
2362 yIdx = Log2(microBlockDim.h);
2363 lowBits = 8;
2364 }
2365 }
2366
2367 if (ret == ADDR_OK)
2368 {
2369 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2370 {
2371 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2372 }
2373
2374 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2375 {
2376 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2377 }
2378
2379 if (IsXor(swMode))
2380 {
2381 // Fill XOR bits
2382 UINT_32 pipeStart = m_pipeInterleaveLog2;
2383 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2384
2385 UINT_32 bankStart = pipeStart + pipeXorBits;
2386 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2387
2388 for (UINT_32 i = 0; i < pipeXorBits; i++)
2389 {
2390 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2391 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2392 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2393
2394 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2395 }
2396
2397 for (UINT_32 i = 0; i < bankXorBits; i++)
2398 {
2399 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2400 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2401 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2402
2403 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2404 }
2405
2406 if (IsPrt(swMode) == FALSE)
2407 {
2408 for (UINT_32 i = 0; i < pipeXorBits; i++)
2409 {
2410 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2411 }
2412
2413 for (UINT_32 i = 0; i < bankXorBits; i++)
2414 {
2415 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2416 }
2417 }
2418 }
2419
2420 pEquation->numBits = blockSizeLog2;
2421 }
2422
2423 return ret;
2424 }
2425
2426 /**
2427 ************************************************************************************************************************
2428 * Gfx9Lib::HwlComputeThickEquation
2429 *
2430 * @brief
2431 * Interface function stub of ComputeThickEquation
2432 *
2433 * @return
2434 * ADDR_E_RETURNCODE
2435 ************************************************************************************************************************
2436 */
2437 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2438 AddrResourceType rsrcType,
2439 AddrSwizzleMode swMode,
2440 UINT_32 elementBytesLog2,
2441 ADDR_EQUATION* pEquation) const
2442 {
2443 ADDR_E_RETURNCODE ret = ADDR_OK;
2444
2445 ADDR_ASSERT(IsTex3d(rsrcType));
2446
2447 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2448
2449 UINT_32 maxXorBits = blockSizeLog2;
2450 if (IsNonPrtXor(swMode))
2451 {
2452 // For non-prt-xor, maybe need to initialize some more bits for xor
2453 // The highest xor bit used in equation will be max the following 3:
2454 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2455 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2456 // 3. blockSizeLog2
2457
2458 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2459 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2460 GetPipeXorBits(blockSizeLog2) +
2461 3 * GetBankXorBits(blockSizeLog2));
2462 }
2463
2464 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2465 {
2466 InitChannel(1, 0 , i, &pEquation->addr[i]);
2467 }
2468
2469 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2470
2471 const UINT_32 maxBitsUsed = 12;
2472 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2473 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2474 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2475 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2476
2477 const UINT_32 extraXorBits = 24;
2478 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2479 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2480
2481 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2482 {
2483 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2484 InitChannel(1, 1, i, &y[i]);
2485 InitChannel(1, 2, i, &z[i]);
2486 }
2487
2488 if (IsZOrderSwizzle(swMode))
2489 {
2490 switch (elementBytesLog2)
2491 {
2492 case 0:
2493 pixelBit[0] = x[0];
2494 pixelBit[1] = y[0];
2495 pixelBit[2] = x[1];
2496 pixelBit[3] = y[1];
2497 pixelBit[4] = z[0];
2498 pixelBit[5] = z[1];
2499 pixelBit[6] = x[2];
2500 pixelBit[7] = z[2];
2501 pixelBit[8] = y[2];
2502 pixelBit[9] = x[3];
2503 break;
2504 case 1:
2505 pixelBit[0] = x[0];
2506 pixelBit[1] = y[0];
2507 pixelBit[2] = x[1];
2508 pixelBit[3] = y[1];
2509 pixelBit[4] = z[0];
2510 pixelBit[5] = z[1];
2511 pixelBit[6] = z[2];
2512 pixelBit[7] = y[2];
2513 pixelBit[8] = x[2];
2514 break;
2515 case 2:
2516 pixelBit[0] = x[0];
2517 pixelBit[1] = y[0];
2518 pixelBit[2] = x[1];
2519 pixelBit[3] = z[0];
2520 pixelBit[4] = y[1];
2521 pixelBit[5] = z[1];
2522 pixelBit[6] = y[2];
2523 pixelBit[7] = x[2];
2524 break;
2525 case 3:
2526 pixelBit[0] = x[0];
2527 pixelBit[1] = y[0];
2528 pixelBit[2] = z[0];
2529 pixelBit[3] = x[1];
2530 pixelBit[4] = z[1];
2531 pixelBit[5] = y[1];
2532 pixelBit[6] = x[2];
2533 break;
2534 case 4:
2535 pixelBit[0] = x[0];
2536 pixelBit[1] = y[0];
2537 pixelBit[2] = z[0];
2538 pixelBit[3] = z[1];
2539 pixelBit[4] = y[1];
2540 pixelBit[5] = x[1];
2541 break;
2542 default:
2543 ADDR_ASSERT_ALWAYS();
2544 ret = ADDR_INVALIDPARAMS;
2545 break;
2546 }
2547 }
2548 else if (IsStandardSwizzle(rsrcType, swMode))
2549 {
2550 switch (elementBytesLog2)
2551 {
2552 case 0:
2553 pixelBit[0] = x[0];
2554 pixelBit[1] = x[1];
2555 pixelBit[2] = x[2];
2556 pixelBit[3] = x[3];
2557 pixelBit[4] = y[0];
2558 pixelBit[5] = y[1];
2559 pixelBit[6] = z[0];
2560 pixelBit[7] = z[1];
2561 pixelBit[8] = z[2];
2562 pixelBit[9] = y[2];
2563 break;
2564 case 1:
2565 pixelBit[0] = x[0];
2566 pixelBit[1] = x[1];
2567 pixelBit[2] = x[2];
2568 pixelBit[3] = y[0];
2569 pixelBit[4] = y[1];
2570 pixelBit[5] = z[0];
2571 pixelBit[6] = z[1];
2572 pixelBit[7] = z[2];
2573 pixelBit[8] = y[2];
2574 break;
2575 case 2:
2576 pixelBit[0] = x[0];
2577 pixelBit[1] = x[1];
2578 pixelBit[2] = y[0];
2579 pixelBit[3] = y[1];
2580 pixelBit[4] = z[0];
2581 pixelBit[5] = z[1];
2582 pixelBit[6] = y[2];
2583 pixelBit[7] = x[2];
2584 break;
2585 case 3:
2586 pixelBit[0] = x[0];
2587 pixelBit[1] = y[0];
2588 pixelBit[2] = y[1];
2589 pixelBit[3] = z[0];
2590 pixelBit[4] = z[1];
2591 pixelBit[5] = x[1];
2592 pixelBit[6] = x[2];
2593 break;
2594 case 4:
2595 pixelBit[0] = y[0];
2596 pixelBit[1] = y[1];
2597 pixelBit[2] = z[0];
2598 pixelBit[3] = z[1];
2599 pixelBit[4] = x[0];
2600 pixelBit[5] = x[1];
2601 break;
2602 default:
2603 ADDR_ASSERT_ALWAYS();
2604 ret = ADDR_INVALIDPARAMS;
2605 break;
2606 }
2607 }
2608 else
2609 {
2610 ADDR_ASSERT_ALWAYS();
2611 ret = ADDR_INVALIDPARAMS;
2612 }
2613
2614 if (ret == ADDR_OK)
2615 {
2616 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2617 UINT_32 xIdx = Log2(microBlockDim.w);
2618 UINT_32 yIdx = Log2(microBlockDim.h);
2619 UINT_32 zIdx = Log2(microBlockDim.d);
2620
2621 pixelBit = pEquation->addr;
2622
2623 const UINT_32 lowBits = 10;
2624 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2625 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2626
2627 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2628 {
2629 if ((i % 3) == 0)
2630 {
2631 pixelBit[i] = x[xIdx++];
2632 }
2633 else if ((i % 3) == 1)
2634 {
2635 pixelBit[i] = z[zIdx++];
2636 }
2637 else
2638 {
2639 pixelBit[i] = y[yIdx++];
2640 }
2641 }
2642
2643 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2644 {
2645 if ((i % 3) == 0)
2646 {
2647 xorExtra[i - blockSizeLog2] = x[xIdx++];
2648 }
2649 else if ((i % 3) == 1)
2650 {
2651 xorExtra[i - blockSizeLog2] = z[zIdx++];
2652 }
2653 else
2654 {
2655 xorExtra[i - blockSizeLog2] = y[yIdx++];
2656 }
2657 }
2658
2659 if (IsXor(swMode))
2660 {
2661 // Fill XOR bits
2662 UINT_32 pipeStart = m_pipeInterleaveLog2;
2663 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2664 for (UINT_32 i = 0; i < pipeXorBits; i++)
2665 {
2666 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2667 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2668 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2669
2670 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2671
2672 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2673 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2674 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2675
2676 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2677 }
2678
2679 UINT_32 bankStart = pipeStart + pipeXorBits;
2680 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2681 for (UINT_32 i = 0; i < bankXorBits; i++)
2682 {
2683 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2684 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2685 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2686
2687 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2688
2689 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2690 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2691 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2692
2693 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2694 }
2695 }
2696
2697 pEquation->numBits = blockSizeLog2;
2698 }
2699
2700 return ret;
2701 }
2702
2703 /**
2704 ************************************************************************************************************************
2705 * Gfx9Lib::IsValidDisplaySwizzleMode
2706 *
2707 * @brief
2708 * Check if a swizzle mode is supported by display engine
2709 *
2710 * @return
2711 * TRUE is swizzle mode is supported by display engine
2712 ************************************************************************************************************************
2713 */
2714 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2715 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2716 {
2717 BOOL_32 support = FALSE;
2718
2719 //const AddrResourceType resourceType = pIn->resourceType;
2720 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2721
2722 if (m_settings.isDce12)
2723 {
2724 switch (swizzleMode)
2725 {
2726 case ADDR_SW_256B_D:
2727 case ADDR_SW_256B_R:
2728 support = (pIn->bpp == 32);
2729 break;
2730
2731 case ADDR_SW_LINEAR:
2732 case ADDR_SW_4KB_D:
2733 case ADDR_SW_4KB_R:
2734 case ADDR_SW_64KB_D:
2735 case ADDR_SW_64KB_R:
2736 case ADDR_SW_VAR_D:
2737 case ADDR_SW_VAR_R:
2738 case ADDR_SW_4KB_D_X:
2739 case ADDR_SW_4KB_R_X:
2740 case ADDR_SW_64KB_D_X:
2741 case ADDR_SW_64KB_R_X:
2742 case ADDR_SW_VAR_D_X:
2743 case ADDR_SW_VAR_R_X:
2744 support = (pIn->bpp <= 64);
2745 break;
2746
2747 default:
2748 break;
2749 }
2750 }
2751 else if (m_settings.isDcn1)
2752 {
2753 switch (swizzleMode)
2754 {
2755 case ADDR_SW_4KB_D:
2756 case ADDR_SW_64KB_D:
2757 case ADDR_SW_VAR_D:
2758 case ADDR_SW_64KB_D_T:
2759 case ADDR_SW_4KB_D_X:
2760 case ADDR_SW_64KB_D_X:
2761 case ADDR_SW_VAR_D_X:
2762 support = (pIn->bpp == 64);
2763 break;
2764
2765 case ADDR_SW_LINEAR:
2766 case ADDR_SW_4KB_S:
2767 case ADDR_SW_64KB_S:
2768 case ADDR_SW_VAR_S:
2769 case ADDR_SW_64KB_S_T:
2770 case ADDR_SW_4KB_S_X:
2771 case ADDR_SW_64KB_S_X:
2772 case ADDR_SW_VAR_S_X:
2773 support = (pIn->bpp <= 64);
2774 break;
2775
2776 default:
2777 break;
2778 }
2779 }
2780 else
2781 {
2782 ADDR_NOT_IMPLEMENTED();
2783 }
2784
2785 return support;
2786 }
2787
2788 /**
2789 ************************************************************************************************************************
2790 * Gfx9Lib::HwlComputePipeBankXor
2791 *
2792 * @brief
2793 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2794 *
2795 * @return
2796 * PipeBankXor value
2797 ************************************************************************************************************************
2798 */
2799 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2800 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2801 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
2802 {
2803 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2804 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2805 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2806
2807 UINT_32 pipeXor = 0;
2808 UINT_32 bankXor = 0;
2809
2810 const UINT_32 bankMask = (1 << bankBits) - 1;
2811 const UINT_32 index = pIn->surfIndex & bankMask;
2812
2813 const UINT_32 bpp = pIn->flags.fmask ?
2814 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2815 if (bankBits == 4)
2816 {
2817 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2818 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2819
2820 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2821 }
2822 else if (bankBits > 0)
2823 {
2824 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
2825 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
2826 bankXor = (index * bankIncrease) & bankMask;
2827 }
2828
2829 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
2830
2831 return ADDR_OK;
2832 }
2833
2834 /**
2835 ************************************************************************************************************************
2836 * Gfx9Lib::HwlComputeSlicePipeBankXor
2837 *
2838 * @brief
2839 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2840 *
2841 * @return
2842 * PipeBankXor value
2843 ************************************************************************************************************************
2844 */
2845 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
2846 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
2847 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
2848 {
2849 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2850 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2851 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2852
2853 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2854 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2855
2856 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
2857
2858 return ADDR_OK;
2859 }
2860
2861 /**
2862 ************************************************************************************************************************
2863 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2864 *
2865 * @brief
2866 * Compute sub resource offset to support swizzle pattern
2867 *
2868 * @return
2869 * Offset
2870 ************************************************************************************************************************
2871 */
2872 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2873 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
2874 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
2875 {
2876 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2877
2878 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2879 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2880 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2881 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2882 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2883 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
2884
2885 pOut->offset = pIn->slice * pIn->sliceSize +
2886 pIn->macroBlockOffset +
2887 (pIn->mipTailOffset ^ pipeBankXor) -
2888 static_cast<UINT_64>(pipeBankXor);
2889 return ADDR_OK;
2890 }
2891
2892 /**
2893 ************************************************************************************************************************
2894 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
2895 *
2896 * @brief
2897 * Compute surface info sanity check
2898 *
2899 * @return
2900 * Offset
2901 ************************************************************************************************************************
2902 */
2903 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
2904 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2905 {
2906 BOOL_32 invalid = FALSE;
2907
2908 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2909 {
2910 invalid = TRUE;
2911 }
2912 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) ||
2913 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
2914 {
2915 invalid = TRUE;
2916 }
2917
2918 BOOL_32 mipmap = (pIn->numMipLevels > 1);
2919 BOOL_32 msaa = (pIn->numFrags > 1);
2920
2921 ADDR2_SURFACE_FLAGS flags = pIn->flags;
2922 BOOL_32 zbuffer = (flags.depth || flags.stencil);
2923 BOOL_32 color = flags.color;
2924 BOOL_32 display = flags.display || flags.rotated;
2925
2926 AddrResourceType rsrcType = pIn->resourceType;
2927 BOOL_32 tex3d = IsTex3d(rsrcType);
2928 AddrSwizzleMode swizzle = pIn->swizzleMode;
2929 BOOL_32 linear = IsLinear(swizzle);
2930 BOOL_32 blk256B = IsBlock256b(swizzle);
2931 BOOL_32 blkVar = IsBlockVariable(swizzle);
2932 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2933 BOOL_32 prt = flags.prt;
2934 BOOL_32 stereo = flags.qbStereo;
2935
2936 if (invalid == FALSE)
2937 {
2938 if ((pIn->numFrags > 1) &&
2939 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2940 {
2941 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2942 invalid = TRUE;
2943 }
2944 }
2945
2946 if (invalid == FALSE)
2947 {
2948 switch (rsrcType)
2949 {
2950 case ADDR_RSRC_TEX_1D:
2951 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
2952 break;
2953 case ADDR_RSRC_TEX_2D:
2954 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
2955 break;
2956 case ADDR_RSRC_TEX_3D:
2957 invalid = msaa || zbuffer || display || stereo;
2958 break;
2959 default:
2960 invalid = TRUE;
2961 break;
2962 }
2963 }
2964
2965 if (invalid == FALSE)
2966 {
2967 if (display)
2968 {
2969 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
2970 }
2971 }
2972
2973 if (invalid == FALSE)
2974 {
2975 if (linear)
2976 {
2977 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
2978 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
2979 }
2980 else
2981 {
2982 if (blk256B || blkVar || isNonPrtXor)
2983 {
2984 invalid = prt;
2985 if (blk256B)
2986 {
2987 invalid = invalid || zbuffer || tex3d || mipmap || msaa;
2988 }
2989 }
2990
2991 if (invalid == FALSE)
2992 {
2993 if (IsZOrderSwizzle(swizzle))
2994 {
2995 invalid = color && msaa;
2996 }
2997 else if (IsStandardSwizzle(rsrcType, swizzle))
2998 {
2999 invalid = zbuffer;
3000 }
3001 else if (IsDisplaySwizzle(rsrcType, swizzle))
3002 {
3003 invalid = zbuffer;
3004 }
3005 else if (IsRotateSwizzle(swizzle))
3006 {
3007 invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3008 }
3009 else
3010 {
3011 ADDR_ASSERT(!"invalid swizzle mode");
3012 invalid = TRUE;
3013 }
3014 }
3015 }
3016 }
3017
3018 ADDR_ASSERT(invalid == FALSE);
3019
3020 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3021 }
3022
3023 /**
3024 ************************************************************************************************************************
3025 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3026 *
3027 * @brief
3028 * Internal function to get suggested surface information for cliet to use
3029 *
3030 * @return
3031 * ADDR_E_RETURNCODE
3032 ************************************************************************************************************************
3033 */
3034 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3035 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3036 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3037 {
3038 // Macro define resource block type
3039 enum AddrBlockType
3040 {
3041 AddrBlockMicro = 0, // Resource uses 256B block
3042 AddrBlock4KB = 1, // Resource uses 4KB block
3043 AddrBlock64KB = 2, // Resource uses 64KB block
3044 AddrBlockVar = 3, // Resource uses var blcok
3045 AddrBlockLinear = 4, // Resource uses linear swizzle mode
3046
3047 AddrBlockMaxTiledType = AddrBlock64KB + 1,
3048 };
3049
3050 enum AddrBlockSet
3051 {
3052 AddrBlockSetMicro = 1 << AddrBlockMicro,
3053 AddrBlockSetMacro4KB = 1 << AddrBlock4KB,
3054 AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
3055 AddrBlockSetVar = 1 << AddrBlockVar,
3056 AddrBlockSetLinear = 1 << AddrBlockLinear,
3057
3058 AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
3059 };
3060
3061 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3062 ElemLib* pElemLib = GetElemLib();
3063
3064 // Set format to INVALID will skip this conversion
3065 UINT_32 expandX = 1;
3066 UINT_32 expandY = 1;
3067 UINT_32 bpp = pIn->bpp;
3068 UINT_32 width = pIn->width;
3069 UINT_32 height = pIn->height;
3070
3071 if (pIn->format != ADDR_FMT_INVALID)
3072 {
3073 // Don't care for this case
3074 ElemMode elemMode = ADDR_UNCOMPRESSED;
3075
3076 // Get compression/expansion factors and element mode which indicates compression/expansion
3077 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3078 &elemMode,
3079 &expandX,
3080 &expandY);
3081
3082 UINT_32 basePitch = 0;
3083 GetElemLib()->AdjustSurfaceInfo(elemMode,
3084 expandX,
3085 expandY,
3086 &bpp,
3087 &basePitch,
3088 &width,
3089 &height);
3090 }
3091
3092 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3093 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3094 UINT_32 slice = Max(pIn->numSlices, 1u);
3095 UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3096 UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
3097
3098 if (pIn->flags.fmask)
3099 {
3100 bpp = GetFmaskBpp(numSamples, numFrags);
3101 numFrags = 1;
3102 numSamples = 1;
3103 pOut->resourceType = ADDR_RSRC_TEX_2D;
3104 }
3105 else
3106 {
3107 // The output may get changed for volume(3D) texture resource in future
3108 pOut->resourceType = pIn->resourceType;
3109 }
3110
3111 ADDR_ASSERT(bpp >= 8u);
3112 UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
3113
3114 if (IsTex1d(pOut->resourceType))
3115 {
3116 pOut->swizzleMode = ADDR_SW_LINEAR;
3117 pOut->validBlockSet.value = AddrBlockSetLinear;
3118 pOut->canXor = FALSE;
3119 }
3120 else
3121 {
3122 ADDR2_BLOCK_SET blockSet;
3123 blockSet.value = 0;
3124
3125 AddrSwType swType = ADDR_SW_S;
3126
3127 // prt Xor and non-xor will have less height align requirement for stereo surface
3128 BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
3129 BOOL_32 displayResource = FALSE;
3130
3131 pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
3132
3133 // Filter out improper swType and blockSet by HW restriction
3134 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3135 {
3136 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3137 blockSet.value = AddrBlockSetMacro;
3138 swType = ADDR_SW_Z;
3139
3140 if (pIn->flags.depth && pIn->flags.texture)
3141 {
3142 if (((bpp == 16) && (numFrags >= 4)) ||
3143 ((bpp == 32) && (numFrags >= 2)))
3144 {
3145 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3146 // equation from wrong address within memory range a tile covered and use the
3147 // garbage data for compressed Z reading which finally leads to corruption.
3148 pOut->canXor = FALSE;
3149 prtXor = FALSE;
3150 }
3151 }
3152 }
3153 else if (ElemLib::IsBlockCompressed(pIn->format))
3154 {
3155 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. Not sure
3156 // under what circumstances "_D" would be appropriate as these formats are not
3157 // displayable.
3158 blockSet.value = AddrBlockSetMacro;
3159
3160 // This isn't to be used as texture and caller doesn't allow macro tiled.
3161 if ((pIn->flags.texture == FALSE) &&
3162 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
3163 {
3164 blockSet.value |= AddrBlockSetLinear;
3165 }
3166 swType = ADDR_SW_D;
3167 }
3168 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3169 {
3170 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. Its not
3171 // clear under what circumstances the D or R modes would be appropriate since
3172 // these formats are not displayable.
3173 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3174 swType = ADDR_SW_S;
3175 }
3176 else if (IsTex3d(pOut->resourceType))
3177 {
3178 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3179
3180 if (pIn->flags.prt)
3181 {
3182 // PRT cannot use SW_D which gives an unexpected block dimension
3183 swType = ADDR_SW_Z;
3184 }
3185 else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
3186 {
3187 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3188 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3189 swType = ADDR_SW_Z;
3190 }
3191 else if (pIn->flags.color)
3192 {
3193 swType = ADDR_SW_D;
3194 }
3195 else
3196 {
3197 swType = ADDR_SW_Z;
3198 }
3199 }
3200 else
3201 {
3202 swType = ((pIn->flags.display == TRUE) ||
3203 (pIn->flags.overlay == TRUE) ||
3204 (pIn->bpp == 128)) ? ADDR_SW_D : ADDR_SW_S;
3205
3206 if (numMipLevels > 1)
3207 {
3208 ADDR_ASSERT(numFrags == 1);
3209 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3210 }
3211 else if ((numFrags > 1) || (numSamples > 1))
3212 {
3213 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3214 blockSet.value = AddrBlockSetMacro;
3215 }
3216 else
3217 {
3218 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3219 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
3220
3221 displayResource = pIn->flags.rotated || pIn->flags.display;
3222
3223 if (displayResource)
3224 {
3225 swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D;
3226
3227 if (pIn->bpp > 64)
3228 {
3229 blockSet.value = 0;
3230 }
3231 else if (m_settings.isDce12)
3232 {
3233 if (pIn->bpp != 32)
3234 {
3235 blockSet.micro = FALSE;
3236 }
3237
3238 // DCE12 does not support display surface to be _T swizzle mode
3239 prtXor = FALSE;
3240 }
3241 else if (m_settings.isDcn1)
3242 {
3243 // _R is not supported by Dcn1
3244 if (pIn->bpp == 64)
3245 {
3246 swType = ADDR_SW_D;
3247 }
3248 else
3249 {
3250 swType = ADDR_SW_S;
3251 }
3252
3253 blockSet.micro = FALSE;
3254 }
3255 else
3256 {
3257 ADDR_NOT_IMPLEMENTED();
3258 returnCode = ADDR_NOTSUPPORTED;
3259 }
3260 }
3261 }
3262 }
3263
3264 if ((numFrags > 1) &&
3265 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3266 {
3267 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3268 blockSet.macro4KB = FALSE;
3269 }
3270
3271 if (pIn->flags.prt)
3272 {
3273 blockSet.value &= AddrBlockSetMacro64KB;
3274 }
3275
3276 // Apply customized forbidden setting
3277 blockSet.value &= ~pIn->forbiddenBlock.value;
3278
3279 if (pIn->maxAlign > 0)
3280 {
3281 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3282 {
3283 blockSet.macro64KB = FALSE;
3284 }
3285
3286 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3287 {
3288 blockSet.macro4KB = FALSE;
3289 }
3290
3291 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3292 {
3293 blockSet.micro = FALSE;
3294 }
3295 }
3296
3297 Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3298 Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3299 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3300
3301 if (blockSet.micro)
3302 {
3303 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
3304 &blkAlign[AddrBlockMicro].h,
3305 &blkAlign[AddrBlockMicro].d,
3306 bpp,
3307 numFrags,
3308 pOut->resourceType,
3309 ADDR_SW_256B);
3310
3311 if (returnCode == ADDR_OK)
3312 {
3313 if (displayResource)
3314 {
3315 blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
3316 }
3317 else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
3318 (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
3319 {
3320 // If one 256B block can contain the surface, don't bother bigger block type
3321 blockSet.macro4KB = FALSE;
3322 blockSet.macro64KB = FALSE;
3323 blockSet.var = FALSE;
3324 }
3325
3326 padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
3327 slice, &paddedDim[AddrBlockMicro]);
3328 }
3329 }
3330
3331 if ((returnCode == ADDR_OK) && blockSet.macro4KB)
3332 {
3333 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
3334 &blkAlign[AddrBlock4KB].h,
3335 &blkAlign[AddrBlock4KB].d,
3336 bpp,
3337 numFrags,
3338 pOut->resourceType,
3339 ADDR_SW_4KB);
3340
3341 if (returnCode == ADDR_OK)
3342 {
3343 if (displayResource)
3344 {
3345 blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
3346 }
3347
3348 padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
3349 slice, &paddedDim[AddrBlock4KB]);
3350
3351 ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
3352 }
3353 }
3354
3355 if ((returnCode == ADDR_OK) && blockSet.macro64KB)
3356 {
3357 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
3358 &blkAlign[AddrBlock64KB].h,
3359 &blkAlign[AddrBlock64KB].d,
3360 bpp,
3361 numFrags,
3362 pOut->resourceType,
3363 ADDR_SW_64KB);
3364
3365 if (returnCode == ADDR_OK)
3366 {
3367 if (displayResource)
3368 {
3369 blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
3370 }
3371
3372 padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
3373 slice, &paddedDim[AddrBlock64KB]);
3374
3375 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
3376 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
3377 }
3378 }
3379
3380 if (returnCode == ADDR_OK)
3381 {
3382 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3383 {
3384 padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
3385 }
3386
3387 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3388 if (pIn->flags.minimizeAlign)
3389 {
3390 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3391 // block, filter out 64KB block from candidate list
3392 if (blockSet.macro64KB &&
3393 ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
3394 (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
3395 {
3396 blockSet.macro64KB = FALSE;
3397 }
3398
3399 // If padded size of 4KB block is larger than padded size of 256B block,
3400 // filter out 4KB block from candidate list
3401 if (blockSet.macro4KB &&
3402 blockSet.micro &&
3403 (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
3404 {
3405 blockSet.macro4KB = FALSE;
3406 }
3407 }
3408 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3409 else if (pIn->flags.opt4space)
3410 {
3411 UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
3412 (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
3413
3414 threshold += threshold >> 1;
3415
3416 if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
3417 {
3418 blockSet.macro64KB = FALSE;
3419 }
3420
3421 if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
3422 {
3423 blockSet.macro4KB = FALSE;
3424 }
3425 }
3426 else
3427 {
3428 if (blockSet.macro64KB &&
3429 (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
3430 ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
3431 {
3432 // If 64KB block waste more than half memory on padding, filter it out from
3433 // candidate list when it is not the only choice left
3434 blockSet.macro64KB = FALSE;
3435 }
3436 }
3437
3438 if (blockSet.value == 0)
3439 {
3440 // Bad things happen, client will not get any useful information from AddrLib.
3441 // Maybe we should fill in some output earlier instead of outputing nothing?
3442 ADDR_ASSERT_ALWAYS();
3443 returnCode = ADDR_INVALIDPARAMS;
3444 }
3445 else
3446 {
3447 pOut->validBlockSet = blockSet;
3448 pOut->canXor = pOut->canXor &&
3449 (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
3450
3451 if (blockSet.macro64KB || blockSet.macro4KB)
3452 {
3453 if (swType == ADDR_SW_Z)
3454 {
3455 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
3456 }
3457 else if (swType == ADDR_SW_S)
3458 {
3459 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
3460 }
3461 else if (swType == ADDR_SW_D)
3462 {
3463 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
3464 }
3465 else
3466 {
3467 ADDR_ASSERT(swType == ADDR_SW_R);
3468 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
3469 }
3470
3471 if (prtXor && blockSet.macro64KB)
3472 {
3473 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3474 const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
3475 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
3476 }
3477 else if (pOut->canXor)
3478 {
3479 // Client wants XOR and this is allowed, return XOR version swizzle mode
3480 const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
3481 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
3482 }
3483 }
3484 else if (blockSet.micro)
3485 {
3486 if (swType == ADDR_SW_S)
3487 {
3488 pOut->swizzleMode = ADDR_SW_256B_S;
3489 }
3490 else if (swType == ADDR_SW_D)
3491 {
3492 pOut->swizzleMode = ADDR_SW_256B_D;
3493 }
3494 else
3495 {
3496 ADDR_ASSERT(swType == ADDR_SW_R);
3497 pOut->swizzleMode = ADDR_SW_256B_R;
3498 }
3499 }
3500 else if (blockSet.linear)
3501 {
3502 // Fall into this branch doesn't mean linear is suitable, only no other choices!
3503 pOut->swizzleMode = ADDR_SW_LINEAR;
3504 }
3505 else
3506 {
3507 ADDR_ASSERT(blockSet.var);
3508
3509 // Designer consider VAR swizzle mode is usless for most cases
3510 ADDR_UNHANDLED_CASE();
3511
3512 returnCode = ADDR_NOTSUPPORTED;
3513 }
3514
3515 #if DEBUG
3516 // Post sanity check, at least AddrLib should accept the output generated by its own
3517 if (pOut->swizzleMode != ADDR_SW_LINEAR)
3518 {
3519 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3520 localIn.flags = pIn->flags;
3521 localIn.swizzleMode = pOut->swizzleMode;
3522 localIn.resourceType = pOut->resourceType;
3523 localIn.format = pIn->format;
3524 localIn.bpp = bpp;
3525 localIn.width = width;
3526 localIn.height = height;
3527 localIn.numSlices = slice;
3528 localIn.numMipLevels = numMipLevels;
3529 localIn.numSamples = numSamples;
3530 localIn.numFrags = numFrags;
3531
3532 HwlComputeSurfaceInfoSanityCheck(&localIn);
3533
3534 // TODO : check all valid block type available in validBlockSet?
3535 }
3536 #endif
3537 }
3538 }
3539 }
3540
3541 return returnCode;
3542 }
3543
3544 /**
3545 ************************************************************************************************************************
3546 * Gfx9Lib::ComputeStereoInfo
3547 *
3548 * @brief
3549 * Compute height alignment and right eye pipeBankXor for stereo surface
3550 *
3551 * @return
3552 * Error code
3553 *
3554 ************************************************************************************************************************
3555 */
3556 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3557 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3558 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3559 UINT_32* pHeightAlign
3560 ) const
3561 {
3562 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3563
3564 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3565
3566 if (eqIndex < m_numEquations)
3567 {
3568 if (IsXor(pIn->swizzleMode))
3569 {
3570 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3571 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3572 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3573 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3574 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3575 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3576
3577 ADDR_ASSERT(maxYCoordBlock256 ==
3578 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3579
3580 const UINT_32 maxYCoordInBaseEquation =
3581 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3582
3583 ADDR_ASSERT(maxYCoordInBaseEquation ==
3584 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3585
3586 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3587
3588 ADDR_ASSERT(maxYCoordInPipeXor ==
3589 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3590
3591 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3592 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3593
3594 ADDR_ASSERT(maxYCoordInBankXor ==
3595 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3596
3597 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3598
3599 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3600 {
3601 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3602
3603 if (pOut->pStereoInfo != NULL)
3604 {
3605 pOut->pStereoInfo->rightSwizzle = 0;
3606
3607 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3608 {
3609 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3610 {
3611 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3612 }
3613
3614 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3615 {
3616 pOut->pStereoInfo->rightSwizzle |=
3617 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3618 }
3619
3620 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3621 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3622 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3623 }
3624 }
3625 }
3626 }
3627 }
3628 else
3629 {
3630 ADDR_ASSERT_ALWAYS();
3631 returnCode = ADDR_ERROR;
3632 }
3633
3634 return returnCode;
3635 }
3636
3637 /**
3638 ************************************************************************************************************************
3639 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3640 *
3641 * @brief
3642 * Internal function to calculate alignment for tiled surface
3643 *
3644 * @return
3645 * ADDR_E_RETURNCODE
3646 ************************************************************************************************************************
3647 */
3648 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3649 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3650 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3651 ) const
3652 {
3653 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3654 &pOut->blockHeight,
3655 &pOut->blockSlices,
3656 pIn->bpp,
3657 pIn->numFrags,
3658 pIn->resourceType,
3659 pIn->swizzleMode);
3660
3661 if (returnCode == ADDR_OK)
3662 {
3663 UINT_32 pitchAlignInElement = pOut->blockWidth;
3664
3665 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3666 (pIn->flags.display || pIn->flags.rotated) &&
3667 (pIn->numMipLevels <= 1) &&
3668 (pIn->numSamples <= 1) &&
3669 (pIn->numFrags <= 1))
3670 {
3671 // Display engine needs pitch align to be at least 32 pixels.
3672 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3673 }
3674
3675 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3676
3677 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3678 {
3679 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3680 {
3681 returnCode = ADDR_INVALIDPARAMS;
3682 }
3683 else if (pIn->pitchInElement < pOut->pitch)
3684 {
3685 returnCode = ADDR_INVALIDPARAMS;
3686 }
3687 else
3688 {
3689 pOut->pitch = pIn->pitchInElement;
3690 }
3691 }
3692
3693 UINT_32 heightAlign = 0;
3694
3695 if (pIn->flags.qbStereo)
3696 {
3697 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3698 }
3699
3700 if (returnCode == ADDR_OK)
3701 {
3702 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3703
3704 if (heightAlign > 1)
3705 {
3706 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3707 }
3708
3709 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3710
3711 pOut->epitchIsHeight = FALSE;
3712 pOut->mipChainInTail = FALSE;
3713
3714 pOut->mipChainPitch = pOut->pitch;
3715 pOut->mipChainHeight = pOut->height;
3716 pOut->mipChainSlice = pOut->numSlices;
3717
3718 if (pIn->numMipLevels > 1)
3719 {
3720 UINT_32 numMipLevel;
3721 ADDR2_MIP_INFO *pMipInfo;
3722 ADDR2_MIP_INFO mipInfo[4];
3723
3724 if (pOut->pMipInfo != NULL)
3725 {
3726 pMipInfo = pOut->pMipInfo;
3727 numMipLevel = pIn->numMipLevels;
3728 }
3729 else
3730 {
3731 pMipInfo = mipInfo;
3732 numMipLevel = Min(pIn->numMipLevels, 4u);
3733 }
3734
3735 UINT_32 endingMip = GetMipChainInfo(pIn->resourceType,
3736 pIn->swizzleMode,
3737 pIn->bpp,
3738 pIn->width,
3739 pIn->height,
3740 pIn->numSlices,
3741 pOut->blockWidth,
3742 pOut->blockHeight,
3743 pOut->blockSlices,
3744 numMipLevel,
3745 pMipInfo);
3746
3747 if (endingMip == 0)
3748 {
3749 pOut->epitchIsHeight = TRUE;
3750 pOut->pitch = pMipInfo[0].pitch;
3751 pOut->height = pMipInfo[0].height;
3752 pOut->numSlices = pMipInfo[0].depth;
3753 pOut->mipChainInTail = TRUE;
3754 }
3755 else
3756 {
3757 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
3758 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3759
3760 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3761 pIn->swizzleMode,
3762 mip0WidthInBlk,
3763 mip0HeightInBlk,
3764 pOut->numSlices / pOut->blockSlices);
3765 if (majorMode == ADDR_MAJOR_Y)
3766 {
3767 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3768
3769 if ((mip1WidthInBlk == 1) && (endingMip > 2))
3770 {
3771 mip1WidthInBlk++;
3772 }
3773
3774 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3775
3776 pOut->epitchIsHeight = FALSE;
3777 }
3778 else
3779 {
3780 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3781
3782 if ((mip1HeightInBlk == 1) && (endingMip > 2))
3783 {
3784 mip1HeightInBlk++;
3785 }
3786
3787 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3788
3789 pOut->epitchIsHeight = TRUE;
3790 }
3791 }
3792
3793 if (pOut->pMipInfo != NULL)
3794 {
3795 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3796
3797 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3798 {
3799 Dim3d mipStartPos = {0};
3800 UINT_32 mipTailOffsetInBytes = 0;
3801
3802 mipStartPos = GetMipStartPos(pIn->resourceType,
3803 pIn->swizzleMode,
3804 pOut->pitch,
3805 pOut->height,
3806 pOut->numSlices,
3807 pOut->blockWidth,
3808 pOut->blockHeight,
3809 pOut->blockSlices,
3810 i,
3811 elementBytesLog2,
3812 &mipTailOffsetInBytes);
3813
3814 UINT_32 pitchInBlock =
3815 pOut->mipChainPitch / pOut->blockWidth;
3816 UINT_32 sliceInBlock =
3817 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3818 UINT_64 blockIndex =
3819 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3820 UINT_64 macroBlockOffset =
3821 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3822
3823 pMipInfo[i].macroBlockOffset = macroBlockOffset;
3824 pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
3825 }
3826 }
3827 }
3828 else if (pOut->pMipInfo != NULL)
3829 {
3830 pOut->pMipInfo[0].pitch = pOut->pitch;
3831 pOut->pMipInfo[0].height = pOut->height;
3832 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3833 pOut->pMipInfo[0].offset = 0;
3834 }
3835
3836 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3837 (pIn->bpp >> 3) * pIn->numFrags;
3838 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
3839 pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
3840
3841 if (pIn->flags.prt)
3842 {
3843 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
3844 }
3845 }
3846 }
3847
3848 return returnCode;
3849 }
3850
3851 /**
3852 ************************************************************************************************************************
3853 * Gfx9Lib::GetMipChainInfo
3854 *
3855 * @brief
3856 * Internal function to get out information about mip chain
3857 *
3858 * @return
3859 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
3860 ************************************************************************************************************************
3861 */
3862 UINT_32 Gfx9Lib::GetMipChainInfo(
3863 AddrResourceType resourceType,
3864 AddrSwizzleMode swizzleMode,
3865 UINT_32 bpp,
3866 UINT_32 mip0Width,
3867 UINT_32 mip0Height,
3868 UINT_32 mip0Depth,
3869 UINT_32 blockWidth,
3870 UINT_32 blockHeight,
3871 UINT_32 blockDepth,
3872 UINT_32 numMipLevel,
3873 ADDR2_MIP_INFO* pMipInfo) const
3874 {
3875 const Dim3d tailMaxDim =
3876 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
3877
3878 UINT_32 mipPitch = mip0Width;
3879 UINT_32 mipHeight = mip0Height;
3880 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
3881 UINT_32 offset = 0;
3882 UINT_32 endingMip = numMipLevel - 1;
3883 BOOL_32 inTail = FALSE;
3884 BOOL_32 finalDim = FALSE;
3885
3886 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
3887 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
3888
3889 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
3890 {
3891 if (inTail)
3892 {
3893 if (finalDim == FALSE)
3894 {
3895 UINT_32 mipSize;
3896
3897 if (is3dThick)
3898 {
3899 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
3900 }
3901 else
3902 {
3903 mipSize = mipPitch * mipHeight * (bpp >> 3);
3904 }
3905
3906 if (mipSize <= 256)
3907 {
3908 UINT_32 index = Log2(bpp >> 3);
3909
3910 if (is3dThick)
3911 {
3912 mipPitch = Block256_3dZ[index].w;
3913 mipHeight = Block256_3dZ[index].h;
3914 mipDepth = Block256_3dZ[index].d;
3915 }
3916 else
3917 {
3918 mipPitch = Block256_2d[index].w;
3919 mipHeight = Block256_2d[index].h;
3920 }
3921
3922 finalDim = TRUE;
3923 }
3924 }
3925 }
3926 else
3927 {
3928 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
3929 mipPitch, mipHeight, mipDepth);
3930
3931 if (inTail)
3932 {
3933 endingMip = mipId;
3934
3935 mipPitch = tailMaxDim.w;
3936 mipHeight = tailMaxDim.h;
3937
3938 if (is3dThick)
3939 {
3940 mipDepth = tailMaxDim.d;
3941 }
3942 }
3943 else
3944 {
3945 mipPitch = PowTwoAlign(mipPitch, blockWidth);
3946 mipHeight = PowTwoAlign(mipHeight, blockHeight);
3947
3948 if (is3dThick)
3949 {
3950 mipDepth = PowTwoAlign(mipDepth, blockDepth);
3951 }
3952 }
3953 }
3954
3955 pMipInfo[mipId].pitch = mipPitch;
3956 pMipInfo[mipId].height = mipHeight;
3957 pMipInfo[mipId].depth = mipDepth;
3958 pMipInfo[mipId].offset = offset;
3959 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
3960
3961 if (finalDim)
3962 {
3963 if (is3dThin)
3964 {
3965 mipDepth = Max(mipDepth >> 1, 1u);
3966 }
3967 }
3968 else
3969 {
3970 mipPitch = Max(mipPitch >> 1, 1u);
3971 mipHeight = Max(mipHeight >> 1, 1u);
3972
3973 if (is3dThick || is3dThin)
3974 {
3975 mipDepth = Max(mipDepth >> 1, 1u);
3976 }
3977 }
3978 }
3979
3980 return endingMip;
3981 }
3982
3983 /**
3984 ************************************************************************************************************************
3985 * Gfx9Lib::GetMetaMiptailInfo
3986 *
3987 * @brief
3988 * Get mip tail coordinate information.
3989 *
3990 * @return
3991 * N/A
3992 ************************************************************************************************************************
3993 */
3994 VOID Gfx9Lib::GetMetaMiptailInfo(
3995 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
3996 Dim3d mipCoord, ///< [in] mip tail base coord
3997 UINT_32 numMipInTail, ///< [in] number of mips in tail
3998 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
3999 ) const
4000 {
4001 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4002 UINT_32 mipWidth = pMetaBlkDim->w;
4003 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4004 UINT_32 mipDepth = pMetaBlkDim->d;
4005 UINT_32 minInc;
4006
4007 if (isThick)
4008 {
4009 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4010 }
4011 else if (pMetaBlkDim->h >= 1024)
4012 {
4013 minInc = 256;
4014 }
4015 else if (pMetaBlkDim->h == 512)
4016 {
4017 minInc = 128;
4018 }
4019 else
4020 {
4021 minInc = 64;
4022 }
4023
4024 UINT_32 blk32MipId = 0xFFFFFFFF;
4025
4026 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4027 {
4028 pInfo[mip].inMiptail = TRUE;
4029 pInfo[mip].startX = mipCoord.w;
4030 pInfo[mip].startY = mipCoord.h;
4031 pInfo[mip].startZ = mipCoord.d;
4032 pInfo[mip].width = mipWidth;
4033 pInfo[mip].height = mipHeight;
4034 pInfo[mip].depth = mipDepth;
4035
4036 if (mipWidth <= 32)
4037 {
4038 if (blk32MipId == 0xFFFFFFFF)
4039 {
4040 blk32MipId = mip;
4041 }
4042
4043 mipCoord.w = pInfo[blk32MipId].startX;
4044 mipCoord.h = pInfo[blk32MipId].startY;
4045 mipCoord.d = pInfo[blk32MipId].startZ;
4046
4047 switch (mip - blk32MipId)
4048 {
4049 case 0:
4050 mipCoord.w += 32; // 16x16
4051 break;
4052 case 1:
4053 mipCoord.h += 32; // 8x8
4054 break;
4055 case 2:
4056 mipCoord.h += 32; // 4x4
4057 mipCoord.w += 16;
4058 break;
4059 case 3:
4060 mipCoord.h += 32; // 2x2
4061 mipCoord.w += 32;
4062 break;
4063 case 4:
4064 mipCoord.h += 32; // 1x1
4065 mipCoord.w += 48;
4066 break;
4067 // The following are for BC/ASTC formats
4068 case 5:
4069 mipCoord.h += 48; // 1/2 x 1/2
4070 break;
4071 case 6:
4072 mipCoord.h += 48; // 1/4 x 1/4
4073 mipCoord.w += 16;
4074 break;
4075 case 7:
4076 mipCoord.h += 48; // 1/8 x 1/8
4077 mipCoord.w += 32;
4078 break;
4079 case 8:
4080 mipCoord.h += 48; // 1/16 x 1/16
4081 mipCoord.w += 48;
4082 break;
4083 default:
4084 ADDR_ASSERT_ALWAYS();
4085 break;
4086 }
4087
4088 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4089 mipHeight = mipWidth;
4090
4091 if (isThick)
4092 {
4093 mipDepth = mipWidth;
4094 }
4095 }
4096 else
4097 {
4098 if (mipWidth <= minInc)
4099 {
4100 // if we're below the minimal increment...
4101 if (isThick)
4102 {
4103 // For 3d, just go in z direction
4104 mipCoord.d += mipDepth;
4105 }
4106 else
4107 {
4108 // For 2d, first go across, then down
4109 if ((mipWidth * 2) == minInc)
4110 {
4111 // if we're 2 mips below, that's when we go back in x, and down in y
4112 mipCoord.w -= minInc;
4113 mipCoord.h += minInc;
4114 }
4115 else
4116 {
4117 // otherwise, just go across in x
4118 mipCoord.w += minInc;
4119 }
4120 }
4121 }
4122 else
4123 {
4124 // On even mip, go down, otherwise, go across
4125 if (mip & 1)
4126 {
4127 mipCoord.w += mipWidth;
4128 }
4129 else
4130 {
4131 mipCoord.h += mipHeight;
4132 }
4133 }
4134 // Divide the width by 2
4135 mipWidth >>= 1;
4136 // After the first mip in tail, the mip is always a square
4137 mipHeight = mipWidth;
4138 // ...or for 3d, a cube
4139 if (isThick)
4140 {
4141 mipDepth = mipWidth;
4142 }
4143 }
4144 }
4145 }
4146
4147 /**
4148 ************************************************************************************************************************
4149 * Gfx9Lib::GetMipStartPos
4150 *
4151 * @brief
4152 * Internal function to get out information about mip logical start position
4153 *
4154 * @return
4155 * logical start position in macro block width/heith/depth of one mip level within one slice
4156 ************************************************************************************************************************
4157 */
4158 Dim3d Gfx9Lib::GetMipStartPos(
4159 AddrResourceType resourceType,
4160 AddrSwizzleMode swizzleMode,
4161 UINT_32 width,
4162 UINT_32 height,
4163 UINT_32 depth,
4164 UINT_32 blockWidth,
4165 UINT_32 blockHeight,
4166 UINT_32 blockDepth,
4167 UINT_32 mipId,
4168 UINT_32 log2ElementBytes,
4169 UINT_32* pMipTailBytesOffset) const
4170 {
4171 Dim3d mipStartPos = {0};
4172 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4173
4174 // Report mip in tail if Mip0 is already in mip tail
4175 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4176 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4177 UINT_32 mipIndexInTail = mipId;
4178
4179 if (inMipTail == FALSE)
4180 {
4181 // Mip 0 dimension, unit in block
4182 UINT_32 mipWidthInBlk = width / blockWidth;
4183 UINT_32 mipHeightInBlk = height / blockHeight;
4184 UINT_32 mipDepthInBlk = depth / blockDepth;
4185 AddrMajorMode majorMode = GetMajorMode(resourceType,
4186 swizzleMode,
4187 mipWidthInBlk,
4188 mipHeightInBlk,
4189 mipDepthInBlk);
4190
4191 UINT_32 endingMip = mipId + 1;
4192
4193 for (UINT_32 i = 1; i <= mipId; i++)
4194 {
4195 if ((i == 1) || (i == 3))
4196 {
4197 if (majorMode == ADDR_MAJOR_Y)
4198 {
4199 mipStartPos.w += mipWidthInBlk;
4200 }
4201 else
4202 {
4203 mipStartPos.h += mipHeightInBlk;
4204 }
4205 }
4206 else
4207 {
4208 if (majorMode == ADDR_MAJOR_X)
4209 {
4210 mipStartPos.w += mipWidthInBlk;
4211 }
4212 else if (majorMode == ADDR_MAJOR_Y)
4213 {
4214 mipStartPos.h += mipHeightInBlk;
4215 }
4216 else
4217 {
4218 mipStartPos.d += mipDepthInBlk;
4219 }
4220 }
4221
4222 BOOL_32 inTail = FALSE;
4223
4224 if (IsThick(resourceType, swizzleMode))
4225 {
4226 UINT_32 dim = log2blkSize % 3;
4227
4228 if (dim == 0)
4229 {
4230 inTail =
4231 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4232 }
4233 else if (dim == 1)
4234 {
4235 inTail =
4236 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4237 }
4238 else
4239 {
4240 inTail =
4241 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4242 }
4243 }
4244 else
4245 {
4246 if (log2blkSize & 1)
4247 {
4248 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4249 }
4250 else
4251 {
4252 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4253 }
4254 }
4255
4256 if (inTail)
4257 {
4258 endingMip = i;
4259 break;
4260 }
4261
4262 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4263 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4264 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4265 }
4266
4267 if (mipId >= endingMip)
4268 {
4269 inMipTail = TRUE;
4270 mipIndexInTail = mipId - endingMip;
4271 }
4272 }
4273
4274 if (inMipTail)
4275 {
4276 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4277 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4278 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4279 }
4280
4281 return mipStartPos;
4282 }
4283
4284 /**
4285 ************************************************************************************************************************
4286 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4287 *
4288 * @brief
4289 * Internal function to calculate address from coord for tiled swizzle surface
4290 *
4291 * @return
4292 * ADDR_E_RETURNCODE
4293 ************************************************************************************************************************
4294 */
4295 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4296 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4297 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4298 ) const
4299 {
4300 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4301 localIn.swizzleMode = pIn->swizzleMode;
4302 localIn.flags = pIn->flags;
4303 localIn.resourceType = pIn->resourceType;
4304 localIn.bpp = pIn->bpp;
4305 localIn.width = Max(pIn->unalignedWidth, 1u);
4306 localIn.height = Max(pIn->unalignedHeight, 1u);
4307 localIn.numSlices = Max(pIn->numSlices, 1u);
4308 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4309 localIn.numSamples = Max(pIn->numSamples, 1u);
4310 localIn.numFrags = Max(pIn->numFrags, 1u);
4311 if (localIn.numMipLevels <= 1)
4312 {
4313 localIn.pitchInElement = pIn->pitchInElement;
4314 }
4315
4316 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4317 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4318
4319 BOOL_32 valid = (returnCode == ADDR_OK) &&
4320 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4321 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4322 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4323
4324 if (valid)
4325 {
4326 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4327 Dim3d mipStartPos = {0};
4328 UINT_32 mipTailBytesOffset = 0;
4329
4330 if (pIn->numMipLevels > 1)
4331 {
4332 // Mip-map chain cannot be MSAA surface
4333 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4334
4335 mipStartPos = GetMipStartPos(pIn->resourceType,
4336 pIn->swizzleMode,
4337 localOut.pitch,
4338 localOut.height,
4339 localOut.numSlices,
4340 localOut.blockWidth,
4341 localOut.blockHeight,
4342 localOut.blockSlices,
4343 pIn->mipId,
4344 log2ElementBytes,
4345 &mipTailBytesOffset);
4346 }
4347
4348 UINT_32 interleaveOffset = 0;
4349 UINT_32 pipeBits = 0;
4350 UINT_32 pipeXor = 0;
4351 UINT_32 bankBits = 0;
4352 UINT_32 bankXor = 0;
4353
4354 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4355 {
4356 UINT_32 blockOffset = 0;
4357 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4358
4359 if (IsZOrderSwizzle(pIn->swizzleMode))
4360 {
4361 // Morton generation
4362 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4363 {
4364 UINT_32 totalLowBits = 6 - log2ElementBytes;
4365 UINT_32 mortBits = totalLowBits / 2;
4366 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4367 // Are 9 bits enough?
4368 UINT_32 highBitsValue =
4369 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4370 blockOffset = lowBitsValue | highBitsValue;
4371 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4372 }
4373 else
4374 {
4375 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4376 }
4377
4378 // Fill LSBs with sample bits
4379 if (pIn->numSamples > 1)
4380 {
4381 blockOffset *= pIn->numSamples;
4382 blockOffset |= pIn->sample;
4383 }
4384
4385 // Shift according to BytesPP
4386 blockOffset <<= log2ElementBytes;
4387 }
4388 else
4389 {
4390 // Micro block offset
4391 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4392 blockOffset = microBlockOffset;
4393
4394 // Micro block dimension
4395 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4396 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4397 // Morton generation, does 12 bit enough?
4398 blockOffset |=
4399 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4400
4401 // Sample bits start location
4402 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4403 // Join sample bits information to the highest Macro block bits
4404 if (IsNonPrtXor(pIn->swizzleMode))
4405 {
4406 // Non-prt-Xor : xor highest Macro block bits with sample bits
4407 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4408 }
4409 else
4410 {
4411 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4412 // after this op, the blockOffset only contains log2 Macro block size bits
4413 blockOffset %= (1 << sampleStart);
4414 blockOffset |= (pIn->sample << sampleStart);
4415 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4416 }
4417 }
4418
4419 if (IsXor(pIn->swizzleMode))
4420 {
4421 // Mask off bits above Macro block bits to keep page synonyms working for prt
4422 if (IsPrt(pIn->swizzleMode))
4423 {
4424 blockOffset &= ((1 << log2blkSize) - 1);
4425 }
4426
4427 // Preserve offset inside pipe interleave
4428 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4429 blockOffset >>= m_pipeInterleaveLog2;
4430
4431 // Pipe/Se xor bits
4432 pipeBits = GetPipeXorBits(log2blkSize);
4433 // Pipe xor
4434 pipeXor = FoldXor2d(blockOffset, pipeBits);
4435 blockOffset >>= pipeBits;
4436
4437 // Bank xor bits
4438 bankBits = GetBankXorBits(log2blkSize);
4439 // Bank Xor
4440 bankXor = FoldXor2d(blockOffset, bankBits);
4441 blockOffset >>= bankBits;
4442
4443 // Put all the part back together
4444 blockOffset <<= bankBits;
4445 blockOffset |= bankXor;
4446 blockOffset <<= pipeBits;
4447 blockOffset |= pipeXor;
4448 blockOffset <<= m_pipeInterleaveLog2;
4449 blockOffset |= interleaveOffset;
4450 }
4451
4452 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4453 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4454
4455 blockOffset |= mipTailBytesOffset;
4456
4457 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4458 {
4459 // Apply slice xor if not MSAA/PRT
4460 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4461 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4462 (m_pipeInterleaveLog2 + pipeBits));
4463 }
4464
4465 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4466 bankBits, pipeBits, &blockOffset);
4467
4468 blockOffset %= (1 << log2blkSize);
4469
4470 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4471 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4472 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4473 UINT_32 macroBlockIndex =
4474 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4475 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4476 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4477
4478 UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
4479 GetBlockSizeLog2(pIn->swizzleMode));
4480
4481 pOut->addr = blockOffset | macroBlockOffset;
4482 }
4483 else
4484 {
4485 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4486
4487 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4488
4489 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4490 (pIn->y / microBlockDim.h),
4491 (pIn->slice / microBlockDim.d),
4492 8);
4493
4494 blockOffset <<= 10;
4495 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4496
4497 if (IsXor(pIn->swizzleMode))
4498 {
4499 // Mask off bits above Macro block bits to keep page synonyms working for prt
4500 if (IsPrt(pIn->swizzleMode))
4501 {
4502 blockOffset &= ((1 << log2blkSize) - 1);
4503 }
4504
4505 // Preserve offset inside pipe interleave
4506 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4507 blockOffset >>= m_pipeInterleaveLog2;
4508
4509 // Pipe/Se xor bits
4510 pipeBits = GetPipeXorBits(log2blkSize);
4511 // Pipe xor
4512 pipeXor = FoldXor3d(blockOffset, pipeBits);
4513 blockOffset >>= pipeBits;
4514
4515 // Bank xor bits
4516 bankBits = GetBankXorBits(log2blkSize);
4517 // Bank Xor
4518 bankXor = FoldXor3d(blockOffset, bankBits);
4519 blockOffset >>= bankBits;
4520
4521 // Put all the part back together
4522 blockOffset <<= bankBits;
4523 blockOffset |= bankXor;
4524 blockOffset <<= pipeBits;
4525 blockOffset |= pipeXor;
4526 blockOffset <<= m_pipeInterleaveLog2;
4527 blockOffset |= interleaveOffset;
4528 }
4529
4530 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4531 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4532 blockOffset |= mipTailBytesOffset;
4533
4534 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4535 bankBits, pipeBits, &blockOffset);
4536
4537 blockOffset %= (1 << log2blkSize);
4538
4539 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4540 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4541 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4542
4543 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4544 UINT_32 sliceSizeInBlock =
4545 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4546 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4547
4548 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4549 }
4550 }
4551 else
4552 {
4553 returnCode = ADDR_INVALIDPARAMS;
4554 }
4555
4556 return returnCode;
4557 }
4558
4559 } // V2
4560 } // Addr