edb4c6e636ac1b825bcafb20e9873678df587964
[mesa.git] / src / amd / addrlib / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37 #include "gfx9_enum.h"
38
39 #if BRAHMA_BUILD
40 #include "amdgpu_id.h"
41 #else
42 #include "ai_id.h"
43 #include "rv_id.h"
44 #endif
45
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48
49 namespace Addr
50 {
51
52 /**
53 ************************************************************************************************************************
54 * Gfx9HwlInit
55 *
56 * @brief
57 * Creates an Gfx9Lib object.
58 *
59 * @return
60 * Returns an Gfx9Lib object pointer.
61 ************************************************************************************************************************
62 */
63 Addr::Lib* Gfx9HwlInit(const Client* pClient)
64 {
65 return V2::Gfx9Lib::CreateObj(pClient);
66 }
67
68 namespace V2
69 {
70
71 ////////////////////////////////////////////////////////////////////////////////////////////////////
72 // Static Const Member
73 ////////////////////////////////////////////////////////////////////////////////////////////////////
74
75 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
76 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
77 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
78 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
79 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
80 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
81
82 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
83 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
84 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
85 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
86
87 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
88 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
89 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
90 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
91
92 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
93 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
94 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
95 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
96
97 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
98 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
99 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
100 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
101
102 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
103 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
104 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
105 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
106
107 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
108 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
109 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
110 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
111
112 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
113 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
114 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
115 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
116 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
117 };
118
119 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
120 8, 6, 5, 4, 3, 2, 1, 0};
121
122 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
123
124 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
125
126 /**
127 ************************************************************************************************************************
128 * Gfx9Lib::Gfx9Lib
129 *
130 * @brief
131 * Constructor
132 *
133 ************************************************************************************************************************
134 */
135 Gfx9Lib::Gfx9Lib(const Client* pClient)
136 :
137 Lib(pClient),
138 m_numEquations(0)
139 {
140 m_class = AI_ADDRLIB;
141 memset(&m_settings, 0, sizeof(m_settings));
142 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
143 }
144
145 /**
146 ************************************************************************************************************************
147 * Gfx9Lib::~Gfx9Lib
148 *
149 * @brief
150 * Destructor
151 ************************************************************************************************************************
152 */
153 Gfx9Lib::~Gfx9Lib()
154 {
155 }
156
157 /**
158 ************************************************************************************************************************
159 * Gfx9Lib::HwlComputeHtileInfo
160 *
161 * @brief
162 * Interface function stub of AddrComputeHtilenfo
163 *
164 * @return
165 * ADDR_E_RETURNCODE
166 ************************************************************************************************************************
167 */
168 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
169 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
170 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
171 ) const
172 {
173 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
174 pIn->swizzleMode);
175
176 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
177
178 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
179
180 if ((numPipeTotal == 1) && (numRbTotal == 1))
181 {
182 numCompressBlkPerMetaBlkLog2 = 10;
183 }
184 else
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
187 }
188
189 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
190
191 Dim3d metaBlkDim = {8, 8, 1};
192 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
193 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
194 UINT_32 heightAmp = totalAmpBits - widthAmp;
195 metaBlkDim.w <<= widthAmp;
196 metaBlkDim.h <<= heightAmp;
197
198 #if DEBUG
199 Dim3d metaBlkDimDbg = {8, 8, 1};
200 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
201 {
202 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
203 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
204 {
205 metaBlkDimDbg.h <<= 1;
206 }
207 else
208 {
209 metaBlkDimDbg.w <<= 1;
210 }
211 }
212 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
213 #endif
214
215 UINT_32 numMetaBlkX;
216 UINT_32 numMetaBlkY;
217 UINT_32 numMetaBlkZ;
218
219 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
220 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
221 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
222
223 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
224
225 pOut->pitch = numMetaBlkX * metaBlkDim.w;
226 pOut->height = numMetaBlkY * metaBlkDim.h;
227 pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
228
229 pOut->metaBlkWidth = metaBlkDim.w;
230 pOut->metaBlkHeight = metaBlkDim.h;
231 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
232
233 pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
234
235 if (m_settings.metaBaseAlignFix)
236 {
237 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
238 }
239
240 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
241 {
242 UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
243
244 if (additionalAlign > sizeAlign)
245 {
246 sizeAlign = additionalAlign;
247 }
248 }
249
250 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
251
252 return ADDR_OK;
253 }
254
255 /**
256 ************************************************************************************************************************
257 * Gfx9Lib::HwlComputeCmaskInfo
258 *
259 * @brief
260 * Interface function stub of AddrComputeCmaskInfo
261 *
262 * @return
263 * ADDR_E_RETURNCODE
264 ************************************************************************************************************************
265 */
266 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
267 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
268 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
269 ) const
270 {
271 // TODO: Clarify with AddrLib team
272 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
273
274 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
275 pIn->swizzleMode);
276
277 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
278
279 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
280
281 if ((numPipeTotal == 1) && (numRbTotal == 1))
282 {
283 numCompressBlkPerMetaBlkLog2 = 13;
284 }
285 else
286 {
287 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
288
289 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
290 }
291
292 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
293
294 Dim2d metaBlkDim = {8, 8};
295 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
296 UINT_32 heightAmp = totalAmpBits >> 1;
297 UINT_32 widthAmp = totalAmpBits - heightAmp;
298 metaBlkDim.w <<= widthAmp;
299 metaBlkDim.h <<= heightAmp;
300
301 #if DEBUG
302 Dim2d metaBlkDimDbg = {8, 8};
303 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
304 {
305 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
306 {
307 metaBlkDimDbg.h <<= 1;
308 }
309 else
310 {
311 metaBlkDimDbg.w <<= 1;
312 }
313 }
314 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
315 #endif
316
317 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
318 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
319 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
320
321 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
322
323 pOut->pitch = numMetaBlkX * metaBlkDim.w;
324 pOut->height = numMetaBlkY * metaBlkDim.h;
325 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
326 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
327 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
328
329 if (m_settings.metaBaseAlignFix)
330 {
331 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
332 }
333
334 pOut->metaBlkWidth = metaBlkDim.w;
335 pOut->metaBlkHeight = metaBlkDim.h;
336
337 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
338
339 return ADDR_OK;
340 }
341
342 /**
343 ************************************************************************************************************************
344 * Gfx9Lib::GetMetaMipInfo
345 *
346 * @brief
347 * Get meta mip info
348 *
349 * @return
350 * N/A
351 ************************************************************************************************************************
352 */
353 VOID Gfx9Lib::GetMetaMipInfo(
354 UINT_32 numMipLevels, ///< [in] number of mip levels
355 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
356 BOOL_32 dataThick, ///< [in] data surface is thick
357 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
358 UINT_32 mip0Width, ///< [in] mip0 width
359 UINT_32 mip0Height, ///< [in] mip0 height
360 UINT_32 mip0Depth, ///< [in] mip0 depth
361 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
362 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
363 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
364 const
365 {
366 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
367 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
368 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
369 UINT_32 tailWidth = pMetaBlkDim->w;
370 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
371 UINT_32 tailDepth = pMetaBlkDim->d;
372 BOOL_32 inTail = FALSE;
373 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
374
375 if (numMipLevels > 1)
376 {
377 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
378 {
379 // Z major
380 major = ADDR_MAJOR_Z;
381 }
382 else if (numMetaBlkX >= numMetaBlkY)
383 {
384 // X major
385 major = ADDR_MAJOR_X;
386 }
387 else
388 {
389 // Y major
390 major = ADDR_MAJOR_Y;
391 }
392
393 inTail = ((mip0Width <= tailWidth) &&
394 (mip0Height <= tailHeight) &&
395 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
396
397 if (inTail == FALSE)
398 {
399 UINT_32 orderLimit;
400 UINT_32 *pMipDim;
401 UINT_32 *pOrderDim;
402
403 if (major == ADDR_MAJOR_Z)
404 {
405 // Z major
406 pMipDim = &numMetaBlkY;
407 pOrderDim = &numMetaBlkZ;
408 orderLimit = 4;
409 }
410 else if (major == ADDR_MAJOR_X)
411 {
412 // X major
413 pMipDim = &numMetaBlkY;
414 pOrderDim = &numMetaBlkX;
415 orderLimit = 4;
416 }
417 else
418 {
419 // Y major
420 pMipDim = &numMetaBlkX;
421 pOrderDim = &numMetaBlkY;
422 orderLimit = 2;
423 }
424
425 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
426 {
427 *pMipDim += 2;
428 }
429 else
430 {
431 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
432 }
433 }
434 }
435
436 if (pInfo != NULL)
437 {
438 UINT_32 mipWidth = mip0Width;
439 UINT_32 mipHeight = mip0Height;
440 UINT_32 mipDepth = mip0Depth;
441 Dim3d mipCoord = {0};
442
443 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
444 {
445 if (inTail)
446 {
447 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
448 pMetaBlkDim);
449 break;
450 }
451 else
452 {
453 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
454 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
455 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
456
457 pInfo[mip].inMiptail = FALSE;
458 pInfo[mip].startX = mipCoord.w;
459 pInfo[mip].startY = mipCoord.h;
460 pInfo[mip].startZ = mipCoord.d;
461 pInfo[mip].width = mipWidth;
462 pInfo[mip].height = mipHeight;
463 pInfo[mip].depth = dataThick ? mipDepth : 1;
464
465 if ((mip >= 3) || (mip & 1))
466 {
467 switch (major)
468 {
469 case ADDR_MAJOR_X:
470 mipCoord.w += mipWidth;
471 break;
472 case ADDR_MAJOR_Y:
473 mipCoord.h += mipHeight;
474 break;
475 case ADDR_MAJOR_Z:
476 mipCoord.d += mipDepth;
477 break;
478 default:
479 break;
480 }
481 }
482 else
483 {
484 switch (major)
485 {
486 case ADDR_MAJOR_X:
487 mipCoord.h += mipHeight;
488 break;
489 case ADDR_MAJOR_Y:
490 mipCoord.w += mipWidth;
491 break;
492 case ADDR_MAJOR_Z:
493 mipCoord.h += mipHeight;
494 break;
495 default:
496 break;
497 }
498 }
499
500 mipWidth = Max(mipWidth >> 1, 1u);
501 mipHeight = Max(mipHeight >> 1, 1u);
502 mipDepth = Max(mipDepth >> 1, 1u);
503
504 inTail = ((mipWidth <= tailWidth) &&
505 (mipHeight <= tailHeight) &&
506 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
507 }
508 }
509 }
510
511 *pNumMetaBlkX = numMetaBlkX;
512 *pNumMetaBlkY = numMetaBlkY;
513 *pNumMetaBlkZ = numMetaBlkZ;
514 }
515
516 /**
517 ************************************************************************************************************************
518 * Gfx9Lib::HwlComputeDccInfo
519 *
520 * @brief
521 * Interface function to compute DCC key info
522 *
523 * @return
524 * ADDR_E_RETURNCODE
525 ************************************************************************************************************************
526 */
527 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
528 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
529 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
530 ) const
531 {
532 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
533 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
534 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
535
536 if (dataLinear)
537 {
538 metaLinear = TRUE;
539 }
540 else if (metaLinear == TRUE)
541 {
542 pipeAligned = FALSE;
543 }
544
545 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
546
547 if (metaLinear)
548 {
549 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
550 ADDR_ASSERT_ALWAYS();
551
552 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
553 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
554 }
555 else
556 {
557 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
558
559 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
560
561 UINT_32 numFrags = Max(pIn->numFrags, 1u);
562 UINT_32 numSlices = Max(pIn->numSlices, 1u);
563
564 minMetaBlkSize /= numFrags;
565
566 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
567
568 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
569
570 if ((numPipeTotal > 1) || (numRbTotal > 1))
571 {
572 numCompressBlkPerMetaBlk =
573 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
574
575 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
576 {
577 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
578 }
579 }
580
581 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
582 Dim3d metaBlkDim = compressBlkDim;
583
584 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
585 {
586 if ((metaBlkDim.h < metaBlkDim.w) ||
587 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
588 {
589 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
590 {
591 metaBlkDim.h <<= 1;
592 }
593 else
594 {
595 metaBlkDim.d <<= 1;
596 }
597 }
598 else
599 {
600 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
601 {
602 metaBlkDim.w <<= 1;
603 }
604 else
605 {
606 metaBlkDim.d <<= 1;
607 }
608 }
609 }
610
611 UINT_32 numMetaBlkX;
612 UINT_32 numMetaBlkY;
613 UINT_32 numMetaBlkZ;
614
615 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
616 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
617 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
618
619 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
620
621 if (numFrags > m_maxCompFrag)
622 {
623 sizeAlign *= (numFrags / m_maxCompFrag);
624 }
625
626 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
627 numCompressBlkPerMetaBlk * numFrags;
628 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
629 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
630
631 if (m_settings.metaBaseAlignFix)
632 {
633 pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
634 }
635
636 pOut->pitch = numMetaBlkX * metaBlkDim.w;
637 pOut->height = numMetaBlkY * metaBlkDim.h;
638 pOut->depth = numMetaBlkZ * metaBlkDim.d;
639
640 pOut->compressBlkWidth = compressBlkDim.w;
641 pOut->compressBlkHeight = compressBlkDim.h;
642 pOut->compressBlkDepth = compressBlkDim.d;
643
644 pOut->metaBlkWidth = metaBlkDim.w;
645 pOut->metaBlkHeight = metaBlkDim.h;
646 pOut->metaBlkDepth = metaBlkDim.d;
647
648 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
649 pOut->fastClearSizePerSlice =
650 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
651 }
652
653 return ADDR_OK;
654 }
655
656 /**
657 ************************************************************************************************************************
658 * Gfx9Lib::HwlGetMaxAlignments
659 *
660 * @brief
661 * Gets maximum alignments
662 * @return
663 * ADDR_E_RETURNCODE
664 ************************************************************************************************************************
665 */
666 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
667 ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
668 ) const
669 {
670 pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
671
672 return ADDR_OK;
673 }
674
675 /**
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
678 *
679 * @brief
680 * Interface function stub of AddrComputeCmaskAddrFromCoord
681 *
682 * @return
683 * ADDR_E_RETURNCODE
684 ************************************************************************************************************************
685 */
686 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
687 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
688 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
689 ) const
690 {
691 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
692 input.size = sizeof(input);
693 input.cMaskFlags = pIn->cMaskFlags;
694 input.colorFlags = pIn->colorFlags;
695 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
696 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
697 input.numSlices = Max(pIn->numSlices, 1u);
698 input.swizzleMode = pIn->swizzleMode;
699 input.resourceType = pIn->resourceType;
700
701 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
702 output.size = sizeof(output);
703
704 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
705
706 if (returnCode == ADDR_OK)
707 {
708 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
709 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
710 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
711 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
712
713 CoordEq metaEq;
714
715 GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
716 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
717 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
718
719 UINT_32 xb = pIn->x / output.metaBlkWidth;
720 UINT_32 yb = pIn->y / output.metaBlkHeight;
721 UINT_32 zb = pIn->slice;
722
723 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
724 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
725 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
726
727 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
728
729 pOut->addr = address >> 1;
730 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
731
732
733 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
734 pIn->swizzleMode);
735
736 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
737
738 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
739 }
740
741 return returnCode;
742 }
743
744 /**
745 ************************************************************************************************************************
746 * Gfx9Lib::HwlComputeHtileAddrFromCoord
747 *
748 * @brief
749 * Interface function stub of AddrComputeHtileAddrFromCoord
750 *
751 * @return
752 * ADDR_E_RETURNCODE
753 ************************************************************************************************************************
754 */
755 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
756 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
757 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
758 ) const
759 {
760 ADDR_E_RETURNCODE returnCode = ADDR_OK;
761
762 if (pIn->numMipLevels > 1)
763 {
764 returnCode = ADDR_NOTIMPLEMENTED;
765 }
766 else
767 {
768 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
769 input.size = sizeof(input);
770 input.hTileFlags = pIn->hTileFlags;
771 input.depthFlags = pIn->depthflags;
772 input.swizzleMode = pIn->swizzleMode;
773 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
774 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
775 input.numSlices = Max(pIn->numSlices, 1u);
776 input.numMipLevels = Max(pIn->numMipLevels, 1u);
777
778 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
779 output.size = sizeof(output);
780
781 returnCode = ComputeHtileInfo(&input, &output);
782
783 if (returnCode == ADDR_OK)
784 {
785 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
786 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
787 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
788 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
789
790 CoordEq metaEq;
791
792 GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
793 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
794 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
795
796 UINT_32 xb = pIn->x / output.metaBlkWidth;
797 UINT_32 yb = pIn->y / output.metaBlkHeight;
798 UINT_32 zb = pIn->slice;
799
800 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
801 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
802 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
803
804 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
805
806 pOut->addr = address >> 1;
807
808 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
809 pIn->swizzleMode);
810
811 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
812
813 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
814 }
815 }
816
817 return returnCode;
818 }
819
820 /**
821 ************************************************************************************************************************
822 * Gfx9Lib::HwlComputeHtileCoordFromAddr
823 *
824 * @brief
825 * Interface function stub of AddrComputeHtileCoordFromAddr
826 *
827 * @return
828 * ADDR_E_RETURNCODE
829 ************************************************************************************************************************
830 */
831 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
832 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
833 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
834 ) const
835 {
836 ADDR_E_RETURNCODE returnCode = ADDR_OK;
837
838 if (pIn->numMipLevels > 1)
839 {
840 returnCode = ADDR_NOTIMPLEMENTED;
841 }
842 else
843 {
844 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
845 input.size = sizeof(input);
846 input.hTileFlags = pIn->hTileFlags;
847 input.swizzleMode = pIn->swizzleMode;
848 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
849 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
850 input.numSlices = Max(pIn->numSlices, 1u);
851 input.numMipLevels = Max(pIn->numMipLevels, 1u);
852
853 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
854 output.size = sizeof(output);
855
856 returnCode = ComputeHtileInfo(&input, &output);
857
858 if (returnCode == ADDR_OK)
859 {
860 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
861 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
862 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
863 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
864
865 CoordEq metaEq;
866
867 GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
868 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
869 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
870
871 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
872 pIn->swizzleMode);
873
874 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
875
876 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
877
878 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
879 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
880
881 UINT_32 x, y, z, s, m;
882 metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
883
884 pOut->slice = m / sliceSizeInBlock;
885 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
886 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
887 }
888 }
889
890 return returnCode;
891 }
892
893 /**
894 ************************************************************************************************************************
895 * Gfx9Lib::HwlComputeDccAddrFromCoord
896 *
897 * @brief
898 * Interface function stub of AddrComputeDccAddrFromCoord
899 *
900 * @return
901 * ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
905 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
906 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
907 {
908 ADDR_E_RETURNCODE returnCode = ADDR_OK;
909
910 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
911 {
912 returnCode = ADDR_NOTIMPLEMENTED;
913 }
914 else
915 {
916 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
917 input.size = sizeof(input);
918 input.dccKeyFlags = pIn->dccKeyFlags;
919 input.colorFlags = pIn->colorFlags;
920 input.swizzleMode = pIn->swizzleMode;
921 input.resourceType = pIn->resourceType;
922 input.bpp = pIn->bpp;
923 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
924 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
925 input.numSlices = Max(pIn->numSlices, 1u);
926 input.numFrags = Max(pIn->numFrags, 1u);
927 input.numMipLevels = Max(pIn->numMipLevels, 1u);
928
929 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
930 output.size = sizeof(output);
931
932 returnCode = ComputeDccInfo(&input, &output);
933
934 if (returnCode == ADDR_OK)
935 {
936 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
937 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
938 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
939 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
940 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
941 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
942 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
943 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
944
945 CoordEq metaEq;
946
947 GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
948 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
949 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
950 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2);
951
952 UINT_32 xb = pIn->x / output.metaBlkWidth;
953 UINT_32 yb = pIn->y / output.metaBlkHeight;
954 UINT_32 zb = pIn->slice / output.metaBlkDepth;
955
956 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
957 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
958 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
959
960 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
961
962 pOut->addr = address >> 1;
963
964 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
965 pIn->swizzleMode);
966
967 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
968
969 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
970 }
971 }
972
973 return returnCode;
974 }
975
976 /**
977 ************************************************************************************************************************
978 * Gfx9Lib::HwlInitGlobalParams
979 *
980 * @brief
981 * Initializes global parameters
982 *
983 * @return
984 * TRUE if all settings are valid
985 *
986 ************************************************************************************************************************
987 */
988 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
989 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
990 {
991 BOOL_32 valid = TRUE;
992
993 if (m_settings.isArcticIsland)
994 {
995 GB_ADDR_CONFIG gbAddrConfig;
996
997 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
998
999 // These values are copied from CModel code
1000 switch (gbAddrConfig.bits.NUM_PIPES)
1001 {
1002 case ADDR_CONFIG_1_PIPE:
1003 m_pipes = 1;
1004 m_pipesLog2 = 0;
1005 break;
1006 case ADDR_CONFIG_2_PIPE:
1007 m_pipes = 2;
1008 m_pipesLog2 = 1;
1009 break;
1010 case ADDR_CONFIG_4_PIPE:
1011 m_pipes = 4;
1012 m_pipesLog2 = 2;
1013 break;
1014 case ADDR_CONFIG_8_PIPE:
1015 m_pipes = 8;
1016 m_pipesLog2 = 3;
1017 break;
1018 case ADDR_CONFIG_16_PIPE:
1019 m_pipes = 16;
1020 m_pipesLog2 = 4;
1021 break;
1022 case ADDR_CONFIG_32_PIPE:
1023 m_pipes = 32;
1024 m_pipesLog2 = 5;
1025 break;
1026 default:
1027 ADDR_ASSERT_ALWAYS();
1028 break;
1029 }
1030
1031 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1032 {
1033 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1034 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1035 m_pipeInterleaveLog2 = 8;
1036 break;
1037 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1038 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1039 m_pipeInterleaveLog2 = 9;
1040 break;
1041 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1042 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1043 m_pipeInterleaveLog2 = 10;
1044 break;
1045 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1046 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1047 m_pipeInterleaveLog2 = 11;
1048 break;
1049 default:
1050 ADDR_ASSERT_ALWAYS();
1051 break;
1052 }
1053
1054 switch (gbAddrConfig.bits.NUM_BANKS)
1055 {
1056 case ADDR_CONFIG_1_BANK:
1057 m_banks = 1;
1058 m_banksLog2 = 0;
1059 break;
1060 case ADDR_CONFIG_2_BANK:
1061 m_banks = 2;
1062 m_banksLog2 = 1;
1063 break;
1064 case ADDR_CONFIG_4_BANK:
1065 m_banks = 4;
1066 m_banksLog2 = 2;
1067 break;
1068 case ADDR_CONFIG_8_BANK:
1069 m_banks = 8;
1070 m_banksLog2 = 3;
1071 break;
1072 case ADDR_CONFIG_16_BANK:
1073 m_banks = 16;
1074 m_banksLog2 = 4;
1075 break;
1076 default:
1077 ADDR_ASSERT_ALWAYS();
1078 break;
1079 }
1080
1081 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1082 {
1083 case ADDR_CONFIG_1_SHADER_ENGINE:
1084 m_se = 1;
1085 m_seLog2 = 0;
1086 break;
1087 case ADDR_CONFIG_2_SHADER_ENGINE:
1088 m_se = 2;
1089 m_seLog2 = 1;
1090 break;
1091 case ADDR_CONFIG_4_SHADER_ENGINE:
1092 m_se = 4;
1093 m_seLog2 = 2;
1094 break;
1095 case ADDR_CONFIG_8_SHADER_ENGINE:
1096 m_se = 8;
1097 m_seLog2 = 3;
1098 break;
1099 default:
1100 ADDR_ASSERT_ALWAYS();
1101 break;
1102 }
1103
1104 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1105 {
1106 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1107 m_rbPerSe = 1;
1108 m_rbPerSeLog2 = 0;
1109 break;
1110 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1111 m_rbPerSe = 2;
1112 m_rbPerSeLog2 = 1;
1113 break;
1114 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1115 m_rbPerSe = 4;
1116 m_rbPerSeLog2 = 2;
1117 break;
1118 default:
1119 ADDR_ASSERT_ALWAYS();
1120 break;
1121 }
1122
1123 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1124 {
1125 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1126 m_maxCompFrag = 1;
1127 m_maxCompFragLog2 = 0;
1128 break;
1129 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1130 m_maxCompFrag = 2;
1131 m_maxCompFragLog2 = 1;
1132 break;
1133 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1134 m_maxCompFrag = 4;
1135 m_maxCompFragLog2 = 2;
1136 break;
1137 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1138 m_maxCompFrag = 8;
1139 m_maxCompFragLog2 = 3;
1140 break;
1141 default:
1142 ADDR_ASSERT_ALWAYS();
1143 break;
1144 }
1145
1146 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1147 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1148 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1149 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1150 }
1151 else
1152 {
1153 valid = FALSE;
1154 ADDR_NOT_IMPLEMENTED();
1155 }
1156
1157 if (valid)
1158 {
1159 InitEquationTable();
1160 }
1161
1162 return valid;
1163 }
1164
1165 /**
1166 ************************************************************************************************************************
1167 * Gfx9Lib::HwlConvertChipFamily
1168 *
1169 * @brief
1170 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1171 * @return
1172 * ChipFamily
1173 ************************************************************************************************************************
1174 */
1175 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1176 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1177 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1178 {
1179 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1180
1181 switch (uChipFamily)
1182 {
1183 case FAMILY_AI:
1184 m_settings.isArcticIsland = 1;
1185 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1186
1187 if (m_settings.isVega10)
1188 {
1189 m_settings.isDce12 = 1;
1190 }
1191
1192 m_settings.metaBaseAlignFix = 1;
1193
1194 m_settings.depthPipeXorDisable = 1;
1195 break;
1196
1197 case FAMILY_RV:
1198 m_settings.isArcticIsland = 1;
1199 m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
1200
1201 if (m_settings.isRaven)
1202 {
1203 m_settings.isDcn1 = 1;
1204 }
1205
1206 m_settings.metaBaseAlignFix = 1;
1207
1208 m_settings.depthPipeXorDisable = 1;
1209 break;
1210
1211 default:
1212 ADDR_ASSERT(!"This should be a Fusion");
1213 break;
1214 }
1215
1216 return family;
1217 }
1218
1219 /**
1220 ************************************************************************************************************************
1221 * Gfx9Lib::InitRbEquation
1222 *
1223 * @brief
1224 * Init RB equation
1225 * @return
1226 * N/A
1227 ************************************************************************************************************************
1228 */
1229 VOID Gfx9Lib::GetRbEquation(
1230 CoordEq* pRbEq, ///< [out] rb equation
1231 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1232 UINT_32 numSeLog2) ///< [in] number of shader engine
1233 {
1234 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1235 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1236 Coordinate cx('x', rbRegion);
1237 Coordinate cy('y', rbRegion);
1238
1239 UINT_32 start = 0;
1240 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1241
1242 // Clear the rb equation
1243 pRbEq->resize(0);
1244 pRbEq->resize(numRbTotalLog2);
1245
1246 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1247 {
1248 // Special case when more than 1 SE, and 2 RB per SE
1249 (*pRbEq)[0].add(cx);
1250 (*pRbEq)[0].add(cy);
1251 cx++;
1252 cy++;
1253 (*pRbEq)[0].add(cy);
1254 start++;
1255 }
1256
1257 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1258
1259 for (UINT_32 i = 0; i < numBits; i++)
1260 {
1261 UINT_32 idx =
1262 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1263
1264 if ((i % 2) == 1)
1265 {
1266 (*pRbEq)[idx].add(cx);
1267 cx++;
1268 }
1269 else
1270 {
1271 (*pRbEq)[idx].add(cy);
1272 cy++;
1273 }
1274 }
1275 }
1276
1277 /**
1278 ************************************************************************************************************************
1279 * Gfx9Lib::GetDataEquation
1280 *
1281 * @brief
1282 * Get data equation for fmask and Z
1283 * @return
1284 * N/A
1285 ************************************************************************************************************************
1286 */
1287 VOID Gfx9Lib::GetDataEquation(
1288 CoordEq* pDataEq, ///< [out] data surface equation
1289 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1290 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1291 AddrResourceType resourceType, ///< [in] data surface resource type
1292 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1293 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1294 const
1295 {
1296 Coordinate cx('x', 0);
1297 Coordinate cy('y', 0);
1298 Coordinate cz('z', 0);
1299 Coordinate cs('s', 0);
1300
1301 // Clear the equation
1302 pDataEq->resize(0);
1303 pDataEq->resize(27);
1304
1305 if (dataSurfaceType == Gfx9DataColor)
1306 {
1307 if (IsLinear(swizzleMode))
1308 {
1309 Coordinate cm('m', 0);
1310
1311 pDataEq->resize(49);
1312
1313 for (UINT_32 i = 0; i < 49; i++)
1314 {
1315 (*pDataEq)[i].add(cm);
1316 cm++;
1317 }
1318 }
1319 else if (IsThick(resourceType, swizzleMode))
1320 {
1321 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1322 UINT_32 i;
1323 if (IsStandardSwizzle(resourceType, swizzleMode))
1324 {
1325 // Standard 3d swizzle
1326 // Fill in bottom x bits
1327 for (i = elementBytesLog2; i < 4; i++)
1328 {
1329 (*pDataEq)[i].add(cx);
1330 cx++;
1331 }
1332 // Fill in 2 bits of y and then z
1333 for (i = 4; i < 6; i++)
1334 {
1335 (*pDataEq)[i].add(cy);
1336 cy++;
1337 }
1338 for (i = 6; i < 8; i++)
1339 {
1340 (*pDataEq)[i].add(cz);
1341 cz++;
1342 }
1343 if (elementBytesLog2 < 2)
1344 {
1345 // fill in z & y bit
1346 (*pDataEq)[8].add(cz);
1347 (*pDataEq)[9].add(cy);
1348 cz++;
1349 cy++;
1350 }
1351 else if (elementBytesLog2 == 2)
1352 {
1353 // fill in y and x bit
1354 (*pDataEq)[8].add(cy);
1355 (*pDataEq)[9].add(cx);
1356 cy++;
1357 cx++;
1358 }
1359 else
1360 {
1361 // fill in 2 x bits
1362 (*pDataEq)[8].add(cx);
1363 cx++;
1364 (*pDataEq)[9].add(cx);
1365 cx++;
1366 }
1367 }
1368 else
1369 {
1370 // Z 3d swizzle
1371 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1372 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1373 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1374 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1375 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1376 {
1377 (*pDataEq)[i].add(cz);
1378 cz++;
1379 }
1380 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1381 {
1382 // add an x and z
1383 (*pDataEq)[6].add(cx);
1384 (*pDataEq)[7].add(cz);
1385 cx++;
1386 cz++;
1387 }
1388 else if (elementBytesLog2 == 2)
1389 {
1390 // add a y and z
1391 (*pDataEq)[6].add(cy);
1392 (*pDataEq)[7].add(cz);
1393 cy++;
1394 cz++;
1395 }
1396 // add y and x
1397 (*pDataEq)[8].add(cy);
1398 (*pDataEq)[9].add(cx);
1399 cy++;
1400 cx++;
1401 }
1402 // Fill in bit 10 and up
1403 pDataEq->mort3d( cz, cy, cx, 10 );
1404 }
1405 else if (IsThin(resourceType, swizzleMode))
1406 {
1407 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1408 // Color 2D
1409 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1410 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1411 UINT_32 i;
1412 // Fill in bottom x bits
1413 for (i = elementBytesLog2; i < 4; i++)
1414 {
1415 (*pDataEq)[i].add(cx);
1416 cx++;
1417 }
1418 // Fill in bottom y bits
1419 for (i = 4; i < 4 + microYBits; i++)
1420 {
1421 (*pDataEq)[i].add(cy);
1422 cy++;
1423 }
1424 // Fill in last of the micro_x bits
1425 for (i = 4 + microYBits; i < 8; i++)
1426 {
1427 (*pDataEq)[i].add(cx);
1428 cx++;
1429 }
1430 // Fill in x/y bits below sample split
1431 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1432 // Fill in sample bits
1433 for (i = 0; i < numSamplesLog2; i++)
1434 {
1435 cs.set('s', i);
1436 (*pDataEq)[tileSplitStart + i].add(cs);
1437 }
1438 // Fill in x/y bits above sample split
1439 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1440 {
1441 pDataEq->mort2d(cx, cy, blockSizeLog2);
1442 }
1443 else
1444 {
1445 pDataEq->mort2d(cy, cx, blockSizeLog2);
1446 }
1447 }
1448 else
1449 {
1450 ADDR_ASSERT_ALWAYS();
1451 }
1452 }
1453 else
1454 {
1455 // Fmask or depth
1456 UINT_32 sampleStart = elementBytesLog2;
1457 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1458 UINT_32 ymajStart = 6 + numSamplesLog2;
1459
1460 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1461 {
1462 cs.set('s', s);
1463 (*pDataEq)[sampleStart + s].add(cs);
1464 }
1465
1466 // Put in the x-major order pixel bits
1467 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1468 // Put in the y-major order pixel bits
1469 pDataEq->mort2d(cy, cx, ymajStart);
1470 }
1471 }
1472
1473 /**
1474 ************************************************************************************************************************
1475 * Gfx9Lib::GetPipeEquation
1476 *
1477 * @brief
1478 * Get pipe equation
1479 * @return
1480 * N/A
1481 ************************************************************************************************************************
1482 */
1483 VOID Gfx9Lib::GetPipeEquation(
1484 CoordEq* pPipeEq, ///< [out] pipe equation
1485 CoordEq* pDataEq, ///< [in] data equation
1486 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1487 UINT_32 numPipeLog2, ///< [in] number of pipes
1488 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1489 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1490 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1491 AddrResourceType resourceType ///< [in] data surface resource type
1492 ) const
1493 {
1494 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1495 CoordEq dataEq;
1496
1497 pDataEq->copy(dataEq);
1498
1499 if (dataSurfaceType == Gfx9DataColor)
1500 {
1501 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1502 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1503 }
1504
1505 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1506
1507 // This section should only apply to z/stencil, maybe fmask
1508 // If the pipe bit is below the comp block size,
1509 // then keep moving up the address until we find a bit that is above
1510 UINT_32 pipeStart = 0;
1511
1512 if (dataSurfaceType != Gfx9DataColor)
1513 {
1514 Coordinate tileMin('x', 3);
1515
1516 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1517 {
1518 pipeStart++;
1519 }
1520
1521 // if pipe is 0, then the first pipe bit is above the comp block size,
1522 // so we don't need to do anything
1523 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1524 // we will get the same pipe equation
1525 if (pipeStart != 0)
1526 {
1527 for (UINT_32 i = 0; i < numPipeLog2; i++)
1528 {
1529 // Copy the jth bit above pipe interleave to the current pipe equation bit
1530 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1531 }
1532 }
1533 }
1534
1535 if (IsPrt(swizzleMode))
1536 {
1537 // Clear out bits above the block size if prt's are enabled
1538 dataEq.resize(blockSizeLog2);
1539 dataEq.resize(48);
1540 }
1541
1542 if (IsXor(swizzleMode))
1543 {
1544 CoordEq xorMask;
1545
1546 if (IsThick(resourceType, swizzleMode))
1547 {
1548 CoordEq xorMask2;
1549
1550 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1551
1552 xorMask.resize(numPipeLog2);
1553
1554 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1555 {
1556 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1557 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1558 }
1559 }
1560 else
1561 {
1562 // Xor in the bits above the pipe+gpu bits
1563 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1564
1565 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1566 {
1567 Coordinate co;
1568 CoordEq xorMask2;
1569 // if 1xaa and not prt, then xor in the z bits
1570 xorMask2.resize(0);
1571 xorMask2.resize(numPipeLog2);
1572 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1573 {
1574 co.set('z', numPipeLog2 - 1 - pipeIdx);
1575 xorMask2[pipeIdx].add(co);
1576 }
1577
1578 pPipeEq->xorin(xorMask2);
1579 }
1580 }
1581
1582 xorMask.reverse();
1583 pPipeEq->xorin(xorMask);
1584 }
1585 }
1586
1587 /**
1588 ************************************************************************************************************************
1589 * Gfx9Lib::GetMetaEquation
1590 *
1591 * @brief
1592 * Get meta equation for cmask/htile/DCC
1593 * @return
1594 * N/A
1595 ************************************************************************************************************************
1596 */
1597 VOID Gfx9Lib::GetMetaEquation(
1598 CoordEq* pMetaEq, ///< [out] meta equation
1599 UINT_32 maxMip, ///< [in] max mip Id
1600 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1601 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1602 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1603 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1604 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1605 AddrResourceType resourceType, ///< [in] data surface resource type
1606 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1607 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1608 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1609 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1610 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1611 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1612 const
1613 {
1614 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1615 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1616 //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1617
1618 // Get the correct data address and rb equation
1619 CoordEq dataEq;
1620 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1621 elementBytesLog2, numSamplesLog2);
1622
1623 // Get pipe and rb equations
1624 CoordEq pipeEquation;
1625 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1626 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1627 numPipeTotalLog2 = pipeEquation.getsize();
1628
1629 if (metaFlag.linear)
1630 {
1631 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1632 ADDR_ASSERT_ALWAYS();
1633
1634 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1635
1636 dataEq.copy(*pMetaEq);
1637
1638 if (IsLinear(swizzleMode))
1639 {
1640 if (metaFlag.pipeAligned)
1641 {
1642 // Remove the pipe bits
1643 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1644 pMetaEq->shift(-shift, pipeInterleaveLog2);
1645 }
1646 // Divide by comp block size, which for linear (which is always color) is 256 B
1647 pMetaEq->shift(-8);
1648
1649 if (metaFlag.pipeAligned)
1650 {
1651 // Put pipe bits back in
1652 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1653
1654 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1655 {
1656 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1657 }
1658 }
1659 }
1660
1661 pMetaEq->shift(1);
1662 }
1663 else
1664 {
1665 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1666 UINT_32 compFragLog2 =
1667 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1668 maxCompFragLog2 : numSamplesLog2;
1669
1670 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1671
1672 // Make sure the metaaddr is cleared
1673 pMetaEq->resize(0);
1674 pMetaEq->resize(27);
1675
1676 if (IsThick(resourceType, swizzleMode))
1677 {
1678 Coordinate cx('x', 0);
1679 Coordinate cy('y', 0);
1680 Coordinate cz('z', 0);
1681
1682 if (maxMip > 0)
1683 {
1684 pMetaEq->mort3d(cy, cx, cz);
1685 }
1686 else
1687 {
1688 pMetaEq->mort3d(cx, cy, cz);
1689 }
1690 }
1691 else
1692 {
1693 Coordinate cx('x', 0);
1694 Coordinate cy('y', 0);
1695 Coordinate cs;
1696
1697 if (maxMip > 0)
1698 {
1699 pMetaEq->mort2d(cy, cx, compFragLog2);
1700 }
1701 else
1702 {
1703 pMetaEq->mort2d(cx, cy, compFragLog2);
1704 }
1705
1706 //------------------------------------------------------------------------------------------------------------------------
1707 // Put the compressible fragments at the lsb
1708 // the uncompressible frags will be at the msb of the micro address
1709 //------------------------------------------------------------------------------------------------------------------------
1710 for (UINT_32 s = 0; s < compFragLog2; s++)
1711 {
1712 cs.set('s', s);
1713 (*pMetaEq)[s].add(cs);
1714 }
1715 }
1716
1717 // Keep a copy of the pipe equations
1718 CoordEq origPipeEquation;
1719 pipeEquation.copy(origPipeEquation);
1720
1721 Coordinate co;
1722 // filter out everything under the compressed block size
1723 co.set('x', compBlkWidthLog2);
1724 pMetaEq->Filter('<', co, 0, 'x');
1725 co.set('y', compBlkHeightLog2);
1726 pMetaEq->Filter('<', co, 0, 'y');
1727 co.set('z', compBlkDepthLog2);
1728 pMetaEq->Filter('<', co, 0, 'z');
1729
1730 // For non-color, filter out sample bits
1731 if (dataSurfaceType != Gfx9DataColor)
1732 {
1733 co.set('x', 0);
1734 pMetaEq->Filter('<', co, 0, 's');
1735 }
1736
1737 // filter out everything above the metablock size
1738 co.set('x', metaBlkWidthLog2 - 1);
1739 pMetaEq->Filter('>', co, 0, 'x');
1740 co.set('y', metaBlkHeightLog2 - 1);
1741 pMetaEq->Filter('>', co, 0, 'y');
1742 co.set('z', metaBlkDepthLog2 - 1);
1743 pMetaEq->Filter('>', co, 0, 'z');
1744
1745 // filter out everything above the metablock size for the channel bits
1746 co.set('x', metaBlkWidthLog2 - 1);
1747 pipeEquation.Filter('>', co, 0, 'x');
1748 co.set('y', metaBlkHeightLog2 - 1);
1749 pipeEquation.Filter('>', co, 0, 'y');
1750 co.set('z', metaBlkDepthLog2 - 1);
1751 pipeEquation.Filter('>', co, 0, 'z');
1752
1753 // Make sure we still have the same number of channel bits
1754 if (pipeEquation.getsize() != numPipeTotalLog2)
1755 {
1756 ADDR_ASSERT_ALWAYS();
1757 }
1758
1759 // Loop through all channel and rb bits,
1760 // and make sure these components exist in the metadata address
1761 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1762 {
1763 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1764 {
1765 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1766 {
1767 ADDR_ASSERT_ALWAYS();
1768 }
1769 }
1770 }
1771
1772 UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1773 UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1774 CoordEq origRbEquation;
1775
1776 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1777
1778 CoordEq rbEquation = origRbEquation;
1779
1780 UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1781
1782 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1783 {
1784 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1785 {
1786 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1787 {
1788 ADDR_ASSERT_ALWAYS();
1789 }
1790 }
1791 }
1792
1793 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1794 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1795 {
1796 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1797 {
1798 if (rbEquation[i] == pipeEquation[j])
1799 {
1800 rbEquation[i].Clear();
1801 }
1802 }
1803 }
1804
1805 // Loop through each bit of the channel, get the smallest coordinate,
1806 // and remove it from the metaaddr, and rb_equation
1807 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1808 {
1809 pipeEquation[i].getsmallest(co);
1810
1811 UINT_32 old_size = pMetaEq->getsize();
1812 pMetaEq->Filter('=', co);
1813 UINT_32 new_size = pMetaEq->getsize();
1814 if (new_size != old_size-1)
1815 {
1816 ADDR_ASSERT_ALWAYS();
1817 }
1818 pipeEquation.remove(co);
1819 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1820 {
1821 if (rbEquation[j].remove(co))
1822 {
1823 // if we actually removed something from this bit, then add the remaining
1824 // channel bits, as these can be removed for this bit
1825 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1826 {
1827 if (pipeEquation[i][k] != co)
1828 {
1829 rbEquation[j].add(pipeEquation[i][k]);
1830 }
1831 }
1832 }
1833 }
1834 }
1835
1836 // Loop through the rb bits and see what remain;
1837 // filter out the smallest coordinate if it remains
1838 UINT_32 rbBitsLeft = 0;
1839 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1840 {
1841 if (rbEquation[i].getsize() > 0)
1842 {
1843 rbBitsLeft++;
1844 rbEquation[i].getsmallest(co);
1845 UINT_32 old_size = pMetaEq->getsize();
1846 pMetaEq->Filter('=', co);
1847 UINT_32 new_size = pMetaEq->getsize();
1848 if (new_size != old_size - 1)
1849 {
1850 // assert warning
1851 }
1852 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1853 {
1854 if (rbEquation[j].remove(co))
1855 {
1856 // if we actually removed something from this bit, then add the remaining
1857 // rb bits, as these can be removed for this bit
1858 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1859 {
1860 if (rbEquation[i][k] != co)
1861 {
1862 rbEquation[j].add(rbEquation[i][k]);
1863 }
1864 }
1865 }
1866 }
1867 }
1868 }
1869
1870 // capture the size of the metaaddr
1871 UINT_32 metaSize = pMetaEq->getsize();
1872 // resize to 49 bits...make this a nibble address
1873 pMetaEq->resize(49);
1874 // Concatenate the macro address above the current address
1875 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1876 {
1877 co.set('m', j);
1878 (*pMetaEq)[i].add(co);
1879 }
1880
1881 // Multiply by meta element size (in nibbles)
1882 if (dataSurfaceType == Gfx9DataColor)
1883 {
1884 pMetaEq->shift(1);
1885 }
1886 else if (dataSurfaceType == Gfx9DataDepthStencil)
1887 {
1888 pMetaEq->shift(3);
1889 }
1890
1891 //------------------------------------------------------------------------------------------
1892 // Note the pipeInterleaveLog2+1 is because address is a nibble address
1893 // Shift up from pipe interleave number of channel
1894 // and rb bits left, and uncompressed fragments
1895 //------------------------------------------------------------------------------------------
1896
1897 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
1898
1899 // Put in the channel bits
1900 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1901 {
1902 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
1903 }
1904
1905 // Put in remaining rb bits
1906 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
1907 {
1908 if (rbEquation[i].getsize() > 0)
1909 {
1910 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
1911 // Mark any rb bit we add in to the rb mask
1912 j++;
1913 }
1914 }
1915
1916 //------------------------------------------------------------------------------------------
1917 // Put in the uncompressed fragment bits
1918 //------------------------------------------------------------------------------------------
1919 for (UINT_32 i = 0; i < uncompFragLog2; i++)
1920 {
1921 co.set('s', compFragLog2 + i);
1922 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
1923 }
1924 }
1925 }
1926
1927 /**
1928 ************************************************************************************************************************
1929 * Gfx9Lib::IsEquationSupported
1930 *
1931 * @brief
1932 * Check if equation is supported for given swizzle mode and resource type.
1933 *
1934 * @return
1935 * TRUE if supported
1936 ************************************************************************************************************************
1937 */
1938 BOOL_32 Gfx9Lib::IsEquationSupported(
1939 AddrResourceType rsrcType,
1940 AddrSwizzleMode swMode,
1941 UINT_32 elementBytesLog2) const
1942 {
1943 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
1944 (IsLinear(swMode) == FALSE) &&
1945 (((IsTex2d(rsrcType) == TRUE) &&
1946 ((elementBytesLog2 < 4) ||
1947 ((IsRotateSwizzle(swMode) == FALSE) &&
1948 (IsZOrderSwizzle(swMode) == FALSE)))) ||
1949 ((IsTex3d(rsrcType) == TRUE) &&
1950 (IsRotateSwizzle(swMode) == FALSE) &&
1951 (IsBlock256b(swMode) == FALSE)));
1952
1953 return supported;
1954 }
1955
1956 /**
1957 ************************************************************************************************************************
1958 * Gfx9Lib::InitEquationTable
1959 *
1960 * @brief
1961 * Initialize Equation table.
1962 *
1963 * @return
1964 * N/A
1965 ************************************************************************************************************************
1966 */
1967 VOID Gfx9Lib::InitEquationTable()
1968 {
1969 memset(m_equationTable, 0, sizeof(m_equationTable));
1970
1971 // Loop all possible resource type (2D/3D)
1972 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1973 {
1974 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1975
1976 // Loop all possible swizzle mode
1977 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1978 {
1979 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1980
1981 // Loop all possible bpp
1982 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
1983 {
1984 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1985
1986 // Check if the input is supported
1987 if (IsEquationSupported(rsrcType, swMode, bppIdx))
1988 {
1989 ADDR_EQUATION equation;
1990 ADDR_E_RETURNCODE retCode;
1991
1992 memset(&equation, 0, sizeof(ADDR_EQUATION));
1993
1994 // Generate the equation
1995 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
1996 {
1997 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
1998 }
1999 else if (IsThin(rsrcType, swMode))
2000 {
2001 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2002 }
2003 else
2004 {
2005 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2006 }
2007
2008 // Only fill the equation into the table if the return code is ADDR_OK,
2009 // otherwise if the return code is not ADDR_OK, it indicates this is not
2010 // a valid input, we do nothing but just fill invalid equation index
2011 // into the lookup table.
2012 if (retCode == ADDR_OK)
2013 {
2014 equationIndex = m_numEquations;
2015 ADDR_ASSERT(equationIndex < EquationTableSize);
2016
2017 m_equationTable[equationIndex] = equation;
2018
2019 m_numEquations++;
2020 }
2021 else
2022 {
2023 ADDR_ASSERT_ALWAYS();
2024 }
2025 }
2026
2027 // Fill the index into the lookup table, if the combination is not supported
2028 // fill the invalid equation index
2029 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2030 }
2031 }
2032 }
2033 }
2034
2035 /**
2036 ************************************************************************************************************************
2037 * Gfx9Lib::HwlGetEquationIndex
2038 *
2039 * @brief
2040 * Interface function stub of GetEquationIndex
2041 *
2042 * @return
2043 * ADDR_E_RETURNCODE
2044 ************************************************************************************************************************
2045 */
2046 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2047 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2048 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2049 ) const
2050 {
2051 AddrResourceType rsrcType = pIn->resourceType;
2052 AddrSwizzleMode swMode = pIn->swizzleMode;
2053 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2054 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2055
2056 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2057 {
2058 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2059 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2060
2061 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2062 }
2063
2064 if (pOut->pMipInfo != NULL)
2065 {
2066 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2067 {
2068 pOut->pMipInfo[i].equationIndex = index;
2069 }
2070 }
2071
2072 return index;
2073 }
2074
2075 /**
2076 ************************************************************************************************************************
2077 * Gfx9Lib::HwlComputeBlock256Equation
2078 *
2079 * @brief
2080 * Interface function stub of ComputeBlock256Equation
2081 *
2082 * @return
2083 * ADDR_E_RETURNCODE
2084 ************************************************************************************************************************
2085 */
2086 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2087 AddrResourceType rsrcType,
2088 AddrSwizzleMode swMode,
2089 UINT_32 elementBytesLog2,
2090 ADDR_EQUATION* pEquation) const
2091 {
2092 ADDR_E_RETURNCODE ret = ADDR_OK;
2093
2094 pEquation->numBits = 8;
2095
2096 UINT_32 i = 0;
2097 for (; i < elementBytesLog2; i++)
2098 {
2099 InitChannel(1, 0 , i, &pEquation->addr[i]);
2100 }
2101
2102 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2103
2104 const UINT_32 maxBitsUsed = 4;
2105 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2106 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2107
2108 for (i = 0; i < maxBitsUsed; i++)
2109 {
2110 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2111 InitChannel(1, 1, i, &y[i]);
2112 }
2113
2114 if (IsStandardSwizzle(rsrcType, swMode))
2115 {
2116 switch (elementBytesLog2)
2117 {
2118 case 0:
2119 pixelBit[0] = x[0];
2120 pixelBit[1] = x[1];
2121 pixelBit[2] = x[2];
2122 pixelBit[3] = x[3];
2123 pixelBit[4] = y[0];
2124 pixelBit[5] = y[1];
2125 pixelBit[6] = y[2];
2126 pixelBit[7] = y[3];
2127 break;
2128 case 1:
2129 pixelBit[0] = x[0];
2130 pixelBit[1] = x[1];
2131 pixelBit[2] = x[2];
2132 pixelBit[3] = y[0];
2133 pixelBit[4] = y[1];
2134 pixelBit[5] = y[2];
2135 pixelBit[6] = x[3];
2136 break;
2137 case 2:
2138 pixelBit[0] = x[0];
2139 pixelBit[1] = x[1];
2140 pixelBit[2] = y[0];
2141 pixelBit[3] = y[1];
2142 pixelBit[4] = y[2];
2143 pixelBit[5] = x[2];
2144 break;
2145 case 3:
2146 pixelBit[0] = x[0];
2147 pixelBit[1] = y[0];
2148 pixelBit[2] = y[1];
2149 pixelBit[3] = x[1];
2150 pixelBit[4] = x[2];
2151 break;
2152 case 4:
2153 pixelBit[0] = y[0];
2154 pixelBit[1] = y[1];
2155 pixelBit[2] = x[0];
2156 pixelBit[3] = x[1];
2157 break;
2158 default:
2159 ADDR_ASSERT_ALWAYS();
2160 ret = ADDR_INVALIDPARAMS;
2161 break;
2162 }
2163 }
2164 else if (IsDisplaySwizzle(rsrcType, swMode))
2165 {
2166 switch (elementBytesLog2)
2167 {
2168 case 0:
2169 pixelBit[0] = x[0];
2170 pixelBit[1] = x[1];
2171 pixelBit[2] = x[2];
2172 pixelBit[3] = y[1];
2173 pixelBit[4] = y[0];
2174 pixelBit[5] = y[2];
2175 pixelBit[6] = x[3];
2176 pixelBit[7] = y[3];
2177 break;
2178 case 1:
2179 pixelBit[0] = x[0];
2180 pixelBit[1] = x[1];
2181 pixelBit[2] = x[2];
2182 pixelBit[3] = y[0];
2183 pixelBit[4] = y[1];
2184 pixelBit[5] = y[2];
2185 pixelBit[6] = x[3];
2186 break;
2187 case 2:
2188 pixelBit[0] = x[0];
2189 pixelBit[1] = x[1];
2190 pixelBit[2] = y[0];
2191 pixelBit[3] = x[2];
2192 pixelBit[4] = y[1];
2193 pixelBit[5] = y[2];
2194 break;
2195 case 3:
2196 pixelBit[0] = x[0];
2197 pixelBit[1] = y[0];
2198 pixelBit[2] = x[1];
2199 pixelBit[3] = x[2];
2200 pixelBit[4] = y[1];
2201 break;
2202 case 4:
2203 pixelBit[0] = x[0];
2204 pixelBit[1] = y[0];
2205 pixelBit[2] = x[1];
2206 pixelBit[3] = y[1];
2207 break;
2208 default:
2209 ADDR_ASSERT_ALWAYS();
2210 ret = ADDR_INVALIDPARAMS;
2211 break;
2212 }
2213 }
2214 else if (IsRotateSwizzle(swMode))
2215 {
2216 switch (elementBytesLog2)
2217 {
2218 case 0:
2219 pixelBit[0] = y[0];
2220 pixelBit[1] = y[1];
2221 pixelBit[2] = y[2];
2222 pixelBit[3] = x[1];
2223 pixelBit[4] = x[0];
2224 pixelBit[5] = x[2];
2225 pixelBit[6] = x[3];
2226 pixelBit[7] = y[3];
2227 break;
2228 case 1:
2229 pixelBit[0] = y[0];
2230 pixelBit[1] = y[1];
2231 pixelBit[2] = y[2];
2232 pixelBit[3] = x[0];
2233 pixelBit[4] = x[1];
2234 pixelBit[5] = x[2];
2235 pixelBit[6] = x[3];
2236 break;
2237 case 2:
2238 pixelBit[0] = y[0];
2239 pixelBit[1] = y[1];
2240 pixelBit[2] = x[0];
2241 pixelBit[3] = y[2];
2242 pixelBit[4] = x[1];
2243 pixelBit[5] = x[2];
2244 break;
2245 case 3:
2246 pixelBit[0] = y[0];
2247 pixelBit[1] = x[0];
2248 pixelBit[2] = y[1];
2249 pixelBit[3] = x[1];
2250 pixelBit[4] = x[2];
2251 break;
2252 default:
2253 ADDR_ASSERT_ALWAYS();
2254 case 4:
2255 ret = ADDR_INVALIDPARAMS;
2256 break;
2257 }
2258 }
2259 else
2260 {
2261 ADDR_ASSERT_ALWAYS();
2262 ret = ADDR_INVALIDPARAMS;
2263 }
2264
2265 // Post validation
2266 if (ret == ADDR_OK)
2267 {
2268 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2269 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2270 (microBlockDim.w * (1 << elementBytesLog2)));
2271 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2272 }
2273
2274 return ret;
2275 }
2276
2277 /**
2278 ************************************************************************************************************************
2279 * Gfx9Lib::HwlComputeThinEquation
2280 *
2281 * @brief
2282 * Interface function stub of ComputeThinEquation
2283 *
2284 * @return
2285 * ADDR_E_RETURNCODE
2286 ************************************************************************************************************************
2287 */
2288 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2289 AddrResourceType rsrcType,
2290 AddrSwizzleMode swMode,
2291 UINT_32 elementBytesLog2,
2292 ADDR_EQUATION* pEquation) const
2293 {
2294 ADDR_E_RETURNCODE ret = ADDR_OK;
2295
2296 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2297
2298 UINT_32 maxXorBits = blockSizeLog2;
2299 if (IsNonPrtXor(swMode))
2300 {
2301 // For non-prt-xor, maybe need to initialize some more bits for xor
2302 // The highest xor bit used in equation will be max the following 3 items:
2303 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2304 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2305 // 3. blockSizeLog2
2306
2307 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2308 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2309 GetPipeXorBits(blockSizeLog2) +
2310 2 * GetBankXorBits(blockSizeLog2));
2311 }
2312
2313 const UINT_32 maxBitsUsed = 14;
2314 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2315 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2316 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2317
2318 const UINT_32 extraXorBits = 16;
2319 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2320 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2321
2322 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2323 {
2324 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2325 InitChannel(1, 1, i, &y[i]);
2326 }
2327
2328 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2329
2330 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2331 {
2332 InitChannel(1, 0 , i, &pixelBit[i]);
2333 }
2334
2335 UINT_32 xIdx = 0;
2336 UINT_32 yIdx = 0;
2337 UINT_32 lowBits = 0;
2338
2339 if (IsZOrderSwizzle(swMode))
2340 {
2341 if (elementBytesLog2 <= 3)
2342 {
2343 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2344 {
2345 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2346 }
2347
2348 lowBits = 6;
2349 }
2350 else
2351 {
2352 ret = ADDR_INVALIDPARAMS;
2353 }
2354 }
2355 else
2356 {
2357 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2358
2359 if (ret == ADDR_OK)
2360 {
2361 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2362 xIdx = Log2(microBlockDim.w);
2363 yIdx = Log2(microBlockDim.h);
2364 lowBits = 8;
2365 }
2366 }
2367
2368 if (ret == ADDR_OK)
2369 {
2370 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2371 {
2372 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2373 }
2374
2375 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2376 {
2377 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2378 }
2379
2380 if (IsXor(swMode))
2381 {
2382 // Fill XOR bits
2383 UINT_32 pipeStart = m_pipeInterleaveLog2;
2384 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2385
2386 UINT_32 bankStart = pipeStart + pipeXorBits;
2387 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2388
2389 for (UINT_32 i = 0; i < pipeXorBits; i++)
2390 {
2391 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2392 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2393 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2394
2395 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2396 }
2397
2398 for (UINT_32 i = 0; i < bankXorBits; i++)
2399 {
2400 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2401 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2402 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2403
2404 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2405 }
2406
2407 if (IsPrt(swMode) == FALSE)
2408 {
2409 for (UINT_32 i = 0; i < pipeXorBits; i++)
2410 {
2411 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2412 }
2413
2414 for (UINT_32 i = 0; i < bankXorBits; i++)
2415 {
2416 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2417 }
2418 }
2419 }
2420
2421 pEquation->numBits = blockSizeLog2;
2422 }
2423
2424 return ret;
2425 }
2426
2427 /**
2428 ************************************************************************************************************************
2429 * Gfx9Lib::HwlComputeThickEquation
2430 *
2431 * @brief
2432 * Interface function stub of ComputeThickEquation
2433 *
2434 * @return
2435 * ADDR_E_RETURNCODE
2436 ************************************************************************************************************************
2437 */
2438 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2439 AddrResourceType rsrcType,
2440 AddrSwizzleMode swMode,
2441 UINT_32 elementBytesLog2,
2442 ADDR_EQUATION* pEquation) const
2443 {
2444 ADDR_E_RETURNCODE ret = ADDR_OK;
2445
2446 ADDR_ASSERT(IsTex3d(rsrcType));
2447
2448 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2449
2450 UINT_32 maxXorBits = blockSizeLog2;
2451 if (IsNonPrtXor(swMode))
2452 {
2453 // For non-prt-xor, maybe need to initialize some more bits for xor
2454 // The highest xor bit used in equation will be max the following 3:
2455 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2456 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2457 // 3. blockSizeLog2
2458
2459 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2460 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2461 GetPipeXorBits(blockSizeLog2) +
2462 3 * GetBankXorBits(blockSizeLog2));
2463 }
2464
2465 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2466 {
2467 InitChannel(1, 0 , i, &pEquation->addr[i]);
2468 }
2469
2470 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2471
2472 const UINT_32 maxBitsUsed = 12;
2473 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2474 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2475 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2476 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2477
2478 const UINT_32 extraXorBits = 24;
2479 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2480 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2481
2482 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2483 {
2484 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2485 InitChannel(1, 1, i, &y[i]);
2486 InitChannel(1, 2, i, &z[i]);
2487 }
2488
2489 if (IsZOrderSwizzle(swMode))
2490 {
2491 switch (elementBytesLog2)
2492 {
2493 case 0:
2494 pixelBit[0] = x[0];
2495 pixelBit[1] = y[0];
2496 pixelBit[2] = x[1];
2497 pixelBit[3] = y[1];
2498 pixelBit[4] = z[0];
2499 pixelBit[5] = z[1];
2500 pixelBit[6] = x[2];
2501 pixelBit[7] = z[2];
2502 pixelBit[8] = y[2];
2503 pixelBit[9] = x[3];
2504 break;
2505 case 1:
2506 pixelBit[0] = x[0];
2507 pixelBit[1] = y[0];
2508 pixelBit[2] = x[1];
2509 pixelBit[3] = y[1];
2510 pixelBit[4] = z[0];
2511 pixelBit[5] = z[1];
2512 pixelBit[6] = z[2];
2513 pixelBit[7] = y[2];
2514 pixelBit[8] = x[2];
2515 break;
2516 case 2:
2517 pixelBit[0] = x[0];
2518 pixelBit[1] = y[0];
2519 pixelBit[2] = x[1];
2520 pixelBit[3] = z[0];
2521 pixelBit[4] = y[1];
2522 pixelBit[5] = z[1];
2523 pixelBit[6] = y[2];
2524 pixelBit[7] = x[2];
2525 break;
2526 case 3:
2527 pixelBit[0] = x[0];
2528 pixelBit[1] = y[0];
2529 pixelBit[2] = z[0];
2530 pixelBit[3] = x[1];
2531 pixelBit[4] = z[1];
2532 pixelBit[5] = y[1];
2533 pixelBit[6] = x[2];
2534 break;
2535 case 4:
2536 pixelBit[0] = x[0];
2537 pixelBit[1] = y[0];
2538 pixelBit[2] = z[0];
2539 pixelBit[3] = z[1];
2540 pixelBit[4] = y[1];
2541 pixelBit[5] = x[1];
2542 break;
2543 default:
2544 ADDR_ASSERT_ALWAYS();
2545 ret = ADDR_INVALIDPARAMS;
2546 break;
2547 }
2548 }
2549 else if (IsStandardSwizzle(rsrcType, swMode))
2550 {
2551 switch (elementBytesLog2)
2552 {
2553 case 0:
2554 pixelBit[0] = x[0];
2555 pixelBit[1] = x[1];
2556 pixelBit[2] = x[2];
2557 pixelBit[3] = x[3];
2558 pixelBit[4] = y[0];
2559 pixelBit[5] = y[1];
2560 pixelBit[6] = z[0];
2561 pixelBit[7] = z[1];
2562 pixelBit[8] = z[2];
2563 pixelBit[9] = y[2];
2564 break;
2565 case 1:
2566 pixelBit[0] = x[0];
2567 pixelBit[1] = x[1];
2568 pixelBit[2] = x[2];
2569 pixelBit[3] = y[0];
2570 pixelBit[4] = y[1];
2571 pixelBit[5] = z[0];
2572 pixelBit[6] = z[1];
2573 pixelBit[7] = z[2];
2574 pixelBit[8] = y[2];
2575 break;
2576 case 2:
2577 pixelBit[0] = x[0];
2578 pixelBit[1] = x[1];
2579 pixelBit[2] = y[0];
2580 pixelBit[3] = y[1];
2581 pixelBit[4] = z[0];
2582 pixelBit[5] = z[1];
2583 pixelBit[6] = y[2];
2584 pixelBit[7] = x[2];
2585 break;
2586 case 3:
2587 pixelBit[0] = x[0];
2588 pixelBit[1] = y[0];
2589 pixelBit[2] = y[1];
2590 pixelBit[3] = z[0];
2591 pixelBit[4] = z[1];
2592 pixelBit[5] = x[1];
2593 pixelBit[6] = x[2];
2594 break;
2595 case 4:
2596 pixelBit[0] = y[0];
2597 pixelBit[1] = y[1];
2598 pixelBit[2] = z[0];
2599 pixelBit[3] = z[1];
2600 pixelBit[4] = x[0];
2601 pixelBit[5] = x[1];
2602 break;
2603 default:
2604 ADDR_ASSERT_ALWAYS();
2605 ret = ADDR_INVALIDPARAMS;
2606 break;
2607 }
2608 }
2609 else
2610 {
2611 ADDR_ASSERT_ALWAYS();
2612 ret = ADDR_INVALIDPARAMS;
2613 }
2614
2615 if (ret == ADDR_OK)
2616 {
2617 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2618 UINT_32 xIdx = Log2(microBlockDim.w);
2619 UINT_32 yIdx = Log2(microBlockDim.h);
2620 UINT_32 zIdx = Log2(microBlockDim.d);
2621
2622 pixelBit = pEquation->addr;
2623
2624 const UINT_32 lowBits = 10;
2625 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2626 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2627
2628 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2629 {
2630 if ((i % 3) == 0)
2631 {
2632 pixelBit[i] = x[xIdx++];
2633 }
2634 else if ((i % 3) == 1)
2635 {
2636 pixelBit[i] = z[zIdx++];
2637 }
2638 else
2639 {
2640 pixelBit[i] = y[yIdx++];
2641 }
2642 }
2643
2644 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2645 {
2646 if ((i % 3) == 0)
2647 {
2648 xorExtra[i - blockSizeLog2] = x[xIdx++];
2649 }
2650 else if ((i % 3) == 1)
2651 {
2652 xorExtra[i - blockSizeLog2] = z[zIdx++];
2653 }
2654 else
2655 {
2656 xorExtra[i - blockSizeLog2] = y[yIdx++];
2657 }
2658 }
2659
2660 if (IsXor(swMode))
2661 {
2662 // Fill XOR bits
2663 UINT_32 pipeStart = m_pipeInterleaveLog2;
2664 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2665 for (UINT_32 i = 0; i < pipeXorBits; i++)
2666 {
2667 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2668 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2669 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2670
2671 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2672
2673 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2674 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2675 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2676
2677 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2678 }
2679
2680 UINT_32 bankStart = pipeStart + pipeXorBits;
2681 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2682 for (UINT_32 i = 0; i < bankXorBits; i++)
2683 {
2684 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2685 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2686 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2687
2688 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2689
2690 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2691 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2692 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2693
2694 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2695 }
2696 }
2697
2698 pEquation->numBits = blockSizeLog2;
2699 }
2700
2701 return ret;
2702 }
2703
2704 /**
2705 ************************************************************************************************************************
2706 * Gfx9Lib::IsValidDisplaySwizzleMode
2707 *
2708 * @brief
2709 * Check if a swizzle mode is supported by display engine
2710 *
2711 * @return
2712 * TRUE is swizzle mode is supported by display engine
2713 ************************************************************************************************************************
2714 */
2715 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2716 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2717 {
2718 BOOL_32 support = FALSE;
2719
2720 //const AddrResourceType resourceType = pIn->resourceType;
2721 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2722
2723 if (m_settings.isDce12)
2724 {
2725 switch (swizzleMode)
2726 {
2727 case ADDR_SW_256B_D:
2728 case ADDR_SW_256B_R:
2729 support = (pIn->bpp == 32);
2730 break;
2731
2732 case ADDR_SW_LINEAR:
2733 case ADDR_SW_4KB_D:
2734 case ADDR_SW_4KB_R:
2735 case ADDR_SW_64KB_D:
2736 case ADDR_SW_64KB_R:
2737 case ADDR_SW_VAR_D:
2738 case ADDR_SW_VAR_R:
2739 case ADDR_SW_4KB_D_X:
2740 case ADDR_SW_4KB_R_X:
2741 case ADDR_SW_64KB_D_X:
2742 case ADDR_SW_64KB_R_X:
2743 case ADDR_SW_VAR_D_X:
2744 case ADDR_SW_VAR_R_X:
2745 support = (pIn->bpp <= 64);
2746 break;
2747
2748 default:
2749 break;
2750 }
2751 }
2752 else if (m_settings.isDcn1)
2753 {
2754 switch (swizzleMode)
2755 {
2756 case ADDR_SW_4KB_D:
2757 case ADDR_SW_64KB_D:
2758 case ADDR_SW_VAR_D:
2759 case ADDR_SW_64KB_D_T:
2760 case ADDR_SW_4KB_D_X:
2761 case ADDR_SW_64KB_D_X:
2762 case ADDR_SW_VAR_D_X:
2763 support = (pIn->bpp == 64);
2764 break;
2765
2766 case ADDR_SW_LINEAR:
2767 case ADDR_SW_4KB_S:
2768 case ADDR_SW_64KB_S:
2769 case ADDR_SW_VAR_S:
2770 case ADDR_SW_64KB_S_T:
2771 case ADDR_SW_4KB_S_X:
2772 case ADDR_SW_64KB_S_X:
2773 case ADDR_SW_VAR_S_X:
2774 support = (pIn->bpp <= 64);
2775 break;
2776
2777 default:
2778 break;
2779 }
2780 }
2781 else
2782 {
2783 ADDR_NOT_IMPLEMENTED();
2784 }
2785
2786 return support;
2787 }
2788
2789 /**
2790 ************************************************************************************************************************
2791 * Gfx9Lib::HwlComputePipeBankXor
2792 *
2793 * @brief
2794 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2795 *
2796 * @return
2797 * PipeBankXor value
2798 ************************************************************************************************************************
2799 */
2800 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2801 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2802 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
2803 {
2804 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2805 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2806 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2807
2808 UINT_32 pipeXor = 0;
2809 UINT_32 bankXor = 0;
2810
2811 const UINT_32 bankMask = (1 << bankBits) - 1;
2812 const UINT_32 index = pIn->surfIndex & bankMask;
2813
2814 const UINT_32 bpp = pIn->flags.fmask ?
2815 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2816 if (bankBits == 4)
2817 {
2818 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2819 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2820
2821 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2822 }
2823 else if (bankBits > 0)
2824 {
2825 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
2826 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
2827 bankXor = (index * bankIncrease) & bankMask;
2828 }
2829
2830 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
2831
2832 return ADDR_OK;
2833 }
2834
2835 /**
2836 ************************************************************************************************************************
2837 * Gfx9Lib::HwlComputeSlicePipeBankXor
2838 *
2839 * @brief
2840 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2841 *
2842 * @return
2843 * PipeBankXor value
2844 ************************************************************************************************************************
2845 */
2846 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
2847 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
2848 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
2849 {
2850 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2851 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2852 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2853
2854 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2855 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2856
2857 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
2858
2859 return ADDR_OK;
2860 }
2861
2862 /**
2863 ************************************************************************************************************************
2864 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2865 *
2866 * @brief
2867 * Compute sub resource offset to support swizzle pattern
2868 *
2869 * @return
2870 * Offset
2871 ************************************************************************************************************************
2872 */
2873 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2874 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
2875 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
2876 {
2877 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2878
2879 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2880 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2881 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2882 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2883 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2884 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
2885
2886 pOut->offset = pIn->slice * pIn->sliceSize +
2887 pIn->macroBlockOffset +
2888 (pIn->mipTailOffset ^ pipeBankXor) -
2889 static_cast<UINT_64>(pipeBankXor);
2890 return ADDR_OK;
2891 }
2892
2893 /**
2894 ************************************************************************************************************************
2895 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
2896 *
2897 * @brief
2898 * Compute surface info sanity check
2899 *
2900 * @return
2901 * Offset
2902 ************************************************************************************************************************
2903 */
2904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
2905 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2906 {
2907 BOOL_32 invalid = FALSE;
2908
2909 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2910 {
2911 invalid = TRUE;
2912 }
2913 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) ||
2914 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
2915 {
2916 invalid = TRUE;
2917 }
2918
2919 BOOL_32 mipmap = (pIn->numMipLevels > 1);
2920 BOOL_32 msaa = (pIn->numFrags > 1);
2921
2922 ADDR2_SURFACE_FLAGS flags = pIn->flags;
2923 BOOL_32 zbuffer = (flags.depth || flags.stencil);
2924 BOOL_32 color = flags.color;
2925 BOOL_32 display = flags.display || flags.rotated;
2926
2927 AddrResourceType rsrcType = pIn->resourceType;
2928 BOOL_32 tex3d = IsTex3d(rsrcType);
2929 AddrSwizzleMode swizzle = pIn->swizzleMode;
2930 BOOL_32 linear = IsLinear(swizzle);
2931 BOOL_32 blk256B = IsBlock256b(swizzle);
2932 BOOL_32 blkVar = IsBlockVariable(swizzle);
2933 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2934 BOOL_32 prt = flags.prt;
2935 BOOL_32 stereo = flags.qbStereo;
2936
2937 if (invalid == FALSE)
2938 {
2939 if ((pIn->numFrags > 1) &&
2940 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2941 {
2942 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2943 invalid = TRUE;
2944 }
2945 }
2946
2947 if (invalid == FALSE)
2948 {
2949 switch (rsrcType)
2950 {
2951 case ADDR_RSRC_TEX_1D:
2952 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
2953 break;
2954 case ADDR_RSRC_TEX_2D:
2955 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
2956 break;
2957 case ADDR_RSRC_TEX_3D:
2958 invalid = msaa || zbuffer || display || stereo;
2959 break;
2960 default:
2961 invalid = TRUE;
2962 break;
2963 }
2964 }
2965
2966 if (invalid == FALSE)
2967 {
2968 if (display)
2969 {
2970 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
2971 }
2972 }
2973
2974 if (invalid == FALSE)
2975 {
2976 if (linear)
2977 {
2978 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
2979 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
2980 }
2981 else
2982 {
2983 if (blk256B || blkVar || isNonPrtXor)
2984 {
2985 invalid = prt;
2986 if (blk256B)
2987 {
2988 invalid = invalid || zbuffer || tex3d || mipmap || msaa;
2989 }
2990 }
2991
2992 if (invalid == FALSE)
2993 {
2994 if (IsZOrderSwizzle(swizzle))
2995 {
2996 invalid = color && msaa;
2997 }
2998 else if (IsStandardSwizzle(rsrcType, swizzle))
2999 {
3000 invalid = zbuffer;
3001 }
3002 else if (IsDisplaySwizzle(rsrcType, swizzle))
3003 {
3004 invalid = zbuffer;
3005 }
3006 else if (IsRotateSwizzle(swizzle))
3007 {
3008 invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3009 }
3010 else
3011 {
3012 ADDR_ASSERT(!"invalid swizzle mode");
3013 invalid = TRUE;
3014 }
3015 }
3016 }
3017 }
3018
3019 ADDR_ASSERT(invalid == FALSE);
3020
3021 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3022 }
3023
3024 /**
3025 ************************************************************************************************************************
3026 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3027 *
3028 * @brief
3029 * Internal function to get suggested surface information for cliet to use
3030 *
3031 * @return
3032 * ADDR_E_RETURNCODE
3033 ************************************************************************************************************************
3034 */
3035 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3036 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3037 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3038 {
3039 // Macro define resource block type
3040 enum AddrBlockType
3041 {
3042 AddrBlockMicro = 0, // Resource uses 256B block
3043 AddrBlock4KB = 1, // Resource uses 4KB block
3044 AddrBlock64KB = 2, // Resource uses 64KB block
3045 AddrBlockVar = 3, // Resource uses var blcok
3046 AddrBlockLinear = 4, // Resource uses linear swizzle mode
3047
3048 AddrBlockMaxTiledType = AddrBlock64KB + 1,
3049 };
3050
3051 enum AddrBlockSet
3052 {
3053 AddrBlockSetMicro = 1 << AddrBlockMicro,
3054 AddrBlockSetMacro4KB = 1 << AddrBlock4KB,
3055 AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
3056 AddrBlockSetVar = 1 << AddrBlockVar,
3057 AddrBlockSetLinear = 1 << AddrBlockLinear,
3058
3059 AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
3060 };
3061
3062 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3063 ElemLib* pElemLib = GetElemLib();
3064
3065 // Set format to INVALID will skip this conversion
3066 UINT_32 expandX = 1;
3067 UINT_32 expandY = 1;
3068 UINT_32 bpp = pIn->bpp;
3069 UINT_32 width = pIn->width;
3070 UINT_32 height = pIn->height;
3071
3072 if (pIn->format != ADDR_FMT_INVALID)
3073 {
3074 // Don't care for this case
3075 ElemMode elemMode = ADDR_UNCOMPRESSED;
3076
3077 // Get compression/expansion factors and element mode which indicates compression/expansion
3078 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3079 &elemMode,
3080 &expandX,
3081 &expandY);
3082
3083 UINT_32 basePitch = 0;
3084 GetElemLib()->AdjustSurfaceInfo(elemMode,
3085 expandX,
3086 expandY,
3087 &bpp,
3088 &basePitch,
3089 &width,
3090 &height);
3091 }
3092
3093 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3094 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3095 UINT_32 slice = Max(pIn->numSlices, 1u);
3096 UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3097 UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
3098
3099 if (pIn->flags.fmask)
3100 {
3101 bpp = GetFmaskBpp(numSamples, numFrags);
3102 numFrags = 1;
3103 numSamples = 1;
3104 pOut->resourceType = ADDR_RSRC_TEX_2D;
3105 }
3106 else
3107 {
3108 // The output may get changed for volume(3D) texture resource in future
3109 pOut->resourceType = pIn->resourceType;
3110 }
3111
3112 ADDR_ASSERT(bpp >= 8u);
3113 UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
3114
3115 if (IsTex1d(pOut->resourceType))
3116 {
3117 pOut->swizzleMode = ADDR_SW_LINEAR;
3118 pOut->validBlockSet.value = AddrBlockSetLinear;
3119 pOut->canXor = FALSE;
3120 }
3121 else
3122 {
3123 ADDR2_BLOCK_SET blockSet;
3124 blockSet.value = 0;
3125
3126 AddrSwType swType = ADDR_SW_S;
3127
3128 // prt Xor and non-xor will have less height align requirement for stereo surface
3129 BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
3130 BOOL_32 displayResource = FALSE;
3131
3132 pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
3133
3134 // Filter out improper swType and blockSet by HW restriction
3135 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3136 {
3137 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3138 blockSet.value = AddrBlockSetMacro;
3139 swType = ADDR_SW_Z;
3140
3141 if (pIn->flags.depth && pIn->flags.texture)
3142 {
3143 if (((bpp == 16) && (numFrags >= 4)) ||
3144 ((bpp == 32) && (numFrags >= 2)))
3145 {
3146 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3147 // equation from wrong address within memory range a tile covered and use the
3148 // garbage data for compressed Z reading which finally leads to corruption.
3149 pOut->canXor = FALSE;
3150 prtXor = FALSE;
3151 }
3152 }
3153 }
3154 else if (ElemLib::IsBlockCompressed(pIn->format))
3155 {
3156 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. Not sure
3157 // under what circumstances "_D" would be appropriate as these formats are not
3158 // displayable.
3159 blockSet.value = AddrBlockSetMacro;
3160
3161 // This isn't to be used as texture and caller doesn't allow macro tiled.
3162 if ((pIn->flags.texture == FALSE) &&
3163 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
3164 {
3165 blockSet.value |= AddrBlockSetLinear;
3166 }
3167 swType = ADDR_SW_D;
3168 }
3169 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3170 {
3171 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. Its not
3172 // clear under what circumstances the D or R modes would be appropriate since
3173 // these formats are not displayable.
3174 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3175 swType = ADDR_SW_S;
3176 }
3177 else if (IsTex3d(pOut->resourceType))
3178 {
3179 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3180
3181 if (pIn->flags.prt)
3182 {
3183 // PRT cannot use SW_D which gives an unexpected block dimension
3184 swType = ADDR_SW_Z;
3185 }
3186 else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
3187 {
3188 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3189 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3190 swType = ADDR_SW_Z;
3191 }
3192 else if (pIn->flags.color)
3193 {
3194 swType = ADDR_SW_D;
3195 }
3196 else
3197 {
3198 swType = ADDR_SW_Z;
3199 }
3200 }
3201 else
3202 {
3203 swType = ((pIn->flags.display == TRUE) ||
3204 (pIn->flags.overlay == TRUE) ||
3205 (pIn->bpp == 128)) ? ADDR_SW_D : ADDR_SW_S;
3206
3207 if (numMipLevels > 1)
3208 {
3209 ADDR_ASSERT(numFrags == 1);
3210 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3211 }
3212 else if ((numFrags > 1) || (numSamples > 1))
3213 {
3214 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3215 blockSet.value = AddrBlockSetMacro;
3216 }
3217 else
3218 {
3219 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3220 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
3221
3222 displayResource = pIn->flags.rotated || pIn->flags.display;
3223
3224 if (displayResource)
3225 {
3226 swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D;
3227
3228 if (pIn->bpp > 64)
3229 {
3230 blockSet.value = 0;
3231 }
3232 else if (m_settings.isDce12)
3233 {
3234 if (pIn->bpp != 32)
3235 {
3236 blockSet.micro = FALSE;
3237 }
3238
3239 // DCE12 does not support display surface to be _T swizzle mode
3240 prtXor = FALSE;
3241 }
3242 else if (m_settings.isDcn1)
3243 {
3244 // _R is not supported by Dcn1
3245 if (pIn->bpp == 64)
3246 {
3247 swType = ADDR_SW_D;
3248 }
3249 else
3250 {
3251 swType = ADDR_SW_S;
3252 }
3253
3254 blockSet.micro = FALSE;
3255 }
3256 else
3257 {
3258 ADDR_NOT_IMPLEMENTED();
3259 returnCode = ADDR_NOTSUPPORTED;
3260 }
3261 }
3262 }
3263 }
3264
3265 if ((numFrags > 1) &&
3266 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3267 {
3268 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3269 blockSet.macro4KB = FALSE;
3270 }
3271
3272 if (pIn->flags.prt)
3273 {
3274 blockSet.value &= AddrBlockSetMacro64KB;
3275 }
3276
3277 // Apply customized forbidden setting
3278 blockSet.value &= ~pIn->forbiddenBlock.value;
3279
3280 if (pIn->maxAlign > 0)
3281 {
3282 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3283 {
3284 blockSet.macro64KB = FALSE;
3285 }
3286
3287 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3288 {
3289 blockSet.macro4KB = FALSE;
3290 }
3291
3292 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3293 {
3294 blockSet.micro = FALSE;
3295 }
3296 }
3297
3298 Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3299 Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3300 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3301
3302 if (blockSet.micro)
3303 {
3304 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
3305 &blkAlign[AddrBlockMicro].h,
3306 &blkAlign[AddrBlockMicro].d,
3307 bpp,
3308 numFrags,
3309 pOut->resourceType,
3310 ADDR_SW_256B);
3311
3312 if (returnCode == ADDR_OK)
3313 {
3314 if (displayResource)
3315 {
3316 blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
3317 }
3318 else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
3319 (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
3320 {
3321 // If one 256B block can contain the surface, don't bother bigger block type
3322 blockSet.macro4KB = FALSE;
3323 blockSet.macro64KB = FALSE;
3324 blockSet.var = FALSE;
3325 }
3326
3327 padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
3328 slice, &paddedDim[AddrBlockMicro]);
3329 }
3330 }
3331
3332 if ((returnCode == ADDR_OK) && blockSet.macro4KB)
3333 {
3334 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
3335 &blkAlign[AddrBlock4KB].h,
3336 &blkAlign[AddrBlock4KB].d,
3337 bpp,
3338 numFrags,
3339 pOut->resourceType,
3340 ADDR_SW_4KB);
3341
3342 if (returnCode == ADDR_OK)
3343 {
3344 if (displayResource)
3345 {
3346 blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
3347 }
3348
3349 padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
3350 slice, &paddedDim[AddrBlock4KB]);
3351
3352 ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
3353 }
3354 }
3355
3356 if ((returnCode == ADDR_OK) && blockSet.macro64KB)
3357 {
3358 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
3359 &blkAlign[AddrBlock64KB].h,
3360 &blkAlign[AddrBlock64KB].d,
3361 bpp,
3362 numFrags,
3363 pOut->resourceType,
3364 ADDR_SW_64KB);
3365
3366 if (returnCode == ADDR_OK)
3367 {
3368 if (displayResource)
3369 {
3370 blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
3371 }
3372
3373 padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
3374 slice, &paddedDim[AddrBlock64KB]);
3375
3376 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
3377 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
3378 }
3379 }
3380
3381 if (returnCode == ADDR_OK)
3382 {
3383 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3384 {
3385 padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
3386 }
3387
3388 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3389 if (pIn->flags.minimizeAlign)
3390 {
3391 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3392 // block, filter out 64KB block from candidate list
3393 if (blockSet.macro64KB &&
3394 ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
3395 (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
3396 {
3397 blockSet.macro64KB = FALSE;
3398 }
3399
3400 // If padded size of 4KB block is larger than padded size of 256B block,
3401 // filter out 4KB block from candidate list
3402 if (blockSet.macro4KB &&
3403 blockSet.micro &&
3404 (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
3405 {
3406 blockSet.macro4KB = FALSE;
3407 }
3408 }
3409 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3410 else if (pIn->flags.opt4space)
3411 {
3412 UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
3413 (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
3414
3415 threshold += threshold >> 1;
3416
3417 if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
3418 {
3419 blockSet.macro64KB = FALSE;
3420 }
3421
3422 if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
3423 {
3424 blockSet.macro4KB = FALSE;
3425 }
3426 }
3427 else
3428 {
3429 if (blockSet.macro64KB &&
3430 (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
3431 ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
3432 {
3433 // If 64KB block waste more than half memory on padding, filter it out from
3434 // candidate list when it is not the only choice left
3435 blockSet.macro64KB = FALSE;
3436 }
3437 }
3438
3439 if (blockSet.value == 0)
3440 {
3441 // Bad things happen, client will not get any useful information from AddrLib.
3442 // Maybe we should fill in some output earlier instead of outputing nothing?
3443 ADDR_ASSERT_ALWAYS();
3444 returnCode = ADDR_INVALIDPARAMS;
3445 }
3446 else
3447 {
3448 pOut->validBlockSet = blockSet;
3449 pOut->canXor = pOut->canXor &&
3450 (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
3451
3452 if (blockSet.macro64KB || blockSet.macro4KB)
3453 {
3454 if (swType == ADDR_SW_Z)
3455 {
3456 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
3457 }
3458 else if (swType == ADDR_SW_S)
3459 {
3460 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
3461 }
3462 else if (swType == ADDR_SW_D)
3463 {
3464 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
3465 }
3466 else
3467 {
3468 ADDR_ASSERT(swType == ADDR_SW_R);
3469 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
3470 }
3471
3472 if (prtXor && blockSet.macro64KB)
3473 {
3474 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3475 const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
3476 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
3477 }
3478 else if (pOut->canXor)
3479 {
3480 // Client wants XOR and this is allowed, return XOR version swizzle mode
3481 const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
3482 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
3483 }
3484 }
3485 else if (blockSet.micro)
3486 {
3487 if (swType == ADDR_SW_S)
3488 {
3489 pOut->swizzleMode = ADDR_SW_256B_S;
3490 }
3491 else if (swType == ADDR_SW_D)
3492 {
3493 pOut->swizzleMode = ADDR_SW_256B_D;
3494 }
3495 else
3496 {
3497 ADDR_ASSERT(swType == ADDR_SW_R);
3498 pOut->swizzleMode = ADDR_SW_256B_R;
3499 }
3500 }
3501 else if (blockSet.linear)
3502 {
3503 // Fall into this branch doesn't mean linear is suitable, only no other choices!
3504 pOut->swizzleMode = ADDR_SW_LINEAR;
3505 }
3506 else
3507 {
3508 ADDR_ASSERT(blockSet.var);
3509
3510 // Designer consider VAR swizzle mode is usless for most cases
3511 ADDR_UNHANDLED_CASE();
3512
3513 returnCode = ADDR_NOTSUPPORTED;
3514 }
3515
3516 #if DEBUG
3517 // Post sanity check, at least AddrLib should accept the output generated by its own
3518 if (pOut->swizzleMode != ADDR_SW_LINEAR)
3519 {
3520 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3521 localIn.flags = pIn->flags;
3522 localIn.swizzleMode = pOut->swizzleMode;
3523 localIn.resourceType = pOut->resourceType;
3524 localIn.format = pIn->format;
3525 localIn.bpp = bpp;
3526 localIn.width = width;
3527 localIn.height = height;
3528 localIn.numSlices = slice;
3529 localIn.numMipLevels = numMipLevels;
3530 localIn.numSamples = numSamples;
3531 localIn.numFrags = numFrags;
3532
3533 HwlComputeSurfaceInfoSanityCheck(&localIn);
3534
3535 // TODO : check all valid block type available in validBlockSet?
3536 }
3537 #endif
3538 }
3539 }
3540 }
3541
3542 return returnCode;
3543 }
3544
3545 /**
3546 ************************************************************************************************************************
3547 * Gfx9Lib::ComputeStereoInfo
3548 *
3549 * @brief
3550 * Compute height alignment and right eye pipeBankXor for stereo surface
3551 *
3552 * @return
3553 * Error code
3554 *
3555 ************************************************************************************************************************
3556 */
3557 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3558 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3559 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3560 UINT_32* pHeightAlign
3561 ) const
3562 {
3563 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3564
3565 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3566
3567 if (eqIndex < m_numEquations)
3568 {
3569 if (IsXor(pIn->swizzleMode))
3570 {
3571 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3572 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3573 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3574 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3575 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3576 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3577
3578 ADDR_ASSERT(maxYCoordBlock256 ==
3579 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3580
3581 const UINT_32 maxYCoordInBaseEquation =
3582 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3583
3584 ADDR_ASSERT(maxYCoordInBaseEquation ==
3585 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3586
3587 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3588
3589 ADDR_ASSERT(maxYCoordInPipeXor ==
3590 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3591
3592 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3593 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3594
3595 ADDR_ASSERT(maxYCoordInBankXor ==
3596 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3597
3598 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3599
3600 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3601 {
3602 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3603
3604 if (pOut->pStereoInfo != NULL)
3605 {
3606 pOut->pStereoInfo->rightSwizzle = 0;
3607
3608 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3609 {
3610 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3611 {
3612 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3613 }
3614
3615 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3616 {
3617 pOut->pStereoInfo->rightSwizzle |=
3618 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3619 }
3620
3621 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3622 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3623 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3624 }
3625 }
3626 }
3627 }
3628 }
3629 else
3630 {
3631 ADDR_ASSERT_ALWAYS();
3632 returnCode = ADDR_ERROR;
3633 }
3634
3635 return returnCode;
3636 }
3637
3638 /**
3639 ************************************************************************************************************************
3640 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3641 *
3642 * @brief
3643 * Internal function to calculate alignment for tiled surface
3644 *
3645 * @return
3646 * ADDR_E_RETURNCODE
3647 ************************************************************************************************************************
3648 */
3649 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3650 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3651 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3652 ) const
3653 {
3654 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3655 &pOut->blockHeight,
3656 &pOut->blockSlices,
3657 pIn->bpp,
3658 pIn->numFrags,
3659 pIn->resourceType,
3660 pIn->swizzleMode);
3661
3662 if (returnCode == ADDR_OK)
3663 {
3664 UINT_32 pitchAlignInElement = pOut->blockWidth;
3665
3666 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3667 (pIn->flags.display || pIn->flags.rotated) &&
3668 (pIn->numMipLevels <= 1) &&
3669 (pIn->numSamples <= 1) &&
3670 (pIn->numFrags <= 1))
3671 {
3672 // Display engine needs pitch align to be at least 32 pixels.
3673 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3674 }
3675
3676 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3677
3678 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3679 {
3680 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3681 {
3682 returnCode = ADDR_INVALIDPARAMS;
3683 }
3684 else if (pIn->pitchInElement < pOut->pitch)
3685 {
3686 returnCode = ADDR_INVALIDPARAMS;
3687 }
3688 else
3689 {
3690 pOut->pitch = pIn->pitchInElement;
3691 }
3692 }
3693
3694 UINT_32 heightAlign = 0;
3695
3696 if (pIn->flags.qbStereo)
3697 {
3698 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3699 }
3700
3701 if (returnCode == ADDR_OK)
3702 {
3703 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3704
3705 if (heightAlign > 1)
3706 {
3707 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3708 }
3709
3710 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3711
3712 pOut->epitchIsHeight = FALSE;
3713 pOut->mipChainInTail = FALSE;
3714
3715 pOut->mipChainPitch = pOut->pitch;
3716 pOut->mipChainHeight = pOut->height;
3717 pOut->mipChainSlice = pOut->numSlices;
3718
3719 if (pIn->numMipLevels > 1)
3720 {
3721 UINT_32 numMipLevel;
3722 ADDR2_MIP_INFO *pMipInfo;
3723 ADDR2_MIP_INFO mipInfo[4];
3724
3725 if (pOut->pMipInfo != NULL)
3726 {
3727 pMipInfo = pOut->pMipInfo;
3728 numMipLevel = pIn->numMipLevels;
3729 }
3730 else
3731 {
3732 pMipInfo = mipInfo;
3733 numMipLevel = Min(pIn->numMipLevels, 4u);
3734 }
3735
3736 UINT_32 endingMip = GetMipChainInfo(pIn->resourceType,
3737 pIn->swizzleMode,
3738 pIn->bpp,
3739 pIn->width,
3740 pIn->height,
3741 pIn->numSlices,
3742 pOut->blockWidth,
3743 pOut->blockHeight,
3744 pOut->blockSlices,
3745 numMipLevel,
3746 pMipInfo);
3747
3748 if (endingMip == 0)
3749 {
3750 pOut->epitchIsHeight = TRUE;
3751 pOut->pitch = pMipInfo[0].pitch;
3752 pOut->height = pMipInfo[0].height;
3753 pOut->numSlices = pMipInfo[0].depth;
3754 pOut->mipChainInTail = TRUE;
3755 }
3756 else
3757 {
3758 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
3759 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3760
3761 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3762 pIn->swizzleMode,
3763 mip0WidthInBlk,
3764 mip0HeightInBlk,
3765 pOut->numSlices / pOut->blockSlices);
3766 if (majorMode == ADDR_MAJOR_Y)
3767 {
3768 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3769
3770 if ((mip1WidthInBlk == 1) && (endingMip > 2))
3771 {
3772 mip1WidthInBlk++;
3773 }
3774
3775 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3776
3777 pOut->epitchIsHeight = FALSE;
3778 }
3779 else
3780 {
3781 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3782
3783 if ((mip1HeightInBlk == 1) && (endingMip > 2))
3784 {
3785 mip1HeightInBlk++;
3786 }
3787
3788 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3789
3790 pOut->epitchIsHeight = TRUE;
3791 }
3792 }
3793
3794 if (pOut->pMipInfo != NULL)
3795 {
3796 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3797
3798 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3799 {
3800 Dim3d mipStartPos = {0};
3801 UINT_32 mipTailOffsetInBytes = 0;
3802
3803 mipStartPos = GetMipStartPos(pIn->resourceType,
3804 pIn->swizzleMode,
3805 pOut->pitch,
3806 pOut->height,
3807 pOut->numSlices,
3808 pOut->blockWidth,
3809 pOut->blockHeight,
3810 pOut->blockSlices,
3811 i,
3812 elementBytesLog2,
3813 &mipTailOffsetInBytes);
3814
3815 UINT_32 pitchInBlock =
3816 pOut->mipChainPitch / pOut->blockWidth;
3817 UINT_32 sliceInBlock =
3818 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3819 UINT_64 blockIndex =
3820 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3821 UINT_64 macroBlockOffset =
3822 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3823
3824 pMipInfo[i].macroBlockOffset = macroBlockOffset;
3825 pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
3826 }
3827 }
3828 }
3829 else if (pOut->pMipInfo != NULL)
3830 {
3831 pOut->pMipInfo[0].pitch = pOut->pitch;
3832 pOut->pMipInfo[0].height = pOut->height;
3833 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3834 pOut->pMipInfo[0].offset = 0;
3835 }
3836
3837 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3838 (pIn->bpp >> 3) * pIn->numFrags;
3839 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
3840 pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
3841
3842 if (pIn->flags.prt)
3843 {
3844 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
3845 }
3846 }
3847 }
3848
3849 return returnCode;
3850 }
3851
3852 /**
3853 ************************************************************************************************************************
3854 * Gfx9Lib::GetMipChainInfo
3855 *
3856 * @brief
3857 * Internal function to get out information about mip chain
3858 *
3859 * @return
3860 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
3861 ************************************************************************************************************************
3862 */
3863 UINT_32 Gfx9Lib::GetMipChainInfo(
3864 AddrResourceType resourceType,
3865 AddrSwizzleMode swizzleMode,
3866 UINT_32 bpp,
3867 UINT_32 mip0Width,
3868 UINT_32 mip0Height,
3869 UINT_32 mip0Depth,
3870 UINT_32 blockWidth,
3871 UINT_32 blockHeight,
3872 UINT_32 blockDepth,
3873 UINT_32 numMipLevel,
3874 ADDR2_MIP_INFO* pMipInfo) const
3875 {
3876 const Dim3d tailMaxDim =
3877 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
3878
3879 UINT_32 mipPitch = mip0Width;
3880 UINT_32 mipHeight = mip0Height;
3881 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
3882 UINT_32 offset = 0;
3883 UINT_32 endingMip = numMipLevel - 1;
3884 BOOL_32 inTail = FALSE;
3885 BOOL_32 finalDim = FALSE;
3886
3887 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
3888 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
3889
3890 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
3891 {
3892 if (inTail)
3893 {
3894 if (finalDim == FALSE)
3895 {
3896 UINT_32 mipSize;
3897
3898 if (is3dThick)
3899 {
3900 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
3901 }
3902 else
3903 {
3904 mipSize = mipPitch * mipHeight * (bpp >> 3);
3905 }
3906
3907 if (mipSize <= 256)
3908 {
3909 UINT_32 index = Log2(bpp >> 3);
3910
3911 if (is3dThick)
3912 {
3913 mipPitch = Block256_3dZ[index].w;
3914 mipHeight = Block256_3dZ[index].h;
3915 mipDepth = Block256_3dZ[index].d;
3916 }
3917 else
3918 {
3919 mipPitch = Block256_2d[index].w;
3920 mipHeight = Block256_2d[index].h;
3921 }
3922
3923 finalDim = TRUE;
3924 }
3925 }
3926 }
3927 else
3928 {
3929 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
3930 mipPitch, mipHeight, mipDepth);
3931
3932 if (inTail)
3933 {
3934 endingMip = mipId;
3935
3936 mipPitch = tailMaxDim.w;
3937 mipHeight = tailMaxDim.h;
3938
3939 if (is3dThick)
3940 {
3941 mipDepth = tailMaxDim.d;
3942 }
3943 }
3944 else
3945 {
3946 mipPitch = PowTwoAlign(mipPitch, blockWidth);
3947 mipHeight = PowTwoAlign(mipHeight, blockHeight);
3948
3949 if (is3dThick)
3950 {
3951 mipDepth = PowTwoAlign(mipDepth, blockDepth);
3952 }
3953 }
3954 }
3955
3956 pMipInfo[mipId].pitch = mipPitch;
3957 pMipInfo[mipId].height = mipHeight;
3958 pMipInfo[mipId].depth = mipDepth;
3959 pMipInfo[mipId].offset = offset;
3960 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
3961
3962 if (finalDim)
3963 {
3964 if (is3dThin)
3965 {
3966 mipDepth = Max(mipDepth >> 1, 1u);
3967 }
3968 }
3969 else
3970 {
3971 mipPitch = Max(mipPitch >> 1, 1u);
3972 mipHeight = Max(mipHeight >> 1, 1u);
3973
3974 if (is3dThick || is3dThin)
3975 {
3976 mipDepth = Max(mipDepth >> 1, 1u);
3977 }
3978 }
3979 }
3980
3981 return endingMip;
3982 }
3983
3984 /**
3985 ************************************************************************************************************************
3986 * Gfx9Lib::GetMetaMiptailInfo
3987 *
3988 * @brief
3989 * Get mip tail coordinate information.
3990 *
3991 * @return
3992 * N/A
3993 ************************************************************************************************************************
3994 */
3995 VOID Gfx9Lib::GetMetaMiptailInfo(
3996 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
3997 Dim3d mipCoord, ///< [in] mip tail base coord
3998 UINT_32 numMipInTail, ///< [in] number of mips in tail
3999 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4000 ) const
4001 {
4002 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4003 UINT_32 mipWidth = pMetaBlkDim->w;
4004 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4005 UINT_32 mipDepth = pMetaBlkDim->d;
4006 UINT_32 minInc;
4007
4008 if (isThick)
4009 {
4010 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4011 }
4012 else if (pMetaBlkDim->h >= 1024)
4013 {
4014 minInc = 256;
4015 }
4016 else if (pMetaBlkDim->h == 512)
4017 {
4018 minInc = 128;
4019 }
4020 else
4021 {
4022 minInc = 64;
4023 }
4024
4025 UINT_32 blk32MipId = 0xFFFFFFFF;
4026
4027 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4028 {
4029 pInfo[mip].inMiptail = TRUE;
4030 pInfo[mip].startX = mipCoord.w;
4031 pInfo[mip].startY = mipCoord.h;
4032 pInfo[mip].startZ = mipCoord.d;
4033 pInfo[mip].width = mipWidth;
4034 pInfo[mip].height = mipHeight;
4035 pInfo[mip].depth = mipDepth;
4036
4037 if (mipWidth <= 32)
4038 {
4039 if (blk32MipId == 0xFFFFFFFF)
4040 {
4041 blk32MipId = mip;
4042 }
4043
4044 mipCoord.w = pInfo[blk32MipId].startX;
4045 mipCoord.h = pInfo[blk32MipId].startY;
4046 mipCoord.d = pInfo[blk32MipId].startZ;
4047
4048 switch (mip - blk32MipId)
4049 {
4050 case 0:
4051 mipCoord.w += 32; // 16x16
4052 break;
4053 case 1:
4054 mipCoord.h += 32; // 8x8
4055 break;
4056 case 2:
4057 mipCoord.h += 32; // 4x4
4058 mipCoord.w += 16;
4059 break;
4060 case 3:
4061 mipCoord.h += 32; // 2x2
4062 mipCoord.w += 32;
4063 break;
4064 case 4:
4065 mipCoord.h += 32; // 1x1
4066 mipCoord.w += 48;
4067 break;
4068 // The following are for BC/ASTC formats
4069 case 5:
4070 mipCoord.h += 48; // 1/2 x 1/2
4071 break;
4072 case 6:
4073 mipCoord.h += 48; // 1/4 x 1/4
4074 mipCoord.w += 16;
4075 break;
4076 case 7:
4077 mipCoord.h += 48; // 1/8 x 1/8
4078 mipCoord.w += 32;
4079 break;
4080 case 8:
4081 mipCoord.h += 48; // 1/16 x 1/16
4082 mipCoord.w += 48;
4083 break;
4084 default:
4085 ADDR_ASSERT_ALWAYS();
4086 break;
4087 }
4088
4089 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4090 mipHeight = mipWidth;
4091
4092 if (isThick)
4093 {
4094 mipDepth = mipWidth;
4095 }
4096 }
4097 else
4098 {
4099 if (mipWidth <= minInc)
4100 {
4101 // if we're below the minimal increment...
4102 if (isThick)
4103 {
4104 // For 3d, just go in z direction
4105 mipCoord.d += mipDepth;
4106 }
4107 else
4108 {
4109 // For 2d, first go across, then down
4110 if ((mipWidth * 2) == minInc)
4111 {
4112 // if we're 2 mips below, that's when we go back in x, and down in y
4113 mipCoord.w -= minInc;
4114 mipCoord.h += minInc;
4115 }
4116 else
4117 {
4118 // otherwise, just go across in x
4119 mipCoord.w += minInc;
4120 }
4121 }
4122 }
4123 else
4124 {
4125 // On even mip, go down, otherwise, go across
4126 if (mip & 1)
4127 {
4128 mipCoord.w += mipWidth;
4129 }
4130 else
4131 {
4132 mipCoord.h += mipHeight;
4133 }
4134 }
4135 // Divide the width by 2
4136 mipWidth >>= 1;
4137 // After the first mip in tail, the mip is always a square
4138 mipHeight = mipWidth;
4139 // ...or for 3d, a cube
4140 if (isThick)
4141 {
4142 mipDepth = mipWidth;
4143 }
4144 }
4145 }
4146 }
4147
4148 /**
4149 ************************************************************************************************************************
4150 * Gfx9Lib::GetMipStartPos
4151 *
4152 * @brief
4153 * Internal function to get out information about mip logical start position
4154 *
4155 * @return
4156 * logical start position in macro block width/heith/depth of one mip level within one slice
4157 ************************************************************************************************************************
4158 */
4159 Dim3d Gfx9Lib::GetMipStartPos(
4160 AddrResourceType resourceType,
4161 AddrSwizzleMode swizzleMode,
4162 UINT_32 width,
4163 UINT_32 height,
4164 UINT_32 depth,
4165 UINT_32 blockWidth,
4166 UINT_32 blockHeight,
4167 UINT_32 blockDepth,
4168 UINT_32 mipId,
4169 UINT_32 log2ElementBytes,
4170 UINT_32* pMipTailBytesOffset) const
4171 {
4172 Dim3d mipStartPos = {0};
4173 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4174
4175 // Report mip in tail if Mip0 is already in mip tail
4176 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4177 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4178 UINT_32 mipIndexInTail = mipId;
4179
4180 if (inMipTail == FALSE)
4181 {
4182 // Mip 0 dimension, unit in block
4183 UINT_32 mipWidthInBlk = width / blockWidth;
4184 UINT_32 mipHeightInBlk = height / blockHeight;
4185 UINT_32 mipDepthInBlk = depth / blockDepth;
4186 AddrMajorMode majorMode = GetMajorMode(resourceType,
4187 swizzleMode,
4188 mipWidthInBlk,
4189 mipHeightInBlk,
4190 mipDepthInBlk);
4191
4192 UINT_32 endingMip = mipId + 1;
4193
4194 for (UINT_32 i = 1; i <= mipId; i++)
4195 {
4196 if ((i == 1) || (i == 3))
4197 {
4198 if (majorMode == ADDR_MAJOR_Y)
4199 {
4200 mipStartPos.w += mipWidthInBlk;
4201 }
4202 else
4203 {
4204 mipStartPos.h += mipHeightInBlk;
4205 }
4206 }
4207 else
4208 {
4209 if (majorMode == ADDR_MAJOR_X)
4210 {
4211 mipStartPos.w += mipWidthInBlk;
4212 }
4213 else if (majorMode == ADDR_MAJOR_Y)
4214 {
4215 mipStartPos.h += mipHeightInBlk;
4216 }
4217 else
4218 {
4219 mipStartPos.d += mipDepthInBlk;
4220 }
4221 }
4222
4223 BOOL_32 inTail = FALSE;
4224
4225 if (IsThick(resourceType, swizzleMode))
4226 {
4227 UINT_32 dim = log2blkSize % 3;
4228
4229 if (dim == 0)
4230 {
4231 inTail =
4232 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4233 }
4234 else if (dim == 1)
4235 {
4236 inTail =
4237 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4238 }
4239 else
4240 {
4241 inTail =
4242 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4243 }
4244 }
4245 else
4246 {
4247 if (log2blkSize & 1)
4248 {
4249 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4250 }
4251 else
4252 {
4253 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4254 }
4255 }
4256
4257 if (inTail)
4258 {
4259 endingMip = i;
4260 break;
4261 }
4262
4263 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4264 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4265 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4266 }
4267
4268 if (mipId >= endingMip)
4269 {
4270 inMipTail = TRUE;
4271 mipIndexInTail = mipId - endingMip;
4272 }
4273 }
4274
4275 if (inMipTail)
4276 {
4277 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4278 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4279 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4280 }
4281
4282 return mipStartPos;
4283 }
4284
4285 /**
4286 ************************************************************************************************************************
4287 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4288 *
4289 * @brief
4290 * Internal function to calculate address from coord for tiled swizzle surface
4291 *
4292 * @return
4293 * ADDR_E_RETURNCODE
4294 ************************************************************************************************************************
4295 */
4296 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4297 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4298 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4299 ) const
4300 {
4301 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4302 localIn.swizzleMode = pIn->swizzleMode;
4303 localIn.flags = pIn->flags;
4304 localIn.resourceType = pIn->resourceType;
4305 localIn.bpp = pIn->bpp;
4306 localIn.width = Max(pIn->unalignedWidth, 1u);
4307 localIn.height = Max(pIn->unalignedHeight, 1u);
4308 localIn.numSlices = Max(pIn->numSlices, 1u);
4309 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4310 localIn.numSamples = Max(pIn->numSamples, 1u);
4311 localIn.numFrags = Max(pIn->numFrags, 1u);
4312 if (localIn.numMipLevels <= 1)
4313 {
4314 localIn.pitchInElement = pIn->pitchInElement;
4315 }
4316
4317 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4318 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4319
4320 BOOL_32 valid = (returnCode == ADDR_OK) &&
4321 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4322 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4323 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4324
4325 if (valid)
4326 {
4327 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4328 Dim3d mipStartPos = {0};
4329 UINT_32 mipTailBytesOffset = 0;
4330
4331 if (pIn->numMipLevels > 1)
4332 {
4333 // Mip-map chain cannot be MSAA surface
4334 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4335
4336 mipStartPos = GetMipStartPos(pIn->resourceType,
4337 pIn->swizzleMode,
4338 localOut.pitch,
4339 localOut.height,
4340 localOut.numSlices,
4341 localOut.blockWidth,
4342 localOut.blockHeight,
4343 localOut.blockSlices,
4344 pIn->mipId,
4345 log2ElementBytes,
4346 &mipTailBytesOffset);
4347 }
4348
4349 UINT_32 interleaveOffset = 0;
4350 UINT_32 pipeBits = 0;
4351 UINT_32 pipeXor = 0;
4352 UINT_32 bankBits = 0;
4353 UINT_32 bankXor = 0;
4354
4355 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4356 {
4357 UINT_32 blockOffset = 0;
4358 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4359
4360 if (IsZOrderSwizzle(pIn->swizzleMode))
4361 {
4362 // Morton generation
4363 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4364 {
4365 UINT_32 totalLowBits = 6 - log2ElementBytes;
4366 UINT_32 mortBits = totalLowBits / 2;
4367 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4368 // Are 9 bits enough?
4369 UINT_32 highBitsValue =
4370 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4371 blockOffset = lowBitsValue | highBitsValue;
4372 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4373 }
4374 else
4375 {
4376 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4377 }
4378
4379 // Fill LSBs with sample bits
4380 if (pIn->numSamples > 1)
4381 {
4382 blockOffset *= pIn->numSamples;
4383 blockOffset |= pIn->sample;
4384 }
4385
4386 // Shift according to BytesPP
4387 blockOffset <<= log2ElementBytes;
4388 }
4389 else
4390 {
4391 // Micro block offset
4392 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4393 blockOffset = microBlockOffset;
4394
4395 // Micro block dimension
4396 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4397 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4398 // Morton generation, does 12 bit enough?
4399 blockOffset |=
4400 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4401
4402 // Sample bits start location
4403 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4404 // Join sample bits information to the highest Macro block bits
4405 if (IsNonPrtXor(pIn->swizzleMode))
4406 {
4407 // Non-prt-Xor : xor highest Macro block bits with sample bits
4408 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4409 }
4410 else
4411 {
4412 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4413 // after this op, the blockOffset only contains log2 Macro block size bits
4414 blockOffset %= (1 << sampleStart);
4415 blockOffset |= (pIn->sample << sampleStart);
4416 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4417 }
4418 }
4419
4420 if (IsXor(pIn->swizzleMode))
4421 {
4422 // Mask off bits above Macro block bits to keep page synonyms working for prt
4423 if (IsPrt(pIn->swizzleMode))
4424 {
4425 blockOffset &= ((1 << log2blkSize) - 1);
4426 }
4427
4428 // Preserve offset inside pipe interleave
4429 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4430 blockOffset >>= m_pipeInterleaveLog2;
4431
4432 // Pipe/Se xor bits
4433 pipeBits = GetPipeXorBits(log2blkSize);
4434 // Pipe xor
4435 pipeXor = FoldXor2d(blockOffset, pipeBits);
4436 blockOffset >>= pipeBits;
4437
4438 // Bank xor bits
4439 bankBits = GetBankXorBits(log2blkSize);
4440 // Bank Xor
4441 bankXor = FoldXor2d(blockOffset, bankBits);
4442 blockOffset >>= bankBits;
4443
4444 // Put all the part back together
4445 blockOffset <<= bankBits;
4446 blockOffset |= bankXor;
4447 blockOffset <<= pipeBits;
4448 blockOffset |= pipeXor;
4449 blockOffset <<= m_pipeInterleaveLog2;
4450 blockOffset |= interleaveOffset;
4451 }
4452
4453 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4454 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4455
4456 blockOffset |= mipTailBytesOffset;
4457
4458 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4459 {
4460 // Apply slice xor if not MSAA/PRT
4461 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4462 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4463 (m_pipeInterleaveLog2 + pipeBits));
4464 }
4465
4466 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4467 bankBits, pipeBits, &blockOffset);
4468
4469 blockOffset %= (1 << log2blkSize);
4470
4471 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4472 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4473 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4474 UINT_32 macroBlockIndex =
4475 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4476 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4477 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4478
4479 UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
4480 GetBlockSizeLog2(pIn->swizzleMode));
4481
4482 pOut->addr = blockOffset | macroBlockOffset;
4483 }
4484 else
4485 {
4486 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4487
4488 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4489
4490 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4491 (pIn->y / microBlockDim.h),
4492 (pIn->slice / microBlockDim.d),
4493 8);
4494
4495 blockOffset <<= 10;
4496 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4497
4498 if (IsXor(pIn->swizzleMode))
4499 {
4500 // Mask off bits above Macro block bits to keep page synonyms working for prt
4501 if (IsPrt(pIn->swizzleMode))
4502 {
4503 blockOffset &= ((1 << log2blkSize) - 1);
4504 }
4505
4506 // Preserve offset inside pipe interleave
4507 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4508 blockOffset >>= m_pipeInterleaveLog2;
4509
4510 // Pipe/Se xor bits
4511 pipeBits = GetPipeXorBits(log2blkSize);
4512 // Pipe xor
4513 pipeXor = FoldXor3d(blockOffset, pipeBits);
4514 blockOffset >>= pipeBits;
4515
4516 // Bank xor bits
4517 bankBits = GetBankXorBits(log2blkSize);
4518 // Bank Xor
4519 bankXor = FoldXor3d(blockOffset, bankBits);
4520 blockOffset >>= bankBits;
4521
4522 // Put all the part back together
4523 blockOffset <<= bankBits;
4524 blockOffset |= bankXor;
4525 blockOffset <<= pipeBits;
4526 blockOffset |= pipeXor;
4527 blockOffset <<= m_pipeInterleaveLog2;
4528 blockOffset |= interleaveOffset;
4529 }
4530
4531 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4532 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4533 blockOffset |= mipTailBytesOffset;
4534
4535 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4536 bankBits, pipeBits, &blockOffset);
4537
4538 blockOffset %= (1 << log2blkSize);
4539
4540 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4541 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4542 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4543
4544 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4545 UINT_32 sliceSizeInBlock =
4546 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4547 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4548
4549 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4550 }
4551 }
4552 else
4553 {
4554 returnCode = ADDR_INVALIDPARAMS;
4555 }
4556
4557 return returnCode;
4558 }
4559
4560 } // V2
4561 } // Addr