amd/addrlib: import gfx9 support
[mesa.git] / src / amd / addrlib / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ****************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ****************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37 #include "gfx9_enum.h"
38
39 #if BRAHMA_BUILD
40 #include "amdgpu_id.h"
41 #else
42 #include "ai_id.h"
43 #include "rv_id.h"
44 #endif
45
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48
49 namespace Addr
50 {
51
52 /**
53 ****************************************************************************************************
54 * Gfx9HwlInit
55 *
56 * @brief
57 * Creates an Gfx9Lib object.
58 *
59 * @return
60 * Returns an Gfx9Lib object pointer.
61 ****************************************************************************************************
62 */
63 Addr::Lib* Gfx9HwlInit(const Client* pClient)
64 {
65 return V2::Gfx9Lib::CreateObj(pClient);
66 }
67
68 namespace V2
69 {
70
71 /**
72 ****************************************************************************************************
73 * Gfx9Lib::Gfx9Lib
74 *
75 * @brief
76 * Constructor
77 *
78 ****************************************************************************************************
79 */
80 Gfx9Lib::Gfx9Lib(const Client* pClient)
81 :
82 Lib(pClient),
83 m_numEquations(0)
84 {
85 m_class = AI_ADDRLIB;
86 memset(&m_settings, 0, sizeof(m_settings));
87 }
88
89 /**
90 ****************************************************************************************************
91 * Gfx9Lib::~Gfx9Lib
92 *
93 * @brief
94 * Destructor
95 ****************************************************************************************************
96 */
97 Gfx9Lib::~Gfx9Lib()
98 {
99 }
100
101 /**
102 ****************************************************************************************************
103 * Gfx9Lib::HwlComputeHtileInfo
104 *
105 * @brief
106 * Interface function stub of AddrComputeHtilenfo
107 *
108 * @return
109 * ADDR_E_RETURNCODE
110 ****************************************************************************************************
111 */
112 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
113 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
114 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
115 ) const
116 {
117 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
118 pIn->swizzleMode);
119
120 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
121
122 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
123
124 if ((numPipeTotal == 1) && (numRbTotal == 1))
125 {
126 numCompressBlkPerMetaBlkLog2 = 10;
127 }
128 else
129 {
130 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
131 }
132
133 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
134
135 Dim3d metaBlkDim = {8, 8, 1};
136 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
137 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
138 UINT_32 heightAmp = totalAmpBits - widthAmp;
139 metaBlkDim.w <<= widthAmp;
140 metaBlkDim.h <<= heightAmp;
141
142 #if DEBUG
143 Dim3d metaBlkDimDbg = {8, 8, 1};
144 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
145 {
146 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
147 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
148 {
149 metaBlkDimDbg.h <<= 1;
150 }
151 else
152 {
153 metaBlkDimDbg.w <<= 1;
154 }
155 }
156 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
157 #endif
158
159 UINT_32 numMetaBlkX;
160 UINT_32 numMetaBlkY;
161 UINT_32 numMetaBlkZ;
162
163 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
164 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
165 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
166
167 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
168
169 pOut->pitch = numMetaBlkX * metaBlkDim.w;
170 pOut->height = numMetaBlkY * metaBlkDim.h;
171 pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
172
173 pOut->metaBlkWidth = metaBlkDim.w;
174 pOut->metaBlkHeight = metaBlkDim.h;
175 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
176
177 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
178 {
179 UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
180
181 if (additionalAlign > sizeAlign)
182 {
183 sizeAlign = additionalAlign;
184 }
185 }
186
187 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
188 pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
189
190 if (m_settings.metaBaseAlignFix)
191 {
192 pOut->baseAlign = Max(pOut->baseAlign, HwlComputeSurfaceBaseAlign(pIn->swizzleMode));
193 }
194
195 return ADDR_OK;
196 }
197
198 /**
199 ****************************************************************************************************
200 * Gfx9Lib::HwlComputeCmaskInfo
201 *
202 * @brief
203 * Interface function stub of AddrComputeCmaskInfo
204 *
205 * @return
206 * ADDR_E_RETURNCODE
207 ****************************************************************************************************
208 */
209 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
210 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
211 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
212 ) const
213 {
214 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
215
216 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
217 pIn->swizzleMode);
218
219 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
220
221 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
222
223 if ((numPipeTotal == 1) && (numRbTotal == 1))
224 {
225 numCompressBlkPerMetaBlkLog2 = 13;
226 }
227 else
228 {
229 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
230
231 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
232 }
233
234 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
235
236 Dim2d metaBlkDim = {8, 8};
237 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
238 UINT_32 heightAmp = totalAmpBits >> 1;
239 UINT_32 widthAmp = totalAmpBits - heightAmp;
240 metaBlkDim.w <<= widthAmp;
241 metaBlkDim.h <<= heightAmp;
242
243 #if DEBUG
244 Dim2d metaBlkDimDbg = {8, 8};
245 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
246 {
247 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
248 {
249 metaBlkDimDbg.h <<= 1;
250 }
251 else
252 {
253 metaBlkDimDbg.w <<= 1;
254 }
255 }
256 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
257 #endif
258
259 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
260 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
261 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
262
263 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
264
265 pOut->pitch = numMetaBlkX * metaBlkDim.w;
266 pOut->height = numMetaBlkY * metaBlkDim.h;
267 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
268 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
269 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
270
271 if (m_settings.metaBaseAlignFix)
272 {
273 pOut->baseAlign = Max(pOut->baseAlign, HwlComputeSurfaceBaseAlign(pIn->swizzleMode));
274 }
275
276 pOut->metaBlkWidth = metaBlkDim.w;
277 pOut->metaBlkHeight = metaBlkDim.h;
278
279 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
280
281 return ADDR_OK;
282 }
283
284 /**
285 ****************************************************************************************************
286 * Gfx9Lib::GetMetaMipInfo
287 *
288 * @brief
289 * Get meta mip info
290 *
291 * @return
292 * N/A
293 ****************************************************************************************************
294 */
295 VOID Gfx9Lib::GetMetaMipInfo(
296 UINT_32 numMipLevels, ///< [in] number of mip levels
297 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
298 BOOL_32 dataThick, ///< [in] data surface is thick
299 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
300 UINT_32 mip0Width, ///< [in] mip0 width
301 UINT_32 mip0Height, ///< [in] mip0 height
302 UINT_32 mip0Depth, ///< [in] mip0 depth
303 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
304 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
305 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
306 const
307 {
308 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
309 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
310 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
311 UINT_32 tailWidth = pMetaBlkDim->w;
312 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
313 UINT_32 tailDepth = pMetaBlkDim->d;
314 BOOL_32 inTail = FALSE;
315 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
316
317 if (numMipLevels > 1)
318 {
319 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
320 {
321 // Z major
322 major = ADDR_MAJOR_Z;
323 }
324 else if (numMetaBlkX >= numMetaBlkY)
325 {
326 // X major
327 major = ADDR_MAJOR_X;
328 }
329 else
330 {
331 // Y major
332 major = ADDR_MAJOR_Y;
333 }
334
335 inTail = ((mip0Width <= tailWidth) &&
336 (mip0Height <= tailHeight) &&
337 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
338
339 if (inTail == FALSE)
340 {
341 UINT_32 orderLimit;
342 UINT_32 *pMipDim;
343 UINT_32 *pOrderDim;
344
345 if (major == ADDR_MAJOR_Z)
346 {
347 // Z major
348 pMipDim = &numMetaBlkY;
349 pOrderDim = &numMetaBlkZ;
350 orderLimit = 4;
351 }
352 else if (major == ADDR_MAJOR_X)
353 {
354 // X major
355 pMipDim = &numMetaBlkY;
356 pOrderDim = &numMetaBlkX;
357 orderLimit = 4;
358 }
359 else
360 {
361 // Y major
362 pMipDim = &numMetaBlkX;
363 pOrderDim = &numMetaBlkY;
364 orderLimit = 2;
365 }
366
367 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
368 {
369 *pMipDim += 2;
370 }
371 else
372 {
373 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
374 }
375 }
376 }
377
378 if (pInfo != NULL)
379 {
380 UINT_32 mipWidth = mip0Width;
381 UINT_32 mipHeight = mip0Height;
382 UINT_32 mipDepth = mip0Depth;
383 Dim3d mipCoord = {0};
384
385 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
386 {
387 if (inTail)
388 {
389 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
390 pMetaBlkDim);
391 break;
392 }
393 else
394 {
395 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
396 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
397 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
398
399 pInfo[mip].inMiptail = FALSE;
400 pInfo[mip].startX = mipCoord.w;
401 pInfo[mip].startY = mipCoord.h;
402 pInfo[mip].startZ = mipCoord.d;
403 pInfo[mip].width = mipWidth;
404 pInfo[mip].height = mipHeight;
405 pInfo[mip].depth = dataThick ? mipDepth : 1;
406
407 if ((mip >= 3) || (mip & 1))
408 {
409 switch (major)
410 {
411 case ADDR_MAJOR_X:
412 mipCoord.w += mipWidth;
413 break;
414 case ADDR_MAJOR_Y:
415 mipCoord.h += mipHeight;
416 break;
417 case ADDR_MAJOR_Z:
418 mipCoord.d += mipDepth;
419 break;
420 default:
421 break;
422 }
423 }
424 else
425 {
426 switch (major)
427 {
428 case ADDR_MAJOR_X:
429 mipCoord.h += mipHeight;
430 break;
431 case ADDR_MAJOR_Y:
432 mipCoord.w += mipWidth;
433 break;
434 case ADDR_MAJOR_Z:
435 mipCoord.h += mipHeight;
436 break;
437 default:
438 break;
439 }
440 }
441
442 mipWidth = Max(mipWidth >> 1, 1u);
443 mipHeight = Max(mipHeight >> 1, 1u);
444 mipDepth = Max(mipDepth >> 1, 1u);
445
446 inTail = ((mipWidth <= tailWidth) &&
447 (mipHeight <= tailHeight) &&
448 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
449 }
450 }
451 }
452
453 *pNumMetaBlkX = numMetaBlkX;
454 *pNumMetaBlkY = numMetaBlkY;
455 *pNumMetaBlkZ = numMetaBlkZ;
456 }
457
458 /**
459 ****************************************************************************************************
460 * Gfx9Lib::HwlComputeDccInfo
461 *
462 * @brief
463 * Interface function to compute DCC key info
464 *
465 * @return
466 * ADDR_E_RETURNCODE
467 ****************************************************************************************************
468 */
469 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
470 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
471 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
472 ) const
473 {
474 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
475 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
476 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
477
478 if (dataLinear)
479 {
480 metaLinear = TRUE;
481 }
482 else if (metaLinear == TRUE)
483 {
484 pipeAligned = FALSE;
485 }
486
487 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
488
489 if (metaLinear)
490 {
491 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
492 ADDR_ASSERT_ALWAYS();
493
494 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
495 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
496 }
497 else
498 {
499 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
500
501 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
502
503 UINT_32 numFrags = (pIn->numFrags == 0) ? 1 : pIn->numFrags;
504 UINT_32 numSlices = (pIn->numSlices == 0) ? 1 : pIn->numSlices;
505
506 minMetaBlkSize /= numFrags;
507
508 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
509
510 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
511
512 if ((numPipeTotal > 1) || (numRbTotal > 1))
513 {
514 numCompressBlkPerMetaBlk =
515 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
516
517 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
518 {
519 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
520 }
521 }
522
523 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
524 Dim3d metaBlkDim = compressBlkDim;
525
526 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
527 {
528 if ((metaBlkDim.h < metaBlkDim.w) ||
529 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
530 {
531 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
532 {
533 metaBlkDim.h <<= 1;
534 }
535 else
536 {
537 metaBlkDim.d <<= 1;
538 }
539 }
540 else
541 {
542 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
543 {
544 metaBlkDim.w <<= 1;
545 }
546 else
547 {
548 metaBlkDim.d <<= 1;
549 }
550 }
551 }
552
553 UINT_32 numMetaBlkX;
554 UINT_32 numMetaBlkY;
555 UINT_32 numMetaBlkZ;
556
557 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
558 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
559 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
560
561 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
562
563 if (numFrags > m_maxCompFrag)
564 {
565 sizeAlign *= (numFrags / m_maxCompFrag);
566 }
567
568 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
569 numCompressBlkPerMetaBlk * numFrags;
570 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
571 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
572
573 if (m_settings.metaBaseAlignFix)
574 {
575 pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, HwlComputeSurfaceBaseAlign(pIn->swizzleMode));
576 }
577
578 pOut->pitch = numMetaBlkX * metaBlkDim.w;
579 pOut->height = numMetaBlkY * metaBlkDim.h;
580 pOut->depth = numMetaBlkZ * metaBlkDim.d;
581
582 pOut->compressBlkWidth = compressBlkDim.w;
583 pOut->compressBlkHeight = compressBlkDim.h;
584 pOut->compressBlkDepth = compressBlkDim.d;
585
586 pOut->metaBlkWidth = metaBlkDim.w;
587 pOut->metaBlkHeight = metaBlkDim.h;
588 pOut->metaBlkDepth = metaBlkDim.d;
589
590 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
591 pOut->fastClearSizePerSlice =
592 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
593 }
594
595 return ADDR_OK;
596 }
597
598 /**
599 ****************************************************************************************************
600 * Gfx9Lib::HwlGetMaxAlignments
601 *
602 * @brief
603 * Gets maximum alignments
604 * @return
605 * ADDR_E_RETURNCODE
606 ****************************************************************************************************
607 */
608 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
609 ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure
610 ) const
611 {
612 pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
613
614 return ADDR_OK;
615 }
616
617 /**
618 ****************************************************************************************************
619 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
620 *
621 * @brief
622 * Interface function stub of AddrComputeCmaskAddrFromCoord
623 *
624 * @return
625 * ADDR_E_RETURNCODE
626 ****************************************************************************************************
627 */
628 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
629 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
630 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
631 ) const
632 {
633 ADDR2_COMPUTE_CMASK_INFO_INPUT input;
634 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output;
635
636 memset(&input, 0, sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT));
637 input.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
638 input.cMaskFlags = pIn->cMaskFlags;
639 input.colorFlags = pIn->colorFlags;
640 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
641 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
642 input.numSlices = Max(pIn->numSlices, 1u);
643 input.swizzleMode = pIn->swizzleMode;
644 input.resourceType = pIn->resourceType;
645
646 memset(&output, 0, sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT));
647 output.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
648
649 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
650
651 if (returnCode == ADDR_OK)
652 {
653 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
654
655 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
656
657 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
658 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
659
660 CoordEq metaEq;
661
662 GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
663 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
664 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
665
666 UINT_32 xb = pIn->x / output.metaBlkWidth;
667 UINT_32 yb = pIn->y / output.metaBlkHeight;
668 UINT_32 zb = pIn->slice;
669
670 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
671 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
672 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
673
674 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
675
676 pOut->addr = address >> 1;
677 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
678
679
680 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
681 pIn->swizzleMode);
682
683 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
684
685 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
686 }
687
688 return returnCode;
689 }
690
691 /**
692 ****************************************************************************************************
693 * Gfx9Lib::HwlComputeHtileAddrFromCoord
694 *
695 * @brief
696 * Interface function stub of AddrComputeHtileAddrFromCoord
697 *
698 * @return
699 * ADDR_E_RETURNCODE
700 ****************************************************************************************************
701 */
702 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
703 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
704 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
705 ) const
706 {
707 ADDR_E_RETURNCODE returnCode = ADDR_OK;
708
709 if (pIn->numMipLevels > 1)
710 {
711 returnCode = ADDR_NOTIMPLEMENTED;
712 }
713 else
714 {
715 ADDR2_COMPUTE_HTILE_INFO_INPUT input;
716 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output;
717
718 memset(&input, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT));
719 input.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
720 input.hTileFlags = pIn->hTileFlags;
721 input.depthFlags = pIn->depthflags;
722 input.swizzleMode = pIn->swizzleMode;
723 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
724 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
725 input.numSlices = Max(pIn->numSlices, 1u);
726 input.numMipLevels = Max(pIn->numMipLevels, 1u);
727
728 memset(&output, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT));
729 output.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
730
731 returnCode = ComputeHtileInfo(&input, &output);
732
733 if (returnCode == ADDR_OK)
734 {
735 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
736
737 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
738 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
739
740 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
741
742 CoordEq metaEq;
743
744 GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
745 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
746 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
747
748 UINT_32 xb = pIn->x / output.metaBlkWidth;
749 UINT_32 yb = pIn->y / output.metaBlkHeight;
750 UINT_32 zb = pIn->slice;
751
752 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
753 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
754 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
755
756 UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
757
758 pOut->addr = address >> 1;
759
760 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
761 pIn->swizzleMode);
762
763 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
764
765 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
766 }
767 }
768
769 return returnCode;
770 }
771
772 /**
773 ****************************************************************************************************
774 * Gfx9Lib::HwlComputeHtileCoordFromAddr
775 *
776 * @brief
777 * Interface function stub of AddrComputeHtileCoordFromAddr
778 *
779 * @return
780 * ADDR_E_RETURNCODE
781 ****************************************************************************************************
782 */
783 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
784 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
785 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
786 ) const
787 {
788 ADDR_E_RETURNCODE returnCode = ADDR_OK;
789
790 if (pIn->numMipLevels > 1)
791 {
792 returnCode = ADDR_NOTIMPLEMENTED;
793 }
794 else
795 {
796 ADDR2_COMPUTE_HTILE_INFO_INPUT input;
797 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output;
798
799 memset(&input, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT));
800 input.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
801 input.hTileFlags = pIn->hTileFlags;
802 input.swizzleMode = pIn->swizzleMode;
803 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
804 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
805 input.numSlices = Max(pIn->numSlices, 1u);
806 input.numMipLevels = Max(pIn->numMipLevels, 1u);
807
808 memset(&output, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT));
809 output.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
810
811 returnCode = ComputeHtileInfo(&input, &output);
812
813 if (returnCode == ADDR_OK)
814 {
815 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
816
817 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
818 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
819
820 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
821
822 CoordEq metaEq;
823
824 GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
825 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
826 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
827
828 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
829 pIn->swizzleMode);
830
831 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
832
833 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
834
835 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
836 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
837
838 UINT_32 x, y, z, s, m;
839
840 metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
841
842 pOut->slice = m / sliceSizeInBlock;
843 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
844 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
845 }
846 }
847
848 return returnCode;
849 }
850
851 /**
852 ****************************************************************************************************
853 * Gfx9Lib::HwlInitGlobalParams
854 *
855 * @brief
856 * Initializes global parameters
857 *
858 * @return
859 * TRUE if all settings are valid
860 *
861 ****************************************************************************************************
862 */
863 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
864 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
865 {
866 BOOL_32 valid = TRUE;
867
868 if (m_settings.isArcticIsland)
869 {
870 GB_ADDR_CONFIG gbAddrConfig;
871
872 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
873
874 // These values are copied from CModel code
875 switch (gbAddrConfig.bits.NUM_PIPES)
876 {
877 case ADDR_CONFIG_1_PIPE:
878 m_pipes = 1;
879 m_pipesLog2 = 0;
880 break;
881 case ADDR_CONFIG_2_PIPE:
882 m_pipes = 2;
883 m_pipesLog2 = 1;
884 break;
885 case ADDR_CONFIG_4_PIPE:
886 m_pipes = 4;
887 m_pipesLog2 = 2;
888 break;
889 case ADDR_CONFIG_8_PIPE:
890 m_pipes = 8;
891 m_pipesLog2 = 3;
892 break;
893 case ADDR_CONFIG_16_PIPE:
894 m_pipes = 16;
895 m_pipesLog2 = 4;
896 break;
897 case ADDR_CONFIG_32_PIPE:
898 m_pipes = 32;
899 m_pipesLog2 = 5;
900 break;
901 default:
902 break;
903 }
904
905 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
906 {
907 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
908 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
909 m_pipeInterleaveLog2 = 8;
910 break;
911 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
912 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
913 m_pipeInterleaveLog2 = 9;
914 break;
915 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
916 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
917 m_pipeInterleaveLog2 = 10;
918 break;
919 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
920 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
921 m_pipeInterleaveLog2 = 11;
922 break;
923 default:
924 break;
925 }
926
927 switch (gbAddrConfig.bits.NUM_BANKS)
928 {
929 case ADDR_CONFIG_1_BANK:
930 m_banks = 1;
931 m_banksLog2 = 0;
932 break;
933 case ADDR_CONFIG_2_BANK:
934 m_banks = 2;
935 m_banksLog2 = 1;
936 break;
937 case ADDR_CONFIG_4_BANK:
938 m_banks = 4;
939 m_banksLog2 = 2;
940 break;
941 case ADDR_CONFIG_8_BANK:
942 m_banks = 8;
943 m_banksLog2 = 3;
944 break;
945 case ADDR_CONFIG_16_BANK:
946 m_banks = 16;
947 m_banksLog2 = 4;
948 break;
949 default:
950 break;
951 }
952
953 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
954 {
955 case ADDR_CONFIG_1_SHADER_ENGINE:
956 m_se = 1;
957 m_seLog2 = 0;
958 break;
959 case ADDR_CONFIG_2_SHADER_ENGINE:
960 m_se = 2;
961 m_seLog2 = 1;
962 break;
963 case ADDR_CONFIG_4_SHADER_ENGINE:
964 m_se = 4;
965 m_seLog2 = 2;
966 break;
967 case ADDR_CONFIG_8_SHADER_ENGINE:
968 m_se = 8;
969 m_seLog2 = 3;
970 break;
971 default:
972 break;
973 }
974
975 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
976 {
977 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
978 m_rbPerSe = 1;
979 m_rbPerSeLog2 = 0;
980 break;
981 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
982 m_rbPerSe = 2;
983 m_rbPerSeLog2 = 1;
984 break;
985 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
986 m_rbPerSe = 4;
987 m_rbPerSeLog2 = 2;
988 break;
989 default:
990 break;
991 }
992
993 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
994 {
995 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
996 m_maxCompFrag = 1;
997 m_maxCompFragLog2 = 0;
998 break;
999 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1000 m_maxCompFrag = 2;
1001 m_maxCompFragLog2 = 1;
1002 break;
1003 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1004 m_maxCompFrag = 4;
1005 m_maxCompFragLog2 = 2;
1006 break;
1007 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1008 m_maxCompFrag = 8;
1009 m_maxCompFragLog2 = 3;
1010 break;
1011 default:
1012 break;
1013 }
1014
1015 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1016 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1017 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1018 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1019 }
1020 else
1021 {
1022 valid = FALSE;
1023 ADDR_NOT_IMPLEMENTED();
1024 }
1025
1026 if (valid)
1027 {
1028 InitEquationTable();
1029 }
1030
1031 return valid;
1032 }
1033
1034 /**
1035 ****************************************************************************************************
1036 * Gfx9Lib::HwlConvertChipFamily
1037 *
1038 * @brief
1039 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1040 * @return
1041 * ChipFamily
1042 ****************************************************************************************************
1043 */
1044 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1045 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1046 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1047 {
1048 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1049
1050 switch (uChipFamily)
1051 {
1052 case FAMILY_AI:
1053 m_settings.isArcticIsland = 1;
1054 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1055
1056 if (m_settings.isVega10)
1057 {
1058 m_settings.isDce12 = 1;
1059 }
1060
1061 // Bug ID DEGGIGX90-1056
1062 m_settings.metaBaseAlignFix = 1;
1063 break;
1064
1065 default:
1066 ADDR_ASSERT(!"This should be a Fusion");
1067 break;
1068 }
1069
1070 return family;
1071 }
1072
1073 /**
1074 ****************************************************************************************************
1075 * Gfx9Lib::InitRbEquation
1076 *
1077 * @brief
1078 * Init RB equation
1079 * @return
1080 * N/A
1081 ****************************************************************************************************
1082 */
1083 VOID Gfx9Lib::GetRbEquation(
1084 CoordEq* pRbEq, ///< [out] rb equation
1085 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1086 UINT_32 numSeLog2) ///< [in] number of shader engine
1087 {
1088 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1089 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1090 Coordinate cx('x', rbRegion);
1091 Coordinate cy('y', rbRegion);
1092
1093 UINT_32 start = 0;
1094 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1095
1096 // Clear the rb equation
1097 pRbEq->resize(0);
1098 pRbEq->resize(numRbTotalLog2);
1099
1100 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1101 {
1102 // Special case when more than 1 SE, and 2 RB per SE
1103 (*pRbEq)[0].add(cx);
1104 (*pRbEq)[0].add(cy);
1105 cx++;
1106 cy++;
1107 (*pRbEq)[0].add(cy);
1108 start++;
1109 }
1110
1111 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1112
1113 for (UINT_32 i = 0; i < numBits; i++)
1114 {
1115 UINT_32 idx =
1116 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1117
1118 if ((i % 2) == 1)
1119 {
1120 (*pRbEq)[idx].add(cx);
1121 cx++;
1122 }
1123 else
1124 {
1125 (*pRbEq)[idx].add(cy);
1126 cy++;
1127 }
1128 }
1129 }
1130
1131 /**
1132 ****************************************************************************************************
1133 * Gfx9Lib::GetDataEquation
1134 *
1135 * @brief
1136 * Get data equation for fmask and Z
1137 * @return
1138 * N/A
1139 ****************************************************************************************************
1140 */
1141 VOID Gfx9Lib::GetDataEquation(
1142 CoordEq* pDataEq, ///< [out] data surface equation
1143 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1144 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1145 AddrResourceType resourceType, ///< [in] data surface resource type
1146 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1147 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1148 const
1149 {
1150 Coordinate cx('x', 0);
1151 Coordinate cy('y', 0);
1152 Coordinate cz('z', 0);
1153 Coordinate cs('s', 0);
1154
1155 // Clear the equation
1156 pDataEq->resize(0);
1157 pDataEq->resize(27);
1158
1159 if (dataSurfaceType == Gfx9DataColor)
1160 {
1161 if (IsLinear(swizzleMode))
1162 {
1163 Coordinate cm('m', 0);
1164
1165 pDataEq->resize(49);
1166
1167 for (UINT_32 i = 0; i < 49; i++)
1168 {
1169 (*pDataEq)[i].add(cm);
1170 cm++;
1171 }
1172 }
1173 else if (IsThick(resourceType, swizzleMode))
1174 {
1175 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1176 UINT_32 i;
1177 if (IsStandardSwizzle(resourceType, swizzleMode))
1178 {
1179 // Standard 3d swizzle
1180 // Fill in bottom x bits
1181 for (i = elementBytesLog2; i < 4; i++)
1182 {
1183 (*pDataEq)[i].add(cx);
1184 cx++;
1185 }
1186 // Fill in 2 bits of y and then z
1187 for (i = 4; i < 6; i++)
1188 {
1189 (*pDataEq)[i].add(cy);
1190 cy++;
1191 }
1192 for (i = 6; i < 8; i++)
1193 {
1194 (*pDataEq)[i].add(cz);
1195 cz++;
1196 }
1197 if (elementBytesLog2 < 2)
1198 {
1199 // fill in z & y bit
1200 (*pDataEq)[8].add(cz);
1201 (*pDataEq)[9].add(cy);
1202 cz++;
1203 cy++;
1204 }
1205 else if (elementBytesLog2 == 2)
1206 {
1207 // fill in y and x bit
1208 (*pDataEq)[8].add(cy);
1209 (*pDataEq)[9].add(cx);
1210 cy++;
1211 cx++;
1212 }
1213 else
1214 {
1215 // fill in 2 x bits
1216 (*pDataEq)[8].add(cx);
1217 cx++;
1218 (*pDataEq)[9].add(cx);
1219 cx++;
1220 }
1221 }
1222 else
1223 {
1224 // Z 3d swizzle
1225 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1226 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1227 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1228 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1229 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1230 {
1231 (*pDataEq)[i].add(cz);
1232 cz++;
1233 }
1234 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1235 {
1236 // add an x and z
1237 (*pDataEq)[6].add(cx);
1238 (*pDataEq)[7].add(cz);
1239 cx++;
1240 cz++;
1241 }
1242 else if (elementBytesLog2 == 2)
1243 {
1244 // add a y and z
1245 (*pDataEq)[6].add(cy);
1246 (*pDataEq)[7].add(cz);
1247 cy++;
1248 cz++;
1249 }
1250 // add y and x
1251 (*pDataEq)[8].add(cy);
1252 (*pDataEq)[9].add(cx);
1253 cy++;
1254 cx++;
1255 }
1256 // Fill in bit 10 and up
1257 pDataEq->mort3d( cz, cy, cx, 10 );
1258 }
1259 else if (IsThin(resourceType, swizzleMode))
1260 {
1261 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1262 // Color 2D
1263 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1264 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1265 UINT_32 i;
1266 // Fill in bottom x bits
1267 for (i = elementBytesLog2; i < 4; i++)
1268 {
1269 (*pDataEq)[i].add(cx);
1270 cx++;
1271 }
1272 // Fill in bottom y bits
1273 for (i = 4; i < 4 + microYBits; i++)
1274 {
1275 (*pDataEq)[i].add(cy);
1276 cy++;
1277 }
1278 // Fill in last of the micro_x bits
1279 for (i = 4 + microYBits; i < 8; i++)
1280 {
1281 (*pDataEq)[i].add(cx);
1282 cx++;
1283 }
1284 // Fill in x/y bits below sample split
1285 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1286 // Fill in sample bits
1287 for (i = 0; i < numSamplesLog2; i++)
1288 {
1289 cs.set('s', i);
1290 (*pDataEq)[tileSplitStart + i].add(cs);
1291 }
1292 // Fill in x/y bits above sample split
1293 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1294 {
1295 pDataEq->mort2d(cx, cy, blockSizeLog2);
1296 }
1297 else
1298 {
1299 pDataEq->mort2d(cy, cx, blockSizeLog2);
1300 }
1301 }
1302 else
1303 {
1304 ADDR_ASSERT_ALWAYS();
1305 }
1306 }
1307 else
1308 {
1309 // Fmask or depth
1310 UINT_32 sampleStart = elementBytesLog2;
1311 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1312 UINT_32 ymajStart = 6 + numSamplesLog2;
1313
1314 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1315 {
1316 cs.set('s', s);
1317 (*pDataEq)[sampleStart + s].add(cs);
1318 }
1319
1320 // Put in the x-major order pixel bits
1321 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1322 // Put in the y-major order pixel bits
1323 pDataEq->mort2d(cy, cx, ymajStart);
1324 }
1325 }
1326
1327 /**
1328 ****************************************************************************************************
1329 * Gfx9Lib::GetPipeEquation
1330 *
1331 * @brief
1332 * Get pipe equation
1333 * @return
1334 * N/A
1335 ****************************************************************************************************
1336 */
1337 VOID Gfx9Lib::GetPipeEquation(
1338 CoordEq* pPipeEq, ///< [out] pipe equation
1339 CoordEq* pDataEq, ///< [in] data equation
1340 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1341 UINT_32 numPipeLog2, ///< [in] number of pipes
1342 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1343 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1344 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1345 AddrResourceType resourceType ///< [in] data surface resource type
1346 ) const
1347 {
1348 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1349 CoordEq dataEq;
1350
1351 pDataEq->copy(dataEq);
1352
1353 if (dataSurfaceType == Gfx9DataColor)
1354 {
1355 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1356 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1357 }
1358
1359 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1360
1361 // This section should only apply to z/stencil, maybe fmask
1362 // If the pipe bit is below the comp block size,
1363 // then keep moving up the address until we find a bit that is above
1364 UINT_32 pipeStart = 0;
1365
1366 if (dataSurfaceType != Gfx9DataColor)
1367 {
1368 Coordinate tileMin('x', 3);
1369
1370 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1371 {
1372 pipeStart++;
1373 }
1374
1375 // if pipe is 0, then the first pipe bit is above the comp block size,
1376 // so we don't need to do anything
1377 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1378 // we will get the same pipe equation
1379 if (pipeStart != 0)
1380 {
1381 for (UINT_32 i = 0; i < numPipeLog2; i++)
1382 {
1383 // Copy the jth bit above pipe interleave to the current pipe equation bit
1384 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1385 }
1386 }
1387 }
1388
1389 if (IsPrt(swizzleMode))
1390 {
1391 // Clear out bits above the block size if prt's are enabled
1392 dataEq.resize(blockSizeLog2);
1393 dataEq.resize(48);
1394 }
1395
1396 if (IsXor(swizzleMode))
1397 {
1398 CoordEq xorMask;
1399
1400 if (IsThick(resourceType, swizzleMode))
1401 {
1402 CoordEq xorMask2;
1403
1404 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1405
1406 xorMask.resize(numPipeLog2);
1407
1408 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1409 {
1410 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1411 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1412 }
1413 }
1414 else
1415 {
1416 // Xor in the bits above the pipe+gpu bits
1417 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1418
1419 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1420 {
1421 Coordinate co;
1422 CoordEq xorMask2;
1423 // if 1xaa and not prt, then xor in the z bits
1424 xorMask2.resize(0);
1425 xorMask2.resize(numPipeLog2);
1426 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1427 {
1428 co.set('z', numPipeLog2 - 1 - pipeIdx);
1429 xorMask2[pipeIdx].add(co);
1430 }
1431
1432 pPipeEq->xorin(xorMask2);
1433 }
1434 }
1435
1436 xorMask.reverse();
1437 pPipeEq->xorin(xorMask);
1438 }
1439 }
1440
1441 /**
1442 ****************************************************************************************************
1443 * Gfx9Lib::GetMetaEquation
1444 *
1445 * @brief
1446 * Get meta equation for cmask/htile/DCC
1447 * @return
1448 * N/A
1449 ****************************************************************************************************
1450 */
1451 VOID Gfx9Lib::GetMetaEquation(
1452 CoordEq* pMetaEq, ///< [out] meta equation
1453 UINT_32 maxMip, ///< [in] max mip Id
1454 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1455 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1456 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1457 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1458 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1459 AddrResourceType resourceType, ///< [in] data surface resource type
1460 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1461 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1462 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1463 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1464 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1465 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1466 const
1467 {
1468 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1469 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1470 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1471
1472 // Get the correct data address and rb equation
1473 CoordEq dataEq;
1474 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1475 elementBytesLog2, numSamplesLog2);
1476
1477 // Get pipe and rb equations
1478 CoordEq pipeEquation;
1479 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1480 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1481 numPipeTotalLog2 = pipeEquation.getsize();
1482
1483 if (metaFlag.linear)
1484 {
1485 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1486 ADDR_ASSERT_ALWAYS();
1487
1488 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1489
1490 dataEq.copy(*pMetaEq);
1491
1492 if (IsLinear(swizzleMode))
1493 {
1494 if (metaFlag.pipeAligned)
1495 {
1496 // Remove the pipe bits
1497 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1498 pMetaEq->shift(-shift, pipeInterleaveLog2);
1499 }
1500 // Divide by comp block size, which for linear (which is always color) is 256 B
1501 pMetaEq->shift(-8);
1502
1503 if (metaFlag.pipeAligned)
1504 {
1505 // Put pipe bits back in
1506 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1507
1508 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1509 {
1510 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1511 }
1512 }
1513 }
1514
1515 pMetaEq->shift(1);
1516 }
1517 else
1518 {
1519 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1520 UINT_32 compFragLog2 =
1521 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1522 maxCompFragLog2 : numSamplesLog2;
1523
1524 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1525
1526 // Make sure the metaaddr is cleared
1527 pMetaEq->resize(0);
1528 pMetaEq->resize(27);
1529
1530 if (IsThick(resourceType, swizzleMode))
1531 {
1532 Coordinate cx('x', 0);
1533 Coordinate cy('y', 0);
1534 Coordinate cz('z', 0);
1535
1536 if (maxMip > 0)
1537 {
1538 pMetaEq->mort3d(cy, cx, cz);
1539 }
1540 else
1541 {
1542 pMetaEq->mort3d(cx, cy, cz);
1543 }
1544 }
1545 else
1546 {
1547 Coordinate cx('x', 0);
1548 Coordinate cy('y', 0);
1549 Coordinate cs;
1550
1551 if (maxMip > 0)
1552 {
1553 pMetaEq->mort2d(cy, cx, compFragLog2);
1554 }
1555 else
1556 {
1557 pMetaEq->mort2d(cx, cy, compFragLog2);
1558 }
1559
1560 //------------------------------------------------------------------------------------------------------------------------
1561 // Put the compressible fragments at the lsb
1562 // the uncompressible frags will be at the msb of the micro address
1563 //------------------------------------------------------------------------------------------------------------------------
1564 for (UINT_32 s = 0; s < compFragLog2; s++)
1565 {
1566 cs.set('s', s);
1567 (*pMetaEq)[s].add(cs);
1568 }
1569 }
1570
1571 // Keep a copy of the pipe equations
1572 CoordEq origPipeEquation;
1573 pipeEquation.copy(origPipeEquation);
1574
1575 Coordinate co;
1576 // filter out everything under the compressed block size
1577 co.set('x', compBlkWidthLog2);
1578 pMetaEq->Filter('<', co, 0, 'x');
1579 co.set('y', compBlkHeightLog2);
1580 pMetaEq->Filter('<', co, 0, 'y');
1581 co.set('z', compBlkDepthLog2);
1582 pMetaEq->Filter('<', co, 0, 'z');
1583
1584 // For non-color, filter out sample bits
1585 if (dataSurfaceType != Gfx9DataColor)
1586 {
1587 co.set('x', 0);
1588 pMetaEq->Filter('<', co, 0, 's');
1589 }
1590
1591 // filter out everything above the metablock size
1592 co.set('x', metaBlkWidthLog2 - 1);
1593 pMetaEq->Filter('>', co, 0, 'x');
1594 co.set('y', metaBlkHeightLog2 - 1);
1595 pMetaEq->Filter('>', co, 0, 'y');
1596 co.set('z', metaBlkDepthLog2 - 1);
1597 pMetaEq->Filter('>', co, 0, 'z');
1598
1599 // filter out everything above the metablock size for the channel bits
1600 co.set('x', metaBlkWidthLog2 - 1);
1601 pipeEquation.Filter('>', co, 0, 'x');
1602 co.set('y', metaBlkHeightLog2 - 1);
1603 pipeEquation.Filter('>', co, 0, 'y');
1604 co.set('z', metaBlkDepthLog2 - 1);
1605 pipeEquation.Filter('>', co, 0, 'z');
1606
1607 // Make sure we still have the same number of channel bits
1608 if (pipeEquation.getsize() != numPipeTotalLog2)
1609 {
1610 ADDR_ASSERT_ALWAYS();
1611 }
1612
1613 // Loop through all channel and rb bits,
1614 // and make sure these components exist in the metadata address
1615 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1616 {
1617 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1618 {
1619 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1620 {
1621 ADDR_ASSERT_ALWAYS();
1622 }
1623 }
1624 }
1625
1626 UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1627 UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1628 CoordEq origRbEquation;
1629
1630 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1631
1632 CoordEq rbEquation = origRbEquation;
1633
1634 UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1635
1636 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1637 {
1638 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1639 {
1640 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1641 {
1642 ADDR_ASSERT_ALWAYS();
1643 }
1644 }
1645 }
1646
1647 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1648 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1649 {
1650 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1651 {
1652 if (rbEquation[i] == pipeEquation[j])
1653 {
1654 rbEquation[i].Clear();
1655 }
1656 }
1657 }
1658
1659 // Loop through each bit of the channel, get the smallest coordinate,
1660 // and remove it from the metaaddr, and rb_equation
1661 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1662 {
1663 pipeEquation[i].getsmallest(co);
1664
1665 UINT_32 old_size = pMetaEq->getsize();
1666 pMetaEq->Filter('=', co);
1667 UINT_32 new_size = pMetaEq->getsize();
1668 if (new_size != old_size-1)
1669 {
1670 ADDR_ASSERT_ALWAYS();
1671 }
1672 pipeEquation.remove(co);
1673 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1674 {
1675 if (rbEquation[j].remove(co))
1676 {
1677 // if we actually removed something from this bit, then add the remaining
1678 // channel bits, as these can be removed for this bit
1679 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1680 {
1681 if (pipeEquation[i][k] != co)
1682 {
1683 rbEquation[j].add(pipeEquation[i][k]);
1684 }
1685 }
1686 }
1687 }
1688 }
1689
1690 // Loop through the rb bits and see what remain;
1691 // filter out the smallest coordinate if it remains
1692 UINT_32 rbBitsLeft = 0;
1693 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1694 {
1695 if (rbEquation[i].getsize() > 0)
1696 {
1697 rbBitsLeft++;
1698 rbEquation[i].getsmallest(co);
1699 UINT_32 old_size = pMetaEq->getsize();
1700 pMetaEq->Filter('=', co);
1701 UINT_32 new_size = pMetaEq->getsize();
1702 if (new_size != old_size - 1)
1703 {
1704 // assert warning
1705 }
1706 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1707 {
1708 if (rbEquation[j].remove(co))
1709 {
1710 // if we actually removed something from this bit, then add the remaining
1711 // rb bits, as these can be removed for this bit
1712 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1713 {
1714 if (rbEquation[i][k] != co)
1715 {
1716 rbEquation[j].add(rbEquation[i][k]);
1717 }
1718 }
1719 }
1720 }
1721 }
1722 }
1723
1724 // capture the size of the metaaddr
1725 UINT_32 metaSize = pMetaEq->getsize();
1726 // resize to 49 bits...make this a nibble address
1727 pMetaEq->resize(49);
1728 // Concatenate the macro address above the current address
1729 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1730 {
1731 co.set('m', j);
1732 (*pMetaEq)[i].add(co);
1733 }
1734
1735 // Multiply by meta element size (in nibbles)
1736 if (dataSurfaceType == Gfx9DataColor)
1737 {
1738 pMetaEq->shift(1);
1739 }
1740 else if (dataSurfaceType == Gfx9DataDepthStencil)
1741 {
1742 pMetaEq->shift(3);
1743 }
1744
1745 //------------------------------------------------------------------------------------------
1746 // Note the pipeInterleaveLog2+1 is because address is a nibble address
1747 // Shift up from pipe interleave number of channel
1748 // and rb bits left, and uncompressed fragments
1749 //------------------------------------------------------------------------------------------
1750
1751 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
1752
1753 // Put in the channel bits
1754 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1755 {
1756 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
1757 }
1758
1759 // Put in remaining rb bits
1760 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
1761 {
1762 if (rbEquation[i].getsize() > 0)
1763 {
1764 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
1765 // Mark any rb bit we add in to the rb mask
1766 j++;
1767 }
1768 }
1769
1770 //------------------------------------------------------------------------------------------
1771 // Put in the uncompressed fragment bits
1772 //------------------------------------------------------------------------------------------
1773 for (UINT_32 i = 0; i < uncompFragLog2; i++)
1774 {
1775 co.set('s', compFragLog2 + i);
1776 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
1777 }
1778 }
1779 }
1780
1781 /**
1782 ****************************************************************************************************
1783 * Gfx9Lib::IsEquationSupported
1784 *
1785 * @brief
1786 * Check if equation is supported for given swizzle mode and resource type.
1787 *
1788 * @return
1789 * TRUE if supported
1790 ****************************************************************************************************
1791 */
1792 BOOL_32 Gfx9Lib::IsEquationSupported(
1793 AddrResourceType rsrcType,
1794 AddrSwizzleMode swMode,
1795 UINT_32 elementBytesLog2) const
1796 {
1797 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
1798 (IsLinear(swMode) == FALSE) &&
1799 ((IsTex2d(rsrcType) == TRUE) ||
1800 ((IsTex3d(rsrcType) == TRUE) &&
1801 (IsRotateSwizzle(swMode) == FALSE) &&
1802 (IsBlock256b(swMode) == FALSE)));
1803
1804 return supported;
1805 }
1806
1807 /**
1808 ****************************************************************************************************
1809 * Gfx9Lib::InitEquationTable
1810 *
1811 * @brief
1812 * Initialize Equation table.
1813 *
1814 * @return
1815 * N/A
1816 ****************************************************************************************************
1817 */
1818 VOID Gfx9Lib::InitEquationTable()
1819 {
1820 memset(m_equationTable, 0, sizeof(m_equationTable));
1821
1822 // Loop all possible resource type (2D/3D)
1823 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1824 {
1825 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1826
1827 // Loop all possible swizzle mode
1828 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1829 {
1830 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1831
1832 // Loop all possible bpp
1833 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
1834 {
1835 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1836
1837 // Check if the input is supported
1838 if (IsEquationSupported(rsrcType, swMode, bppIdx))
1839 {
1840 ADDR_EQUATION equation;
1841 ADDR_E_RETURNCODE retCode;
1842
1843 memset(&equation, 0, sizeof(ADDR_EQUATION));
1844
1845 // Generate the equation
1846 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
1847 {
1848 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
1849 }
1850 else if (IsThin(rsrcType, swMode))
1851 {
1852 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
1853 }
1854 else
1855 {
1856 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
1857 }
1858
1859 // Only fill the equation into the table if the return code is ADDR_OK,
1860 // otherwise if the return code is not ADDR_OK, it indicates this is not
1861 // a valid input, we do nothing but just fill invalid equation index
1862 // into the lookup table.
1863 if (retCode == ADDR_OK)
1864 {
1865 equationIndex = m_numEquations;
1866 ADDR_ASSERT(equationIndex < EquationTableSize);
1867
1868 m_equationTable[equationIndex] = equation;
1869
1870 m_numEquations++;
1871 }
1872 }
1873
1874 // Fill the index into the lookup table, if the combination is not supported
1875 // fill the invalid equation index
1876 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
1877 }
1878 }
1879 }
1880 }
1881
1882 /**
1883 ****************************************************************************************************
1884 * Gfx9Lib::HwlGetEquationIndex
1885 *
1886 * @brief
1887 * Interface function stub of GetEquationIndex
1888 *
1889 * @return
1890 * ADDR_E_RETURNCODE
1891 ****************************************************************************************************
1892 */
1893 UINT_32 Gfx9Lib::HwlGetEquationIndex(
1894 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
1895 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
1896 ) const
1897 {
1898 AddrResourceType rsrcType = pIn->resourceType;
1899 AddrSwizzleMode swMode = pIn->swizzleMode;
1900 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1901 UINT_32 numMipLevels = pIn->numMipLevels;
1902 ADDR2_MIP_INFO* pMipInfo = pOut->pMipInfo;
1903
1904 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
1905
1906 BOOL_32 eqSupported = (pOut->firstMipInTail == FALSE) &&
1907 IsEquationSupported(rsrcType, swMode, elementBytesLog2);
1908
1909 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
1910 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
1911
1912 if (eqSupported)
1913 {
1914 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
1915
1916 if (pMipInfo != NULL)
1917 {
1918 pMipInfo->equationIndex = index;
1919 pMipInfo->mipOffsetXBytes = 0;
1920 pMipInfo->mipOffsetYPixel = 0;
1921 pMipInfo->mipOffsetZPixel = 0;
1922 pMipInfo->postSwizzleOffset = 0;
1923
1924 static const UINT_32 Prt_Xor_Gap =
1925 static_cast<UINT_32>(ADDR_SW_64KB_Z_T) - static_cast<UINT_32>(ADDR_SW_64KB_Z);
1926
1927 for (UINT_32 i = 1; i < numMipLevels; i++)
1928 {
1929 Dim3d mipStartPos = {0};
1930 UINT_32 mipTailOffset = 0;
1931
1932 mipStartPos = GetMipStartPos(rsrcType,
1933 swMode,
1934 pOut->pitch,
1935 pOut->height,
1936 pOut->numSlices,
1937 pOut->blockWidth,
1938 pOut->blockHeight,
1939 pOut->blockSlices,
1940 i,
1941 &mipTailOffset);
1942
1943 UINT_32 mipSwModeIdx = swModeIdx;
1944
1945 pMipInfo[i].equationIndex =
1946 m_equationLookupTable[rsrcTypeIdx][mipSwModeIdx][elementBytesLog2];
1947 pMipInfo[i].mipOffsetXBytes = mipStartPos.w * pOut->blockWidth * (pOut->bpp >> 3);
1948 pMipInfo[i].mipOffsetYPixel = mipStartPos.h * pOut->blockHeight;
1949 pMipInfo[i].mipOffsetZPixel = mipStartPos.d * pOut->blockSlices;
1950 pMipInfo[i].postSwizzleOffset = mipTailOffset;
1951 }
1952 }
1953 }
1954 else if (pMipInfo != NULL)
1955 {
1956 for (UINT_32 i = 0; i < numMipLevels; i++)
1957 {
1958 pMipInfo[i].equationIndex = ADDR_INVALID_EQUATION_INDEX;
1959 pMipInfo[i].mipOffsetXBytes = 0;
1960 pMipInfo[i].mipOffsetYPixel = 0;
1961 pMipInfo[i].mipOffsetZPixel = 0;
1962 pMipInfo[i].postSwizzleOffset = 0;
1963 }
1964 }
1965
1966 return index;
1967 }
1968
1969 /**
1970 ****************************************************************************************************
1971 * Gfx9Lib::HwlComputeBlock256Equation
1972 *
1973 * @brief
1974 * Interface function stub of ComputeBlock256Equation
1975 *
1976 * @return
1977 * ADDR_E_RETURNCODE
1978 ****************************************************************************************************
1979 */
1980 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
1981 AddrResourceType rsrcType,
1982 AddrSwizzleMode swMode,
1983 UINT_32 elementBytesLog2,
1984 ADDR_EQUATION* pEquation) const
1985 {
1986 ADDR_E_RETURNCODE ret = ADDR_OK;
1987
1988 pEquation->numBits = 8;
1989
1990 UINT_32 i = 0;
1991 for (; i < elementBytesLog2; i++)
1992 {
1993 InitChannel(1, 0 , i, &pEquation->addr[i]);
1994 }
1995
1996 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
1997
1998 const UINT_32 MaxBitsUsed = 4;
1999 ADDR_CHANNEL_SETTING x[MaxBitsUsed] = {};
2000 ADDR_CHANNEL_SETTING y[MaxBitsUsed] = {};
2001
2002 for (i = 0; i < MaxBitsUsed; i++)
2003 {
2004 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2005 InitChannel(1, 1, i, &y[i]);
2006 }
2007
2008 if (IsStandardSwizzle(rsrcType, swMode))
2009 {
2010 switch (elementBytesLog2)
2011 {
2012 case 0:
2013 pixelBit[0] = x[0];
2014 pixelBit[1] = x[1];
2015 pixelBit[2] = x[2];
2016 pixelBit[3] = x[3];
2017 pixelBit[4] = y[0];
2018 pixelBit[5] = y[1];
2019 pixelBit[6] = y[2];
2020 pixelBit[7] = y[3];
2021 break;
2022 case 1:
2023 pixelBit[0] = x[0];
2024 pixelBit[1] = x[1];
2025 pixelBit[2] = x[2];
2026 pixelBit[3] = y[0];
2027 pixelBit[4] = y[1];
2028 pixelBit[5] = y[2];
2029 pixelBit[6] = x[3];
2030 break;
2031 case 2:
2032 pixelBit[0] = x[0];
2033 pixelBit[1] = x[1];
2034 pixelBit[2] = y[0];
2035 pixelBit[3] = y[1];
2036 pixelBit[4] = y[2];
2037 pixelBit[5] = x[2];
2038 break;
2039 case 3:
2040 pixelBit[0] = x[0];
2041 pixelBit[1] = y[0];
2042 pixelBit[2] = y[1];
2043 pixelBit[3] = x[1];
2044 pixelBit[4] = x[2];
2045 break;
2046 case 4:
2047 pixelBit[0] = y[0];
2048 pixelBit[1] = y[1];
2049 pixelBit[2] = x[0];
2050 pixelBit[3] = x[1];
2051 break;
2052 default:
2053 ADDR_ASSERT_ALWAYS();
2054 ret = ADDR_INVALIDPARAMS;
2055 break;
2056 }
2057 }
2058 else if (IsDisplaySwizzle(rsrcType, swMode))
2059 {
2060 switch (elementBytesLog2)
2061 {
2062 case 0:
2063 pixelBit[0] = x[0];
2064 pixelBit[1] = x[1];
2065 pixelBit[2] = x[2];
2066 pixelBit[3] = y[1];
2067 pixelBit[4] = y[0];
2068 pixelBit[5] = y[2];
2069 pixelBit[6] = x[3];
2070 pixelBit[7] = y[3];
2071 break;
2072 case 1:
2073 pixelBit[0] = x[0];
2074 pixelBit[1] = x[1];
2075 pixelBit[2] = x[2];
2076 pixelBit[3] = y[0];
2077 pixelBit[4] = y[1];
2078 pixelBit[5] = y[2];
2079 pixelBit[6] = x[3];
2080 break;
2081 case 2:
2082 pixelBit[0] = x[0];
2083 pixelBit[1] = x[1];
2084 pixelBit[2] = y[0];
2085 pixelBit[3] = x[2];
2086 pixelBit[4] = y[1];
2087 pixelBit[5] = y[2];
2088 break;
2089 case 3:
2090 pixelBit[0] = x[0];
2091 pixelBit[1] = y[0];
2092 pixelBit[2] = x[1];
2093 pixelBit[3] = x[2];
2094 pixelBit[4] = y[1];
2095 break;
2096 case 4:
2097 pixelBit[0] = x[0];
2098 pixelBit[1] = y[0];
2099 pixelBit[2] = x[1];
2100 pixelBit[3] = y[1];
2101 break;
2102 default:
2103 ADDR_ASSERT_ALWAYS();
2104 ret = ADDR_INVALIDPARAMS;
2105 break;
2106 }
2107 }
2108 else if (IsRotateSwizzle(swMode))
2109 {
2110 switch (elementBytesLog2)
2111 {
2112 case 0:
2113 pixelBit[0] = y[0];
2114 pixelBit[1] = y[1];
2115 pixelBit[2] = y[2];
2116 pixelBit[3] = x[1];
2117 pixelBit[4] = x[0];
2118 pixelBit[5] = x[2];
2119 pixelBit[6] = x[3];
2120 pixelBit[7] = y[3];
2121 break;
2122 case 1:
2123 pixelBit[0] = y[0];
2124 pixelBit[1] = y[1];
2125 pixelBit[2] = y[2];
2126 pixelBit[3] = x[0];
2127 pixelBit[4] = x[1];
2128 pixelBit[5] = x[2];
2129 pixelBit[6] = x[3];
2130 break;
2131 case 2:
2132 pixelBit[0] = y[0];
2133 pixelBit[1] = y[1];
2134 pixelBit[2] = x[0];
2135 pixelBit[3] = y[2];
2136 pixelBit[4] = x[1];
2137 pixelBit[5] = x[2];
2138 break;
2139 case 3:
2140 pixelBit[0] = y[0];
2141 pixelBit[1] = x[0];
2142 pixelBit[2] = y[1];
2143 pixelBit[3] = x[1];
2144 pixelBit[4] = x[2];
2145 break;
2146 default:
2147 ADDR_ASSERT_ALWAYS();
2148 case 4:
2149 ret = ADDR_INVALIDPARAMS;
2150 break;
2151 }
2152 }
2153 else
2154 {
2155 ADDR_ASSERT_ALWAYS();
2156 ret = ADDR_INVALIDPARAMS;
2157 }
2158
2159 // Post validation
2160 if (ret == ADDR_OK)
2161 {
2162 Dim2d microBlockDim = Block256b[elementBytesLog2];
2163 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2164 (microBlockDim.w * (1 << elementBytesLog2)));
2165 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2166 }
2167
2168 return ret;
2169 }
2170
2171 /**
2172 ****************************************************************************************************
2173 * Gfx9Lib::HwlComputeThinEquation
2174 *
2175 * @brief
2176 * Interface function stub of ComputeThinEquation
2177 *
2178 * @return
2179 * ADDR_E_RETURNCODE
2180 ****************************************************************************************************
2181 */
2182 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2183 AddrResourceType rsrcType,
2184 AddrSwizzleMode swMode,
2185 UINT_32 elementBytesLog2,
2186 ADDR_EQUATION* pEquation) const
2187 {
2188 ADDR_E_RETURNCODE ret = ADDR_OK;
2189
2190 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2191
2192 UINT_32 maxXorBits = blockSizeLog2;
2193 if (IsNonPrtXor(swMode))
2194 {
2195 // For non-prt-xor, maybe need to initialize some more bits for xor
2196 // The highest xor bit used in equation will be max the following 3 items:
2197 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2198 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2199 // 3. blockSizeLog2
2200
2201 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2202 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2203 GetPipeXorBits(blockSizeLog2) +
2204 2 * GetBankXorBits(blockSizeLog2));
2205 }
2206
2207 const UINT_32 MaxBitsUsed = 14;
2208 ADDR_ASSERT((2 * MaxBitsUsed) >= maxXorBits);
2209 ADDR_CHANNEL_SETTING x[MaxBitsUsed] = {};
2210 ADDR_CHANNEL_SETTING y[MaxBitsUsed] = {};
2211
2212 const UINT_32 ExtraXorBits = 16;
2213 ADDR_ASSERT(ExtraXorBits >= maxXorBits - blockSizeLog2);
2214 ADDR_CHANNEL_SETTING xorExtra[ExtraXorBits] = {};
2215
2216 for (UINT_32 i = 0; i < MaxBitsUsed; i++)
2217 {
2218 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2219 InitChannel(1, 1, i, &y[i]);
2220 }
2221
2222 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2223
2224 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2225 {
2226 InitChannel(1, 0 , i, &pixelBit[i]);
2227 }
2228
2229 UINT_32 xIdx = 0;
2230 UINT_32 yIdx = 0;
2231 UINT_32 lowBits = 0;
2232
2233 if (IsZOrderSwizzle(swMode))
2234 {
2235 if (elementBytesLog2 <= 3)
2236 {
2237 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2238 {
2239 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2240 }
2241
2242 lowBits = 6;
2243 }
2244 else
2245 {
2246 ret = ADDR_INVALIDPARAMS;
2247 }
2248 }
2249 else
2250 {
2251 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2252 if (ret == ADDR_OK)
2253 {
2254 Dim2d microBlockDim = Block256b[elementBytesLog2];
2255 xIdx = Log2(microBlockDim.w);
2256 yIdx = Log2(microBlockDim.h);
2257 lowBits = 8;
2258 }
2259 }
2260
2261 if (ret == ADDR_OK)
2262 {
2263 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2264 {
2265 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2266 }
2267
2268 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2269 {
2270 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2271 }
2272 }
2273
2274 if ((ret == ADDR_OK) && IsXor(swMode))
2275 {
2276 // Fill XOR bits
2277 UINT_32 pipeStart = m_pipeInterleaveLog2;
2278 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2279 for (UINT_32 i = 0; i < pipeXorBits; i++)
2280 {
2281 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2282 ADDR_CHANNEL_SETTING* pXor1Src =
2283 (xor1BitPos < blockSizeLog2) ?
2284 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2285
2286 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2287 }
2288
2289 UINT_32 bankStart = pipeStart + pipeXorBits;
2290 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2291 for (UINT_32 i = 0; i < bankXorBits; i++)
2292 {
2293 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2294 ADDR_CHANNEL_SETTING* pXor1Src =
2295 (xor1BitPos < blockSizeLog2) ?
2296 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2297
2298 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2299 }
2300
2301 pEquation->numBits = blockSizeLog2;
2302 }
2303
2304 if ((ret == ADDR_OK) && IsTex3d(rsrcType))
2305 {
2306 pEquation->stackedDepthSlices = TRUE;
2307 }
2308
2309 return ret;
2310 }
2311
2312 /**
2313 ****************************************************************************************************
2314 * Gfx9Lib::HwlComputeThickEquation
2315 *
2316 * @brief
2317 * Interface function stub of ComputeThickEquation
2318 *
2319 * @return
2320 * ADDR_E_RETURNCODE
2321 ****************************************************************************************************
2322 */
2323 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2324 AddrResourceType rsrcType,
2325 AddrSwizzleMode swMode,
2326 UINT_32 elementBytesLog2,
2327 ADDR_EQUATION* pEquation) const
2328 {
2329 ADDR_E_RETURNCODE ret = ADDR_OK;
2330
2331 ADDR_ASSERT(IsTex3d(rsrcType));
2332
2333 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2334
2335 UINT_32 maxXorBits = blockSizeLog2;
2336 if (IsNonPrtXor(swMode))
2337 {
2338 // For non-prt-xor, maybe need to initialize some more bits for xor
2339 // The highest xor bit used in equation will be max the following 3:
2340 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2341 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2342 // 3. blockSizeLog2
2343
2344 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2345 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2346 GetPipeXorBits(blockSizeLog2) +
2347 3 * GetBankXorBits(blockSizeLog2));
2348 }
2349
2350 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2351 {
2352 InitChannel(1, 0 , i, &pEquation->addr[i]);
2353 }
2354
2355 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2356
2357 const UINT_32 MaxBitsUsed = 12;
2358 ADDR_ASSERT((3 * MaxBitsUsed) >= maxXorBits);
2359 ADDR_CHANNEL_SETTING x[MaxBitsUsed] = {};
2360 ADDR_CHANNEL_SETTING y[MaxBitsUsed] = {};
2361 ADDR_CHANNEL_SETTING z[MaxBitsUsed] = {};
2362
2363 const UINT_32 ExtraXorBits = 24;
2364 ADDR_ASSERT(ExtraXorBits >= maxXorBits - blockSizeLog2);
2365 ADDR_CHANNEL_SETTING xorExtra[ExtraXorBits] = {};
2366
2367 for (UINT_32 i = 0; i < MaxBitsUsed; i++)
2368 {
2369 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2370 InitChannel(1, 1, i, &y[i]);
2371 InitChannel(1, 2, i, &z[i]);
2372 }
2373
2374 if (IsZOrderSwizzle(swMode))
2375 {
2376 switch (elementBytesLog2)
2377 {
2378 case 0:
2379 pixelBit[0] = x[0];
2380 pixelBit[1] = y[0];
2381 pixelBit[2] = x[1];
2382 pixelBit[3] = y[1];
2383 pixelBit[4] = z[0];
2384 pixelBit[5] = z[1];
2385 pixelBit[6] = x[2];
2386 pixelBit[7] = z[2];
2387 pixelBit[8] = y[2];
2388 pixelBit[9] = x[3];
2389 break;
2390 case 1:
2391 pixelBit[0] = x[0];
2392 pixelBit[1] = y[0];
2393 pixelBit[2] = x[1];
2394 pixelBit[3] = y[1];
2395 pixelBit[4] = z[0];
2396 pixelBit[5] = z[1];
2397 pixelBit[6] = z[2];
2398 pixelBit[7] = y[2];
2399 pixelBit[8] = x[2];
2400 break;
2401 case 2:
2402 pixelBit[0] = x[0];
2403 pixelBit[1] = y[0];
2404 pixelBit[2] = x[1];
2405 pixelBit[3] = z[0];
2406 pixelBit[4] = y[1];
2407 pixelBit[5] = z[1];
2408 pixelBit[6] = y[2];
2409 pixelBit[7] = x[2];
2410 break;
2411 case 3:
2412 pixelBit[0] = x[0];
2413 pixelBit[1] = y[0];
2414 pixelBit[2] = z[0];
2415 pixelBit[3] = x[1];
2416 pixelBit[4] = z[1];
2417 pixelBit[5] = y[1];
2418 pixelBit[6] = x[2];
2419 break;
2420 case 4:
2421 pixelBit[0] = x[0];
2422 pixelBit[1] = y[0];
2423 pixelBit[2] = z[0];
2424 pixelBit[3] = z[1];
2425 pixelBit[4] = y[1];
2426 pixelBit[5] = x[1];
2427 break;
2428 default:
2429 ADDR_ASSERT_ALWAYS();
2430 ret = ADDR_INVALIDPARAMS;
2431 break;
2432 }
2433 }
2434 else if (IsStandardSwizzle(rsrcType, swMode))
2435 {
2436 switch (elementBytesLog2)
2437 {
2438 case 0:
2439 pixelBit[0] = x[0];
2440 pixelBit[1] = x[1];
2441 pixelBit[2] = x[2];
2442 pixelBit[3] = x[3];
2443 pixelBit[4] = y[0];
2444 pixelBit[5] = y[1];
2445 pixelBit[6] = z[0];
2446 pixelBit[7] = z[1];
2447 pixelBit[8] = z[2];
2448 pixelBit[9] = y[2];
2449 break;
2450 case 1:
2451 pixelBit[0] = x[0];
2452 pixelBit[1] = x[1];
2453 pixelBit[2] = x[2];
2454 pixelBit[3] = y[0];
2455 pixelBit[4] = y[1];
2456 pixelBit[5] = z[0];
2457 pixelBit[6] = z[1];
2458 pixelBit[7] = z[2];
2459 pixelBit[8] = y[2];
2460 break;
2461 case 2:
2462 pixelBit[0] = x[0];
2463 pixelBit[1] = x[1];
2464 pixelBit[2] = y[0];
2465 pixelBit[3] = y[1];
2466 pixelBit[4] = z[0];
2467 pixelBit[5] = z[1];
2468 pixelBit[6] = y[2];
2469 pixelBit[7] = x[2];
2470 break;
2471 case 3:
2472 pixelBit[0] = x[0];
2473 pixelBit[1] = y[0];
2474 pixelBit[2] = y[1];
2475 pixelBit[3] = z[0];
2476 pixelBit[4] = z[1];
2477 pixelBit[5] = x[1];
2478 pixelBit[6] = x[2];
2479 break;
2480 case 4:
2481 pixelBit[0] = y[0];
2482 pixelBit[1] = y[1];
2483 pixelBit[2] = z[0];
2484 pixelBit[3] = z[1];
2485 pixelBit[4] = x[0];
2486 pixelBit[5] = x[1];
2487 break;
2488 default:
2489 ADDR_ASSERT_ALWAYS();
2490 ret = ADDR_INVALIDPARAMS;
2491 break;
2492 }
2493 }
2494 else
2495 {
2496 ADDR_ASSERT_ALWAYS();
2497 ret = ADDR_INVALIDPARAMS;
2498 }
2499
2500 if (ret == ADDR_OK)
2501 {
2502 Dim3d microBlockDim = Block1kb[elementBytesLog2];
2503 UINT_32 xIdx = Log2(microBlockDim.w);
2504 UINT_32 yIdx = Log2(microBlockDim.h);
2505 UINT_32 zIdx = Log2(microBlockDim.d);
2506
2507 pixelBit = pEquation->addr;
2508
2509 static const UINT_32 lowBits = 10;
2510 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2511 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2512
2513 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2514 {
2515 if (((i - lowBits) % 3) == 0)
2516 {
2517 pixelBit[i] = x[xIdx++];
2518 }
2519 else if (((i - lowBits) % 3) == 1)
2520 {
2521 pixelBit[i] = z[zIdx++];
2522 }
2523 else
2524 {
2525 pixelBit[i] = y[yIdx++];
2526 }
2527 }
2528
2529 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2530 {
2531 if (((i - lowBits) % 3) == 0)
2532 {
2533 xorExtra[i - blockSizeLog2] = x[xIdx++];
2534 }
2535 else if (((i - lowBits) % 3) == 1)
2536 {
2537 xorExtra[i - blockSizeLog2] = z[zIdx++];
2538 }
2539 else
2540 {
2541 xorExtra[i - blockSizeLog2] = y[yIdx++];
2542 }
2543 }
2544 }
2545
2546 if ((ret == ADDR_OK) && IsXor(swMode))
2547 {
2548 // Fill XOR bits
2549 UINT_32 pipeStart = m_pipeInterleaveLog2;
2550 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2551 for (UINT_32 i = 0; i < pipeXorBits; i++)
2552 {
2553 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2554 ADDR_CHANNEL_SETTING* pXor1Src =
2555 (xor1BitPos < blockSizeLog2) ?
2556 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2557
2558 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2559
2560 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2561 ADDR_CHANNEL_SETTING* pXor2Src =
2562 (xor2BitPos < blockSizeLog2) ?
2563 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2564
2565 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2566 }
2567
2568 UINT_32 bankStart = pipeStart + pipeXorBits;
2569 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2570 for (UINT_32 i = 0; i < bankXorBits; i++)
2571 {
2572 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2573 ADDR_CHANNEL_SETTING* pXor1Src =
2574 (xor1BitPos < blockSizeLog2) ?
2575 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2576
2577 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2578
2579 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2580 ADDR_CHANNEL_SETTING* pXor2Src =
2581 (xor2BitPos < blockSizeLog2) ?
2582 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2583
2584 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2585 }
2586
2587 pEquation->numBits = blockSizeLog2;
2588 }
2589
2590 return ret;
2591 }
2592
2593 /**
2594 ****************************************************************************************************
2595 * Gfx9Lib::HwlIsValidDisplaySwizzleMode
2596 *
2597 * @brief
2598 * Check if a swizzle mode is supported by display engine
2599 *
2600 * @return
2601 * TRUE is swizzle mode is supported by display engine
2602 ****************************************************************************************************
2603 */
2604 BOOL_32 Gfx9Lib::HwlIsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2605 {
2606 BOOL_32 support = FALSE;
2607
2608 const AddrResourceType resourceType = pIn->resourceType;
2609 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2610
2611 if (m_settings.isDce12)
2612 {
2613 switch (swizzleMode)
2614 {
2615 case ADDR_SW_256B_D:
2616 case ADDR_SW_256B_R:
2617 support = (pIn->bpp == 32);
2618 break;
2619
2620 case ADDR_SW_LINEAR:
2621 case ADDR_SW_4KB_D:
2622 case ADDR_SW_4KB_R:
2623 case ADDR_SW_64KB_D:
2624 case ADDR_SW_64KB_R:
2625 case ADDR_SW_VAR_D:
2626 case ADDR_SW_VAR_R:
2627 case ADDR_SW_4KB_D_X:
2628 case ADDR_SW_4KB_R_X:
2629 case ADDR_SW_64KB_D_X:
2630 case ADDR_SW_64KB_R_X:
2631 case ADDR_SW_VAR_D_X:
2632 case ADDR_SW_VAR_R_X:
2633 support = (pIn->bpp <= 64);
2634 break;
2635
2636 default:
2637 break;
2638 }
2639 }
2640 else
2641 {
2642 ADDR_NOT_IMPLEMENTED();
2643 }
2644
2645 return support;
2646 }
2647
2648 } // V2
2649 } // Addr