2 * Copyright © 2017 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
27 // This class generates rb id map based rb id equations
30 // Unlock more verbose debug messages (V* borrows from dj -v * to indicate most verbosity)
31 //#define DPI_DEBUG_V4 1
32 //#define DPI_DEBUG_V5 1
33 //#define DPI_DEBUG_PIPE_CASES 1
34 // "----+----|----+----|----+----|----+----|"
35 #include "addrcommon.h"
43 VOID
RB_MAP::Get_Comp_Block_Screen_Space( CoordEq
& addr
, int bytes_log2
, int* w
, int* h
, int* d
)
49 for( n
=0; n
<bytes_log2
; n
++ ) { // go up to the bytes_log2 bit
50 for( i
=0; i
<addr
[n
].getsize(); i
++ ) {
51 char dim
= addr
[n
][i
].getdim();
52 int ord
= addr
[n
][i
].getord();
53 if( w
&& dim
== 'x' && ord
>= *w
) *w
= ord
+1;
54 if( h
&& dim
== 'y' && ord
>= *h
) *h
= ord
+1;
55 if( d
&& dim
== 'z' && ord
>= *d
) *d
= ord
+1;
61 RB_MAP::Get_Meta_Block_Screen_Space( int num_comp_blocks_log2
, bool is_thick
, bool y_biased
,
62 int comp_block_width_log2
, int comp_block_height_log2
, int comp_block_depth_log2
,
65 int& meta_block_width_log2
, int& meta_block_height_log2
, int& meta_block_depth_log2
)
67 meta_block_width_log2
= comp_block_width_log2
;
68 meta_block_height_log2
= comp_block_height_log2
;
69 meta_block_depth_log2
= comp_block_depth_log2
;
72 for( n
=0; n
<num_comp_blocks_log2
; n
++ ) {
73 if( (meta_block_height_log2
< meta_block_width_log2
) ||
74 (y_biased
&& (meta_block_height_log2
== meta_block_width_log2
)) ) {
75 if ( !is_thick
|| (meta_block_height_log2
<= meta_block_depth_log2
) )
76 meta_block_height_log2
++;
78 meta_block_depth_log2
++;
81 if ( !is_thick
|| (meta_block_width_log2
<= meta_block_depth_log2
) )
82 meta_block_width_log2
++;
84 meta_block_depth_log2
++;
90 RB_MAP::cap_pipe( int xmode
, bool is_thick
, int& num_ses_log2
, int bpp_log2
, int num_samples_log2
, int pipe_interleave_log2
, int& block_size_log2
, int& num_pipes_log2
)
92 // pipes+SEs can't exceed 32 for now
93 if( num_pipes_log2
+num_ses_log2
> 5 ) {
94 num_pipes_log2
= 5-num_ses_log2
;
97 // Since we are not supporting SE affinity anymore, just add nu_ses to num_pipes, and set num_ses to 0
98 num_pipes_log2
+= num_ses_log2
;
101 // If block size is set to variable (0), compute the size
102 if( block_size_log2
== 0 ) {
104 //TODO Temporary disable till RTL can drive Var signals properly
107 if( xmode
!= NONE
) {
108 int max_pipes_log2
= block_size_log2
- pipe_interleave_log2
;
110 // For 3d, treat the num_pipes as the sum of num_pipes and gpus
111 num_pipes_log2
= num_pipes_log2
+ num_ses_log2
;
114 int block_space_used
= num_pipes_log2
+pipe_interleave_log2
;
115 if( block_space_used
< 10+bpp_log2
) block_space_used
= 10+bpp_log2
;
116 // if the num gpus exceeds however many bits we have left between block size and block_space_used+num_samples
117 // then set num_ses_log2 to 0
118 if( num_ses_log2
> block_size_log2
- block_space_used
- num_samples_log2
) {
119 num_pipes_log2
= num_pipes_log2
+ num_ses_log2
;
123 if( num_pipes_log2
> max_pipes_log2
) {
124 // If it exceeds the space we have left, cap it to that
125 num_pipes_log2
= max_pipes_log2
;
128 num_pipes_log2
= num_pipes_log2
+ num_ses_log2
;
133 void RB_MAP::Get_Data_Offset_Equation( CoordEq
& data_eq
, int data_type
, int bpp_log2
, int num_samples_log2
, int block_size_log2
)
135 bool is_linear
= ( data_type
== DATA_COLOR1D
|| data_type
== DATA_COLOR2D_LINEAR
);
136 bool is_thick
= ( data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
);
137 bool is_color
= ( data_type
== DATA_COLOR2D
|| data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
|| data_type
== DATA_COLOR3D_D_NOT_USED
);
138 bool is_s
= ( data_type
== DATA_COLOR3D_S
);
139 Coordinate
cx( 'x', 0 );
140 Coordinate
cy( 'y', 0 );
141 Coordinate
cz( 'z', 0 );
142 Coordinate
cs( 's', 0 );
143 // Clear the equation
146 if( block_size_log2
== 0 ) block_size_log2
= 16;
149 Coordinate
cm( 'm', 0 );
152 for( i
=0; i
<49; i
++ ) {
156 } else if( is_thick
) {
157 // Color 3d (_S and _Z modes; _D is same as color 2d)
160 // Standard 3d swizzle
161 // Fill in bottom x bits
162 for( i
=bpp_log2
; i
<4; i
++ ) {
166 // Fill in 2 bits of y and then z
167 for( i
=4; i
<6; i
++ ) {
171 for( i
=6; i
<8; i
++ ) {
181 } else if( bpp_log2
== 2 ) {
182 // fill in y and x bit
196 int m2d_end
= (bpp_log2
==0) ? 3 : ((bpp_log2
< 4) ? 4 : 5);
197 int num_zs
= (bpp_log2
==0 || bpp_log2
==4) ? 2 : ((bpp_log2
==1) ? 3 : 1);
198 data_eq
.mort2d( cx
, cy
, bpp_log2
, m2d_end
);
199 for( i
=m2d_end
+1; i
<=m2d_end
+num_zs
; i
++ ) {
203 if( bpp_log2
== 0 || bpp_log2
== 3 ) {
209 } else if( bpp_log2
== 2 ) {
222 // Fill in bit 10 and up
223 data_eq
.mort3d( cz
, cy
, cx
, 10 );
224 } else if( is_color
) {
226 int micro_y_bits
= (8-bpp_log2
) / 2;
227 int tile_split_start
= block_size_log2
- num_samples_log2
;
229 // Fill in bottom x bits
230 for( i
=bpp_log2
;i
<4; i
++ ) {
234 // Fill in bottom y bits
235 for( i
=4; i
<4+micro_y_bits
; i
++ ) {
239 // Fill in last of the micro_x bits
240 for( i
=4+micro_y_bits
; i
<8; i
++ ) {
244 // Fill in x/y bits below sample split
245 data_eq
.mort2d( cy
, cx
, 8, tile_split_start
-1 );
246 // Fill in sample bits
247 for( i
=0; i
<num_samples_log2
; i
++ ) {
249 data_eq
[tile_split_start
+i
].add(cs
);
251 // Fill in x/y bits above sample split
252 if( (num_samples_log2
& 1) ^ (block_size_log2
& 1) ) data_eq
.mort2d( cx
, cy
, block_size_log2
);
253 else data_eq
.mort2d( cy
, cx
, block_size_log2
);
255 // Z, stencil or fmask
256 // First, figure out where each section of bits starts
257 int sample_start
= bpp_log2
;
258 int pixel_start
= bpp_log2
+ num_samples_log2
;
259 int y_maj_start
= 6 + num_samples_log2
;
261 // Put in sample bits
263 for( s
=0; s
<num_samples_log2
; s
++ ) {
265 data_eq
[sample_start
+s
].add(cs
);
267 // Put in the x-major order pixel bits
268 data_eq
.mort2d( cx
, cy
, pixel_start
, y_maj_start
-1 );
269 // Put in the y-major order pixel bits
270 data_eq
.mort2d( cy
, cx
, y_maj_start
);
274 void RB_MAP::Get_RB_Equation( CoordEq
& rb_equation
, int num_ses_log2
, int num_rbs_log2
)
276 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
277 int rb_region
= (num_rbs_log2
== 0) ? 5 : 4;
278 Coordinate
cx( 'x', rb_region
);
279 Coordinate
cy( 'y', rb_region
);
280 int i
, start
= 0, num_total_rbs_log2
= num_ses_log2
+ num_rbs_log2
;
281 // Clear the rb equation
282 rb_equation
.resize(0);
283 rb_equation
.resize(num_total_rbs_log2
);
284 if( num_ses_log2
> 0 && num_rbs_log2
== 1 ) {
285 // Special case when more than 1 SE, and only 1 RB per SE
286 rb_equation
[0].add(cx
);
287 rb_equation
[0].add(cy
);
290 rb_equation
[0].add(cy
);
293 for( i
=0; i
<2*(num_total_rbs_log2
-start
); i
++ ) {
294 int index
= start
+ (((start
+i
)>=num_total_rbs_log2
) ? 2*(num_total_rbs_log2
-start
)-i
-1 : i
);
295 Coordinate
& c
= ((i
% 2) == 1) ? cx
: cy
;
296 rb_equation
[index
].add(c
);
301 //void getcheq( CoordEq& pipe_equation, CoordEq& addr, int pipe_interleave_log2, int num_pipes_log2,
303 RB_MAP::Get_Pipe_Equation( CoordEq
& pipe_equation
, CoordEq
& addr
,
304 int pipe_interleave_log2
,
308 int num_samples_log2
,
310 int xmode
, int data_type
314 CoordEq addr_f
, xormask
, xormask2
;
315 Coordinate
tile_min( 'x', 3 );
317 bool is_color
= ( data_type
== DATA_COLOR1D
|| data_type
== DATA_COLOR2D
|| data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
|| data_type
== DATA_COLOR2D_LINEAR
|| data_type
== DATA_COLOR3D_D_NOT_USED
);
318 bool is_thick
= ( data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
);
320 // For color, filter out sample bits only
321 // otherwise filter out everything under an 8x8 tile
323 tile_min
.set( 'x', 0 );
327 // Z/stencil is no longer tile split
329 addr_f
.shift( -num_samples_log2
, block_size_log2
- num_samples_log2
);
332 addr_f
.copy( pipe_equation
, pipe_interleave_log2
, num_pipes_log2
); //@todo kr needs num_ses_log2??
335 // This section should only apply to z/stencil, maybe fmask
336 // If the pipe bit is below the comp block size, then keep moving up the address until we find a bit that is above
337 for( pipe
=0; addr_f
[pipe_interleave_log2
+ pipe
][0] < tile_min
; pipe
++ ) {
340 // if pipe is 0, then the first pipe bit is above the comp block size, so we don't need to do anything
341 // Note, this if condition is not necessary, since if we execute the loop when pipe==0, we will get the same pipe equation
346 for( i
=0; i
<num_pipes_log2
; i
++ ) {
347 // Copy the jth bit above pipe interleave to the current pipe equation bit
348 addr_f
[pipe_interleave_log2
+ j
].copyto(pipe_equation
[i
]);
358 // Clear out bits above the block size if prt's are enabled
359 addr_f
.resize(block_size_log2
);
363 if( xmode
!= NONE
) {
365 addr_f
.copy( xormask2
, pipe_interleave_log2
+num_pipes_log2
, 2*num_pipes_log2
);
367 xormask
.resize( num_pipes_log2
);
368 for( pipe
=0; pipe
<num_pipes_log2
; pipe
++ ) {
369 xormask
[pipe
].add( xormask2
[2*pipe
] );
370 xormask
[pipe
].add( xormask2
[2*pipe
+1] );
374 // Xor in the bits above the pipe+gpu bits
375 addr_f
.copy( xormask
, pipe_interleave_log2
+ pipe
+ num_pipes_log2
, num_pipes_log2
);
376 if( num_samples_log2
== 0 && (xmode
!= PRT
) ) {
377 // if 1xaa and not prt, then xor in the z bits
379 xormask2
.resize(num_pipes_log2
);
380 for( pipe
=0; pipe
<num_pipes_log2
; pipe
++ ) {
381 co
.set( 'z', num_pipes_log2
-1 - pipe
);
382 xormask2
[pipe
].add( co
);
385 pipe_equation
.xorin( xormask2
);
390 pipe_equation
.xorin( xormask
);
395 void RB_MAP::get_meta_miptail_coord( int& x
, int& y
, int& z
, int mip_in_tail
, int blk_width_log2
, int blk_height_log2
, int blk_depth_log2
)
397 bool is_thick
= (blk_depth_log2
>0);
399 int mip_width
= 1 << blk_width_log2
;
400 int mip_height
= 1 << (blk_height_log2
-1);
401 int mip_depth
= 1 << blk_depth_log2
;
403 // Find the minimal increment, based on the block size and 2d/3d
406 min_inc
= (blk_height_log2
>= 9) ? 128 : ((blk_height_log2
== 8) ? 64 : 32);
407 } else if(blk_height_log2
>=10) {
409 } else if(blk_height_log2
==9) {
415 for( m
=0; m
<mip_in_tail
; m
++ ) {
416 if( mip_width
<= 32 ) {
417 // special case when below 32x32 mipmap
418 switch(mip_in_tail
-m
) {
419 case 0: break; // 32x32
420 case 1: x
+=32; break; // 16x16
421 case 2: y
+=32; break; // 8x8
422 case 3: y
+=32; x
+=16; break;// 4x4
423 case 4: y
+=32; x
+=32; break;// 2x2
424 case 5: y
+=32; x
+=48; break;// 1x1
425 // The following are for BC/ASTC formats
426 case 6: y
+=48; break; // 1/2 x 1/2
427 case 7: y
+=48; x
+=16; break;// 1/4 x 1/4
428 case 8: y
+=48; x
+=32; break;// 1/8 x 1/8
429 default:y
+=48; x
+=48; break;// 1/16 x 1/16
431 m
= mip_in_tail
; // break the loop
433 if( mip_width
<= min_inc
) {
434 // if we're below the minimal increment...
436 // For 3d, just go in z direction
439 // For 2d, first go across, then down
440 if( mip_width
* 2 == min_inc
) {
441 // if we're 2 mips below, that's when we go back in x, and down in y
445 // otherwise, just go across in x
450 // On even mip, go down, otherwise, go across
457 // Divide the width by 2
458 mip_width
= mip_width
/ 2;
459 // After the first mip in tail, the mip is always a square
460 mip_height
= mip_width
;
461 // ...or for 3d, a cube
462 if(is_thick
) mip_depth
= mip_width
;
467 void RB_MAP::get_mip_coord( int& x
, int& y
, int& z
, int mip
,
468 int meta_blk_width_log2
, int meta_blk_height_log2
, int meta_blk_depth_log2
,
469 int data_blk_width_log2
, int data_blk_height_log2
,
470 int& surf_width
, int& surf_height
, int& surf_depth
, int epitch
, int max_mip
,
471 int data_type
, int bpp_log2
, bool meta_linear
)
474 get_mip_coord_linear( x
, y
, z
, mip
, data_blk_width_log2
, data_blk_height_log2
,
475 surf_width
, surf_height
, surf_depth
, epitch
, max_mip
, data_type
, bpp_log2
);
477 get_mip_coord_nonlinear( x
, y
, z
, mip
, meta_blk_width_log2
, meta_blk_height_log2
, meta_blk_depth_log2
,
478 surf_width
, surf_height
, surf_depth
, epitch
, max_mip
, data_type
);
482 void RB_MAP::get_mip_coord_linear( int& x
, int& y
, int& z
,
484 int data_blk_width_log2
, int data_blk_height_log2
,
485 int& surf_width
, int& surf_height
, int& surf_depth
, int epitch
,
486 int max_mip
, int data_type
, int bpp_log2
489 bool data_linear
= ( data_type
== DATA_COLOR1D
|| data_type
== DATA_COLOR2D_LINEAR
);
492 // linear width is padded out to 256 Bytes
493 int width_padding
= 8 - bpp_log2
;
494 int width_pad_mask
= ~(0xffffffff << width_padding
);
495 int padded_surf_width
= surf_width
;
496 int padded_surf_height
= (data_type
== DATA_COLOR1D
) ? 1 : surf_height
;
499 int mip_width
= padded_surf_width
;
500 int mip_height
= padded_surf_height
;
501 int padded_mip_height
= 0;
504 while( (mip_width
>= 1 || mip_height
>= 1) && m
<= max_mip
) {
505 if( mip
== m
) mip_base
= padded_mip_height
;
506 padded_mip_height
+= mip_height
;
508 mip_width
= (mip_width
/ 2) + (mip_width
& 1);
509 mip_height
= (mip_height
/ 2) + (mip_height
& 1);
513 mip_base
= padded_mip_height
- mip_height
;
515 padded_surf_height
= padded_mip_height
;
518 padded_surf_height
= epitch
;
521 padded_surf_width
= ((surf_width
>> width_padding
) + ((surf_width
& width_pad_mask
) ? 1 : 0)) << width_padding
;
524 padded_surf_width
= ((surf_width
>> width_padding
) + ((surf_width
& width_pad_mask
) ? 1 : 0)) << width_padding
;
526 // Pad up epitch to meta block width
527 if( (epitch
& width_pad_mask
) != 0 ) {
528 epitch
= ((epitch
>> width_padding
) + 1) << width_padding
;
530 // Take max of epitch and computed surf width
531 if( epitch
< padded_surf_width
) {
534 padded_surf_width
= epitch
;
538 surf_width
= padded_surf_width
;
539 surf_height
= padded_surf_height
;
542 // padding based data block size
543 int width_pad_mask
= ~(0xffffffff << data_blk_width_log2
);
544 int height_pad_mask
= ~(0xffffffff << data_blk_height_log2
);
546 // Pad the data surface dimensions by the block dimensions, and put the result in compressed block dimension units
547 surf_width
= ((surf_width
>> data_blk_width_log2
) + ((surf_width
& width_pad_mask
) ? 1 : 0)) << data_blk_width_log2
;
548 surf_height
= ((surf_height
>> data_blk_height_log2
) + ((surf_height
& height_pad_mask
) ? 1 : 0)) << data_blk_height_log2
;
550 // Tiled data, linear metadata
552 // we don't allow mipmapping on tiled data, with linear metadata
556 // Pad up epitch to data block width
557 if( (epitch
& width_pad_mask
) != 0 ) {
558 epitch
= ((epitch
>> data_blk_width_log2
) + 1) << data_blk_width_log2
;
560 // Take max of epitch and computed surf width
561 if( epitch
< surf_width
) {
569 void RB_MAP::get_mip_coord_nonlinear( int& x
, int& y
, int& z
,
571 int meta_blk_width_log2
, int meta_blk_height_log2
, int meta_blk_depth_log2
,
574 int& surf_width
, int& surf_height
, int& surf_depth
,
576 int epitch
, int max_mip
, int data_type
579 bool is3d
= (data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
|| data_type
== DATA_COLOR3D_D_NOT_USED
);
580 int order
; // 0 = xmajor, 1 = ymajor, 2 = zmajor
582 int mip_width
= surf_width
;
583 int mip_height
= surf_height
;
584 int mip_depth
= (is3d
) ? surf_depth
: 1;
586 // Divide surface w/h/d by block size, padding if needed
587 surf_width
= (((surf_width
& ((1<<meta_blk_width_log2
)-1)) != 0) ? 1 : 0) + (surf_width
>> meta_blk_width_log2
);
588 surf_height
= (((surf_height
& ((1<<meta_blk_height_log2
)-1)) != 0) ? 1 : 0) + (surf_height
>> meta_blk_height_log2
);
589 surf_depth
= (((surf_depth
& ((1<<meta_blk_depth_log2
)-1)) != 0) ? 1 : 0) + (surf_depth
>> meta_blk_depth_log2
);
590 epitch
= (((epitch
& ((1<<meta_blk_width_log2
)-1)) != 0) ? 1 : 0) + (epitch
>> meta_blk_width_log2
);
593 // Determine major order
594 if( is3d
&& surf_depth
> surf_width
&& surf_depth
> surf_height
) {
595 order
= 2; // Z major
597 else if( surf_width
>= surf_height
) {
598 order
= 0; // X major
601 order
= 1; // Y major
604 // Check if mip 0 is in the tail
605 bool in_tail
= (mip_width
<= (1<<meta_blk_width_log2
)) &&
606 (mip_height
<= (1<<(meta_blk_height_log2
-1))) &&
607 (!is3d
|| (mip_depth
<= (1<<meta_blk_depth_log2
)));
608 // Pad the mip w/h/d, which is just the surf w/h/d times blk dim
609 mip_width
= surf_width
<< meta_blk_width_log2
;
610 mip_height
= surf_height
<< meta_blk_height_log2
;
611 mip_depth
= surf_depth
<< meta_blk_depth_log2
;
614 // Select the dimension that stores the mip chain, based on major order
615 // Then pad it out to max(2, ceil(mip_dim/2))
616 int& mip_dim
= (order
== 1) ? surf_width
: surf_height
;
617 // in y-major, if height > 2 blocks, then we need extra padding;
618 // in x or z major, it only occurs if width/depth is greater than 4 blocks
619 // Height is special, since we can enter the mip tail when height is 1/2 block high
620 int order_dim_limit
= (order
== 1) ? 2 : 4;
621 int& order_dim
= (order
== 0) ? surf_width
: ((order
== 1) ? surf_height
: surf_depth
);
622 if( mip_dim
< 3 && order_dim
> order_dim_limit
&& max_mip
>= 3 ) mip_dim
+= 2;
623 else mip_dim
+= (mip_dim
/2) + (mip_dim
&1);
627 for( m
=0; m
<mip
; m
++ ) {
629 get_meta_miptail_coord( x
, y
, z
, mip
-m
, meta_blk_width_log2
, meta_blk_height_log2
, meta_blk_depth_log2
);
630 m
= mip
; // break the loop
632 // Move either x, y, or z by the mip dimension based on which mip we're on and the order
635 case 0: x
+= mip_width
; break;
636 case 1: y
+= mip_height
; break;
637 case 2: z
+= mip_depth
; break;
641 case 0: y
+= mip_height
; break;
642 case 1: x
+= mip_width
; break;
643 case 2: y
+= mip_height
; break;
646 // Compute next mip's dimensions
647 mip_width
= (mip_width
/2);
648 mip_height
= (mip_height
/2);
649 mip_depth
= (mip_depth
/2);
650 // See if it's in the tail
651 in_tail
= (mip_width
<= (1<<meta_blk_width_log2
)) &&
652 (mip_height
<= (1<<(meta_blk_height_log2
-1))) &&
653 (!is3d
|| (mip_depth
<= (1<<meta_blk_depth_log2
)));
654 // Pad out mip dimensions
655 mip_width
= ((mip_width
>> meta_blk_width_log2
) + ((mip_width
& ((1<<meta_blk_width_log2
) -1)) != 0)) << meta_blk_width_log2
;
656 mip_height
= ((mip_height
>> meta_blk_height_log2
) + ((mip_height
& ((1<<meta_blk_height_log2
)-1)) != 0)) << meta_blk_height_log2
;
657 mip_depth
= ((mip_depth
>> meta_blk_depth_log2
) + ((mip_depth
& ((1<<meta_blk_depth_log2
) -1)) != 0)) << meta_blk_depth_log2
;
661 // Take max of epitch and computed surf width
662 surf_width
= (surf_width
> epitch
) ? surf_width
: epitch
;
665 // Multiply the surface dimension by block size
666 surf_width
= surf_width
<< meta_blk_width_log2
;
667 surf_height
= surf_height
<< meta_blk_height_log2
;
668 surf_depth
= surf_depth
<< meta_blk_depth_log2
;
673 RB_MAP::get_meta_eq( CoordEq
& metaaddr
,
674 int max_mip
, int num_ses_log2
, int num_rbs_log2
,
676 int block_size_log2
, int bpp_log2
, int num_samples_log2
, int max_comp_frag_log2
,
677 int pipe_interleave_log2
,
680 int meta_alignment
, bool meta_linear
)
684 CoordEq cur_rbeq
, pipe_equation
, orig_pipe_equation
;
686 bool data_linear
= ( data_type
== DATA_COLOR1D
|| data_type
== DATA_COLOR2D_LINEAR
);
687 bool is_color
= ( data_linear
|| data_type
== DATA_COLOR2D
|| data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
|| data_type
== DATA_COLOR3D_D_NOT_USED
);
688 bool is3d
= ( data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
|| data_type
== DATA_COLOR3D_D_NOT_USED
);
689 bool is_thick
= ( data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
);
691 bool is_fmask
= (data_type
== DATA_FMASK
);
692 bool is_pipe_aligned
= (meta_alignment
== META_ALIGN_PIPE
) || (meta_alignment
== META_ALIGN_PIPE_RB
);
693 bool is_rb_aligned
= (meta_alignment
== META_ALIGN_RB
) || (meta_alignment
== META_ALIGN_PIPE_RB
);
695 bool is_mipmapped
= (max_mip
> 0) ? true : false;
698 int comp_frag_log2
= (is_color
&& (num_samples_log2
> max_comp_frag_log2
)) ? max_comp_frag_log2
: num_samples_log2
;
700 int uncomp_frag_log2
= num_samples_log2
- comp_frag_log2
;
702 // Constraints on linear
705 num_samples_log2
= 0;
706 is_rb_aligned
= false;
709 if( meta_linear
&& !data_linear
) {
710 is_pipe_aligned
= false;
713 // Min metablock size if thick is 64KB, otherwise 4KB
714 int min_meta_block_size_log2
= (is_thick
) ? 16 : 12;
716 // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
717 int metadata_word_size_log2
= (is_fmask
) ? -1 : ((is_color
) ? 0 : 2);
719 int metadata_words_per_page_log2
= min_meta_block_size_log2
- metadata_word_size_log2
;
721 // Get the total # of RB's before modifying due to rb align
722 int num_total_rbs_pre_rb_align_log2
= num_ses_log2
+ num_rbs_log2
;
724 // Cap the pipe bits to block size
725 int num_ses_data_log2
= num_ses_log2
;
726 cap_pipe( xmode
, is_thick
, num_ses_data_log2
, bpp_log2
,
727 num_samples_log2
, pipe_interleave_log2
, block_size_log2
, num_pipes_log2
);
729 // if not pipe aligned, set num_pipes_log2, num_ses_log2 to 0
730 if( !is_pipe_aligned
) {
732 num_ses_data_log2
= 0;
735 // Get the correct data address and rb equation
737 Get_Data_Offset_Equation( dataaddr
,
738 (meta_linear
) ? DATA_COLOR1D
: data_type
,
739 bpp_log2
, num_samples_log2
, block_size_log2
);
742 // if not rb aligned, set num_ses_log2/rbs_log2 to 0; note, this is done after generating the data equation
743 if( !is_rb_aligned
) {
748 // Get pipe and rb equations
749 Get_Pipe_Equation( pipe_equation
, dataaddr
, pipe_interleave_log2
,
750 num_pipes_log2
, block_size_log2
, num_samples_log2
, xmode
, data_type
);
752 CoordEq
& this_rbeq
= rb_equation
[num_ses_log2
][num_rbs_log2
];
754 num_pipes_log2
= pipe_equation
.getsize();
757 dataaddr
.copy( metaaddr
);
759 if( is_pipe_aligned
) {
760 // Remove the pipe bits
761 metaaddr
.shift( -num_pipes_log2
, pipe_interleave_log2
);
763 // Divide by comp block size, which for linear (which is always color) is 256 B
764 metaaddr
.shift( -8 );
765 if( is_pipe_aligned
) {
766 // Put pipe bits back in
767 metaaddr
.shift( num_pipes_log2
, pipe_interleave_log2
);
769 for( i
=0; i
<num_pipes_log2
; i
++ ) {
770 pipe_equation
[i
].copyto(metaaddr
[pipe_interleave_log2
+i
]);
778 int i
, j
, k
, old_size
, new_size
;
779 int num_total_rbs_log2
= num_ses_log2
+ num_rbs_log2
;
781 // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
782 int comp_blk_width_log2
= 3, comp_blk_height_log2
= 3, comp_blk_depth_log2
= 0;
783 int comp_blk_size_log2
= 8;
785 // For color surfaces, compute the comp block width, height, and depth
786 // For non-color surfaces, compute the comp block size
788 Get_Comp_Block_Screen_Space( dataaddr
, comp_blk_size_log2
, &comp_blk_width_log2
, &comp_blk_height_log2
, &comp_blk_depth_log2
);
789 metadata_words_per_page_log2
-= num_samples_log2
; // factor out num fragments for color surfaces
792 comp_blk_size_log2
= 6 + num_samples_log2
+ bpp_log2
;
795 // Compute meta block width and height
796 int num_comp_blks_per_meta_blk
;
797 if (num_pipes_log2
==0 && num_ses_log2
==0 && num_rbs_log2
==0) {
798 num_comp_blks_per_meta_blk
= metadata_words_per_page_log2
;
801 num_comp_blks_per_meta_blk
= num_total_rbs_pre_rb_align_log2
+ ((is_thick
) ? 18 : 10);
803 if( num_comp_blks_per_meta_blk
+ comp_blk_size_log2
> 27+bpp_log2
)
804 num_comp_blks_per_meta_blk
= 27+bpp_log2
- comp_blk_size_log2
;
806 if( metadata_words_per_page_log2
> num_comp_blks_per_meta_blk
)
807 num_comp_blks_per_meta_blk
= metadata_words_per_page_log2
;
810 int meta_block_width_log2
, meta_block_height_log2
, meta_block_depth_log2
;
811 Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk
, is_thick
, is_mipmapped
, // mipmaps should be y-biased
812 comp_blk_width_log2
, comp_blk_height_log2
, comp_blk_depth_log2
,
813 meta_block_width_log2
, meta_block_height_log2
, meta_block_depth_log2
);
815 // Make sure the metaaddr is cleared
819 //------------------------------------------------------------------------------------------------------------------------
820 // Use the growing square or growing cube order for thick as a starting point for the metadata address
821 //------------------------------------------------------------------------------------------------------------------------
823 Coordinate
cx( 'x', 0 );
824 Coordinate
cy( 'y', 0 );
825 Coordinate
cz( 'z', 0 );
827 metaaddr
.mort3d( cy
, cx
, cz
);
829 metaaddr
.mort3d( cx
, cy
, cz
);
833 Coordinate
cx( 'x', 0 );
834 Coordinate
cy( 'y', 0 );
838 metaaddr
.mort2d( cy
, cx
, comp_frag_log2
);
840 metaaddr
.mort2d( cx
, cy
, comp_frag_log2
);
843 //------------------------------------------------------------------------------------------------------------------------
844 // Put the compressible fragments at the lsb
845 // the uncompressible frags will be at the msb of the micro address
846 //------------------------------------------------------------------------------------------------------------------------
848 for( s
=0; s
<comp_frag_log2
; s
++ ) {
854 // Keep a copy of the pipe and rb equations
855 this_rbeq
.copy( cur_rbeq
);
856 pipe_equation
.copy( orig_pipe_equation
);
858 // filter out everything under the compressed block size
859 co
.set( 'x', comp_blk_width_log2
);
860 metaaddr
.Filter( '<', co
, 0, 'x' );
861 co
.set( 'y', comp_blk_height_log2
);
862 metaaddr
.Filter( '<', co
, 0, 'y' );
863 co
.set( 'z', comp_blk_depth_log2
);
864 metaaddr
.Filter( '<', co
, 0, 'z' );
865 // For non-color, filter out sample bits
868 metaaddr
.Filter( '<', co
, 0, 's' );
871 // filter out everything above the metablock size
872 co
.set( 'x', meta_block_width_log2
-1 );
873 metaaddr
.Filter( '>', co
, 0, 'x' );
874 co
.set( 'y', meta_block_height_log2
-1 );
875 metaaddr
.Filter( '>', co
, 0, 'y' );
876 co
.set( 'z', meta_block_depth_log2
-1 );
877 metaaddr
.Filter( '>', co
, 0, 'z' );
879 // filter out everything above the metablock size for the channel bits
880 co
.set( 'x', meta_block_width_log2
-1 );
881 pipe_equation
.Filter( '>', co
, 0, 'x' );
882 co
.set( 'y', meta_block_height_log2
-1 );
883 pipe_equation
.Filter( '>', co
, 0, 'y' );
884 co
.set( 'z', meta_block_depth_log2
-1 );
885 pipe_equation
.Filter( '>', co
, 0, 'z' );
887 // Make sure we still have the same number of channel bits
888 if( pipe_equation
.getsize() != static_cast<UINT_32
>(num_pipes_log2
) ) {
892 // Loop through all channel and rb bits, and make sure these components exist in the metadata address
893 for( i
=0; i
<num_pipes_log2
; i
++ ) {
894 for( j
=pipe_equation
[i
].getsize()-1; j
>=0; j
-- ) {
895 if( !metaaddr
.Exists( pipe_equation
[i
][j
] ) ) {
900 for( i
=0; i
<num_total_rbs_log2
; i
++ ) {
901 for( j
=cur_rbeq
[i
].getsize()-1; j
>=0; j
-- ) {
902 if( !metaaddr
.Exists( cur_rbeq
[i
][j
] ) ) {
908 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
909 int old_rb_bits_left
= num_total_rbs_log2
;
910 for( i
=0; i
<num_total_rbs_log2
; i
++ ) {
911 for(j
=0; j
<num_pipes_log2
; j
++ ) {
912 if( cur_rbeq
[i
] == pipe_equation
[j
] ) {
915 // Mark which pipe bit caused the RB bit to be dropped
916 pipe_mask
|= (1 << j
);
921 // Loop through each bit of the channel, get the smallest coordinate, and remove it from the metaaddr, and rb_equation
922 for( i
=0; i
<num_pipes_log2
; i
++ ) {
923 pipe_equation
[i
].getsmallest( co
);
925 old_size
= metaaddr
.getsize();
926 metaaddr
.Filter( '=', co
);
927 new_size
= metaaddr
.getsize();
928 if( new_size
!= old_size
-1 ) {
931 pipe_equation
.remove( co
);
932 for( j
=0; j
<num_total_rbs_log2
; j
++ ) {
933 if( cur_rbeq
[j
].remove( co
) ) {
934 // if we actually removed something from this bit, then add the remaining
935 // channel bits, as these can be removed for this bit
936 for( k
=0; k
<pipe_equation
[i
].getsize(); k
++ ) {
937 if( pipe_equation
[i
][k
] != co
) {
938 cur_rbeq
[j
].add( pipe_equation
[i
][k
] );
941 // if the rb bit is still empty, then we have to mark all pipe bits as affecting the RB
942 if( cur_rbeq
[j
].getsize() == 0 ) {
943 pipe_mask
= (1 << num_pipes_log2
) - 1;
949 // Loop through the rb bits and see what remain; filter out the smallest coordinate if it remains
950 int rb_bits_left
= 0;
951 for( i
=0; i
<num_total_rbs_log2
; i
++ ) {
952 if( cur_rbeq
[i
].getsize() > 0 ) {
954 cur_rbeq
[i
].getsmallest( co
);
955 old_size
= metaaddr
.getsize();
956 metaaddr
.Filter( '=', co
);
957 new_size
= metaaddr
.getsize();
958 if( new_size
!= old_size
-1 ) {
961 for( j
=i
+1; j
<num_total_rbs_log2
; j
++ ) {
962 if( cur_rbeq
[j
].remove( co
) ) {
963 // if we actually removed something from this bit, then add the remaining
964 // rb bits, as these can be removed for this bit
965 for( k
=0; k
<cur_rbeq
[i
].getsize(); k
++ ) {
966 if( cur_rbeq
[i
][k
] != co
) {
967 cur_rbeq
[j
].add( cur_rbeq
[i
][k
] );
975 // capture the size of the metaaddr
976 i
= metaaddr
.getsize();
977 // resize to 49 bits...make this a nibble address
979 // Concatenate the macro address above the current address
980 for( j
=0; i
<49; i
++, j
++ ) {
982 metaaddr
[i
].add( co
);
985 // Multiply by meta element size (in nibbles)
987 metaaddr
.shift( 1 ); // Byte size element
988 } else if( data_type
== DATA_Z_STENCIL
) {
989 metaaddr
.shift( 3 ); // 4 Byte size elements
992 //------------------------------------------------------------------------------------------------------------------------
993 // Note the pipe_interleave_log2+1 is because address is a nibble address
994 // Shift up from pipe interleave number of channel and rb bits left, and uncompressed fragments
995 //------------------------------------------------------------------------------------------------------------------------
997 metaaddr
.shift( num_pipes_log2
+ rb_bits_left
+ uncomp_frag_log2
,
998 pipe_interleave_log2
+1 );
1000 // Put in the channel bits
1001 for( i
=0; i
<num_pipes_log2
; i
++ ) {
1002 orig_pipe_equation
[i
].copyto( metaaddr
[pipe_interleave_log2
+1 + i
] );
1005 // Put in remaining rb bits
1007 for( j
=0; j
<rb_bits_left
; i
=(i
+1) % num_total_rbs_log2
) {
1008 if( cur_rbeq
[i
].getsize() > 0 ) {
1009 rb_equation
[num_ses_log2
][num_rbs_log2
][i
].copyto( metaaddr
[pipe_interleave_log2
+1 + num_pipes_log2
+ j
] );
1010 // Mark any rb bit we add in to the rb mask
1015 //------------------------------------------------------------------------------------------------------------------------
1016 // Put in the uncompressed fragment bits
1017 //------------------------------------------------------------------------------------------------------------------------
1018 for( i
=0; i
<uncomp_frag_log2
; i
++ ) {
1019 co
.set( 's', comp_frag_log2
+i
);
1020 metaaddr
[pipe_interleave_log2
+1 + num_pipes_log2
+ rb_bits_left
+ i
].add( co
);
1024 //------------------------------------------------------------------------------------------------------------------------
1025 // Check that the metadata SE bits match the data address
1026 //------------------------------------------------------------------------------------------------------------------------
1027 for( i
=0; i
<num_ses_data_log2
; i
++ ) {
1028 if(num_total_rbs_log2
-num_ses_data_log2
+i
>= 0){
1029 if( metaaddr
[ pipe_interleave_log2
+1 + num_pipes_log2
-num_ses_data_log2
+ i
] != dataaddr
[ pipe_interleave_log2
+ num_pipes_log2
-num_ses_data_log2
+ i
] ||
1030 metaaddr
[ pipe_interleave_log2
+1 + num_pipes_log2
-num_ses_data_log2
+ i
] != rb_equation
[num_ses_log2
][num_rbs_log2
][num_total_rbs_log2
-num_ses_data_log2
+i
]) {
1031 //FIXME: Removed to prevent logs from growing large in size // cout << "Warning: GPU bit " << i << " differs from data addr or RB equation on " << data_name << title << endl;
1032 //FIXME: Removed to prevent logs from growing large in size // cout << " Data: " << dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
1033 //FIXME: Removed to prevent logs from growing large in size // cout << "MData: " << metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
1034 //FIXME: Removed to prevent logs from growing large in size // cout << " RBeq: " << rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i] << endl;
1035 //FIXME: Removed to prevent logs from growing large in size // cout << " Pipe: " << orig_pipe_equation << endl;
1036 //FIXME: Removed to prevent logs from growing large in size // cout << " DEq: " << dataaddr << endl;
1043 RB_MAP::get_meta_addr_calc( int x
, int y
, int z
, int s
,
1044 long surf_base
, int element_bytes_log2
, int num_samples_log2
, int max_comp_frag_log2
,
1045 long pitch
, long slice
,
1049 int xmode
, int pipe_xor
, int block_size_log2
,
1051 /*int num_banks_log2,*/
1053 int pipe_interleave_log2
,
1057 int x_mip_org
, int y_mip_org
, int z_mip_org
,
1059 int num_ses_log2
, int num_rbs_log2
,
1060 /*bool se_affinity_enable, */
1064 int l2_metablk_w
, int l2_metablk_h
, int l2_metablk_d
,
1068 int bpp_log2
= element_bytes_log2
;
1069 int mip_base_x
= x_mip_org
;
1070 int mip_base_y
= y_mip_org
;
1071 int mip_base_z
= z_mip_org
;
1075 bool se_affinity_enable
= false;
1076 //int max_pipe_bytes = std::max(1<<num_pipes_log2 * 1<<pipe_interleave_log2, 1024 * 1<<log2_element_bytes);
1077 //int max_banks_samples = std::max(1<<num_banks_log2, 1<<num_samples_log2);
1078 //int block_size_log2 = max(4096, max_pipe_bytes * max_bank_samples * 1<<num_ses_log2);
1080 bool data_linear
= ( data_type
== DATA_COLOR1D
|| data_type
== DATA_COLOR2D_LINEAR
);
1081 bool is_color
= ( data_linear
|| data_type
== DATA_COLOR2D
|| data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
|| data_type
== DATA_COLOR3D_D_NOT_USED
);
1082 bool is_thick
= ( data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
);
1083 bool is_fmask
= (data_type
== DATA_FMASK
);
1085 bool is_pipe_aligned
= (meta_alignment
== META_ALIGN_PIPE
) || (meta_alignment
== META_ALIGN_PIPE_RB
);
1086 bool is_rb_aligned
= (meta_alignment
== META_ALIGN_RB
) || (meta_alignment
== META_ALIGN_PIPE_RB
);
1091 if ( !data_linear
&& meta_linear
)
1094 // Min metablock size if thick is 64KB, otherwise 4KB
1095 int min_meta_block_size_log2
= (is_thick
) ? 16 : 12;
1097 // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
1098 int metadata_word_size_log2
= (is_fmask
) ? -1 : ((is_color
) ? 0 : 2);
1099 int metadata_words_per_page_log2
= min_meta_block_size_log2
- metadata_word_size_log2
;
1101 int num_ses_data_log2
= num_ses_log2
;
1102 int block_size_data_log2
= block_size_log2
;
1103 int num_pipes_data_log2
= num_pipes_log2
;
1105 //int num_banks_data_log2 = num_banks_log2;
1106 cap_pipe( xmode
, is_thick
, num_ses_data_log2
, bpp_log2
, num_samples_log2
, pipe_interleave_log2
, block_size_data_log2
, num_pipes_data_log2
/*, num_banks_data_log2 */);
1108 // Get the correct data address and rb equation
1110 Get_Data_Offset_Equation( dataaddr
, data_type
, bpp_log2
, num_samples_log2
, block_size_data_log2
);
1112 get_meta_eq( metaaddr
, max_mip
, num_ses_log2
, num_rbs_log2
, num_pipes_log2
, /*num_banks_log2,*/ block_size_log2
,
1113 bpp_log2
, num_samples_log2
, max_comp_frag_log2
, pipe_interleave_log2
, xmode
,
1114 data_type
, meta_alignment
, meta_linear
);
1115 // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
1116 int comp_blk_width_log2
= 3, comp_blk_height_log2
= 3, comp_blk_depth_log2
= 0;
1117 int comp_blk_size_log2
= 8;
1120 Get_Comp_Block_Screen_Space( dataaddr
, comp_blk_size_log2
, &comp_blk_width_log2
, &comp_blk_height_log2
, &comp_blk_depth_log2
);
1121 metadata_words_per_page_log2
-= num_samples_log2
; // factor out num fragments for color surfaces
1124 comp_blk_size_log2
= 6 + num_samples_log2
+ bpp_log2
;
1127 // Compute meta block width and height
1128 int num_total_rbs_log2
= num_ses_log2
+ num_rbs_log2
;
1129 int num_comp_blks_per_meta_blk
;
1130 if((!is_pipe_aligned
|| num_pipes_log2
==0) && (!is_rb_aligned
|| (num_ses_log2
==0 && num_rbs_log2
==0))) {
1131 num_comp_blks_per_meta_blk
= metadata_words_per_page_log2
;
1134 num_comp_blks_per_meta_blk
= num_total_rbs_log2
+ ((is_thick
) ? 18 : 10);
1135 if( num_comp_blks_per_meta_blk
+ comp_blk_size_log2
> 27+bpp_log2
) num_comp_blks_per_meta_blk
= 27+bpp_log2
- comp_blk_size_log2
;
1136 if( metadata_words_per_page_log2
> num_comp_blks_per_meta_blk
)
1137 num_comp_blks_per_meta_blk
= metadata_words_per_page_log2
;
1140 int meta_block_width_log2
, meta_block_height_log2
, meta_block_depth_log2
;
1142 //@@todo kr missing meta_block_width*
1144 // Get the data block size
1145 int data_block_width_log2
, data_block_height_log2
, data_block_depth_log2
;
1147 Get_Meta_Block_Screen_Space( block_size_log2
- comp_blk_size_log2
,
1149 comp_blk_width_log2
, comp_blk_height_log2
, comp_blk_depth_log2
,
1150 data_block_width_log2
, data_block_height_log2
, data_block_depth_log2
);
1152 meta_block_width_log2
= l2_metablk_w
;
1153 meta_block_height_log2
= l2_metablk_h
;
1154 meta_block_depth_log2
= l2_metablk_d
;
1156 int meta_x
= mip_base_x
+ x
;
1157 int meta_y
= mip_base_y
+ y
;
1158 int meta_z
= mip_base_z
+ z
;
1162 // Tiled data, linear metadata
1163 meta_x
= meta_x
>> comp_blk_width_log2
;
1164 meta_y
= meta_y
>> comp_blk_height_log2
;
1165 meta_z
= meta_z
>> comp_blk_depth_log2
;
1166 pitch
= pitch
>> comp_blk_width_log2
;
1167 slice
= slice
>> (comp_blk_width_log2
+ comp_blk_height_log2
);
1170 meta_x
= meta_x
<< bpp_log2
;
1171 meta_y
= meta_y
<< bpp_log2
;
1172 meta_z
= meta_z
<< bpp_log2
;
1176 meta_x
= meta_x
>> meta_block_width_log2
;
1177 meta_y
= meta_y
>> meta_block_height_log2
;
1178 meta_z
= meta_z
>> meta_block_depth_log2
;
1180 pitch
= pitch
>> meta_block_width_log2
;
1181 slice
= slice
>> (meta_block_width_log2
+ meta_block_height_log2
);
1184 long macroaddr
= (long)meta_x
+ (long)meta_y
*(long)pitch
+ (long)meta_z
*(long)slice
;
1186 int mip_tail_x
, mip_tail_y
, mip_tail_z
;
1187 mip_tail_x
= mip_base_x
& ((1 << meta_block_width_log2
)-1);
1188 mip_tail_y
= mip_base_y
& ((1 << meta_block_height_log2
)-1);
1189 mip_tail_z
= mip_base_z
& ((1 << meta_block_depth_log2
)-1);
1191 int mip_x
= x
+ mip_tail_x
;
1192 int mip_y
= y
+ mip_tail_y
;
1193 int mip_z
= z
+ mip_tail_z
;
1195 // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
1196 long pipe_xor_mask
= (pipe_xor
& ((1 << num_pipes_data_log2
)-1)) << (pipe_interleave_log2
+1);
1198 // shift surf_base to make it a nibble address
1199 long meta_offset_from_base_nibble_address
= metaaddr
.solve( mip_x
, mip_y
, mip_z
, s
, macroaddr
);
1201 long address
= (surf_base
<< 1) + (meta_offset_from_base_nibble_address
^ pipe_xor_mask
);
1208 RB_MAP::get_meta_addr( int x
, int y
, int z
, int s
, int mip
,
1209 int surf_width
, int surf_height
, int surf_depth
, int lpitch
,
1210 long surf_base
, int pipe_xor
, int max_mip
,
1211 int num_ses_log2
, int num_rbs_log2
, int num_pipes_log2
,
1212 int block_size_log2
, int bpp_log2
, int num_samples_log2
, int max_comp_frag_log2
,
1213 int pipe_interleave_log2
, int xmode
, int data_type
, int meta_alignment
, bool meta_linear
)
1217 bool data_linear
= ( data_type
== DATA_COLOR1D
|| data_type
== DATA_COLOR2D_LINEAR
);
1218 bool is_color
= ( data_linear
|| data_type
== DATA_COLOR2D
|| data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
|| data_type
== DATA_COLOR3D_D_NOT_USED
);
1219 bool is_thick
= ( data_type
== DATA_COLOR3D_S
|| data_type
== DATA_COLOR3D_Z
);
1220 bool is_fmask
= (data_type
== DATA_FMASK
);
1222 bool is_pipe_aligned
= (meta_alignment
== META_ALIGN_PIPE
) || (meta_alignment
== META_ALIGN_PIPE_RB
);
1223 bool is_rb_aligned
= (meta_alignment
== META_ALIGN_RB
) || (meta_alignment
== META_ALIGN_PIPE_RB
);
1225 bool is_mipmapped
= (max_mip
> 0) ? true : false;
1227 if( data_linear
) meta_linear
= true;
1228 // Don't allow mipmapping on the tiled data, meta linear case
1229 // or if we have linear 2d/3d surface
1231 #ifdef ADDRESS__LPITCH_DISABLE__0
1232 if( (!data_linear
&& meta_linear
) || (data_type
== DATA_COLOR2D_LINEAR
) ) max_mip
= 0;
1234 if( !data_linear
&& meta_linear
) max_mip
= 0;
1237 // Min metablock size if thick is 64KB, otherwise 4KB
1238 int min_meta_block_size_log2
= (is_thick
) ? 16 : 12;
1241 // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
1242 int metadata_word_size_log2
= (is_fmask
) ? -1 : ((is_color
) ? 0 : 2);
1243 int metadata_words_per_page_log2
= min_meta_block_size_log2
- metadata_word_size_log2
;
1245 // Cap the pipe bits to block size
1246 int num_ses_data_log2
= num_ses_log2
;
1247 int block_size_data_log2
= block_size_log2
;
1248 int num_pipes_data_log2
= num_pipes_log2
;
1250 cap_pipe( xmode
, is_thick
, num_ses_data_log2
, bpp_log2
, num_samples_log2
, pipe_interleave_log2
, block_size_data_log2
, num_pipes_data_log2
);
1252 // Get the correct data address and rb equation
1254 Get_Data_Offset_Equation( dataaddr
, data_type
, bpp_log2
, num_samples_log2
, block_size_data_log2
);
1256 get_meta_eq( metaaddr
, max_mip
, num_ses_log2
, num_rbs_log2
, num_pipes_log2
, block_size_log2
,
1257 bpp_log2
, num_samples_log2
, max_comp_frag_log2
, pipe_interleave_log2
, xmode
, data_type
,
1258 meta_alignment
, meta_linear
);
1260 // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
1261 int comp_blk_width_log2
= 3, comp_blk_height_log2
= 3, comp_blk_depth_log2
= 0;
1262 int comp_blk_size_log2
= 8;
1265 Get_Comp_Block_Screen_Space( dataaddr
, comp_blk_size_log2
, &comp_blk_width_log2
, &comp_blk_height_log2
, &comp_blk_depth_log2
);
1266 metadata_words_per_page_log2
-= num_samples_log2
; // factor out num fragments for color surfaces
1268 comp_blk_size_log2
= 6 + num_samples_log2
+ bpp_log2
;
1271 // Compute meta block width and height
1272 int num_total_rbs_log2
= num_ses_log2
+ num_rbs_log2
;
1274 int num_comp_blks_per_meta_blk
;
1275 if((!is_pipe_aligned
|| num_pipes_log2
==0) && (!is_rb_aligned
|| (num_ses_log2
==0 && num_rbs_log2
==0))) {
1276 num_comp_blks_per_meta_blk
= metadata_words_per_page_log2
;
1279 num_comp_blks_per_meta_blk
= num_total_rbs_log2
+ ((is_thick
) ? 18 : 10);
1281 if( num_comp_blks_per_meta_blk
+ comp_blk_size_log2
> 27+bpp_log2
) num_comp_blks_per_meta_blk
= 27+bpp_log2
- comp_blk_size_log2
;
1283 if( metadata_words_per_page_log2
> num_comp_blks_per_meta_blk
)
1284 num_comp_blks_per_meta_blk
= metadata_words_per_page_log2
;
1287 int meta_block_width_log2
, meta_block_height_log2
, meta_block_depth_log2
;
1290 Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk
, is_thick
, is_mipmapped
,
1291 comp_blk_width_log2
, comp_blk_height_log2
, comp_blk_depth_log2
,
1292 meta_block_width_log2
, meta_block_height_log2
, meta_block_depth_log2
);
1294 // Get the data block size
1295 int data_block_width_log2
, data_block_height_log2
, data_block_depth_log2
;
1297 Get_Meta_Block_Screen_Space( block_size_log2
- comp_blk_size_log2
, is_thick
, true,
1298 comp_blk_width_log2
, comp_blk_height_log2
, comp_blk_depth_log2
,
1299 data_block_width_log2
, data_block_height_log2
, data_block_depth_log2
);
1301 int meta_x
, meta_y
, meta_z
;
1302 int meta_surf_width
= surf_width
;
1303 int meta_surf_height
= surf_height
;
1304 int meta_surf_depth
= surf_depth
;
1306 int mip_base_x
=0, mip_base_y
=0, mip_base_z
=0;
1307 get_mip_coord( mip_base_x
, mip_base_y
, mip_base_z
, mip
,
1308 meta_block_width_log2
, meta_block_height_log2
, meta_block_depth_log2
,
1309 data_block_width_log2
, data_block_height_log2
,
1310 meta_surf_width
, meta_surf_height
, meta_surf_depth
, lpitch
, max_mip
,
1311 data_type
, bpp_log2
, meta_linear
);
1313 meta_x
= mip_base_x
+ x
;
1314 meta_y
= mip_base_y
+ y
;
1315 meta_z
= mip_base_z
+ z
;
1318 if( !data_linear
) {
1319 // Tiled data, linear metadata
1320 meta_x
= meta_x
>> comp_blk_width_log2
;
1321 meta_y
= meta_y
>> comp_blk_height_log2
;
1322 meta_z
= meta_z
>> comp_blk_depth_log2
;
1323 meta_surf_width
= meta_surf_width
>> comp_blk_width_log2
;
1324 meta_surf_height
= meta_surf_height
>> comp_blk_height_log2
;
1327 meta_x
= meta_x
<< bpp_log2
;
1328 meta_y
= meta_y
<< bpp_log2
;
1329 meta_z
= meta_z
<< bpp_log2
;
1332 meta_x
= meta_x
>> meta_block_width_log2
;
1333 meta_y
= meta_y
>> meta_block_height_log2
;
1334 meta_z
= meta_z
>> meta_block_depth_log2
;
1335 meta_surf_width
= meta_surf_width
>> meta_block_width_log2
;
1336 meta_surf_height
= meta_surf_height
>> meta_block_height_log2
;
1339 long macroaddr
= (long)meta_x
+ (long)meta_y
*(long)meta_surf_width
+ (long)meta_z
*(long)meta_surf_width
*(long)meta_surf_height
;
1341 int mip_tail_x
, mip_tail_y
, mip_tail_z
;
1342 mip_tail_x
= mip_base_x
& ((1 << meta_block_width_log2
)-1);
1343 mip_tail_y
= mip_base_y
& ((1 << meta_block_height_log2
)-1);
1344 mip_tail_z
= mip_base_z
& ((1 << meta_block_depth_log2
)-1);
1346 int mip_x
= x
+ mip_tail_x
;
1347 int mip_y
= y
+ mip_tail_y
;
1348 int mip_z
= z
+ mip_tail_z
;
1350 // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
1351 long pipe_xor_mask
= (pipe_xor
& ((1 << num_pipes_data_log2
)-1)) << (pipe_interleave_log2
+1);
1353 // shift surf_base to make it a nibble address
1354 long address
= (surf_base
<< 1) + (metaaddr
.solve( mip_x
, mip_y
, mip_z
, s
, macroaddr
) ^ pipe_xor_mask
);
1361 RB_MAP::Initialize()
1363 int num_se_log2
, num_rb_per_se_log2
;
1364 for( num_se_log2
=0; num_se_log2
<5; num_se_log2
++ ) {
1365 for( num_rb_per_se_log2
=0; num_rb_per_se_log2
<3; num_rb_per_se_log2
++ ) {
1366 Get_RB_Equation( rb_equation
[num_se_log2
][num_rb_per_se_log2
], num_se_log2
, num_rb_per_se_log2
);
1370 int pix_size_log2
, num_samples_log2
;
1371 for( pix_size_log2
=0; pix_size_log2
<4; pix_size_log2
++ ) {
1372 for( num_samples_log2
=0; num_samples_log2
<4; num_samples_log2
++ ) {
1373 Get_Data_Offset_Equation( zaddr
[pix_size_log2
][num_samples_log2
], DATA_Z_STENCIL
, pix_size_log2
, num_samples_log2
, 16 );
1377 for( pix_size_log2
=0; pix_size_log2
<5; pix_size_log2
++ ) {
1378 for( num_samples_log2
=0; num_samples_log2
<4; num_samples_log2
++ ) {
1379 Get_Data_Offset_Equation( caddr
[pix_size_log2
][num_samples_log2
], DATA_COLOR2D
, pix_size_log2
, num_samples_log2
, 16 );
1383 for( pix_size_log2
=0; pix_size_log2
<5; pix_size_log2
++ ) {
1384 Get_Data_Offset_Equation( c3addr
[pix_size_log2
][0], DATA_COLOR3D_S
, pix_size_log2
, 0, 16 );
1385 Get_Data_Offset_Equation( c3addr
[pix_size_log2
][1], DATA_COLOR3D_Z
, pix_size_log2
, 0, 16 );