amd/addrlib: import gfx9 support
[mesa.git] / src / amd / addrlib / gfx9 / rbmap.cpp
1 /*
2 * Copyright © 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 // This class generates rb id map based rb id equations
28
29 //#define DPI_DEBUG 1
30 // Unlock more verbose debug messages (V* borrows from dj -v * to indicate most verbosity)
31 //#define DPI_DEBUG_V4 1
32 //#define DPI_DEBUG_V5 1
33 //#define DPI_DEBUG_PIPE_CASES 1
34 // "----+----|----+----|----+----|----+----|"
35 #include "addrcommon.h"
36 #include "rbmap.h"
37
38 RB_MAP::RB_MAP(void)
39 {
40 Initialize();
41 }
42
43 VOID RB_MAP::Get_Comp_Block_Screen_Space( CoordEq& addr, int bytes_log2, int* w, int* h, int* d)
44 {
45 int n, i;
46 if( w ) *w = 0;
47 if( h ) *h = 0;
48 if( d ) *d = 0;
49 for( n=0; n<bytes_log2; n++ ) { // go up to the bytes_log2 bit
50 for( i=0; i<addr[n].getsize(); i++ ) {
51 char dim = addr[n][i].getdim();
52 int ord = addr[n][i].getord();
53 if( w && dim == 'x' && ord >= *w ) *w = ord+1;
54 if( h && dim == 'y' && ord >= *h ) *h = ord+1;
55 if( d && dim == 'z' && ord >= *d ) *d = ord+1;
56 }
57 }
58 }
59
60 void
61 RB_MAP::Get_Meta_Block_Screen_Space( int num_comp_blocks_log2, bool is_thick, bool y_biased,
62 int comp_block_width_log2, int comp_block_height_log2, int comp_block_depth_log2,
63
64 // Outputs
65 int& meta_block_width_log2, int& meta_block_height_log2, int& meta_block_depth_log2 )
66 {
67 meta_block_width_log2 = comp_block_width_log2;
68 meta_block_height_log2 = comp_block_height_log2;
69 meta_block_depth_log2 = comp_block_depth_log2;
70 int n;
71
72 for( n=0; n<num_comp_blocks_log2; n++ ) {
73 if( (meta_block_height_log2 < meta_block_width_log2) ||
74 (y_biased && (meta_block_height_log2 == meta_block_width_log2)) ) {
75 if ( !is_thick || (meta_block_height_log2 <= meta_block_depth_log2) )
76 meta_block_height_log2++;
77 else
78 meta_block_depth_log2++;
79 }
80 else {
81 if ( !is_thick || (meta_block_width_log2 <= meta_block_depth_log2) )
82 meta_block_width_log2++;
83 else
84 meta_block_depth_log2++;
85 }
86 }
87 }
88
89 void
90 RB_MAP::cap_pipe( int xmode, bool is_thick, int& num_ses_log2, int bpp_log2, int num_samples_log2, int pipe_interleave_log2, int& block_size_log2, int& num_pipes_log2 )
91 {
92 // pipes+SEs can't exceed 32 for now
93 if( num_pipes_log2+num_ses_log2 > 5 ) {
94 num_pipes_log2 = 5-num_ses_log2;
95 }
96
97 // Since we are not supporting SE affinity anymore, just add nu_ses to num_pipes, and set num_ses to 0
98 num_pipes_log2 += num_ses_log2;
99 num_ses_log2 = 0;
100
101 // If block size is set to variable (0), compute the size
102 if( block_size_log2 == 0 ) {
103 //
104 //TODO Temporary disable till RTL can drive Var signals properly
105 }
106
107 if( xmode != NONE ) {
108 int max_pipes_log2 = block_size_log2 - pipe_interleave_log2;
109 if( is_thick ) {
110 // For 3d, treat the num_pipes as the sum of num_pipes and gpus
111 num_pipes_log2 = num_pipes_log2 + num_ses_log2;
112 num_ses_log2 = 0;
113 } else {
114 int block_space_used = num_pipes_log2+pipe_interleave_log2;
115 if( block_space_used < 10+bpp_log2 ) block_space_used = 10+bpp_log2;
116 // if the num gpus exceeds however many bits we have left between block size and block_space_used+num_samples
117 // then set num_ses_log2 to 0
118 if( num_ses_log2 > block_size_log2 - block_space_used - num_samples_log2) {
119 num_pipes_log2 = num_pipes_log2 + num_ses_log2;
120 num_ses_log2 = 0;
121 }
122 }
123 if( num_pipes_log2 > max_pipes_log2 ) {
124 // If it exceeds the space we have left, cap it to that
125 num_pipes_log2 = max_pipes_log2;
126 }
127 } else {
128 num_pipes_log2 = num_pipes_log2 + num_ses_log2;
129 num_ses_log2 = 0;
130 }
131 }
132
133 void RB_MAP::Get_Data_Offset_Equation( CoordEq& data_eq, int data_type, int bpp_log2, int num_samples_log2, int block_size_log2 )
134 {
135 bool is_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
136 bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
137 bool is_color = ( data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
138 bool is_s = ( data_type == DATA_COLOR3D_S );
139 Coordinate cx( 'x', 0 );
140 Coordinate cy( 'y', 0 );
141 Coordinate cz( 'z', 0 );
142 Coordinate cs( 's', 0 );
143 // Clear the equation
144 data_eq.resize(0);
145 data_eq.resize(27);
146 if( block_size_log2 == 0 ) block_size_log2 = 16;
147
148 if( is_linear ) {
149 Coordinate cm( 'm', 0 );
150 int i;
151 data_eq.resize(49);
152 for( i=0; i<49; i++ ) {
153 data_eq[i].add(cm);
154 cm++;
155 }
156 } else if( is_thick ) {
157 // Color 3d (_S and _Z modes; _D is same as color 2d)
158 int i;
159 if( is_s ) {
160 // Standard 3d swizzle
161 // Fill in bottom x bits
162 for( i=bpp_log2; i<4; i++ ) {
163 data_eq[i].add(cx);
164 cx++;
165 }
166 // Fill in 2 bits of y and then z
167 for( i=4; i<6; i++ ) {
168 data_eq[i].add(cy);
169 cy++;
170 }
171 for( i=6; i<8; i++ ) {
172 data_eq[i].add(cz);
173 cz++;
174 }
175 if (bpp_log2 < 2) {
176 // fill in z & y bit
177 data_eq[8].add(cz);
178 data_eq[9].add(cy);
179 cz++;
180 cy++;
181 } else if( bpp_log2 == 2 ) {
182 // fill in y and x bit
183 data_eq[8].add(cy);
184 data_eq[9].add(cx);
185 cy++;
186 cx++;
187 } else {
188 // fill in 2 x bits
189 data_eq[8].add(cx);
190 cx++;
191 data_eq[9].add(cx);
192 cx++;
193 }
194 } else {
195 // Z 3d swizzle
196 int m2d_end = (bpp_log2==0) ? 3 : ((bpp_log2 < 4) ? 4 : 5);
197 int num_zs = (bpp_log2==0 || bpp_log2==4) ? 2 : ((bpp_log2==1) ? 3 : 1);
198 data_eq.mort2d( cx, cy, bpp_log2, m2d_end );
199 for( i=m2d_end+1; i<=m2d_end+num_zs; i++ ) {
200 data_eq[i].add(cz);
201 cz++;
202 }
203 if( bpp_log2 == 0 || bpp_log2 == 3 ) {
204 // add an x and z
205 data_eq[6].add(cx);
206 data_eq[7].add(cz);
207 cx++;
208 cz++;
209 } else if( bpp_log2 == 2 ) {
210 // add a y and z
211 data_eq[6].add(cy);
212 data_eq[7].add(cz);
213 cy++;
214 cz++;
215 }
216 // add y and x
217 data_eq[8].add(cy);
218 data_eq[9].add(cx);
219 cy++;
220 cx++;
221 }
222 // Fill in bit 10 and up
223 data_eq.mort3d( cz, cy, cx, 10 );
224 } else if( is_color ) {
225 // Color 2D
226 int micro_y_bits = (8-bpp_log2) / 2;
227 int tile_split_start = block_size_log2 - num_samples_log2;
228 int i;
229 // Fill in bottom x bits
230 for( i=bpp_log2;i<4; i++ ) {
231 data_eq[i].add(cx);
232 cx++;
233 }
234 // Fill in bottom y bits
235 for( i=4; i<4+micro_y_bits; i++ ) {
236 data_eq[i].add(cy);
237 cy++;
238 }
239 // Fill in last of the micro_x bits
240 for( i=4+micro_y_bits; i<8; i++ ) {
241 data_eq[i].add(cx);
242 cx++;
243 }
244 // Fill in x/y bits below sample split
245 data_eq.mort2d( cy, cx, 8, tile_split_start-1 );
246 // Fill in sample bits
247 for( i=0; i<num_samples_log2; i++ ) {
248 cs.set( 's', i );
249 data_eq[tile_split_start+i].add(cs);
250 }
251 // Fill in x/y bits above sample split
252 if( (num_samples_log2 & 1) ^ (block_size_log2 & 1) ) data_eq.mort2d( cx, cy, block_size_log2 );
253 else data_eq.mort2d( cy, cx, block_size_log2 );
254 } else {
255 // Z, stencil or fmask
256 // First, figure out where each section of bits starts
257 int sample_start = bpp_log2;
258 int pixel_start = bpp_log2 + num_samples_log2;
259 int y_maj_start = 6 + num_samples_log2;
260
261 // Put in sample bits
262 int s;
263 for( s=0; s<num_samples_log2; s++ ) {
264 cs.set( 's', s );
265 data_eq[sample_start+s].add(cs);
266 }
267 // Put in the x-major order pixel bits
268 data_eq.mort2d( cx, cy, pixel_start, y_maj_start-1 );
269 // Put in the y-major order pixel bits
270 data_eq.mort2d( cy, cx, y_maj_start );
271 }
272 }
273
274 void RB_MAP::Get_RB_Equation( CoordEq& rb_equation, int num_ses_log2, int num_rbs_log2 )
275 {
276 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
277 int rb_region = (num_rbs_log2 == 0) ? 5 : 4;
278 Coordinate cx( 'x', rb_region );
279 Coordinate cy( 'y', rb_region );
280 int i, start = 0, num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
281 // Clear the rb equation
282 rb_equation.resize(0);
283 rb_equation.resize(num_total_rbs_log2);
284 if( num_ses_log2 > 0 && num_rbs_log2 == 1 ) {
285 // Special case when more than 1 SE, and only 1 RB per SE
286 rb_equation[0].add(cx);
287 rb_equation[0].add(cy);
288 cx++;
289 cy++;
290 rb_equation[0].add(cy);
291 start++;
292 }
293 for( i=0; i<2*(num_total_rbs_log2-start); i++ ) {
294 int index = start + (((start+i)>=num_total_rbs_log2) ? 2*(num_total_rbs_log2-start)-i-1 : i);
295 Coordinate& c = ((i % 2) == 1) ? cx : cy;
296 rb_equation[index].add(c);
297 c++;
298 }
299 }
300
301 //void getcheq( CoordEq& pipe_equation, CoordEq& addr, int pipe_interleave_log2, int num_pipes_log2,
302 void
303 RB_MAP::Get_Pipe_Equation( CoordEq& pipe_equation, CoordEq& addr,
304 int pipe_interleave_log2,
305 int num_pipes_log2,
306
307 int block_size_log2,
308 int num_samples_log2,
309
310 int xmode, int data_type
311 )
312 {
313 int pipe;
314 CoordEq addr_f, xormask, xormask2;
315 Coordinate tile_min( 'x', 3 );
316
317 bool is_color = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR2D_LINEAR || data_type == DATA_COLOR3D_D_NOT_USED );
318 bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
319
320 // For color, filter out sample bits only
321 // otherwise filter out everything under an 8x8 tile
322 if( is_color )
323 tile_min.set( 'x', 0 );
324
325 addr.copy( addr_f );
326
327 // Z/stencil is no longer tile split
328 if( is_color )
329 addr_f.shift( -num_samples_log2, block_size_log2- num_samples_log2 );
330
331 int i;
332 addr_f.copy( pipe_equation, pipe_interleave_log2, num_pipes_log2 ); //@todo kr needs num_ses_log2??
333
334
335 // This section should only apply to z/stencil, maybe fmask
336 // If the pipe bit is below the comp block size, then keep moving up the address until we find a bit that is above
337 for( pipe=0; addr_f[pipe_interleave_log2 + pipe][0] < tile_min; pipe++ ) {
338 }
339
340 // if pipe is 0, then the first pipe bit is above the comp block size, so we don't need to do anything
341 // Note, this if condition is not necessary, since if we execute the loop when pipe==0, we will get the same pipe equation
342 if ( pipe != 0 ) {
343 int j = pipe;
344
345
346 for( i=0; i<num_pipes_log2; i++ ) {
347 // Copy the jth bit above pipe interleave to the current pipe equation bit
348 addr_f[pipe_interleave_log2 + j].copyto(pipe_equation[i]);
349 j++;
350
351
352 }
353
354
355 }
356
357 if( xmode == PRT ) {
358 // Clear out bits above the block size if prt's are enabled
359 addr_f.resize(block_size_log2);
360 addr_f.resize(48);
361 }
362
363 if( xmode != NONE ) {
364 if( is_thick ) {
365 addr_f.copy( xormask2, pipe_interleave_log2+num_pipes_log2, 2*num_pipes_log2 );
366
367 xormask.resize( num_pipes_log2 );
368 for( pipe=0; pipe<num_pipes_log2; pipe++ ) {
369 xormask[pipe].add( xormask2[2*pipe] );
370 xormask[pipe].add( xormask2[2*pipe+1] );
371 }
372 } else {
373 Coordinate co;
374 // Xor in the bits above the pipe+gpu bits
375 addr_f.copy( xormask, pipe_interleave_log2 + pipe + num_pipes_log2, num_pipes_log2 );
376 if( num_samples_log2 == 0 && (xmode != PRT) ) {
377 // if 1xaa and not prt, then xor in the z bits
378 xormask2.resize(0);
379 xormask2.resize(num_pipes_log2);
380 for( pipe=0; pipe<num_pipes_log2; pipe++ ) {
381 co.set( 'z', num_pipes_log2-1 - pipe );
382 xormask2[pipe].add( co );
383 }
384
385 pipe_equation.xorin( xormask2 );
386 }
387 }
388
389 xormask.reverse();
390 pipe_equation.xorin( xormask );
391
392 }
393 }
394
395 void RB_MAP::get_meta_miptail_coord( int& x, int& y, int& z, int mip_in_tail, int blk_width_log2, int blk_height_log2, int blk_depth_log2 )
396 {
397 bool is_thick = (blk_depth_log2>0);
398 int m;
399 int mip_width = 1 << blk_width_log2;
400 int mip_height = 1 << (blk_height_log2-1);
401 int mip_depth = 1 << blk_depth_log2;
402
403 // Find the minimal increment, based on the block size and 2d/3d
404 int min_inc;
405 if(is_thick) {
406 min_inc = (blk_height_log2 >= 9) ? 128 : ((blk_height_log2 == 8) ? 64 : 32);
407 } else if(blk_height_log2>=10) {
408 min_inc = 256;
409 } else if(blk_height_log2==9) {
410 min_inc = 128;
411 } else {
412 min_inc = 64;
413 }
414
415 for( m=0; m<mip_in_tail; m++ ) {
416 if( mip_width <= 32 ) {
417 // special case when below 32x32 mipmap
418 switch(mip_in_tail-m) {
419 case 0: break; // 32x32
420 case 1: x+=32; break; // 16x16
421 case 2: y+=32; break; // 8x8
422 case 3: y+=32; x+=16; break;// 4x4
423 case 4: y+=32; x+=32; break;// 2x2
424 case 5: y+=32; x+=48; break;// 1x1
425 // The following are for BC/ASTC formats
426 case 6: y+=48; break; // 1/2 x 1/2
427 case 7: y+=48; x+=16; break;// 1/4 x 1/4
428 case 8: y+=48; x+=32; break;// 1/8 x 1/8
429 default:y+=48; x+=48; break;// 1/16 x 1/16
430 }
431 m = mip_in_tail; // break the loop
432 } else {
433 if( mip_width <= min_inc ) {
434 // if we're below the minimal increment...
435 if( is_thick ) {
436 // For 3d, just go in z direction
437 z += mip_depth;
438 } else {
439 // For 2d, first go across, then down
440 if( mip_width * 2 == min_inc ) {
441 // if we're 2 mips below, that's when we go back in x, and down in y
442 x -= min_inc;
443 y += min_inc;
444 } else {
445 // otherwise, just go across in x
446 x += min_inc;
447 }
448 }
449 } else {
450 // On even mip, go down, otherwise, go across
451 if( m&1 ) {
452 x += mip_width;
453 } else {
454 y += mip_height;
455 }
456 }
457 // Divide the width by 2
458 mip_width = mip_width / 2;
459 // After the first mip in tail, the mip is always a square
460 mip_height = mip_width;
461 // ...or for 3d, a cube
462 if(is_thick) mip_depth = mip_width;
463 }
464 }
465 }
466
467 void RB_MAP::get_mip_coord( int& x, int& y, int& z, int mip,
468 int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
469 int data_blk_width_log2, int data_blk_height_log2,
470 int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip,
471 int data_type, int bpp_log2, bool meta_linear )
472 {
473 if( meta_linear ) {
474 get_mip_coord_linear( x, y, z, mip, data_blk_width_log2, data_blk_height_log2,
475 surf_width, surf_height, surf_depth, epitch, max_mip, data_type, bpp_log2 );
476 } else {
477 get_mip_coord_nonlinear( x, y, z, mip, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2,
478 surf_width, surf_height, surf_depth, epitch, max_mip, data_type );
479 }
480 }
481
482 void RB_MAP::get_mip_coord_linear( int& x, int& y, int& z,
483 int mip,
484 int data_blk_width_log2, int data_blk_height_log2,
485 int& surf_width, int& surf_height, int& surf_depth, int epitch,
486 int max_mip, int data_type, int bpp_log2
487 )
488 {
489 bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
490
491 if( data_linear ) {
492 // linear width is padded out to 256 Bytes
493 int width_padding = 8 - bpp_log2;
494 int width_pad_mask = ~(0xffffffff << width_padding);
495 int padded_surf_width = surf_width;
496 int padded_surf_height = (data_type == DATA_COLOR1D) ? 1 : surf_height;
497
498 if( max_mip > 0 ) {
499 int mip_width = padded_surf_width;
500 int mip_height = padded_surf_height;
501 int padded_mip_height = 0;
502 int mip_base = 0;
503 int m = 0;
504 while( (mip_width >= 1 || mip_height >= 1) && m <= max_mip ) {
505 if( mip == m ) mip_base = padded_mip_height;
506 padded_mip_height += mip_height;
507 m++;
508 mip_width = (mip_width / 2) + (mip_width & 1);
509 mip_height = (mip_height / 2) + (mip_height & 1);
510 }
511 if( mip >= m ) {
512 // assert error
513 mip_base = padded_mip_height - mip_height;
514 }
515 padded_surf_height = padded_mip_height;
516
517 if(epitch > 0){
518 padded_surf_height = epitch;
519 }
520 y += mip_base;
521 padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding;
522 }
523 else{
524 padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding;
525
526 // Pad up epitch to meta block width
527 if( (epitch & width_pad_mask) != 0 ) {
528 epitch = ((epitch >> width_padding) + 1) << width_padding;
529 }
530 // Take max of epitch and computed surf width
531 if( epitch < padded_surf_width ) {
532 // assert error
533 } else {
534 padded_surf_width = epitch;
535 }
536 }
537
538 surf_width = padded_surf_width;
539 surf_height = padded_surf_height;
540 }
541 else {
542 // padding based data block size
543 int width_pad_mask = ~(0xffffffff << data_blk_width_log2);
544 int height_pad_mask = ~(0xffffffff << data_blk_height_log2);
545
546 // Pad the data surface dimensions by the block dimensions, and put the result in compressed block dimension units
547 surf_width = ((surf_width >> data_blk_width_log2) + ((surf_width & width_pad_mask) ? 1 : 0)) << data_blk_width_log2;
548 surf_height = ((surf_height >> data_blk_height_log2) + ((surf_height & height_pad_mask) ? 1 : 0)) << data_blk_height_log2;
549
550 // Tiled data, linear metadata
551 if( max_mip > 0 ) {
552 // we don't allow mipmapping on tiled data, with linear metadata
553 // assert error
554 }
555
556 // Pad up epitch to data block width
557 if( (epitch & width_pad_mask) != 0 ) {
558 epitch = ((epitch >> data_blk_width_log2) + 1) << data_blk_width_log2;
559 }
560 // Take max of epitch and computed surf width
561 if( epitch < surf_width ) {
562 // assert error
563 } else {
564 surf_width = epitch;
565 }
566 }
567 }
568
569 void RB_MAP::get_mip_coord_nonlinear( int& x, int& y, int& z,
570 int mip,
571 int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
572
573 // Outputs
574 int& surf_width, int& surf_height, int& surf_depth,
575
576 int epitch, int max_mip, int data_type
577 )
578 {
579 bool is3d = (data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
580 int order; // 0 = xmajor, 1 = ymajor, 2 = zmajor
581
582 int mip_width = surf_width;
583 int mip_height = surf_height;
584 int mip_depth = (is3d) ? surf_depth : 1;
585
586 // Divide surface w/h/d by block size, padding if needed
587 surf_width = (((surf_width & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (surf_width >> meta_blk_width_log2);
588 surf_height = (((surf_height & ((1<<meta_blk_height_log2)-1)) != 0) ? 1 : 0) + (surf_height >> meta_blk_height_log2);
589 surf_depth = (((surf_depth & ((1<<meta_blk_depth_log2 )-1)) != 0) ? 1 : 0) + (surf_depth >> meta_blk_depth_log2);
590 epitch = (((epitch & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (epitch >> meta_blk_width_log2);
591
592 if( max_mip > 0 ) {
593 // Determine major order
594 if( is3d && surf_depth > surf_width && surf_depth > surf_height ) {
595 order = 2; // Z major
596 }
597 else if( surf_width >= surf_height ) {
598 order = 0; // X major
599 }
600 else {
601 order = 1; // Y major
602 }
603
604 // Check if mip 0 is in the tail
605 bool in_tail = (mip_width <= (1<<meta_blk_width_log2)) &&
606 (mip_height <= (1<<(meta_blk_height_log2-1))) &&
607 (!is3d || (mip_depth <= (1<<meta_blk_depth_log2)));
608 // Pad the mip w/h/d, which is just the surf w/h/d times blk dim
609 mip_width = surf_width << meta_blk_width_log2;
610 mip_height = surf_height << meta_blk_height_log2;
611 mip_depth = surf_depth << meta_blk_depth_log2;
612
613 if( !in_tail ) {
614 // Select the dimension that stores the mip chain, based on major order
615 // Then pad it out to max(2, ceil(mip_dim/2))
616 int& mip_dim = (order == 1) ? surf_width : surf_height;
617 // in y-major, if height > 2 blocks, then we need extra padding;
618 // in x or z major, it only occurs if width/depth is greater than 4 blocks
619 // Height is special, since we can enter the mip tail when height is 1/2 block high
620 int order_dim_limit = (order == 1) ? 2 : 4;
621 int& order_dim = (order == 0) ? surf_width : ((order == 1) ? surf_height : surf_depth);
622 if( mip_dim < 3 && order_dim > order_dim_limit && max_mip >= 3 ) mip_dim += 2;
623 else mip_dim += (mip_dim/2) + (mip_dim&1);
624 }
625
626 int m;
627 for( m=0; m<mip; m++ ) {
628 if( in_tail ) {
629 get_meta_miptail_coord( x, y, z, mip-m, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2 );
630 m = mip; // break the loop
631 } else {
632 // Move either x, y, or z by the mip dimension based on which mip we're on and the order
633 if(m>=3 || m&1) {
634 switch(order) {
635 case 0: x += mip_width; break;
636 case 1: y += mip_height; break;
637 case 2: z += mip_depth; break;
638 }
639 } else {
640 switch(order) {
641 case 0: y += mip_height; break;
642 case 1: x += mip_width; break;
643 case 2: y += mip_height; break;
644 }
645 }
646 // Compute next mip's dimensions
647 mip_width = (mip_width/2);
648 mip_height = (mip_height/2);
649 mip_depth = (mip_depth/2);
650 // See if it's in the tail
651 in_tail = (mip_width <= (1<<meta_blk_width_log2)) &&
652 (mip_height <= (1<<(meta_blk_height_log2-1))) &&
653 (!is3d || (mip_depth <= (1<<meta_blk_depth_log2)));
654 // Pad out mip dimensions
655 mip_width = ((mip_width >> meta_blk_width_log2) + ((mip_width & ((1<<meta_blk_width_log2) -1)) != 0)) << meta_blk_width_log2;
656 mip_height = ((mip_height >> meta_blk_height_log2) + ((mip_height & ((1<<meta_blk_height_log2)-1)) != 0)) << meta_blk_height_log2;
657 mip_depth = ((mip_depth >> meta_blk_depth_log2) + ((mip_depth & ((1<<meta_blk_depth_log2) -1)) != 0)) << meta_blk_depth_log2;
658 }
659 }
660 } else {
661 // Take max of epitch and computed surf width
662 surf_width = (surf_width > epitch) ? surf_width : epitch;
663 }
664
665 // Multiply the surface dimension by block size
666 surf_width = surf_width << meta_blk_width_log2;
667 surf_height = surf_height << meta_blk_height_log2;
668 surf_depth = surf_depth << meta_blk_depth_log2;
669
670 }
671
672 void
673 RB_MAP::get_meta_eq( CoordEq& metaaddr,
674 int max_mip, int num_ses_log2, int num_rbs_log2,
675 int &num_pipes_log2,
676 int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
677 int pipe_interleave_log2,
678 int xmode,
679 int data_type,
680 int meta_alignment, bool meta_linear)
681 {
682 // Metaaddressing
683 Coordinate co;
684 CoordEq cur_rbeq, pipe_equation, orig_pipe_equation;
685
686 bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
687 bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
688 bool is3d = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
689 bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
690
691 bool is_fmask = (data_type == DATA_FMASK);
692 bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
693 bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
694
695 bool is_mipmapped = (max_mip > 0) ? true : false;
696
697 int pipe_mask = 0x0;
698 int comp_frag_log2 = (is_color && (num_samples_log2 > max_comp_frag_log2)) ? max_comp_frag_log2 : num_samples_log2;
699
700 int uncomp_frag_log2 = num_samples_log2 - comp_frag_log2;
701
702 // Constraints on linear
703 if ( data_linear ) {
704 xmode = NONE;
705 num_samples_log2 = 0;
706 is_rb_aligned = false;
707 meta_linear = true;
708 }
709 if( meta_linear && !data_linear ) {
710 is_pipe_aligned = false;
711 }
712
713 // Min metablock size if thick is 64KB, otherwise 4KB
714 int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
715
716 // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
717 int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
718
719 int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
720
721 // Get the total # of RB's before modifying due to rb align
722 int num_total_rbs_pre_rb_align_log2 = num_ses_log2 + num_rbs_log2;
723
724 // Cap the pipe bits to block size
725 int num_ses_data_log2 = num_ses_log2;
726 cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2,
727 num_samples_log2, pipe_interleave_log2, block_size_log2, num_pipes_log2 );
728
729 // if not pipe aligned, set num_pipes_log2, num_ses_log2 to 0
730 if( !is_pipe_aligned ) {
731 num_pipes_log2 = 0;
732 num_ses_data_log2 = 0;
733 }
734
735 // Get the correct data address and rb equation
736 CoordEq dataaddr;
737 Get_Data_Offset_Equation( dataaddr,
738 (meta_linear) ? DATA_COLOR1D : data_type,
739 bpp_log2, num_samples_log2, block_size_log2 );
740
741
742 // if not rb aligned, set num_ses_log2/rbs_log2 to 0; note, this is done after generating the data equation
743 if( !is_rb_aligned ) {
744 num_ses_log2 = 0;
745 num_rbs_log2 = 0;
746 }
747
748 // Get pipe and rb equations
749 Get_Pipe_Equation( pipe_equation, dataaddr, pipe_interleave_log2,
750 num_pipes_log2, block_size_log2, num_samples_log2, xmode, data_type );
751
752 CoordEq& this_rbeq = rb_equation[num_ses_log2][num_rbs_log2];
753
754 num_pipes_log2 = pipe_equation.getsize();
755
756 if( meta_linear ) {
757 dataaddr.copy( metaaddr );
758 if( data_linear ) {
759 if( is_pipe_aligned ) {
760 // Remove the pipe bits
761 metaaddr.shift( -num_pipes_log2, pipe_interleave_log2 );
762 }
763 // Divide by comp block size, which for linear (which is always color) is 256 B
764 metaaddr.shift( -8 );
765 if( is_pipe_aligned ) {
766 // Put pipe bits back in
767 metaaddr.shift( num_pipes_log2, pipe_interleave_log2 );
768 int i;
769 for( i=0; i<num_pipes_log2; i++ ) {
770 pipe_equation[i].copyto(metaaddr[pipe_interleave_log2+i]);
771 }
772 }
773 }
774 metaaddr.shift( 1 );
775 return;
776 }
777
778 int i, j, k, old_size, new_size;
779 int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
780
781 // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
782 int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
783 int comp_blk_size_log2 = 8;
784
785 // For color surfaces, compute the comp block width, height, and depth
786 // For non-color surfaces, compute the comp block size
787 if( is_color ) {
788 Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
789 metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
790 }
791 else {
792 comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
793 }
794
795 // Compute meta block width and height
796 int num_comp_blks_per_meta_blk;
797 if (num_pipes_log2==0 && num_ses_log2==0 && num_rbs_log2==0) {
798 num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
799 }
800 else {
801 num_comp_blks_per_meta_blk = num_total_rbs_pre_rb_align_log2 + ((is_thick) ? 18 : 10);
802
803 if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2)
804 num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
805
806 if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
807 num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
808 }
809
810 int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
811 Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped, // mipmaps should be y-biased
812 comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
813 meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 );
814
815 // Make sure the metaaddr is cleared
816 metaaddr.resize(0);
817 metaaddr.resize(27);
818
819 //------------------------------------------------------------------------------------------------------------------------
820 // Use the growing square or growing cube order for thick as a starting point for the metadata address
821 //------------------------------------------------------------------------------------------------------------------------
822 if( is_thick ) {
823 Coordinate cx( 'x', 0 );
824 Coordinate cy( 'y', 0 );
825 Coordinate cz( 'z', 0 );
826 if(is_mipmapped) {
827 metaaddr.mort3d( cy, cx, cz );
828 } else {
829 metaaddr.mort3d( cx, cy, cz );
830 }
831 }
832 else {
833 Coordinate cx( 'x', 0 );
834 Coordinate cy( 'y', 0 );
835 Coordinate cs;
836
837 if(is_mipmapped) {
838 metaaddr.mort2d( cy, cx, comp_frag_log2 );
839 } else {
840 metaaddr.mort2d( cx, cy, comp_frag_log2 );
841 }
842
843 //------------------------------------------------------------------------------------------------------------------------
844 // Put the compressible fragments at the lsb
845 // the uncompressible frags will be at the msb of the micro address
846 //------------------------------------------------------------------------------------------------------------------------
847 int s;
848 for( s=0; s<comp_frag_log2; s++ ) {
849 cs.set( 's', s );
850 metaaddr[s].add(cs);
851 }
852 }
853
854 // Keep a copy of the pipe and rb equations
855 this_rbeq.copy( cur_rbeq );
856 pipe_equation.copy( orig_pipe_equation );
857
858 // filter out everything under the compressed block size
859 co.set( 'x', comp_blk_width_log2 );
860 metaaddr.Filter( '<', co, 0, 'x' );
861 co.set( 'y', comp_blk_height_log2 );
862 metaaddr.Filter( '<', co, 0, 'y' );
863 co.set( 'z', comp_blk_depth_log2 );
864 metaaddr.Filter( '<', co, 0, 'z' );
865 // For non-color, filter out sample bits
866 if( !is_color ) {
867 co.set( 'x', 0 );
868 metaaddr.Filter( '<', co, 0, 's' );
869 }
870
871 // filter out everything above the metablock size
872 co.set( 'x', meta_block_width_log2-1 );
873 metaaddr.Filter( '>', co, 0, 'x' );
874 co.set( 'y', meta_block_height_log2-1 );
875 metaaddr.Filter( '>', co, 0, 'y' );
876 co.set( 'z', meta_block_depth_log2-1 );
877 metaaddr.Filter( '>', co, 0, 'z' );
878
879 // filter out everything above the metablock size for the channel bits
880 co.set( 'x', meta_block_width_log2-1 );
881 pipe_equation.Filter( '>', co, 0, 'x' );
882 co.set( 'y', meta_block_height_log2-1 );
883 pipe_equation.Filter( '>', co, 0, 'y' );
884 co.set( 'z', meta_block_depth_log2-1 );
885 pipe_equation.Filter( '>', co, 0, 'z' );
886
887 // Make sure we still have the same number of channel bits
888 if( pipe_equation.getsize() != static_cast<UINT_32>(num_pipes_log2) ) {
889 // assert
890 }
891
892 // Loop through all channel and rb bits, and make sure these components exist in the metadata address
893 for( i=0; i<num_pipes_log2; i++ ) {
894 for( j=pipe_equation[i].getsize()-1; j>=0; j-- ) {
895 if( !metaaddr.Exists( pipe_equation[i][j] ) ) {
896 // assert
897 }
898 }
899 }
900 for( i=0; i<num_total_rbs_log2; i++ ) {
901 for( j=cur_rbeq[i].getsize()-1; j>=0; j-- ) {
902 if( !metaaddr.Exists( cur_rbeq[i][j] ) ) {
903 // assert
904 }
905 }
906 }
907
908 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
909 int old_rb_bits_left = num_total_rbs_log2;
910 for( i=0; i<num_total_rbs_log2; i++ ) {
911 for(j=0; j<num_pipes_log2; j++ ) {
912 if( cur_rbeq[i] == pipe_equation[j] ) {
913 cur_rbeq[i].Clear();
914 old_rb_bits_left--;
915 // Mark which pipe bit caused the RB bit to be dropped
916 pipe_mask |= (1 << j);
917 }
918 }
919 }
920
921 // Loop through each bit of the channel, get the smallest coordinate, and remove it from the metaaddr, and rb_equation
922 for( i=0; i<num_pipes_log2; i++ ) {
923 pipe_equation[i].getsmallest( co );
924
925 old_size = metaaddr.getsize();
926 metaaddr.Filter( '=', co );
927 new_size = metaaddr.getsize();
928 if( new_size != old_size-1 ) {
929 // assert warning
930 }
931 pipe_equation.remove( co );
932 for( j=0; j<num_total_rbs_log2; j++ ) {
933 if( cur_rbeq[j].remove( co ) ) {
934 // if we actually removed something from this bit, then add the remaining
935 // channel bits, as these can be removed for this bit
936 for( k=0; k<pipe_equation[i].getsize(); k++ ) {
937 if( pipe_equation[i][k] != co ) {
938 cur_rbeq[j].add( pipe_equation[i][k] );
939 }
940 }
941 // if the rb bit is still empty, then we have to mark all pipe bits as affecting the RB
942 if( cur_rbeq[j].getsize() == 0 ) {
943 pipe_mask = (1 << num_pipes_log2) - 1;
944 }
945 }
946 }
947 }
948
949 // Loop through the rb bits and see what remain; filter out the smallest coordinate if it remains
950 int rb_bits_left = 0;
951 for( i=0; i<num_total_rbs_log2; i++ ) {
952 if( cur_rbeq[i].getsize() > 0 ) {
953 rb_bits_left++;
954 cur_rbeq[i].getsmallest( co );
955 old_size = metaaddr.getsize();
956 metaaddr.Filter( '=', co );
957 new_size = metaaddr.getsize();
958 if( new_size != old_size-1 ) {
959 // assert warning
960 }
961 for( j=i+1; j<num_total_rbs_log2; j++ ) {
962 if( cur_rbeq[j].remove( co ) ) {
963 // if we actually removed something from this bit, then add the remaining
964 // rb bits, as these can be removed for this bit
965 for( k=0; k<cur_rbeq[i].getsize(); k++ ) {
966 if( cur_rbeq[i][k] != co ) {
967 cur_rbeq[j].add( cur_rbeq[i][k] );
968 }
969 }
970 }
971 }
972 }
973 }
974
975 // capture the size of the metaaddr
976 i = metaaddr.getsize();
977 // resize to 49 bits...make this a nibble address
978 metaaddr.resize(49);
979 // Concatenate the macro address above the current address
980 for( j=0; i<49; i++, j++ ) {
981 co.set( 'm', j );
982 metaaddr[i].add( co );
983 }
984
985 // Multiply by meta element size (in nibbles)
986 if( is_color ) {
987 metaaddr.shift( 1 ); // Byte size element
988 } else if( data_type == DATA_Z_STENCIL ) {
989 metaaddr.shift( 3 ); // 4 Byte size elements
990 }
991
992 //------------------------------------------------------------------------------------------------------------------------
993 // Note the pipe_interleave_log2+1 is because address is a nibble address
994 // Shift up from pipe interleave number of channel and rb bits left, and uncompressed fragments
995 //------------------------------------------------------------------------------------------------------------------------
996
997 metaaddr.shift( num_pipes_log2 + rb_bits_left + uncomp_frag_log2,
998 pipe_interleave_log2+1 );
999
1000 // Put in the channel bits
1001 for( i=0; i<num_pipes_log2; i++ ) {
1002 orig_pipe_equation[i].copyto( metaaddr[pipe_interleave_log2+1 + i] );
1003 }
1004
1005 // Put in remaining rb bits
1006 i = 0;
1007 for( j=0; j<rb_bits_left; i=(i+1) % num_total_rbs_log2 ) {
1008 if( cur_rbeq[i].getsize() > 0 ) {
1009 rb_equation[num_ses_log2][num_rbs_log2][i].copyto( metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + j] );
1010 // Mark any rb bit we add in to the rb mask
1011 j++;
1012 }
1013 }
1014
1015 //------------------------------------------------------------------------------------------------------------------------
1016 // Put in the uncompressed fragment bits
1017 //------------------------------------------------------------------------------------------------------------------------
1018 for( i=0; i<uncomp_frag_log2; i++ ) {
1019 co.set( 's', comp_frag_log2+i );
1020 metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + rb_bits_left + i].add( co );
1021 }
1022
1023
1024 //------------------------------------------------------------------------------------------------------------------------
1025 // Check that the metadata SE bits match the data address
1026 //------------------------------------------------------------------------------------------------------------------------
1027 for( i=0; i<num_ses_data_log2; i++ ) {
1028 if(num_total_rbs_log2-num_ses_data_log2+i >= 0){
1029 if( metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] ||
1030 metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i]) {
1031 //FIXME: Removed to prevent logs from growing large in size // cout << "Warning: GPU bit " << i << " differs from data addr or RB equation on " << data_name << title << endl;
1032 //FIXME: Removed to prevent logs from growing large in size // cout << " Data: " << dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
1033 //FIXME: Removed to prevent logs from growing large in size // cout << "MData: " << metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
1034 //FIXME: Removed to prevent logs from growing large in size // cout << " RBeq: " << rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i] << endl;
1035 //FIXME: Removed to prevent logs from growing large in size // cout << " Pipe: " << orig_pipe_equation << endl;
1036 //FIXME: Removed to prevent logs from growing large in size // cout << " DEq: " << dataaddr << endl;
1037 }
1038 }
1039 }
1040 }
1041
1042 long
1043 RB_MAP::get_meta_addr_calc( int x, int y, int z, int s,
1044 long surf_base, int element_bytes_log2, int num_samples_log2, int max_comp_frag_log2,
1045 long pitch, long slice,
1046 int max_mip,
1047
1048 //int swizzle_mode,
1049 int xmode, int pipe_xor, int block_size_log2,
1050
1051 /*int num_banks_log2,*/
1052 int num_pipes_log2,
1053 int pipe_interleave_log2,
1054
1055 int meta_alignment,
1056 int dim_type,
1057 int x_mip_org, int y_mip_org, int z_mip_org,
1058
1059 int num_ses_log2, int num_rbs_log2,
1060 /*bool se_affinity_enable, */
1061
1062 int data_type,
1063
1064 int l2_metablk_w, int l2_metablk_h, int l2_metablk_d,
1065 bool meta_linear
1066 )
1067 {
1068 int bpp_log2 = element_bytes_log2;
1069 int mip_base_x = x_mip_org;
1070 int mip_base_y = y_mip_org;
1071 int mip_base_z = z_mip_org;
1072
1073 CoordEq metaaddr;
1074
1075 bool se_affinity_enable = false;
1076 //int max_pipe_bytes = std::max(1<<num_pipes_log2 * 1<<pipe_interleave_log2, 1024 * 1<<log2_element_bytes);
1077 //int max_banks_samples = std::max(1<<num_banks_log2, 1<<num_samples_log2);
1078 //int block_size_log2 = max(4096, max_pipe_bytes * max_bank_samples * 1<<num_ses_log2);
1079
1080 bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
1081 bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
1082 bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
1083 bool is_fmask = (data_type == DATA_FMASK);
1084
1085 bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
1086 bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
1087
1088 if ( data_linear )
1089 meta_linear = true;
1090
1091 if ( !data_linear && meta_linear)
1092 max_mip = 0;
1093
1094 // Min metablock size if thick is 64KB, otherwise 4KB
1095 int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
1096
1097 // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
1098 int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
1099 int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
1100
1101 int num_ses_data_log2 = num_ses_log2;
1102 int block_size_data_log2 = block_size_log2;
1103 int num_pipes_data_log2 = num_pipes_log2;
1104
1105 //int num_banks_data_log2 = num_banks_log2;
1106 cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2/*, num_banks_data_log2 */);
1107
1108 // Get the correct data address and rb equation
1109 CoordEq dataaddr;
1110 Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 );
1111
1112 get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, /*num_banks_log2,*/ block_size_log2,
1113 bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode,
1114 data_type, meta_alignment, meta_linear);
1115 // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
1116 int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
1117 int comp_blk_size_log2 = 8;
1118
1119 if ( is_color ){
1120 Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
1121 metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
1122 }
1123 else {
1124 comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
1125 }
1126
1127 // Compute meta block width and height
1128 int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
1129 int num_comp_blks_per_meta_blk;
1130 if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) {
1131 num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
1132 }
1133 else {
1134 num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10);
1135 if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
1136 if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
1137 num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
1138 }
1139
1140 int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
1141
1142 //@@todo kr missing meta_block_width*
1143
1144 // Get the data block size
1145 int data_block_width_log2, data_block_height_log2, data_block_depth_log2;
1146
1147 Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2,
1148 is_thick, true,
1149 comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
1150 data_block_width_log2, data_block_height_log2, data_block_depth_log2 );
1151
1152 meta_block_width_log2 = l2_metablk_w;
1153 meta_block_height_log2 = l2_metablk_h;
1154 meta_block_depth_log2 = l2_metablk_d;
1155
1156 int meta_x = mip_base_x + x ;
1157 int meta_y = mip_base_y + y ;
1158 int meta_z = mip_base_z + z ;
1159
1160 if( meta_linear ){
1161 if(!data_linear) {
1162 // Tiled data, linear metadata
1163 meta_x = meta_x >> comp_blk_width_log2;
1164 meta_y = meta_y >> comp_blk_height_log2;
1165 meta_z = meta_z >> comp_blk_depth_log2;
1166 pitch = pitch >> comp_blk_width_log2;
1167 slice = slice >> (comp_blk_width_log2 + comp_blk_height_log2);
1168 }
1169 else{
1170 meta_x = meta_x << bpp_log2;
1171 meta_y = meta_y << bpp_log2;
1172 meta_z = meta_z << bpp_log2;
1173 }
1174 }
1175 else{
1176 meta_x = meta_x >> meta_block_width_log2;
1177 meta_y = meta_y >> meta_block_height_log2;
1178 meta_z = meta_z >> meta_block_depth_log2;
1179
1180 pitch = pitch >> meta_block_width_log2;
1181 slice = slice >> (meta_block_width_log2 + meta_block_height_log2);
1182 }
1183
1184 long macroaddr = (long)meta_x + (long)meta_y*(long)pitch + (long)meta_z*(long)slice;
1185
1186 int mip_tail_x, mip_tail_y, mip_tail_z;
1187 mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1);
1188 mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1);
1189 mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1);
1190
1191 int mip_x = x + mip_tail_x;
1192 int mip_y = y + mip_tail_y;
1193 int mip_z = z + mip_tail_z;
1194
1195 // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
1196 long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1);
1197
1198 // shift surf_base to make it a nibble address
1199 long meta_offset_from_base_nibble_address = metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr );
1200
1201 long address = (surf_base << 1) + (meta_offset_from_base_nibble_address ^ pipe_xor_mask);
1202
1203 return address;
1204 }
1205
1206 #if 0
1207 long
1208 RB_MAP::get_meta_addr( int x, int y, int z, int s, int mip,
1209 int surf_width, int surf_height, int surf_depth, int lpitch,
1210 long surf_base, int pipe_xor, int max_mip,
1211 int num_ses_log2, int num_rbs_log2, int num_pipes_log2,
1212 int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
1213 int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear)
1214 {
1215 CoordEq metaaddr;
1216
1217 bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
1218 bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
1219 bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
1220 bool is_fmask = (data_type == DATA_FMASK);
1221
1222 bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
1223 bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
1224
1225 bool is_mipmapped = (max_mip > 0) ? true : false;
1226
1227 if( data_linear ) meta_linear = true;
1228 // Don't allow mipmapping on the tiled data, meta linear case
1229 // or if we have linear 2d/3d surface
1230
1231 #ifdef ADDRESS__LPITCH_DISABLE__0
1232 if( (!data_linear && meta_linear) || (data_type == DATA_COLOR2D_LINEAR) ) max_mip = 0;
1233 #else
1234 if( !data_linear && meta_linear) max_mip = 0;
1235 #endif
1236
1237 // Min metablock size if thick is 64KB, otherwise 4KB
1238 int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
1239
1240
1241 // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
1242 int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
1243 int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
1244
1245 // Cap the pipe bits to block size
1246 int num_ses_data_log2 = num_ses_log2;
1247 int block_size_data_log2 = block_size_log2;
1248 int num_pipes_data_log2 = num_pipes_log2;
1249
1250 cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2 );
1251
1252 // Get the correct data address and rb equation
1253 CoordEq dataaddr;
1254 Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 );
1255
1256 get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, block_size_log2,
1257 bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode, data_type,
1258 meta_alignment, meta_linear);
1259
1260 // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
1261 int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
1262 int comp_blk_size_log2 = 8;
1263
1264 if ( is_color ) {
1265 Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
1266 metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
1267 } else {
1268 comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
1269 }
1270
1271 // Compute meta block width and height
1272 int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
1273
1274 int num_comp_blks_per_meta_blk;
1275 if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) {
1276 num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
1277 }
1278 else {
1279 num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10);
1280
1281 if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
1282
1283 if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
1284 num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
1285 }
1286
1287 int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
1288
1289
1290 Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped,
1291 comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
1292 meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 );
1293
1294 // Get the data block size
1295 int data_block_width_log2, data_block_height_log2, data_block_depth_log2;
1296
1297 Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2, is_thick, true,
1298 comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
1299 data_block_width_log2, data_block_height_log2, data_block_depth_log2 );
1300
1301 int meta_x, meta_y, meta_z;
1302 int meta_surf_width = surf_width;
1303 int meta_surf_height = surf_height;
1304 int meta_surf_depth = surf_depth;
1305
1306 int mip_base_x=0, mip_base_y=0, mip_base_z=0;
1307 get_mip_coord( mip_base_x, mip_base_y, mip_base_z, mip,
1308 meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2,
1309 data_block_width_log2, data_block_height_log2,
1310 meta_surf_width, meta_surf_height, meta_surf_depth, lpitch, max_mip,
1311 data_type, bpp_log2, meta_linear );
1312
1313 meta_x = mip_base_x + x;
1314 meta_y = mip_base_y + y;
1315 meta_z = mip_base_z + z;
1316
1317 if( meta_linear ) {
1318 if( !data_linear ) {
1319 // Tiled data, linear metadata
1320 meta_x = meta_x >> comp_blk_width_log2;
1321 meta_y = meta_y >> comp_blk_height_log2;
1322 meta_z = meta_z >> comp_blk_depth_log2;
1323 meta_surf_width = meta_surf_width >> comp_blk_width_log2;
1324 meta_surf_height = meta_surf_height >> comp_blk_height_log2;
1325 }
1326 else{
1327 meta_x = meta_x << bpp_log2;
1328 meta_y = meta_y << bpp_log2;
1329 meta_z = meta_z << bpp_log2;
1330 }
1331 } else {
1332 meta_x = meta_x >> meta_block_width_log2;
1333 meta_y = meta_y >> meta_block_height_log2;
1334 meta_z = meta_z >> meta_block_depth_log2;
1335 meta_surf_width = meta_surf_width >> meta_block_width_log2;
1336 meta_surf_height = meta_surf_height >> meta_block_height_log2;
1337 }
1338
1339 long macroaddr = (long)meta_x + (long)meta_y*(long)meta_surf_width + (long)meta_z*(long)meta_surf_width*(long)meta_surf_height;
1340
1341 int mip_tail_x, mip_tail_y, mip_tail_z;
1342 mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1);
1343 mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1);
1344 mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1);
1345
1346 int mip_x = x + mip_tail_x;
1347 int mip_y = y + mip_tail_y;
1348 int mip_z = z + mip_tail_z;
1349
1350 // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
1351 long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1);
1352
1353 // shift surf_base to make it a nibble address
1354 long address = (surf_base << 1) + (metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr ) ^ pipe_xor_mask);
1355
1356 return address;
1357 }
1358 #endif
1359
1360 void
1361 RB_MAP::Initialize()
1362 {
1363 int num_se_log2, num_rb_per_se_log2;
1364 for( num_se_log2=0; num_se_log2<5; num_se_log2++ ) {
1365 for( num_rb_per_se_log2=0; num_rb_per_se_log2<3; num_rb_per_se_log2++ ) {
1366 Get_RB_Equation( rb_equation[num_se_log2][num_rb_per_se_log2], num_se_log2, num_rb_per_se_log2 );
1367 }
1368 }
1369
1370 int pix_size_log2, num_samples_log2;
1371 for( pix_size_log2=0; pix_size_log2<4; pix_size_log2++ ) {
1372 for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) {
1373 Get_Data_Offset_Equation( zaddr[pix_size_log2][num_samples_log2], DATA_Z_STENCIL, pix_size_log2, num_samples_log2, 16 );
1374 }
1375 }
1376
1377 for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) {
1378 for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) {
1379 Get_Data_Offset_Equation( caddr[pix_size_log2][num_samples_log2], DATA_COLOR2D, pix_size_log2, num_samples_log2, 16 );
1380 }
1381 }
1382
1383 for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) {
1384 Get_Data_Offset_Equation( c3addr[pix_size_log2][0], DATA_COLOR3D_S, pix_size_log2, 0, 16 );
1385 Get_Data_Offset_Equation( c3addr[pix_size_log2][1], DATA_COLOR3D_Z, pix_size_log2, 0, 16 );
1386 }
1387 }
1388