2910666dd512e745801a7f57d4e42eff99f56db7
[mesa.git] / src / gallium / drivers / r300 / r300_texture_desc.c
1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "r300_texture_desc.h"
25
26 #include "r300_context.h"
27 #include "r300_winsys.h"
28
29 #include "util/u_format.h"
30
31 /* Returns the number of pixels that the texture should be aligned to
32 * in the given dimension. */
33 unsigned r300_get_pixel_alignment(enum pipe_format format,
34 unsigned num_samples,
35 enum r300_buffer_tiling microtile,
36 enum r300_buffer_tiling macrotile,
37 enum r300_dim dim, boolean is_rs690)
38 {
39 static const unsigned table[2][5][3][2] =
40 {
41 {
42 /* Macro: linear linear linear
43 Micro: linear tiled square-tiled */
44 {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */
45 {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */
46 {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */
47 {{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */
48 {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
49 },
50 {
51 /* Macro: tiled tiled tiled
52 Micro: linear tiled square-tiled */
53 {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */
54 {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */
55 {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */
56 {{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */
57 {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
58 }
59 };
60
61 static const unsigned aa_block[2] = {4, 8};
62 unsigned tile = 0;
63 unsigned pixsize = util_format_get_blocksize(format);
64
65 assert(macrotile <= R300_BUFFER_TILED);
66 assert(microtile <= R300_BUFFER_SQUARETILED);
67 assert(pixsize <= 16);
68 assert(dim <= DIM_HEIGHT);
69
70 if (num_samples > 1) {
71 /* Multisampled textures have their own alignment scheme. */
72 if (pixsize == 4)
73 tile = aa_block[dim];
74 /* XXX FP16 AA. */
75 } else {
76 /* Standard alignment. */
77 tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
78 if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) {
79 int align;
80 int h_tile;
81 h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT];
82 align = 64 / (pixsize * h_tile);
83 if (tile < align)
84 tile = align;
85 }
86 }
87
88 assert(tile);
89 return tile;
90 }
91
92 /* Return true if macrotiling should be enabled on the miplevel. */
93 static boolean r300_texture_macro_switch(struct r300_resource *tex,
94 unsigned level,
95 boolean rv350_mode,
96 enum r300_dim dim)
97 {
98 unsigned tile, texdim;
99
100 tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples,
101 tex->tex.microtile, R300_BUFFER_TILED, dim, 0);
102 if (dim == DIM_WIDTH) {
103 texdim = u_minify(tex->tex.width0, level);
104 } else {
105 texdim = u_minify(tex->tex.height0, level);
106 }
107
108 /* See TX_FILTER1_n.MACRO_SWITCH. */
109 if (rv350_mode) {
110 return texdim >= tile;
111 } else {
112 return texdim > tile;
113 }
114 }
115
116 /**
117 * Return the stride, in bytes, of the texture image of the given texture
118 * at the given level.
119 */
120 static unsigned r300_texture_get_stride(struct r300_screen *screen,
121 struct r300_resource *tex,
122 unsigned level)
123 {
124 unsigned tile_width, width, stride;
125 boolean is_rs690 = (screen->caps.family == CHIP_FAMILY_RS600 ||
126 screen->caps.family == CHIP_FAMILY_RS690 ||
127 screen->caps.family == CHIP_FAMILY_RS740);
128
129 if (tex->tex.stride_in_bytes_override)
130 return tex->tex.stride_in_bytes_override;
131
132 /* Check the level. */
133 if (level > tex->b.b.b.last_level) {
134 SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
135 __FUNCTION__, level, tex->b.b.b.last_level);
136 return 0;
137 }
138
139 width = u_minify(tex->tex.width0, level);
140
141 if (util_format_is_plain(tex->b.b.b.format)) {
142 tile_width = r300_get_pixel_alignment(tex->b.b.b.format,
143 tex->b.b.b.nr_samples,
144 tex->tex.microtile,
145 tex->tex.macrotile[level],
146 DIM_WIDTH, is_rs690);
147 width = align(width, tile_width);
148
149 stride = util_format_get_stride(tex->b.b.b.format, width);
150 /* The alignment to 32 bytes is sort of implied by the layout... */
151 return stride;
152 } else {
153 return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32);
154 }
155 }
156
157 static unsigned r300_texture_get_nblocksy(struct r300_resource *tex,
158 unsigned level,
159 boolean *out_aligned_for_cbzb)
160 {
161 unsigned height, tile_height;
162
163 height = u_minify(tex->tex.height0, level);
164
165 /* Mipmapped and 3D textures must have their height aligned to POT. */
166 if ((tex->b.b.b.target != PIPE_TEXTURE_1D &&
167 tex->b.b.b.target != PIPE_TEXTURE_2D &&
168 tex->b.b.b.target != PIPE_TEXTURE_RECT) ||
169 tex->b.b.b.last_level != 0) {
170 height = util_next_power_of_two(height);
171 }
172
173 if (util_format_is_plain(tex->b.b.b.format)) {
174 tile_height = r300_get_pixel_alignment(tex->b.b.b.format,
175 tex->b.b.b.nr_samples,
176 tex->tex.microtile,
177 tex->tex.macrotile[level],
178 DIM_HEIGHT, 0);
179 height = align(height, tile_height);
180
181 /* See if the CBZB clear can be used on the buffer,
182 * taking the texture size into account. */
183 if (out_aligned_for_cbzb) {
184 if (tex->tex.macrotile[level]) {
185 /* When clearing, the layer (width*height) is horizontally split
186 * into two, and the upper and lower halves are cleared by the CB
187 * and ZB units, respectively. Therefore, the number of macrotiles
188 * in the Y direction must be even. */
189
190 /* Align the height so that there is an even number of macrotiles.
191 * Do so for 3 or more macrotiles in the Y direction. */
192 if (level == 0 && tex->b.b.b.last_level == 0 &&
193 (tex->b.b.b.target == PIPE_TEXTURE_1D ||
194 tex->b.b.b.target == PIPE_TEXTURE_2D ||
195 tex->b.b.b.target == PIPE_TEXTURE_RECT) &&
196 height >= tile_height * 3) {
197 height = align(height, tile_height * 2);
198 }
199
200 *out_aligned_for_cbzb = height % (tile_height * 2) == 0;
201 } else {
202 *out_aligned_for_cbzb = FALSE;
203 }
204 }
205 }
206
207 return util_format_get_nblocksy(tex->b.b.b.format, height);
208 }
209
210 /* Get a width in pixels from a stride in bytes. */
211 static unsigned stride_to_width(enum pipe_format format,
212 unsigned stride_in_bytes)
213 {
214 return (stride_in_bytes / util_format_get_blocksize(format)) *
215 util_format_get_blockwidth(format);
216 }
217
218 static void r300_setup_miptree(struct r300_screen *screen,
219 struct r300_resource *tex,
220 boolean align_for_cbzb)
221 {
222 struct pipe_resource *base = &tex->b.b.b;
223 unsigned stride, size, layer_size, nblocksy, i;
224 boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
225 boolean aligned_for_cbzb;
226
227 tex->tex.size_in_bytes = 0;
228
229 SCREEN_DBG(screen, DBG_TEXALLOC,
230 "r300: Making miptree for texture, format %s\n",
231 util_format_short_name(base->format));
232
233 for (i = 0; i <= base->last_level; i++) {
234 /* Let's see if this miplevel can be macrotiled. */
235 tex->tex.macrotile[i] =
236 (tex->tex.macrotile[0] == R300_BUFFER_TILED &&
237 r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) &&
238 r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ?
239 R300_BUFFER_TILED : R300_BUFFER_LINEAR;
240
241 stride = r300_texture_get_stride(screen, tex, i);
242
243 /* Compute the number of blocks in Y, see if the CBZB clear can be
244 * used on the texture. */
245 aligned_for_cbzb = FALSE;
246 if (align_for_cbzb && tex->tex.cbzb_allowed[i])
247 nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb);
248 else
249 nblocksy = r300_texture_get_nblocksy(tex, i, NULL);
250
251 layer_size = stride * nblocksy;
252
253 if (base->nr_samples) {
254 layer_size *= base->nr_samples;
255 }
256
257 if (base->target == PIPE_TEXTURE_CUBE)
258 size = layer_size * 6;
259 else
260 size = layer_size * u_minify(tex->tex.depth0, i);
261
262 tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes;
263 tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size;
264 tex->tex.layer_size_in_bytes[i] = layer_size;
265 tex->tex.stride_in_bytes[i] = stride;
266 tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride);
267 tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb;
268
269 SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
270 "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
271 i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i),
272 u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes,
273 tex->tex.macrotile[i] ? "TRUE" : "FALSE");
274 }
275 }
276
277 static void r300_setup_flags(struct r300_resource *tex)
278 {
279 tex->tex.uses_stride_addressing =
280 !util_is_power_of_two(tex->b.b.b.width0) ||
281 (tex->tex.stride_in_bytes_override &&
282 stride_to_width(tex->b.b.b.format,
283 tex->tex.stride_in_bytes_override) != tex->b.b.b.width0);
284
285 tex->tex.is_npot =
286 tex->tex.uses_stride_addressing ||
287 !util_is_power_of_two(tex->b.b.b.height0) ||
288 !util_is_power_of_two(tex->b.b.b.depth0);
289 }
290
291 static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
292 struct r300_resource *tex)
293 {
294 unsigned i, bpp;
295 boolean first_level_valid;
296
297 bpp = util_format_get_blocksizebits(tex->b.b.b.format);
298
299 /* 1) The texture must be point-sampled,
300 * 2) The depth must be 16 or 32 bits.
301 * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage
302 * with certain texture sizes. Macrotiling ensures the alignment. */
303 first_level_valid = tex->b.b.b.nr_samples <= 1 &&
304 (bpp == 16 || bpp == 32) &&
305 tex->tex.macrotile[0];
306
307 if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB))
308 first_level_valid = FALSE;
309
310 for (i = 0; i <= tex->b.b.b.last_level; i++)
311 tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i];
312 }
313
314 static unsigned r300_pixels_to_dwords(unsigned stride,
315 unsigned height,
316 unsigned xblock, unsigned yblock)
317 {
318 return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock);
319 }
320
321 static void r300_setup_hyperz_properties(struct r300_screen *screen,
322 struct r300_resource *tex)
323 {
324 /* The tile size of 1 DWORD in ZMASK RAM is:
325 *
326 * GPU Pipes 4x4 mode 8x8 mode
327 * ------------------------------------------
328 * R580 4P/1Z 32x32 64x64
329 * RV570 3P/1Z 48x16 96x32
330 * RV530 1P/2Z 32x16 64x32
331 * 1P/1Z 16x16 32x32
332 */
333 static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8};
334 static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8};
335
336 /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels),
337 * but the blocks have very weird ordering.
338 *
339 * With 2 pipes and an image of size 8xY, where Y >= 1,
340 * clearing 4 dwords clears blocks like this:
341 *
342 * 01012323
343 *
344 * where numbers correspond to dword indices. The blocks are interleaved
345 * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels).
346 *
347 * With 4 pipes and an image of size 8xY, where Y >= 4,
348 * clearing 8 dwords clears blocks like this:
349 * 01012323
350 * 45456767
351 * 01012323
352 * 45456767
353 * where numbers correspond to dword indices. The blocks are interleaved
354 * in both directions, so the alignment must be 4x4 blocks (32x32 pixels)
355 */
356 static unsigned hiz_align_x[4] = {8, 32, 48, 32};
357 static unsigned hiz_align_y[4] = {8, 8, 8, 32};
358
359 if (util_format_is_depth_or_stencil(tex->b.b.b.format) &&
360 util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
361 tex->tex.microtile) {
362 unsigned i, pipes;
363
364 if (screen->caps.family == CHIP_FAMILY_RV530) {
365 pipes = screen->caps.num_z_pipes;
366 } else {
367 pipes = screen->caps.num_frag_pipes;
368 }
369
370 for (i = 0; i <= tex->b.b.b.last_level; i++) {
371 unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height;
372
373 stride = align(tex->tex.stride_in_pixels[i], 16);
374 height = u_minify(tex->b.b.b.height0, i);
375
376 /* The 8x8 compression mode needs macrotiling. */
377 zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
378 tex->tex.macrotile[i] &&
379 tex->b.b.b.nr_samples <= 1 ? 8 : 4;
380
381 /* Get the ZMASK buffer size in dwords. */
382 zcomp_numdw = r300_pixels_to_dwords(stride, height,
383 zmask_blocks_x_per_dw[pipes-1] * zcompsize,
384 zmask_blocks_y_per_dw[pipes-1] * zcompsize);
385
386 /* Check whether we have enough ZMASK memory. */
387 if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
388 zcomp_numdw <= screen->caps.zmask_ram * pipes) {
389 tex->tex.zmask_dwords[i] = zcomp_numdw;
390 tex->tex.zcomp8x8[i] = zcompsize == 8;
391
392 tex->tex.zmask_stride_in_pixels[i] =
393 util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize);
394 } else {
395 tex->tex.zmask_dwords[i] = 0;
396 tex->tex.zcomp8x8[i] = FALSE;
397 tex->tex.zmask_stride_in_pixels[i] = 0;
398 }
399
400 /* Now setup HIZ. */
401 stride = util_align_npot(stride, hiz_align_x[pipes-1]);
402 height = align(height, hiz_align_y[pipes-1]);
403
404 /* Get the HIZ buffer size in dwords. */
405 hiz_numdw = (stride * height) / (8*8 * pipes);
406
407 /* Check whether we have enough HIZ memory. */
408 if (hiz_numdw <= screen->caps.hiz_ram * pipes) {
409 tex->tex.hiz_dwords[i] = hiz_numdw;
410 tex->tex.hiz_stride_in_pixels[i] = stride;
411 } else {
412 tex->tex.hiz_dwords[i] = 0;
413 tex->tex.hiz_stride_in_pixels[i] = 0;
414 }
415 }
416 }
417 }
418
419 static void r300_setup_tiling(struct r300_screen *screen,
420 struct r300_resource *tex)
421 {
422 enum pipe_format format = tex->b.b.b.format;
423 boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
424 boolean is_zb = util_format_is_depth_or_stencil(format);
425 boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING);
426
427 tex->tex.microtile = R300_BUFFER_LINEAR;
428 tex->tex.macrotile[0] = R300_BUFFER_LINEAR;
429
430 if (!util_format_is_plain(format)) {
431 return;
432 }
433
434 /* If height == 1, disable microtiling except for zbuffer. */
435 if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) {
436 return;
437 }
438
439 /* Set microtiling. */
440 switch (util_format_get_blocksize(format)) {
441 case 1:
442 case 4:
443 case 8:
444 tex->tex.microtile = R300_BUFFER_TILED;
445 break;
446
447 case 2:
448 tex->tex.microtile = R300_BUFFER_SQUARETILED;
449 break;
450 }
451
452 if (dbg_no_tiling) {
453 return;
454 }
455
456 /* Set macrotiling. */
457 if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) &&
458 r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) {
459 tex->tex.macrotile[0] = R300_BUFFER_TILED;
460 }
461 }
462
463 static void r300_tex_print_info(struct r300_resource *tex,
464 const char *func)
465 {
466 fprintf(stderr,
467 "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, "
468 "LastLevel: %i, Size: %i, Format: %s\n",
469 func,
470 tex->tex.macrotile[0] ? "YES" : " NO",
471 tex->tex.microtile ? "YES" : " NO",
472 tex->tex.stride_in_pixels[0],
473 tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0,
474 tex->b.b.b.last_level, tex->tex.size_in_bytes,
475 util_format_short_name(tex->b.b.b.format));
476 }
477
478 boolean r300_texture_desc_init(struct r300_screen *rscreen,
479 struct r300_resource *tex,
480 const struct pipe_resource *base)
481 {
482 tex->b.b.b.target = base->target;
483 tex->b.b.b.format = base->format;
484 tex->b.b.b.width0 = base->width0;
485 tex->b.b.b.height0 = base->height0;
486 tex->b.b.b.depth0 = base->depth0;
487 tex->b.b.b.array_size = base->array_size;
488 tex->b.b.b.last_level = base->last_level;
489 tex->b.b.b.nr_samples = base->nr_samples;
490 tex->tex.width0 = base->width0;
491 tex->tex.height0 = base->height0;
492 tex->tex.depth0 = base->depth0;
493
494 r300_setup_flags(tex);
495
496 /* Align a 3D NPOT texture to POT. */
497 if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) {
498 tex->tex.width0 = util_next_power_of_two(tex->tex.width0);
499 tex->tex.height0 = util_next_power_of_two(tex->tex.height0);
500 tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0);
501 }
502
503 /* Setup tiling. */
504 if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) {
505 r300_setup_tiling(rscreen, tex);
506 }
507
508 r300_setup_cbzb_flags(rscreen, tex);
509
510 /* Setup the miptree description. */
511 r300_setup_miptree(rscreen, tex, TRUE);
512 /* If the required buffer size is larger the given max size,
513 * try again without the alignment for the CBZB clear. */
514 if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) {
515 r300_setup_miptree(rscreen, tex, FALSE);
516 }
517
518 r300_setup_hyperz_properties(rscreen, tex);
519
520 if (tex->buf_size) {
521 /* Make sure the buffer we got is large enough. */
522 if (tex->tex.size_in_bytes > tex->buf_size) {
523 fprintf(stderr, "r300: texture_desc_init: The buffer is not "
524 "large enough. Got: %i, Need: %i, Info:\n",
525 tex->buf_size, tex->tex.size_in_bytes);
526 r300_tex_print_info(tex, "texture_desc_init");
527 return FALSE;
528 }
529
530 tex->tex.buffer_size_in_bytes = tex->buf_size;
531 } else {
532 tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes;
533 }
534
535 if (SCREEN_DBG_ON(rscreen, DBG_TEX))
536 r300_tex_print_info(tex, "texture_desc_init");
537
538 return TRUE;
539 }
540
541 unsigned r300_texture_get_offset(struct r300_resource *tex,
542 unsigned level, unsigned layer)
543 {
544 unsigned offset = tex->tex.offset_in_bytes[level];
545
546 switch (tex->b.b.b.target) {
547 case PIPE_TEXTURE_3D:
548 case PIPE_TEXTURE_CUBE:
549 return offset + layer * tex->tex.layer_size_in_bytes[level];
550
551 default:
552 assert(layer == 0);
553 return offset;
554 }
555 }