2 * Mesa 3-D graphics library
4 * Copyright (C) 2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_debug.h"
29 #include "ilo_image.h"
32 IMAGE_TILING_NONE
= 1 << GEN6_TILING_NONE
,
33 IMAGE_TILING_X
= 1 << GEN6_TILING_X
,
34 IMAGE_TILING_Y
= 1 << GEN6_TILING_Y
,
35 IMAGE_TILING_W
= 1 << GEN8_TILING_W
,
37 IMAGE_TILING_ALL
= (IMAGE_TILING_NONE
|
43 struct ilo_image_params
{
44 const struct ilo_dev
*dev
;
45 const struct ilo_image_info
*info
;
46 unsigned valid_tilings
;
51 unsigned max_x
, max_y
;
55 img_get_slice_size(const struct ilo_image
*img
,
56 const struct ilo_image_params
*params
,
57 unsigned level
, unsigned *width
, unsigned *height
)
59 const struct ilo_image_info
*info
= params
->info
;
62 w
= u_minify(img
->width0
, level
);
63 h
= u_minify(img
->height0
, level
);
66 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
68 * "The dimensions of the mip maps are first determined by applying the
69 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
70 * if necessary, they are padded out to compression block boundaries."
72 w
= align(w
, img
->block_width
);
73 h
= align(h
, img
->block_height
);
76 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
78 * "If the surface is multisampled (4x), these values must be adjusted
79 * as follows before proceeding:
81 * W_L = ceiling(W_L / 2) * 4
82 * H_L = ceiling(H_L / 2) * 4"
84 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
86 * "If the surface is multisampled and it is a depth or stencil surface
87 * or Multisampled Surface StorageFormat in SURFACE_STATE is
88 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
91 * #samples W_L = H_L =
92 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
93 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
94 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
95 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
97 * For interleaved samples (4x), where pixels
100 * (x, y+1) (x+1, y+1)
102 * would be is occupied by
104 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
105 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
106 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
107 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
111 * w = align(w, 2) * 2;
112 * y = align(y, 2) * 2;
114 if (img
->interleaved_samples
) {
115 switch (info
->sample_count
) {
134 assert(!"unsupported sample count");
140 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
142 * "For separate stencil buffer, the width must be mutiplied by 2 and
143 * height divided by 2..."
145 * To make things easier (for transfer), we will just double the stencil
146 * stride in 3DSTATE_STENCIL_BUFFER.
148 w
= align(w
, img
->align_i
);
149 h
= align(h
, img
->align_j
);
156 img_get_num_layers(const struct ilo_image
*img
,
157 const struct ilo_image_params
*params
)
159 const struct ilo_image_info
*info
= params
->info
;
160 unsigned num_layers
= info
->array_size
;
162 /* samples of the same index are stored in a layer */
163 if (info
->sample_count
> 1 && !img
->interleaved_samples
)
164 num_layers
*= info
->sample_count
;
170 img_init_layer_height(struct ilo_image
*img
,
171 struct ilo_image_params
*params
)
173 const struct ilo_image_info
*info
= params
->info
;
176 if (img
->walk
!= ILO_IMAGE_WALK_LAYER
)
179 num_layers
= img_get_num_layers(img
, params
);
184 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
186 * "The following equation is used for surface formats other than
187 * compressed textures:
189 * QPitch = (h0 + h1 + 11j)"
191 * "The equation for compressed textures (BC* and FXT1 surface formats)
194 * QPitch = (h0 + h1 + 11j) / 4"
196 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
197 * value calculated in the equation above, for every other odd Surface
198 * Height starting from 1 i.e. 1,5,9,13"
200 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
202 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
203 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
205 * QPitch = (h0 + h1 + 12j)
206 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
208 * (There are many typos or missing words here...)"
210 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
211 * the base address. The PRM divides QPitch by 4 for compressed formats
212 * because the block height for those formats are 4, and it wants QPitch to
213 * mean the number of memory rows, as opposed to texel rows, between
214 * slices. Since we use texel rows everywhere, we do not need to divide
217 img
->walk_layer_height
= params
->h0
+ params
->h1
+
218 ((ilo_dev_gen(params
->dev
) >= ILO_GEN(7)) ? 12 : 11) * img
->align_j
;
220 if (ilo_dev_gen(params
->dev
) == ILO_GEN(6) && info
->sample_count
> 1 &&
221 img
->height0
% 4 == 1)
222 img
->walk_layer_height
+= 4;
224 params
->max_y
+= img
->walk_layer_height
* (num_layers
- 1);
228 img_init_lods(struct ilo_image
*img
,
229 struct ilo_image_params
*params
)
231 const struct ilo_image_info
*info
= params
->info
;
232 unsigned cur_x
, cur_y
;
237 for (lv
= 0; lv
< info
->level_count
; lv
++) {
238 unsigned lod_w
, lod_h
;
240 img_get_slice_size(img
, params
, lv
, &lod_w
, &lod_h
);
242 img
->lods
[lv
].x
= cur_x
;
243 img
->lods
[lv
].y
= cur_y
;
244 img
->lods
[lv
].slice_width
= lod_w
;
245 img
->lods
[lv
].slice_height
= lod_h
;
248 case ILO_IMAGE_WALK_LAYER
:
249 /* MIPLAYOUT_BELOW */
255 case ILO_IMAGE_WALK_LOD
:
256 lod_h
*= img_get_num_layers(img
, params
);
262 /* every LOD begins at tile boundaries */
263 if (info
->level_count
> 1) {
264 assert(img
->format
== PIPE_FORMAT_S8_UINT
);
265 cur_x
= align(cur_x
, 64);
266 cur_y
= align(cur_y
, 64);
269 case ILO_IMAGE_WALK_3D
:
271 const unsigned num_slices
= u_minify(info
->depth
, lv
);
272 const unsigned num_slices_per_row
= 1 << lv
;
273 const unsigned num_rows
=
274 (num_slices
+ num_slices_per_row
- 1) / num_slices_per_row
;
276 lod_w
*= num_slices_per_row
;
284 if (params
->max_x
< img
->lods
[lv
].x
+ lod_w
)
285 params
->max_x
= img
->lods
[lv
].x
+ lod_w
;
286 if (params
->max_y
< img
->lods
[lv
].y
+ lod_h
)
287 params
->max_y
= img
->lods
[lv
].y
+ lod_h
;
290 if (img
->walk
== ILO_IMAGE_WALK_LAYER
) {
291 params
->h0
= img
->lods
[0].slice_height
;
293 if (info
->level_count
> 1)
294 params
->h1
= img
->lods
[1].slice_height
;
296 img_get_slice_size(img
, params
, 1, &cur_x
, ¶ms
->h1
);
301 img_init_alignments(struct ilo_image
*img
,
302 const struct ilo_image_params
*params
)
304 const struct ilo_image_info
*info
= params
->info
;
307 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
309 * "surface format align_i align_j
310 * YUV 4:2:2 formats 4 *see below
313 * all other formats 4 *see below"
315 * "- align_j = 4 for any depth buffer
316 * - align_j = 2 for separate stencil buffer
317 * - align_j = 4 for any render target surface is multisampled (4x)
318 * - align_j = 4 for any render target surface with Surface Vertical
319 * Alignment = VALIGN_4
320 * - align_j = 2 for any render target surface with Surface Vertical
321 * Alignment = VALIGN_2
322 * - align_j = 2 for all other render target surface
323 * - align_j = 2 for any sampling engine surface with Surface Vertical
324 * Alignment = VALIGN_2
325 * - align_j = 4 for any sampling engine surface with Surface Vertical
326 * Alignment = VALIGN_4"
328 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
330 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
331 * the Surface Format is 96 bits per element (BPE)."
333 * They can be rephrased as
336 * compressed formats block width block height
337 * PIPE_FORMAT_S8_UINT 4 2
338 * other depth/stencil formats 4 4
339 * 4x multisampled 4 4
345 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
347 * "surface defined by surface format align_i align_j
348 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
350 * 3DSTATE_STENCIL_BUFFER N/A 8 8
351 * SURFACE_STATE BC*, ETC*, EAC* 4 4
353 * all others (set by SURFACE_STATE)"
355 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
357 * "- This field (Surface Vertical Aligment) is intended to be set to
358 * VALIGN_4 if the surface was rendered as a depth buffer, for a
359 * multisampled (4x) render target, or for a multisampled (8x)
360 * render target, since these surfaces support only alignment of 4.
361 * - Use of VALIGN_4 for other surfaces is supported, but uses more
363 * - This field must be set to VALIGN_4 for all tiled Y Render Target
365 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
366 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
367 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
368 * must be set to VALIGN_4."
369 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
371 * "- This field (Surface Horizontal Aligment) is intended to be set to
372 * HALIGN_8 only if the surface was rendered as a depth buffer with
373 * Z16 format or a stencil buffer, since these surfaces support only
375 * - Use of HALIGN_8 for other surfaces is supported, but uses more
377 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
378 * - This field must be set to HALIGN_8 if the Surface Format is
381 * They can be rephrased as
384 * compressed formats block width block height
385 * PIPE_FORMAT_Z16_UNORM 8 4
386 * PIPE_FORMAT_S8_UINT 8 8
387 * other depth/stencil formats 4 4
388 * 2x or 4x multisampled 4 or 8 4
389 * tiled Y 4 or 8 4 (if rt)
390 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
391 * others 4 or 8 2 or 4
394 if (params
->compressed
) {
395 /* this happens to be the case */
396 img
->align_i
= img
->block_width
;
397 img
->align_j
= img
->block_height
;
398 } else if (info
->bind_zs
) {
399 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7)) {
400 switch (img
->format
) {
401 case PIPE_FORMAT_Z16_UNORM
:
405 case PIPE_FORMAT_S8_UINT
:
415 switch (img
->format
) {
416 case PIPE_FORMAT_S8_UINT
:
427 const bool valign_4
=
428 (info
->sample_count
> 1) ||
429 (ilo_dev_gen(params
->dev
) >= ILO_GEN(8)) ||
430 (ilo_dev_gen(params
->dev
) >= ILO_GEN(7) &&
431 img
->tiling
== GEN6_TILING_Y
&&
432 info
->bind_surface_dp_render
);
434 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7) &&
435 ilo_dev_gen(params
->dev
) <= ILO_GEN(7.5) && valign_4
)
436 assert(img
->format
!= PIPE_FORMAT_R32G32B32_FLOAT
);
439 img
->align_j
= (valign_4
) ? 4 : 2;
443 * the fact that align i and j are multiples of block width and height
444 * respectively is what makes the size of the bo a multiple of the block
445 * size, slices start at block boundaries, and many of the computations
448 assert(img
->align_i
% img
->block_width
== 0);
449 assert(img
->align_j
% img
->block_height
== 0);
451 /* make sure align() works */
452 assert(util_is_power_of_two(img
->align_i
) &&
453 util_is_power_of_two(img
->align_j
));
454 assert(util_is_power_of_two(img
->block_width
) &&
455 util_is_power_of_two(img
->block_height
));
459 img_init_tiling(struct ilo_image
*img
,
460 const struct ilo_image_params
*params
)
462 const struct ilo_image_info
*info
= params
->info
;
463 unsigned preferred_tilings
= params
->valid_tilings
;
465 /* no fencing nor BLT support */
466 if (preferred_tilings
& ~IMAGE_TILING_W
)
467 preferred_tilings
&= ~IMAGE_TILING_W
;
469 if (info
->bind_surface_dp_render
|| info
->bind_surface_sampler
) {
471 * heuristically set a minimum width/height for enabling tiling
473 if (img
->width0
< 64 && (preferred_tilings
& ~IMAGE_TILING_X
))
474 preferred_tilings
&= ~IMAGE_TILING_X
;
476 if ((img
->width0
< 32 || img
->height0
< 16) &&
477 (img
->width0
< 16 || img
->height0
< 32) &&
478 (preferred_tilings
& ~IMAGE_TILING_Y
))
479 preferred_tilings
&= ~IMAGE_TILING_Y
;
481 /* force linear if we are not sure where the texture is bound to */
482 if (preferred_tilings
& IMAGE_TILING_NONE
)
483 preferred_tilings
&= IMAGE_TILING_NONE
;
486 /* prefer tiled over linear */
487 if (preferred_tilings
& IMAGE_TILING_Y
)
488 img
->tiling
= GEN6_TILING_Y
;
489 else if (preferred_tilings
& IMAGE_TILING_X
)
490 img
->tiling
= GEN6_TILING_X
;
491 else if (preferred_tilings
& IMAGE_TILING_W
)
492 img
->tiling
= GEN8_TILING_W
;
494 img
->tiling
= GEN6_TILING_NONE
;
498 img_init_walk_gen7(struct ilo_image
*img
,
499 const struct ilo_image_params
*params
)
501 const struct ilo_image_info
*info
= params
->info
;
504 * It is not explicitly states, but render targets are expected to be
505 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
506 * to be IMS (samples interleaved).
508 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
512 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
514 * "note that the depth buffer and stencil buffer have an implied
515 * value of ARYSPC_FULL"
517 img
->walk
= (info
->type
== GEN6_SURFTYPE_3D
) ?
518 ILO_IMAGE_WALK_3D
: ILO_IMAGE_WALK_LAYER
;
520 img
->interleaved_samples
= true;
523 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
525 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
526 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
527 * Array Spacing) must be set to ARYSPC_LOD0."
529 * As multisampled resources are not mipmapped, we never use
530 * ARYSPC_FULL for them.
532 if (info
->sample_count
> 1)
533 assert(info
->level_count
== 1);
536 (info
->type
== GEN6_SURFTYPE_3D
) ? ILO_IMAGE_WALK_3D
:
537 (info
->level_count
> 1) ? ILO_IMAGE_WALK_LAYER
:
540 img
->interleaved_samples
= false;
545 img_init_walk_gen6(struct ilo_image
*img
,
546 const struct ilo_image_params
*params
)
549 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
551 * "The separate stencil buffer does not support mip mapping, thus the
552 * storage for LODs other than LOD 0 is not needed. The following
553 * QPitch equation applies only to the separate stencil buffer:
557 * GEN6 does not support compact spacing otherwise.
560 (params
->info
->type
== GEN6_SURFTYPE_3D
) ? ILO_IMAGE_WALK_3D
:
561 (img
->format
== PIPE_FORMAT_S8_UINT
) ? ILO_IMAGE_WALK_LOD
:
562 ILO_IMAGE_WALK_LAYER
;
564 /* GEN6 supports only interleaved samples */
565 img
->interleaved_samples
= true;
569 img_init_walk(struct ilo_image
*img
,
570 const struct ilo_image_params
*params
)
572 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
573 img_init_walk_gen7(img
, params
);
575 img_init_walk_gen6(img
, params
);
579 img_get_valid_tilings(const struct ilo_image
*img
,
580 const struct ilo_image_params
*params
)
582 const struct ilo_image_info
*info
= params
->info
;
583 const enum pipe_format format
= img
->format
;
584 unsigned valid_tilings
= params
->valid_tilings
;
587 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
589 * "Display/Overlay Y-Major not supported.
590 * X-Major required for Async Flips"
592 if (unlikely(info
->bind_scanout
))
593 valid_tilings
&= IMAGE_TILING_X
;
596 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
598 * "The cursor surface address must be 4K byte aligned. The cursor must
599 * be in linear memory, it cannot be tiled."
601 if (unlikely(info
->bind_cursor
))
602 valid_tilings
&= IMAGE_TILING_NONE
;
605 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
607 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
608 * Depth Buffer is not supported."
610 * "The Depth Buffer, if tiled, must use Y-Major tiling."
612 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
614 * "W-Major Tile Format is used for separate stencil."
618 case PIPE_FORMAT_S8_UINT
:
619 valid_tilings
&= IMAGE_TILING_W
;
622 valid_tilings
&= IMAGE_TILING_Y
;
627 if (info
->bind_surface_dp_render
) {
629 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
631 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
632 * either TileX or Linear."
634 * From the Haswell PRM, volume 5, page 32:
636 * "NOTE: 128 BPP format color buffer (render target) supports
637 * Linear, TiledX and TiledY."
639 if (ilo_dev_gen(params
->dev
) < ILO_GEN(7.5) && img
->block_size
== 16)
640 valid_tilings
&= ~IMAGE_TILING_Y
;
643 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
645 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
646 * for all tiled Y Render Target surfaces."
648 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
650 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7) &&
651 ilo_dev_gen(params
->dev
) <= ILO_GEN(7.5) &&
652 img
->format
== PIPE_FORMAT_R32G32B32_FLOAT
)
653 valid_tilings
&= ~IMAGE_TILING_Y
;
655 valid_tilings
&= ~IMAGE_TILING_W
;
658 if (info
->bind_surface_sampler
) {
659 if (ilo_dev_gen(params
->dev
) < ILO_GEN(8))
660 valid_tilings
&= ~IMAGE_TILING_W
;
663 /* no conflicting binding flags */
664 assert(valid_tilings
);
666 return valid_tilings
;
670 img_init_size_and_format(struct ilo_image
*img
,
671 struct ilo_image_params
*params
)
673 const struct ilo_image_info
*info
= params
->info
;
674 enum pipe_format format
= info
->format
;
675 bool require_separate_stencil
= false;
677 img
->type
= info
->type
;
678 img
->width0
= info
->width
;
679 img
->height0
= info
->height
;
680 img
->depth0
= info
->depth
;
681 img
->array_size
= info
->array_size
;
682 img
->level_count
= info
->level_count
;
683 img
->sample_count
= info
->sample_count
;
686 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
688 * "This field (Separate Stencil Buffer Enable) must be set to the same
689 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
691 * GEN7+ requires separate stencil buffers.
694 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
695 require_separate_stencil
= true;
697 require_separate_stencil
= (img
->aux
.type
== ILO_IMAGE_AUX_HIZ
);
701 case PIPE_FORMAT_ETC1_RGB8
:
702 format
= PIPE_FORMAT_R8G8B8X8_UNORM
;
704 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
705 if (require_separate_stencil
) {
706 format
= PIPE_FORMAT_Z24X8_UNORM
;
707 img
->separate_stencil
= true;
710 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
711 if (require_separate_stencil
) {
712 format
= PIPE_FORMAT_Z32_FLOAT
;
713 img
->separate_stencil
= true;
720 img
->format
= format
;
721 img
->block_width
= util_format_get_blockwidth(format
);
722 img
->block_height
= util_format_get_blockheight(format
);
723 img
->block_size
= util_format_get_blocksize(format
);
725 params
->valid_tilings
= img_get_valid_tilings(img
, params
);
726 params
->compressed
= util_format_is_compressed(img
->format
);
730 img_want_mcs(const struct ilo_image
*img
,
731 const struct ilo_image_params
*params
)
733 const struct ilo_image_info
*info
= params
->info
;
734 bool want_mcs
= false;
736 /* MCS is for RT on GEN7+ */
737 if (ilo_dev_gen(params
->dev
) < ILO_GEN(7))
740 if (info
->type
!= GEN6_SURFTYPE_2D
|| !info
->bind_surface_dp_render
)
744 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
746 * "For Render Target and Sampling Engine Surfaces:If the surface is
747 * multisampled (Number of Multisamples any value other than
748 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
750 * "This field must be set to 0 for all SINT MSRTs when all RT channels
753 if (info
->sample_count
> 1 && !util_format_is_pure_sint(info
->format
)) {
755 } else if (info
->sample_count
== 1 && !info
->aux_disable
) {
757 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
759 * "When MCS is buffer is used for color clear of non-multisampler
760 * render target, the following restrictions apply.
761 * - Support is limited to tiled render targets.
762 * - Support is for non-mip-mapped and non-array surface types
764 * - Clear is supported only on the full RT; i.e., no partial clear
765 * or overlapping clears.
766 * - MCS buffer for non-MSRT is supported only for RT formats
767 * 32bpp, 64bpp and 128bpp.
770 if (img
->tiling
!= GEN6_TILING_NONE
&&
771 info
->level_count
== 1 && info
->array_size
== 1) {
772 switch (img
->block_size
) {
788 img_want_hiz(const struct ilo_image
*img
,
789 const struct ilo_image_params
*params
)
791 const struct ilo_image_info
*info
= params
->info
;
792 const struct util_format_description
*desc
=
793 util_format_description(info
->format
);
795 if (ilo_debug
& ILO_DEBUG_NOHIZ
)
798 if (info
->aux_disable
)
801 /* we want 8x4 aligned levels */
802 if (info
->type
== GEN6_SURFTYPE_1D
)
808 if (!util_format_has_depth(desc
))
812 * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
813 * for every level. This is generally fine except on GEN6, where HiZ and
814 * separate stencil are enabled and disabled at the same time. When the
815 * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
816 * can result in incompatible formats.
818 if (ilo_dev_gen(params
->dev
) == ILO_GEN(6) &&
819 info
->format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
&&
820 info
->level_count
> 1)
827 img_init_aux(struct ilo_image
*img
,
828 const struct ilo_image_params
*params
)
830 if (img_want_hiz(img
, params
))
831 img
->aux
.type
= ILO_IMAGE_AUX_HIZ
;
832 else if (img_want_mcs(img
, params
))
833 img
->aux
.type
= ILO_IMAGE_AUX_MCS
;
837 img_align(struct ilo_image
*img
, struct ilo_image_params
*params
)
839 const struct ilo_image_info
*info
= params
->info
;
840 int align_w
= 1, align_h
= 1, pad_h
= 0;
843 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
845 * "To determine the necessary padding on the bottom and right side of
846 * the surface, refer to the table in Section 7.18.3.4 for the i and j
847 * parameters for the surface format in use. The surface must then be
848 * extended to the next multiple of the alignment unit size in each
849 * dimension, and all texels contained in this extended surface must
850 * have valid GTT entries."
852 * "For cube surfaces, an additional two rows of padding are required
853 * at the bottom of the surface. This must be ensured regardless of
854 * whether the surface is stored tiled or linear. This is due to the
855 * potential rotation of cache line orientation from memory to cache."
857 * "For compressed textures (BC* and FXT1 surface formats), padding at
858 * the bottom of the surface is to an even compressed row, which is
859 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
860 * purposes, these surfaces behave as if j = 8 only for surface
861 * padding purposes. The value of 4 for j still applies for mip level
862 * alignment and QPitch calculation."
864 if (info
->bind_surface_sampler
) {
865 align_w
= MAX2(align_w
, img
->align_i
);
866 align_h
= MAX2(align_h
, img
->align_j
);
868 if (info
->type
== GEN6_SURFTYPE_CUBE
)
871 if (params
->compressed
)
872 align_h
= MAX2(align_h
, img
->align_j
* 2);
876 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
878 * "If the surface contains an odd number of rows of data, a final row
879 * below the surface must be allocated."
881 if (info
->bind_surface_dp_render
)
882 align_h
= MAX2(align_h
, 2);
885 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
886 * for unaligned non-mipmapped and non-array images.
888 if (img
->aux
.type
== ILO_IMAGE_AUX_HIZ
&&
889 info
->level_count
== 1 &&
890 info
->array_size
== 1 &&
892 align_w
= MAX2(align_w
, 8);
893 align_h
= MAX2(align_h
, 4);
896 params
->max_x
= align(params
->max_x
, align_w
);
897 params
->max_y
= align(params
->max_y
+ pad_h
, align_h
);
900 /* note that this may force the texture to be linear */
902 img_calculate_bo_size(struct ilo_image
*img
,
903 const struct ilo_image_params
*params
)
905 assert(params
->max_x
% img
->block_width
== 0);
906 assert(params
->max_y
% img
->block_height
== 0);
907 assert(img
->walk_layer_height
% img
->block_height
== 0);
910 (params
->max_x
/ img
->block_width
) * img
->block_size
;
911 img
->bo_height
= params
->max_y
/ img
->block_height
;
914 unsigned w
= img
->bo_stride
, h
= img
->bo_height
;
915 unsigned align_w
, align_h
;
918 * From the Haswell PRM, volume 5, page 163:
920 * "For linear surfaces, additional padding of 64 bytes is required
921 * at the bottom of the surface. This is in addition to the padding
924 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7.5) &&
925 params
->info
->bind_surface_sampler
&&
926 img
->tiling
== GEN6_TILING_NONE
)
927 h
+= (64 + img
->bo_stride
- 1) / img
->bo_stride
;
930 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
932 * "- For linear render target surfaces, the pitch must be a
933 * multiple of the element size for non-YUV surface formats.
934 * Pitch must be a multiple of 2 * element size for YUV surface
936 * - For other linear surfaces, the pitch can be any multiple of
938 * - For tiled surfaces, the pitch must be a multiple of the tile
941 * Different requirements may exist when the bo is used in different
942 * places, but our alignments here should be good enough that we do not
943 * need to check params->info->bind_x.
945 switch (img
->tiling
) {
956 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
958 * "A 4KB tile is subdivided into 8-high by 8-wide array of
959 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
966 assert(img
->tiling
== GEN6_TILING_NONE
);
967 /* some good enough values */
973 w
= align(w
, align_w
);
974 h
= align(h
, align_h
);
976 /* make sure the bo is mappable */
977 if (img
->tiling
!= GEN6_TILING_NONE
) {
979 * Usually only the first 256MB of the GTT is mappable.
981 * See also how intel_context::max_gtt_map_object_size is calculated.
983 const size_t mappable_gtt_size
= 256 * 1024 * 1024;
986 * Be conservative. We may be able to switch from VALIGN_4 to
987 * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
989 if (mappable_gtt_size
/ w
/ 4 < h
) {
990 if (params
->valid_tilings
& IMAGE_TILING_NONE
) {
991 img
->tiling
= GEN6_TILING_NONE
;
992 /* MCS support for non-MSRTs is limited to tiled RTs */
993 if (img
->aux
.type
== ILO_IMAGE_AUX_MCS
&&
994 params
->info
->sample_count
== 1)
995 img
->aux
.type
= ILO_IMAGE_AUX_NONE
;
999 ilo_warn("cannot force texture to be linear\n");
1011 img_calculate_hiz_size(struct ilo_image
*img
,
1012 const struct ilo_image_params
*params
)
1014 const struct ilo_image_info
*info
= params
->info
;
1015 const unsigned hz_align_j
= 8;
1016 enum ilo_image_walk_type hz_walk
;
1017 unsigned hz_width
, hz_height
, lv
;
1018 unsigned hz_clear_w
, hz_clear_h
;
1020 assert(img
->aux
.type
== ILO_IMAGE_AUX_HIZ
);
1022 assert(img
->walk
== ILO_IMAGE_WALK_LAYER
||
1023 img
->walk
== ILO_IMAGE_WALK_3D
);
1026 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1028 * "The hierarchical depth buffer does not support the LOD field, it is
1029 * assumed by hardware to be zero. A separate hierarachical depth
1030 * buffer is required for each LOD used, and the corresponding
1031 * buffer's state delivered to hardware each time a new depth buffer
1032 * state with modified LOD is delivered."
1034 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1036 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
1037 hz_walk
= img
->walk
;
1039 hz_walk
= ILO_IMAGE_WALK_LOD
;
1042 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1043 * PRM, volume 2 part 1, page 312-313.
1045 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1049 case ILO_IMAGE_WALK_LAYER
:
1051 const unsigned h0
= align(params
->h0
, hz_align_j
);
1052 const unsigned h1
= align(params
->h1
, hz_align_j
);
1053 const unsigned htail
=
1054 ((ilo_dev_gen(params
->dev
) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j
;
1055 const unsigned hz_qpitch
= h0
+ h1
+ htail
;
1057 hz_width
= align(img
->lods
[0].slice_width
, 16);
1059 hz_height
= hz_qpitch
* info
->array_size
/ 2;
1060 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
1061 hz_height
= align(hz_height
, 8);
1063 img
->aux
.walk_layer_height
= hz_qpitch
;
1066 case ILO_IMAGE_WALK_LOD
:
1068 unsigned lod_tx
[PIPE_MAX_TEXTURE_LEVELS
];
1069 unsigned lod_ty
[PIPE_MAX_TEXTURE_LEVELS
];
1070 unsigned cur_tx
, cur_ty
;
1072 /* figure out the tile offsets of LODs */
1077 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1080 lod_tx
[lv
] = cur_tx
;
1081 lod_ty
[lv
] = cur_ty
;
1083 tw
= align(img
->lods
[lv
].slice_width
, 16);
1084 th
= align(img
->lods
[lv
].slice_height
, hz_align_j
) *
1085 info
->array_size
/ 2;
1086 /* convert to Y-tiles */
1087 tw
= align(tw
, 128) / 128;
1088 th
= align(th
, 32) / 32;
1090 if (hz_width
< cur_tx
+ tw
)
1091 hz_width
= cur_tx
+ tw
;
1092 if (hz_height
< cur_ty
+ th
)
1093 hz_height
= cur_ty
+ th
;
1101 /* convert tile offsets to memory offsets */
1102 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1103 img
->aux
.walk_lod_offsets
[lv
] =
1104 (lod_ty
[lv
] * hz_width
+ lod_tx
[lv
]) * 4096;
1110 case ILO_IMAGE_WALK_3D
:
1111 hz_width
= align(img
->lods
[0].slice_width
, 16);
1114 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1115 const unsigned h
= align(img
->lods
[lv
].slice_height
, hz_align_j
);
1116 /* according to the formula, slices are packed together vertically */
1117 hz_height
+= h
* u_minify(info
->depth
, lv
);
1122 assert(!"unknown HiZ walk");
1129 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1130 * Experiments on Haswell show that aligning the RECTLIST primitive and
1131 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1136 switch (info
->sample_count
) {
1157 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1158 if (u_minify(img
->width0
, lv
) % hz_clear_w
||
1159 u_minify(img
->height0
, lv
) % hz_clear_h
)
1161 img
->aux
.enables
|= 1 << lv
;
1164 /* we padded to allow this in img_align() */
1165 if (info
->level_count
== 1 && info
->array_size
== 1 && info
->depth
== 1)
1166 img
->aux
.enables
|= 0x1;
1168 /* align to Y-tile */
1169 img
->aux
.bo_stride
= align(hz_width
, 128);
1170 img
->aux
.bo_height
= align(hz_height
, 32);
1174 img_calculate_mcs_size(struct ilo_image
*img
,
1175 const struct ilo_image_params
*params
)
1177 const struct ilo_image_info
*info
= params
->info
;
1178 int mcs_width
, mcs_height
, mcs_cpp
;
1179 int downscale_x
, downscale_y
;
1181 assert(img
->aux
.type
== ILO_IMAGE_AUX_MCS
);
1183 if (info
->sample_count
> 1) {
1185 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1186 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1187 * need of scale down could be that the clear rectangle is used to clear
1188 * the MCS instead of the RT.
1190 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1191 * 2x2 factor could come from that the hardware writes 128 bits (an
1192 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1193 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1194 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1195 * pixel block in the RT.
1197 switch (info
->sample_count
) {
1215 assert(!"unsupported sample count");
1221 * It also appears that the 2x2 subspans generated by the scaled-down
1222 * clear rectangle cannot be masked. The scale-down clear rectangle
1223 * thus must be aligned to 2x2, and we need to pad.
1225 mcs_width
= align(img
->width0
, downscale_x
* 2);
1226 mcs_height
= align(img
->height0
, downscale_y
* 2);
1229 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1244 * This table and the two following tables define the RT alignments, the
1245 * clear rectangle alignments, and the clear rectangle scale factors.
1246 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1247 * that the clear rectangle alignments are 16x32 blocks, and the clear
1248 * rectangle scale factors are 8x16 blocks.
1250 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1251 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1254 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1255 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1256 * which says that a Y-tile maps to 128x256 blocks (\see
1257 * intel_get_non_msrt_mcs_alignment). It does not really change
1258 * anything except for the size of the allocated MCS. Let's see if we
1259 * hit out-of-bound access.
1261 switch (img
->tiling
) {
1263 downscale_x
= 64 / img
->block_size
;
1267 downscale_x
= 32 / img
->block_size
;
1271 assert(!"unsupported tiling mode");
1280 * From the Haswell PRM, volume 7, page 652:
1282 * "Clear rectangle must be aligned to two times the number of
1283 * pixels in the table shown below due to 16X16 hashing across the
1286 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1287 * 2x2, and we need to pad.
1289 mcs_width
= align(img
->width0
, downscale_x
* 4) / downscale_x
;
1290 mcs_height
= align(img
->height0
, downscale_y
* 4) / downscale_y
;
1291 mcs_cpp
= 16; /* an OWord */
1294 img
->aux
.enables
= (1 << info
->level_count
) - 1;
1295 /* align to Y-tile */
1296 img
->aux
.bo_stride
= align(mcs_width
* mcs_cpp
, 128);
1297 img
->aux
.bo_height
= align(mcs_height
, 32);
1301 img_init(struct ilo_image
*img
,
1302 struct ilo_image_params
*params
)
1304 /* there are hard dependencies between every function here */
1306 img_init_aux(img
, params
);
1307 img_init_size_and_format(img
, params
);
1308 img_init_walk(img
, params
);
1309 img_init_tiling(img
, params
);
1310 img_init_alignments(img
, params
);
1311 img_init_lods(img
, params
);
1312 img_init_layer_height(img
, params
);
1314 img_align(img
, params
);
1315 img_calculate_bo_size(img
, params
);
1317 img
->scanout
= params
->info
->bind_scanout
;
1319 switch (img
->aux
.type
) {
1320 case ILO_IMAGE_AUX_HIZ
:
1321 img_calculate_hiz_size(img
, params
);
1323 case ILO_IMAGE_AUX_MCS
:
1324 img_calculate_mcs_size(img
, params
);
1334 * The texutre is for transfer only. We can define our own layout to save
1338 img_init_for_transfer(struct ilo_image
*img
,
1339 const struct ilo_dev
*dev
,
1340 const struct ilo_image_info
*info
)
1342 const unsigned num_layers
= (info
->type
== GEN6_SURFTYPE_3D
) ?
1343 info
->depth
: info
->array_size
;
1344 unsigned layer_width
, layer_height
;
1346 assert(info
->level_count
== 1);
1347 assert(info
->sample_count
== 1);
1349 img
->aux
.type
= ILO_IMAGE_AUX_NONE
;
1351 img
->type
= info
->type
;
1352 img
->width0
= info
->width
;
1353 img
->height0
= info
->height
;
1354 img
->depth0
= info
->depth
;
1355 img
->array_size
= info
->array_size
;
1356 img
->level_count
= 1;
1357 img
->sample_count
= 1;
1359 img
->format
= info
->format
;
1360 img
->block_width
= util_format_get_blockwidth(info
->format
);
1361 img
->block_height
= util_format_get_blockheight(info
->format
);
1362 img
->block_size
= util_format_get_blocksize(info
->format
);
1364 img
->walk
= ILO_IMAGE_WALK_LOD
;
1366 img
->tiling
= GEN6_TILING_NONE
;
1368 img
->align_i
= img
->block_width
;
1369 img
->align_j
= img
->block_height
;
1371 assert(util_is_power_of_two(img
->block_width
) &&
1372 util_is_power_of_two(img
->block_height
));
1374 /* use packed layout */
1375 layer_width
= align(info
->width
, img
->align_i
);
1376 layer_height
= align(info
->height
, img
->align_j
);
1378 img
->lods
[0].slice_width
= layer_width
;
1379 img
->lods
[0].slice_height
= layer_height
;
1381 img
->bo_stride
= (layer_width
/ img
->block_width
) * img
->block_size
;
1382 img
->bo_stride
= align(img
->bo_stride
, 64);
1384 img
->bo_height
= (layer_height
/ img
->block_height
) * num_layers
;
1388 img_is_bind_gpu(const struct ilo_image_info
*info
)
1390 return (info
->bind_surface_sampler
||
1391 info
->bind_surface_dp_render
||
1392 info
->bind_surface_dp_typed
||
1394 info
->bind_scanout
||
1399 * Initialize the image. Callers should zero-initialize \p img first.
1402 ilo_image_init(struct ilo_image
*img
,
1403 const struct ilo_dev
*dev
,
1404 const struct ilo_image_info
*info
)
1406 struct ilo_image_params params
;
1408 assert(ilo_is_zeroed(img
, sizeof(*img
)));
1410 /* use transfer layout when the texture is never bound to GPU */
1411 if (!img_is_bind_gpu(info
) &&
1412 info
->level_count
== 1 &&
1413 info
->sample_count
== 1) {
1414 img_init_for_transfer(img
, dev
, info
);
1418 memset(¶ms
, 0, sizeof(params
));
1421 params
.valid_tilings
= (info
->valid_tilings
) ?
1422 info
->valid_tilings
: IMAGE_TILING_ALL
;
1424 if (!img_init(img
, ¶ms
))
1427 if (info
->force_bo_stride
) {
1428 if ((img
->tiling
== GEN6_TILING_X
&& info
->force_bo_stride
% 512) ||
1429 (img
->tiling
== GEN6_TILING_Y
&& info
->force_bo_stride
% 128) ||
1430 (img
->tiling
== GEN8_TILING_W
&& info
->force_bo_stride
% 64))
1433 if (img
->bo_stride
> info
->force_bo_stride
)
1436 img
->bo_stride
= info
->force_bo_stride
;