2 * Mesa 3-D graphics library
4 * Copyright (C) 2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_layout.h"
31 LAYOUT_TILING_NONE
= 1 << INTEL_TILING_NONE
,
32 LAYOUT_TILING_X
= 1 << INTEL_TILING_X
,
33 LAYOUT_TILING_Y
= 1 << INTEL_TILING_Y
,
34 LAYOUT_TILING_W
= 1 << (INTEL_TILING_Y
+ 1),
36 LAYOUT_TILING_ALL
= (LAYOUT_TILING_NONE
|
42 struct ilo_layout_params
{
43 const struct ilo_dev_info
*dev
;
44 const struct pipe_resource
*templ
;
49 unsigned max_x
, max_y
;
53 layout_get_slice_size(const struct ilo_layout
*layout
,
54 const struct ilo_layout_params
*params
,
55 unsigned level
, unsigned *width
, unsigned *height
)
57 const struct pipe_resource
*templ
= params
->templ
;
60 w
= u_minify(layout
->width0
, level
);
61 h
= u_minify(layout
->height0
, level
);
64 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
66 * "The dimensions of the mip maps are first determined by applying the
67 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
68 * if necessary, they are padded out to compression block boundaries."
70 w
= align(w
, layout
->block_width
);
71 h
= align(h
, layout
->block_height
);
74 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
76 * "If the surface is multisampled (4x), these values must be adjusted
77 * as follows before proceeding:
79 * W_L = ceiling(W_L / 2) * 4
80 * H_L = ceiling(H_L / 2) * 4"
82 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
84 * "If the surface is multisampled and it is a depth or stencil surface
85 * or Multisampled Surface StorageFormat in SURFACE_STATE is
86 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
89 * #samples W_L = H_L =
90 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
91 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
92 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
93 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
95 * For interleaved samples (4x), where pixels
100 * would be is occupied by
102 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
103 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
104 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
105 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
109 * w = align(w, 2) * 2;
110 * y = align(y, 2) * 2;
112 if (layout
->interleaved_samples
) {
113 switch (templ
->nr_samples
) {
133 assert(!"unsupported sample count");
139 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
141 * "For separate stencil buffer, the width must be mutiplied by 2 and
142 * height divided by 2..."
144 * To make things easier (for transfer), we will just double the stencil
145 * stride in 3DSTATE_STENCIL_BUFFER.
147 w
= align(w
, layout
->align_i
);
148 h
= align(h
, layout
->align_j
);
155 layout_get_num_layers(const struct ilo_layout
*layout
,
156 const struct ilo_layout_params
*params
)
158 const struct pipe_resource
*templ
= params
->templ
;
159 unsigned num_layers
= templ
->array_size
;
161 /* samples of the same index are stored in a layer */
162 if (templ
->nr_samples
> 1 && !layout
->interleaved_samples
)
163 num_layers
*= templ
->nr_samples
;
169 layout_init_layer_height(struct ilo_layout
*layout
,
170 struct ilo_layout_params
*params
)
172 const struct pipe_resource
*templ
= params
->templ
;
175 if (layout
->walk
!= ILO_LAYOUT_WALK_LAYER
)
178 num_layers
= layout_get_num_layers(layout
, params
);
183 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
185 * "The following equation is used for surface formats other than
186 * compressed textures:
188 * QPitch = (h0 + h1 + 11j)"
190 * "The equation for compressed textures (BC* and FXT1 surface formats)
193 * QPitch = (h0 + h1 + 11j) / 4"
195 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
196 * value calculated in the equation above, for every other odd Surface
197 * Height starting from 1 i.e. 1,5,9,13"
199 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
201 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
202 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
204 * QPitch = (h0 + h1 + 12j)
205 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
207 * (There are many typos or missing words here...)"
209 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
210 * the base address. The PRM divides QPitch by 4 for compressed formats
211 * because the block height for those formats are 4, and it wants QPitch to
212 * mean the number of memory rows, as opposed to texel rows, between
213 * slices. Since we use texel rows everywhere, we do not need to divide
216 layout
->layer_height
= params
->h0
+ params
->h1
+
217 ((params
->dev
->gen
>= ILO_GEN(7)) ? 12 : 11) * layout
->align_j
;
219 if (params
->dev
->gen
== ILO_GEN(6) && templ
->nr_samples
> 1 &&
220 layout
->height0
% 4 == 1)
221 layout
->layer_height
+= 4;
223 params
->max_y
+= layout
->layer_height
* (num_layers
- 1);
227 layout_init_lods(struct ilo_layout
*layout
,
228 struct ilo_layout_params
*params
)
230 const struct pipe_resource
*templ
= params
->templ
;
231 unsigned cur_x
, cur_y
;
236 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
237 unsigned lod_w
, lod_h
;
239 layout_get_slice_size(layout
, params
, lv
, &lod_w
, &lod_h
);
241 layout
->lods
[lv
].x
= cur_x
;
242 layout
->lods
[lv
].y
= cur_y
;
243 layout
->lods
[lv
].slice_width
= lod_w
;
244 layout
->lods
[lv
].slice_height
= lod_h
;
246 switch (layout
->walk
) {
247 case ILO_LAYOUT_WALK_LOD
:
248 lod_h
*= layout_get_num_layers(layout
, params
);
254 /* every LOD begins at tile boundaries */
255 if (templ
->last_level
> 0) {
256 assert(layout
->format
== PIPE_FORMAT_S8_UINT
);
257 cur_x
= align(cur_x
, 64);
258 cur_y
= align(cur_y
, 64);
261 case ILO_LAYOUT_WALK_LAYER
:
262 /* MIPLAYOUT_BELOW */
268 case ILO_LAYOUT_WALK_3D
:
270 const unsigned num_slices
= u_minify(templ
->depth0
, lv
);
271 const unsigned num_slices_per_row
= 1 << lv
;
272 const unsigned num_rows
=
273 (num_slices
+ num_slices_per_row
- 1) / num_slices_per_row
;
275 lod_w
*= num_slices_per_row
;
283 if (params
->max_x
< layout
->lods
[lv
].x
+ lod_w
)
284 params
->max_x
= layout
->lods
[lv
].x
+ lod_w
;
285 if (params
->max_y
< layout
->lods
[lv
].y
+ lod_h
)
286 params
->max_y
= layout
->lods
[lv
].y
+ lod_h
;
289 if (layout
->walk
== ILO_LAYOUT_WALK_LAYER
) {
290 params
->h0
= layout
->lods
[0].slice_height
;
292 if (templ
->last_level
> 0)
293 params
->h1
= layout
->lods
[1].slice_height
;
295 layout_get_slice_size(layout
, params
, 1, &cur_x
, ¶ms
->h1
);
300 layout_init_alignments(struct ilo_layout
*layout
,
301 struct ilo_layout_params
*params
)
303 const struct pipe_resource
*templ
= params
->templ
;
306 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
308 * "surface format align_i align_j
309 * YUV 4:2:2 formats 4 *see below
312 * all other formats 4 *see below"
314 * "- align_j = 4 for any depth buffer
315 * - align_j = 2 for separate stencil buffer
316 * - align_j = 4 for any render target surface is multisampled (4x)
317 * - align_j = 4 for any render target surface with Surface Vertical
318 * Alignment = VALIGN_4
319 * - align_j = 2 for any render target surface with Surface Vertical
320 * Alignment = VALIGN_2
321 * - align_j = 2 for all other render target surface
322 * - align_j = 2 for any sampling engine surface with Surface Vertical
323 * Alignment = VALIGN_2
324 * - align_j = 4 for any sampling engine surface with Surface Vertical
325 * Alignment = VALIGN_4"
327 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
329 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
330 * the Surface Format is 96 bits per element (BPE)."
332 * They can be rephrased as
335 * compressed formats block width block height
336 * PIPE_FORMAT_S8_UINT 4 2
337 * other depth/stencil formats 4 4
338 * 4x multisampled 4 4
344 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
346 * "surface defined by surface format align_i align_j
347 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
349 * 3DSTATE_STENCIL_BUFFER N/A 8 8
350 * SURFACE_STATE BC*, ETC*, EAC* 4 4
352 * all others (set by SURFACE_STATE)"
354 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
356 * "- This field (Surface Vertical Aligment) is intended to be set to
357 * VALIGN_4 if the surface was rendered as a depth buffer, for a
358 * multisampled (4x) render target, or for a multisampled (8x)
359 * render target, since these surfaces support only alignment of 4.
360 * - Use of VALIGN_4 for other surfaces is supported, but uses more
362 * - This field must be set to VALIGN_4 for all tiled Y Render Target
364 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
365 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
366 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
367 * must be set to VALIGN_4."
368 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
370 * "- This field (Surface Horizontal Aligment) is intended to be set to
371 * HALIGN_8 only if the surface was rendered as a depth buffer with
372 * Z16 format or a stencil buffer, since these surfaces support only
374 * - Use of HALIGN_8 for other surfaces is supported, but uses more
376 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
377 * - This field must be set to HALIGN_8 if the Surface Format is
380 * They can be rephrased as
383 * compressed formats block width block height
384 * PIPE_FORMAT_Z16_UNORM 8 4
385 * PIPE_FORMAT_S8_UINT 8 8
386 * other depth/stencil formats 4 4
387 * 2x or 4x multisampled 4 or 8 4
388 * tiled Y 4 or 8 4 (if rt)
389 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
390 * others 4 or 8 2 or 4
393 if (params
->compressed
) {
394 /* this happens to be the case */
395 layout
->align_i
= layout
->block_width
;
396 layout
->align_j
= layout
->block_height
;
397 } else if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
398 if (params
->dev
->gen
>= ILO_GEN(7)) {
399 switch (layout
->format
) {
400 case PIPE_FORMAT_Z16_UNORM
:
404 case PIPE_FORMAT_S8_UINT
:
414 switch (layout
->format
) {
415 case PIPE_FORMAT_S8_UINT
:
426 const bool valign_4
= (templ
->nr_samples
> 1) ||
427 (params
->dev
->gen
>= ILO_GEN(7) &&
428 layout
->tiling
== INTEL_TILING_Y
&&
429 (templ
->bind
& PIPE_BIND_RENDER_TARGET
));
432 assert(layout
->block_size
!= 12);
435 layout
->align_j
= (valign_4
) ? 4 : 2;
439 * the fact that align i and j are multiples of block width and height
440 * respectively is what makes the size of the bo a multiple of the block
441 * size, slices start at block boundaries, and many of the computations
444 assert(layout
->align_i
% layout
->block_width
== 0);
445 assert(layout
->align_j
% layout
->block_height
== 0);
447 /* make sure align() works */
448 assert(util_is_power_of_two(layout
->align_i
) &&
449 util_is_power_of_two(layout
->align_j
));
450 assert(util_is_power_of_two(layout
->block_width
) &&
451 util_is_power_of_two(layout
->block_height
));
455 layout_get_valid_tilings(const struct ilo_layout
*layout
,
456 const struct ilo_layout_params
*params
)
458 const struct pipe_resource
*templ
= params
->templ
;
459 const enum pipe_format format
= layout
->format
;
460 unsigned valid_tilings
= LAYOUT_TILING_ALL
;
463 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
465 * "Display/Overlay Y-Major not supported.
466 * X-Major required for Async Flips"
468 if (unlikely(templ
->bind
& PIPE_BIND_SCANOUT
))
469 valid_tilings
&= LAYOUT_TILING_X
;
472 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
474 * "The cursor surface address must be 4K byte aligned. The cursor must
475 * be in linear memory, it cannot be tiled."
477 if (unlikely(templ
->bind
& (PIPE_BIND_CURSOR
| PIPE_BIND_LINEAR
)))
478 valid_tilings
&= LAYOUT_TILING_NONE
;
481 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
483 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
484 * Depth Buffer is not supported."
486 * "The Depth Buffer, if tiled, must use Y-Major tiling."
488 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
490 * "W-Major Tile Format is used for separate stencil."
492 if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
494 case PIPE_FORMAT_S8_UINT
:
495 valid_tilings
&= LAYOUT_TILING_W
;
498 valid_tilings
&= LAYOUT_TILING_Y
;
503 if (templ
->bind
& PIPE_BIND_RENDER_TARGET
) {
505 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
507 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
508 * either TileX or Linear."
510 if (layout
->block_size
== 16)
511 valid_tilings
&= ~LAYOUT_TILING_Y
;
514 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
516 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
517 * for all tiled Y Render Target surfaces."
519 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
521 if (params
->dev
->gen
>= ILO_GEN(7) && layout
->block_size
== 12)
522 valid_tilings
&= ~LAYOUT_TILING_Y
;
525 /* no conflicting binding flags */
526 assert(valid_tilings
);
528 return valid_tilings
;
532 layout_init_tiling(struct ilo_layout
*layout
,
533 struct ilo_layout_params
*params
)
535 const struct pipe_resource
*templ
= params
->templ
;
536 unsigned valid_tilings
= layout_get_valid_tilings(layout
, params
);
538 /* no hardware support for W-tile */
539 if (valid_tilings
& LAYOUT_TILING_W
)
540 valid_tilings
= (valid_tilings
& ~LAYOUT_TILING_W
) | LAYOUT_TILING_NONE
;
542 layout
->valid_tilings
= valid_tilings
;
544 if (templ
->bind
& (PIPE_BIND_RENDER_TARGET
| PIPE_BIND_SAMPLER_VIEW
)) {
546 * heuristically set a minimum width/height for enabling tiling
548 if (layout
->width0
< 64 && (valid_tilings
& ~LAYOUT_TILING_X
))
549 valid_tilings
&= ~LAYOUT_TILING_X
;
551 if ((layout
->width0
< 32 || layout
->height0
< 16) &&
552 (layout
->width0
< 16 || layout
->height0
< 32) &&
553 (valid_tilings
& ~LAYOUT_TILING_Y
))
554 valid_tilings
&= ~LAYOUT_TILING_Y
;
556 /* force linear if we are not sure where the texture is bound to */
557 if (valid_tilings
& LAYOUT_TILING_NONE
)
558 valid_tilings
&= LAYOUT_TILING_NONE
;
561 /* prefer tiled over linear */
562 if (valid_tilings
& LAYOUT_TILING_Y
)
563 layout
->tiling
= INTEL_TILING_Y
;
564 else if (valid_tilings
& LAYOUT_TILING_X
)
565 layout
->tiling
= INTEL_TILING_X
;
567 layout
->tiling
= INTEL_TILING_NONE
;
571 layout_init_walk_gen7(struct ilo_layout
*layout
,
572 struct ilo_layout_params
*params
)
574 const struct pipe_resource
*templ
= params
->templ
;
577 * It is not explicitly states, but render targets are expected to be
578 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
579 * to be IMS (samples interleaved).
581 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
583 if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
585 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
587 * "note that the depth buffer and stencil buffer have an implied
588 * value of ARYSPC_FULL"
590 layout
->walk
= (templ
->target
== PIPE_TEXTURE_3D
) ?
591 ILO_LAYOUT_WALK_3D
: ILO_LAYOUT_WALK_LAYER
;
593 layout
->interleaved_samples
= true;
596 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
598 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
599 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
600 * Array Spacing) must be set to ARYSPC_LOD0."
602 * As multisampled resources are not mipmapped, we never use
603 * ARYSPC_FULL for them.
605 if (templ
->nr_samples
> 1)
606 assert(templ
->last_level
== 0);
609 (templ
->target
== PIPE_TEXTURE_3D
) ? ILO_LAYOUT_WALK_3D
:
610 (templ
->last_level
> 0) ? ILO_LAYOUT_WALK_LAYER
:
613 layout
->interleaved_samples
= false;
618 layout_init_walk_gen6(struct ilo_layout
*layout
,
619 struct ilo_layout_params
*params
)
622 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
624 * "The separate stencil buffer does not support mip mapping, thus the
625 * storage for LODs other than LOD 0 is not needed. The following
626 * QPitch equation applies only to the separate stencil buffer:
630 * GEN6 does not support compact spacing otherwise.
633 (params
->templ
->target
== PIPE_TEXTURE_3D
) ? ILO_LAYOUT_WALK_3D
:
634 (layout
->format
== PIPE_FORMAT_S8_UINT
) ? ILO_LAYOUT_WALK_LOD
:
635 ILO_LAYOUT_WALK_LAYER
;
637 /* GEN6 supports only interleaved samples */
638 layout
->interleaved_samples
= true;
642 layout_init_walk(struct ilo_layout
*layout
,
643 struct ilo_layout_params
*params
)
645 if (params
->dev
->gen
>= ILO_GEN(7))
646 layout_init_walk_gen7(layout
, params
);
648 layout_init_walk_gen6(layout
, params
);
652 layout_init_size_and_format(struct ilo_layout
*layout
,
653 struct ilo_layout_params
*params
)
655 const struct pipe_resource
*templ
= params
->templ
;
656 enum pipe_format format
= templ
->format
;
657 bool require_separate_stencil
;
659 layout
->width0
= templ
->width0
;
660 layout
->height0
= templ
->height0
;
663 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
665 * "This field (Separate Stencil Buffer Enable) must be set to the same
666 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
668 * GEN7+ requires separate stencil buffers.
670 if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
671 if (params
->dev
->gen
>= ILO_GEN(7))
672 require_separate_stencil
= true;
674 require_separate_stencil
= (layout
->aux
== ILO_LAYOUT_AUX_HIZ
);
678 case PIPE_FORMAT_ETC1_RGB8
:
679 format
= PIPE_FORMAT_R8G8B8X8_UNORM
;
681 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
682 if (require_separate_stencil
) {
683 format
= PIPE_FORMAT_Z24X8_UNORM
;
684 layout
->separate_stencil
= true;
687 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
688 if (require_separate_stencil
) {
689 format
= PIPE_FORMAT_Z32_FLOAT
;
690 layout
->separate_stencil
= true;
697 layout
->format
= format
;
698 layout
->block_width
= util_format_get_blockwidth(format
);
699 layout
->block_height
= util_format_get_blockheight(format
);
700 layout
->block_size
= util_format_get_blocksize(format
);
702 params
->compressed
= util_format_is_compressed(format
);
706 layout_want_mcs(struct ilo_layout
*layout
,
707 struct ilo_layout_params
*params
)
709 const struct pipe_resource
*templ
= params
->templ
;
710 bool want_mcs
= false;
712 /* MCS is for RT on GEN7+ */
713 if (params
->dev
->gen
< ILO_GEN(7))
716 if (templ
->target
!= PIPE_TEXTURE_2D
||
717 !(templ
->bind
& PIPE_BIND_RENDER_TARGET
))
721 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
723 * "For Render Target and Sampling Engine Surfaces:If the surface is
724 * multisampled (Number of Multisamples any value other than
725 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
727 * "This field must be set to 0 for all SINT MSRTs when all RT channels
730 if (templ
->nr_samples
> 1 && !layout
->interleaved_samples
&&
731 !util_format_is_pure_sint(templ
->format
)) {
733 } else if (templ
->nr_samples
<= 1) {
735 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
737 * "When MCS is buffer is used for color clear of non-multisampler
738 * render target, the following restrictions apply.
739 * - Support is limited to tiled render targets.
740 * - Support is for non-mip-mapped and non-array surface types
742 * - Clear is supported only on the full RT; i.e., no partial clear
743 * or overlapping clears.
744 * - MCS buffer for non-MSRT is supported only for RT formats
745 * 32bpp, 64bpp and 128bpp.
748 if (layout
->tiling
!= INTEL_TILING_NONE
&&
749 templ
->last_level
== 0 && templ
->array_size
== 1) {
750 switch (layout
->block_size
) {
766 layout_want_hiz(const struct ilo_layout
*layout
,
767 const struct ilo_layout_params
*params
)
769 const struct pipe_resource
*templ
= params
->templ
;
770 const struct util_format_description
*desc
=
771 util_format_description(templ
->format
);
772 bool want_hiz
= false;
774 if (ilo_debug
& ILO_DEBUG_NOHIZ
)
777 if (!(templ
->bind
& PIPE_BIND_DEPTH_STENCIL
))
780 if (!util_format_has_depth(desc
))
783 /* no point in having HiZ */
784 if (templ
->usage
== PIPE_USAGE_STAGING
)
787 if (params
->dev
->gen
>= ILO_GEN(7)) {
791 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
793 * "The hierarchical depth buffer does not support the LOD field, it
794 * is assumed by hardware to be zero. A separate hierarachical
795 * depth buffer is required for each LOD used, and the
796 * corresponding buffer's state delivered to hardware each time a
797 * new depth buffer state with modified LOD is delivered."
799 * But we have a stronger requirement. Because of layer offsetting
800 * (check out the callers of ilo_layout_get_slice_tile_offset()), we
801 * already have to require the texture to be non-mipmapped and
804 if (templ
->last_level
== 0 && templ
->array_size
== 1 &&
813 layout_init_aux(struct ilo_layout
*layout
,
814 struct ilo_layout_params
*params
)
816 if (layout_want_hiz(layout
, params
))
817 layout
->aux
= ILO_LAYOUT_AUX_HIZ
;
818 else if (layout_want_mcs(layout
, params
))
819 layout
->aux
= ILO_LAYOUT_AUX_MCS
;
823 layout_align(struct ilo_layout
*layout
, struct ilo_layout_params
*params
)
825 const struct pipe_resource
*templ
= params
->templ
;
826 int align_w
= 1, align_h
= 1, pad_h
= 0;
829 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
831 * "To determine the necessary padding on the bottom and right side of
832 * the surface, refer to the table in Section 7.18.3.4 for the i and j
833 * parameters for the surface format in use. The surface must then be
834 * extended to the next multiple of the alignment unit size in each
835 * dimension, and all texels contained in this extended surface must
836 * have valid GTT entries."
838 * "For cube surfaces, an additional two rows of padding are required
839 * at the bottom of the surface. This must be ensured regardless of
840 * whether the surface is stored tiled or linear. This is due to the
841 * potential rotation of cache line orientation from memory to cache."
843 * "For compressed textures (BC* and FXT1 surface formats), padding at
844 * the bottom of the surface is to an even compressed row, which is
845 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
846 * purposes, these surfaces behave as if j = 8 only for surface
847 * padding purposes. The value of 4 for j still applies for mip level
848 * alignment and QPitch calculation."
850 if (templ
->bind
& PIPE_BIND_SAMPLER_VIEW
) {
851 align_w
= MAX2(align_w
, layout
->align_i
);
852 align_h
= MAX2(align_h
, layout
->align_j
);
854 if (templ
->target
== PIPE_TEXTURE_CUBE
)
857 if (params
->compressed
)
858 align_h
= MAX2(align_h
, layout
->align_j
* 2);
862 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
864 * "If the surface contains an odd number of rows of data, a final row
865 * below the surface must be allocated."
867 if (templ
->bind
& PIPE_BIND_RENDER_TARGET
)
868 align_h
= MAX2(align_h
, 2);
871 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
872 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
873 * To avoid out-of-bound access, we have to pad.
875 if (layout
->aux
== ILO_LAYOUT_AUX_HIZ
&&
876 templ
->last_level
== 0 &&
877 templ
->array_size
== 1 &&
878 templ
->depth0
== 1) {
879 align_w
= MAX2(align_w
, 8);
880 align_h
= MAX2(align_h
, 4);
883 params
->max_x
= align(params
->max_x
, align_w
);
884 params
->max_y
= align(params
->max_y
+ pad_h
, align_h
);
887 /* note that this may force the texture to be linear */
889 layout_calculate_bo_size(struct ilo_layout
*layout
,
890 struct ilo_layout_params
*params
)
892 assert(params
->max_x
% layout
->block_width
== 0);
893 assert(params
->max_y
% layout
->block_height
== 0);
894 assert(layout
->layer_height
% layout
->block_height
== 0);
897 (params
->max_x
/ layout
->block_width
) * layout
->block_size
;
898 layout
->bo_height
= params
->max_y
/ layout
->block_height
;
901 unsigned w
= layout
->bo_stride
, h
= layout
->bo_height
;
902 unsigned align_w
, align_h
;
905 * From the Haswell PRM, volume 5, page 163:
907 * "For linear surfaces, additional padding of 64 bytes is required
908 * at the bottom of the surface. This is in addition to the padding
911 if (params
->dev
->gen
>= ILO_GEN(7.5) &&
912 (params
->templ
->bind
& PIPE_BIND_SAMPLER_VIEW
) &&
913 layout
->tiling
== INTEL_TILING_NONE
) {
915 (64 + layout
->bo_stride
- 1) / layout
->bo_stride
;
919 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
921 * "- For linear render target surfaces, the pitch must be a
922 * multiple of the element size for non-YUV surface formats.
923 * Pitch must be a multiple of 2 * element size for YUV surface
925 * - For other linear surfaces, the pitch can be any multiple of
927 * - For tiled surfaces, the pitch must be a multiple of the tile
930 * Different requirements may exist when the bo is used in different
931 * places, but our alignments here should be good enough that we do not
932 * need to check layout->templ->bind.
934 switch (layout
->tiling
) {
944 if (layout
->format
== PIPE_FORMAT_S8_UINT
) {
946 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
948 * "A 4KB tile is subdivided into 8-high by 8-wide array of
949 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
952 * Since we asked for INTEL_TILING_NONE instead of the non-existent
953 * INTEL_TILING_W, we want to align to W tiles here.
958 /* some good enough values */
965 w
= align(w
, align_w
);
966 h
= align(h
, align_h
);
968 /* make sure the bo is mappable */
969 if (layout
->tiling
!= INTEL_TILING_NONE
) {
971 * Usually only the first 256MB of the GTT is mappable.
973 * See also how intel_context::max_gtt_map_object_size is calculated.
975 const size_t mappable_gtt_size
= 256 * 1024 * 1024;
978 * Be conservative. We may be able to switch from VALIGN_4 to
979 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
981 if (mappable_gtt_size
/ w
/ 4 < h
) {
982 if (layout
->valid_tilings
& LAYOUT_TILING_NONE
) {
983 layout
->tiling
= INTEL_TILING_NONE
;
984 /* MCS support for non-MSRTs is limited to tiled RTs */
985 if (layout
->aux
== ILO_LAYOUT_AUX_MCS
&&
986 params
->templ
->nr_samples
<= 1)
987 layout
->aux
= ILO_LAYOUT_AUX_NONE
;
991 ilo_warn("cannot force texture to be linear\n");
996 layout
->bo_stride
= w
;
997 layout
->bo_height
= h
;
1003 layout_calculate_hiz_size(struct ilo_layout
*layout
,
1004 struct ilo_layout_params
*params
)
1006 const struct pipe_resource
*templ
= params
->templ
;
1007 const unsigned hz_align_j
= 8;
1008 enum ilo_layout_walk_type hz_walk
;
1009 unsigned hz_width
, hz_height
, lv
;
1010 unsigned hz_clear_w
, hz_clear_h
;
1012 assert(layout
->aux
== ILO_LAYOUT_AUX_HIZ
);
1014 assert(layout
->walk
== ILO_LAYOUT_WALK_LAYER
||
1015 layout
->walk
== ILO_LAYOUT_WALK_3D
);
1018 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1020 * "The hierarchical depth buffer does not support the LOD field, it is
1021 * assumed by hardware to be zero. A separate hierarachical depth
1022 * buffer is required for each LOD used, and the corresponding
1023 * buffer's state delivered to hardware each time a new depth buffer
1024 * state with modified LOD is delivered."
1026 * We will put all LODs in a single bo with ILO_LAYOUT_WALK_LOD.
1028 if (params
->dev
->gen
>= ILO_GEN(7))
1029 hz_walk
= layout
->walk
;
1031 hz_walk
= ILO_LAYOUT_WALK_LOD
;
1034 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1035 * PRM, volume 2 part 1, page 312-313.
1037 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1041 case ILO_LAYOUT_WALK_LOD
:
1043 unsigned lod_tx
[PIPE_MAX_TEXTURE_LEVELS
];
1044 unsigned lod_ty
[PIPE_MAX_TEXTURE_LEVELS
];
1045 unsigned cur_tx
, cur_ty
;
1047 /* figure out the tile offsets of LODs */
1052 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1055 lod_tx
[lv
] = cur_tx
;
1056 lod_ty
[lv
] = cur_ty
;
1058 tw
= align(layout
->lods
[lv
].slice_width
, 16);
1059 th
= align(layout
->lods
[lv
].slice_height
, hz_align_j
) *
1060 templ
->array_size
/ 2;
1061 /* convert to Y-tiles */
1062 tw
= align(tw
, 128) / 128;
1063 th
= align(th
, 32) / 32;
1065 if (hz_width
< cur_tx
+ tw
)
1066 hz_width
= cur_tx
+ tw
;
1067 if (hz_height
< cur_ty
+ th
)
1068 hz_height
= cur_ty
+ th
;
1076 /* convert tile offsets to memory offsets */
1077 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1078 layout
->aux_offsets
[lv
] =
1079 (lod_ty
[lv
] * hz_width
+ lod_tx
[lv
]) * 4096;
1085 case ILO_LAYOUT_WALK_LAYER
:
1087 const unsigned h0
= align(params
->h0
, hz_align_j
);
1088 const unsigned h1
= align(params
->h1
, hz_align_j
);
1089 const unsigned htail
=
1090 ((params
->dev
->gen
>= ILO_GEN(7)) ? 12 : 11) * hz_align_j
;
1091 const unsigned hz_qpitch
= h0
+ h1
+ htail
;
1093 hz_width
= align(layout
->lods
[0].slice_width
, 16);
1095 hz_height
= hz_qpitch
* templ
->array_size
/ 2;
1096 if (params
->dev
->gen
>= ILO_GEN(7))
1097 hz_height
= align(hz_height
, 8);
1100 case ILO_LAYOUT_WALK_3D
:
1101 hz_width
= align(layout
->lods
[0].slice_width
, 16);
1104 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1105 const unsigned h
= align(layout
->lods
[lv
].slice_height
, hz_align_j
);
1106 /* according to the formula, slices are packed together vertically */
1107 hz_height
+= h
* u_minify(templ
->depth0
, lv
);
1114 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1115 * Experiments on Haswell show that aligning the RECTLIST primitive and
1116 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1121 switch (templ
->nr_samples
) {
1143 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1144 if (u_minify(layout
->width0
, lv
) % hz_clear_w
||
1145 u_minify(layout
->height0
, lv
) % hz_clear_h
)
1147 layout
->aux_enables
|= 1 << lv
;
1150 /* we padded to allow this in layout_align() */
1151 if (templ
->last_level
== 0 && templ
->array_size
== 1 && templ
->depth0
== 1)
1152 layout
->aux_enables
|= 0x1;
1154 /* align to Y-tile */
1155 layout
->aux_stride
= align(hz_width
, 128);
1156 layout
->aux_height
= align(hz_height
, 32);
1160 layout_calculate_mcs_size(struct ilo_layout
*layout
,
1161 struct ilo_layout_params
*params
)
1163 const struct pipe_resource
*templ
= params
->templ
;
1164 int mcs_width
, mcs_height
, mcs_cpp
;
1165 int downscale_x
, downscale_y
;
1167 assert(layout
->aux
== ILO_LAYOUT_AUX_MCS
);
1169 if (templ
->nr_samples
> 1) {
1171 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1172 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1173 * need of scale down could be that the clear rectangle is used to clear
1174 * the MCS instead of the RT.
1176 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1177 * 2x2 factor could come from that the hardware writes 128 bits (an
1178 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1179 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1180 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1181 * pixel block in the RT.
1183 switch (templ
->nr_samples
) {
1201 assert(!"unsupported sample count");
1207 * It also appears that the 2x2 subspans generated by the scaled-down
1208 * clear rectangle cannot be masked. The scale-down clear rectangle
1209 * thus must be aligned to 2x2, and we need to pad.
1211 mcs_width
= align(layout
->width0
, downscale_x
* 2);
1212 mcs_height
= align(layout
->height0
, downscale_y
* 2);
1215 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1230 * This table and the two following tables define the RT alignments, the
1231 * clear rectangle alignments, and the clear rectangle scale factors.
1232 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1233 * that the clear rectangle alignments are 16x32 blocks, and the clear
1234 * rectangle scale factors are 8x16 blocks.
1236 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1237 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1240 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1241 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1242 * which says that a Y-tile maps to 128x256 blocks (\see
1243 * intel_get_non_msrt_mcs_alignment). It does not really change
1244 * anything except for the size of the allocated MCS. Let's see if we
1245 * hit out-of-bound access.
1247 switch (layout
->tiling
) {
1248 case INTEL_TILING_X
:
1249 downscale_x
= 64 / layout
->block_size
;
1252 case INTEL_TILING_Y
:
1253 downscale_x
= 32 / layout
->block_size
;
1257 assert(!"unsupported tiling mode");
1266 * From the Haswell PRM, volume 7, page 652:
1268 * "Clear rectangle must be aligned to two times the number of
1269 * pixels in the table shown below due to 16X16 hashing across the
1272 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1273 * 2x2, and we need to pad.
1275 mcs_width
= align(layout
->width0
, downscale_x
* 4) / downscale_x
;
1276 mcs_height
= align(layout
->height0
, downscale_y
* 4) / downscale_y
;
1277 mcs_cpp
= 16; /* an OWord */
1280 layout
->aux_enables
= (1 << (templ
->last_level
+ 1)) - 1;
1281 /* align to Y-tile */
1282 layout
->aux_stride
= align(mcs_width
* mcs_cpp
, 128);
1283 layout
->aux_height
= align(mcs_height
, 32);
1287 * The texutre is for transfer only. We can define our own layout to save
1291 layout_init_for_transfer(struct ilo_layout
*layout
,
1292 const struct ilo_dev_info
*dev
,
1293 const struct pipe_resource
*templ
)
1295 const unsigned num_layers
= (templ
->target
== PIPE_TEXTURE_3D
) ?
1296 templ
->depth0
: templ
->array_size
;
1297 unsigned layer_width
, layer_height
;
1299 assert(templ
->last_level
== 0);
1300 assert(templ
->nr_samples
<= 1);
1302 layout
->aux
= ILO_LAYOUT_AUX_NONE
;
1303 layout
->width0
= templ
->width0
;
1304 layout
->height0
= templ
->height0
;
1305 layout
->format
= templ
->format
;
1306 layout
->block_width
= util_format_get_blockwidth(templ
->format
);
1307 layout
->block_height
= util_format_get_blockheight(templ
->format
);
1308 layout
->block_size
= util_format_get_blocksize(templ
->format
);
1309 layout
->walk
= ILO_LAYOUT_WALK_LOD
;
1311 layout
->valid_tilings
= LAYOUT_TILING_NONE
;
1312 layout
->tiling
= INTEL_TILING_NONE
;
1314 layout
->align_i
= layout
->block_width
;
1315 layout
->align_j
= layout
->block_height
;
1317 assert(util_is_power_of_two(layout
->block_width
) &&
1318 util_is_power_of_two(layout
->block_height
));
1320 /* use packed layout */
1321 layer_width
= align(templ
->width0
, layout
->align_i
);
1322 layer_height
= align(templ
->height0
, layout
->align_j
);
1324 layout
->lods
[0].slice_width
= layer_width
;
1325 layout
->lods
[0].slice_height
= layer_height
;
1327 layout
->bo_stride
= (layer_width
/ layout
->block_width
) * layout
->block_size
;
1328 layout
->bo_stride
= align(layout
->bo_stride
, 64);
1330 layout
->bo_height
= (layer_height
/ layout
->block_height
) * num_layers
;
1334 * Initialize the layout. Callers should zero-initialize \p layout first.
1336 void ilo_layout_init(struct ilo_layout
*layout
,
1337 const struct ilo_dev_info
*dev
,
1338 const struct pipe_resource
*templ
)
1340 struct ilo_layout_params params
;
1343 /* use transfer layout when the texture is never bound to GPU */
1344 transfer_only
= !(templ
->bind
& ~(PIPE_BIND_TRANSFER_WRITE
|
1345 PIPE_BIND_TRANSFER_READ
));
1346 if (transfer_only
&& templ
->last_level
== 0 && templ
->nr_samples
<= 1) {
1347 layout_init_for_transfer(layout
, dev
, templ
);
1351 memset(¶ms
, 0, sizeof(params
));
1353 params
.templ
= templ
;
1355 /* note that there are dependencies between these functions */
1356 layout_init_aux(layout
, ¶ms
);
1357 layout_init_size_and_format(layout
, ¶ms
);
1358 layout_init_walk(layout
, ¶ms
);
1359 layout_init_tiling(layout
, ¶ms
);
1360 layout_init_alignments(layout
, ¶ms
);
1361 layout_init_lods(layout
, ¶ms
);
1362 layout_init_layer_height(layout
, ¶ms
);
1364 layout_align(layout
, ¶ms
);
1365 layout_calculate_bo_size(layout
, ¶ms
);
1367 switch (layout
->aux
) {
1368 case ILO_LAYOUT_AUX_HIZ
:
1369 layout_calculate_hiz_size(layout
, ¶ms
);
1371 case ILO_LAYOUT_AUX_MCS
:
1372 layout_calculate_mcs_size(layout
, ¶ms
);
1380 * Update the tiling mode and bo stride (for imported resources).
1383 ilo_layout_update_for_imported_bo(struct ilo_layout
*layout
,
1384 enum intel_tiling_mode tiling
,
1387 if (!(layout
->valid_tilings
& (1 << tiling
)))
1390 if ((tiling
== INTEL_TILING_X
&& bo_stride
% 512) ||
1391 (tiling
== INTEL_TILING_Y
&& bo_stride
% 128))
1394 layout
->tiling
= tiling
;
1395 layout
->bo_stride
= bo_stride
;