2 * Mesa 3-D graphics library
4 * Copyright (C) 2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_debug.h"
29 #include "ilo_image.h"
32 IMAGE_TILING_NONE
= 1 << GEN6_TILING_NONE
,
33 IMAGE_TILING_X
= 1 << GEN6_TILING_X
,
34 IMAGE_TILING_Y
= 1 << GEN6_TILING_Y
,
35 IMAGE_TILING_W
= 1 << GEN8_TILING_W
,
37 IMAGE_TILING_ALL
= (IMAGE_TILING_NONE
|
43 struct ilo_image_params
{
44 const struct ilo_dev
*dev
;
45 const struct pipe_resource
*templ
;
46 unsigned valid_tilings
;
51 unsigned max_x
, max_y
;
55 img_get_slice_size(const struct ilo_image
*img
,
56 const struct ilo_image_params
*params
,
57 unsigned level
, unsigned *width
, unsigned *height
)
59 const struct pipe_resource
*templ
= params
->templ
;
62 w
= u_minify(img
->width0
, level
);
63 h
= u_minify(img
->height0
, level
);
66 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
68 * "The dimensions of the mip maps are first determined by applying the
69 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
70 * if necessary, they are padded out to compression block boundaries."
72 w
= align(w
, img
->block_width
);
73 h
= align(h
, img
->block_height
);
76 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
78 * "If the surface is multisampled (4x), these values must be adjusted
79 * as follows before proceeding:
81 * W_L = ceiling(W_L / 2) * 4
82 * H_L = ceiling(H_L / 2) * 4"
84 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
86 * "If the surface is multisampled and it is a depth or stencil surface
87 * or Multisampled Surface StorageFormat in SURFACE_STATE is
88 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
91 * #samples W_L = H_L =
92 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
93 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
94 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
95 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
97 * For interleaved samples (4x), where pixels
100 * (x, y+1) (x+1, y+1)
102 * would be is occupied by
104 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
105 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
106 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
107 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
111 * w = align(w, 2) * 2;
112 * y = align(y, 2) * 2;
114 if (img
->interleaved_samples
) {
115 switch (templ
->nr_samples
) {
135 assert(!"unsupported sample count");
141 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
143 * "For separate stencil buffer, the width must be mutiplied by 2 and
144 * height divided by 2..."
146 * To make things easier (for transfer), we will just double the stencil
147 * stride in 3DSTATE_STENCIL_BUFFER.
149 w
= align(w
, img
->align_i
);
150 h
= align(h
, img
->align_j
);
157 img_get_num_layers(const struct ilo_image
*img
,
158 const struct ilo_image_params
*params
)
160 const struct pipe_resource
*templ
= params
->templ
;
161 unsigned num_layers
= templ
->array_size
;
163 /* samples of the same index are stored in a layer */
164 if (templ
->nr_samples
> 1 && !img
->interleaved_samples
)
165 num_layers
*= templ
->nr_samples
;
171 img_init_layer_height(struct ilo_image
*img
,
172 struct ilo_image_params
*params
)
174 const struct pipe_resource
*templ
= params
->templ
;
177 if (img
->walk
!= ILO_IMAGE_WALK_LAYER
)
180 num_layers
= img_get_num_layers(img
, params
);
185 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
187 * "The following equation is used for surface formats other than
188 * compressed textures:
190 * QPitch = (h0 + h1 + 11j)"
192 * "The equation for compressed textures (BC* and FXT1 surface formats)
195 * QPitch = (h0 + h1 + 11j) / 4"
197 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
198 * value calculated in the equation above, for every other odd Surface
199 * Height starting from 1 i.e. 1,5,9,13"
201 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
203 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
204 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
206 * QPitch = (h0 + h1 + 12j)
207 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
209 * (There are many typos or missing words here...)"
211 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
212 * the base address. The PRM divides QPitch by 4 for compressed formats
213 * because the block height for those formats are 4, and it wants QPitch to
214 * mean the number of memory rows, as opposed to texel rows, between
215 * slices. Since we use texel rows everywhere, we do not need to divide
218 img
->walk_layer_height
= params
->h0
+ params
->h1
+
219 ((ilo_dev_gen(params
->dev
) >= ILO_GEN(7)) ? 12 : 11) * img
->align_j
;
221 if (ilo_dev_gen(params
->dev
) == ILO_GEN(6) && templ
->nr_samples
> 1 &&
222 img
->height0
% 4 == 1)
223 img
->walk_layer_height
+= 4;
225 params
->max_y
+= img
->walk_layer_height
* (num_layers
- 1);
229 img_init_lods(struct ilo_image
*img
,
230 struct ilo_image_params
*params
)
232 const struct pipe_resource
*templ
= params
->templ
;
233 unsigned cur_x
, cur_y
;
238 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
239 unsigned lod_w
, lod_h
;
241 img_get_slice_size(img
, params
, lv
, &lod_w
, &lod_h
);
243 img
->lods
[lv
].x
= cur_x
;
244 img
->lods
[lv
].y
= cur_y
;
245 img
->lods
[lv
].slice_width
= lod_w
;
246 img
->lods
[lv
].slice_height
= lod_h
;
249 case ILO_IMAGE_WALK_LAYER
:
250 /* MIPLAYOUT_BELOW */
256 case ILO_IMAGE_WALK_LOD
:
257 lod_h
*= img_get_num_layers(img
, params
);
263 /* every LOD begins at tile boundaries */
264 if (templ
->last_level
> 0) {
265 assert(img
->format
== PIPE_FORMAT_S8_UINT
);
266 cur_x
= align(cur_x
, 64);
267 cur_y
= align(cur_y
, 64);
270 case ILO_IMAGE_WALK_3D
:
272 const unsigned num_slices
= u_minify(templ
->depth0
, lv
);
273 const unsigned num_slices_per_row
= 1 << lv
;
274 const unsigned num_rows
=
275 (num_slices
+ num_slices_per_row
- 1) / num_slices_per_row
;
277 lod_w
*= num_slices_per_row
;
285 if (params
->max_x
< img
->lods
[lv
].x
+ lod_w
)
286 params
->max_x
= img
->lods
[lv
].x
+ lod_w
;
287 if (params
->max_y
< img
->lods
[lv
].y
+ lod_h
)
288 params
->max_y
= img
->lods
[lv
].y
+ lod_h
;
291 if (img
->walk
== ILO_IMAGE_WALK_LAYER
) {
292 params
->h0
= img
->lods
[0].slice_height
;
294 if (templ
->last_level
> 0)
295 params
->h1
= img
->lods
[1].slice_height
;
297 img_get_slice_size(img
, params
, 1, &cur_x
, ¶ms
->h1
);
302 img_init_alignments(struct ilo_image
*img
,
303 const struct ilo_image_params
*params
)
305 const struct pipe_resource
*templ
= params
->templ
;
308 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
310 * "surface format align_i align_j
311 * YUV 4:2:2 formats 4 *see below
314 * all other formats 4 *see below"
316 * "- align_j = 4 for any depth buffer
317 * - align_j = 2 for separate stencil buffer
318 * - align_j = 4 for any render target surface is multisampled (4x)
319 * - align_j = 4 for any render target surface with Surface Vertical
320 * Alignment = VALIGN_4
321 * - align_j = 2 for any render target surface with Surface Vertical
322 * Alignment = VALIGN_2
323 * - align_j = 2 for all other render target surface
324 * - align_j = 2 for any sampling engine surface with Surface Vertical
325 * Alignment = VALIGN_2
326 * - align_j = 4 for any sampling engine surface with Surface Vertical
327 * Alignment = VALIGN_4"
329 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
331 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
332 * the Surface Format is 96 bits per element (BPE)."
334 * They can be rephrased as
337 * compressed formats block width block height
338 * PIPE_FORMAT_S8_UINT 4 2
339 * other depth/stencil formats 4 4
340 * 4x multisampled 4 4
346 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
348 * "surface defined by surface format align_i align_j
349 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
351 * 3DSTATE_STENCIL_BUFFER N/A 8 8
352 * SURFACE_STATE BC*, ETC*, EAC* 4 4
354 * all others (set by SURFACE_STATE)"
356 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
358 * "- This field (Surface Vertical Aligment) is intended to be set to
359 * VALIGN_4 if the surface was rendered as a depth buffer, for a
360 * multisampled (4x) render target, or for a multisampled (8x)
361 * render target, since these surfaces support only alignment of 4.
362 * - Use of VALIGN_4 for other surfaces is supported, but uses more
364 * - This field must be set to VALIGN_4 for all tiled Y Render Target
366 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
367 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
368 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
369 * must be set to VALIGN_4."
370 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
372 * "- This field (Surface Horizontal Aligment) is intended to be set to
373 * HALIGN_8 only if the surface was rendered as a depth buffer with
374 * Z16 format or a stencil buffer, since these surfaces support only
376 * - Use of HALIGN_8 for other surfaces is supported, but uses more
378 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
379 * - This field must be set to HALIGN_8 if the Surface Format is
382 * They can be rephrased as
385 * compressed formats block width block height
386 * PIPE_FORMAT_Z16_UNORM 8 4
387 * PIPE_FORMAT_S8_UINT 8 8
388 * other depth/stencil formats 4 4
389 * 2x or 4x multisampled 4 or 8 4
390 * tiled Y 4 or 8 4 (if rt)
391 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
392 * others 4 or 8 2 or 4
395 if (params
->compressed
) {
396 /* this happens to be the case */
397 img
->align_i
= img
->block_width
;
398 img
->align_j
= img
->block_height
;
399 } else if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
400 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7)) {
401 switch (img
->format
) {
402 case PIPE_FORMAT_Z16_UNORM
:
406 case PIPE_FORMAT_S8_UINT
:
416 switch (img
->format
) {
417 case PIPE_FORMAT_S8_UINT
:
428 const bool valign_4
=
429 (templ
->nr_samples
> 1) ||
430 (ilo_dev_gen(params
->dev
) >= ILO_GEN(8)) ||
431 (ilo_dev_gen(params
->dev
) >= ILO_GEN(7) &&
432 img
->tiling
== GEN6_TILING_Y
&&
433 (templ
->bind
& PIPE_BIND_RENDER_TARGET
));
435 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7) &&
436 ilo_dev_gen(params
->dev
) <= ILO_GEN(7.5) && valign_4
)
437 assert(img
->format
!= PIPE_FORMAT_R32G32B32_FLOAT
);
440 img
->align_j
= (valign_4
) ? 4 : 2;
444 * the fact that align i and j are multiples of block width and height
445 * respectively is what makes the size of the bo a multiple of the block
446 * size, slices start at block boundaries, and many of the computations
449 assert(img
->align_i
% img
->block_width
== 0);
450 assert(img
->align_j
% img
->block_height
== 0);
452 /* make sure align() works */
453 assert(util_is_power_of_two(img
->align_i
) &&
454 util_is_power_of_two(img
->align_j
));
455 assert(util_is_power_of_two(img
->block_width
) &&
456 util_is_power_of_two(img
->block_height
));
460 img_init_tiling(struct ilo_image
*img
,
461 const struct ilo_image_params
*params
)
463 const struct pipe_resource
*templ
= params
->templ
;
464 unsigned preferred_tilings
= params
->valid_tilings
;
466 /* no fencing nor BLT support */
467 if (preferred_tilings
& ~IMAGE_TILING_W
)
468 preferred_tilings
&= ~IMAGE_TILING_W
;
470 if (templ
->bind
& (PIPE_BIND_RENDER_TARGET
| PIPE_BIND_SAMPLER_VIEW
)) {
472 * heuristically set a minimum width/height for enabling tiling
474 if (img
->width0
< 64 && (preferred_tilings
& ~IMAGE_TILING_X
))
475 preferred_tilings
&= ~IMAGE_TILING_X
;
477 if ((img
->width0
< 32 || img
->height0
< 16) &&
478 (img
->width0
< 16 || img
->height0
< 32) &&
479 (preferred_tilings
& ~IMAGE_TILING_Y
))
480 preferred_tilings
&= ~IMAGE_TILING_Y
;
482 /* force linear if we are not sure where the texture is bound to */
483 if (preferred_tilings
& IMAGE_TILING_NONE
)
484 preferred_tilings
&= IMAGE_TILING_NONE
;
487 /* prefer tiled over linear */
488 if (preferred_tilings
& IMAGE_TILING_Y
)
489 img
->tiling
= GEN6_TILING_Y
;
490 else if (preferred_tilings
& IMAGE_TILING_X
)
491 img
->tiling
= GEN6_TILING_X
;
492 else if (preferred_tilings
& IMAGE_TILING_W
)
493 img
->tiling
= GEN8_TILING_W
;
495 img
->tiling
= GEN6_TILING_NONE
;
499 img_init_walk_gen7(struct ilo_image
*img
,
500 const struct ilo_image_params
*params
)
502 const struct pipe_resource
*templ
= params
->templ
;
505 * It is not explicitly states, but render targets are expected to be
506 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
507 * to be IMS (samples interleaved).
509 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
511 if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
513 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
515 * "note that the depth buffer and stencil buffer have an implied
516 * value of ARYSPC_FULL"
518 img
->walk
= (templ
->target
== PIPE_TEXTURE_3D
) ?
519 ILO_IMAGE_WALK_3D
: ILO_IMAGE_WALK_LAYER
;
521 img
->interleaved_samples
= true;
524 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
526 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
527 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
528 * Array Spacing) must be set to ARYSPC_LOD0."
530 * As multisampled resources are not mipmapped, we never use
531 * ARYSPC_FULL for them.
533 if (templ
->nr_samples
> 1)
534 assert(templ
->last_level
== 0);
537 (templ
->target
== PIPE_TEXTURE_3D
) ? ILO_IMAGE_WALK_3D
:
538 (templ
->last_level
> 0) ? ILO_IMAGE_WALK_LAYER
:
541 img
->interleaved_samples
= false;
546 img_init_walk_gen6(struct ilo_image
*img
,
547 const struct ilo_image_params
*params
)
550 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
552 * "The separate stencil buffer does not support mip mapping, thus the
553 * storage for LODs other than LOD 0 is not needed. The following
554 * QPitch equation applies only to the separate stencil buffer:
558 * GEN6 does not support compact spacing otherwise.
561 (params
->templ
->target
== PIPE_TEXTURE_3D
) ? ILO_IMAGE_WALK_3D
:
562 (img
->format
== PIPE_FORMAT_S8_UINT
) ? ILO_IMAGE_WALK_LOD
:
563 ILO_IMAGE_WALK_LAYER
;
565 /* GEN6 supports only interleaved samples */
566 img
->interleaved_samples
= true;
570 img_init_walk(struct ilo_image
*img
,
571 const struct ilo_image_params
*params
)
573 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
574 img_init_walk_gen7(img
, params
);
576 img_init_walk_gen6(img
, params
);
580 img_get_valid_tilings(const struct ilo_image
*img
,
581 const struct ilo_image_params
*params
)
583 const struct pipe_resource
*templ
= params
->templ
;
584 const enum pipe_format format
= img
->format
;
585 unsigned valid_tilings
= params
->valid_tilings
;
588 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
590 * "Display/Overlay Y-Major not supported.
591 * X-Major required for Async Flips"
593 if (unlikely(templ
->bind
& PIPE_BIND_SCANOUT
))
594 valid_tilings
&= IMAGE_TILING_X
;
597 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
599 * "The cursor surface address must be 4K byte aligned. The cursor must
600 * be in linear memory, it cannot be tiled."
602 if (unlikely(templ
->bind
& (PIPE_BIND_CURSOR
| PIPE_BIND_LINEAR
)))
603 valid_tilings
&= IMAGE_TILING_NONE
;
606 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
608 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
609 * Depth Buffer is not supported."
611 * "The Depth Buffer, if tiled, must use Y-Major tiling."
613 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
615 * "W-Major Tile Format is used for separate stencil."
617 if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
619 case PIPE_FORMAT_S8_UINT
:
620 valid_tilings
&= IMAGE_TILING_W
;
623 valid_tilings
&= IMAGE_TILING_Y
;
628 if (templ
->bind
& PIPE_BIND_RENDER_TARGET
) {
630 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
632 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
633 * either TileX or Linear."
635 * From the Haswell PRM, volume 5, page 32:
637 * "NOTE: 128 BPP format color buffer (render target) supports
638 * Linear, TiledX and TiledY."
640 if (ilo_dev_gen(params
->dev
) < ILO_GEN(7.5) && img
->block_size
== 16)
641 valid_tilings
&= ~IMAGE_TILING_Y
;
644 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
646 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
647 * for all tiled Y Render Target surfaces."
649 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
651 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7) &&
652 ilo_dev_gen(params
->dev
) <= ILO_GEN(7.5) &&
653 img
->format
== PIPE_FORMAT_R32G32B32_FLOAT
)
654 valid_tilings
&= ~IMAGE_TILING_Y
;
656 valid_tilings
&= ~IMAGE_TILING_W
;
659 if (templ
->bind
& PIPE_BIND_SAMPLER_VIEW
) {
660 if (ilo_dev_gen(params
->dev
) < ILO_GEN(8))
661 valid_tilings
&= ~IMAGE_TILING_W
;
664 /* no conflicting binding flags */
665 assert(valid_tilings
);
667 return valid_tilings
;
671 img_init_size_and_format(struct ilo_image
*img
,
672 struct ilo_image_params
*params
)
674 const struct pipe_resource
*templ
= params
->templ
;
675 enum pipe_format format
= templ
->format
;
676 bool require_separate_stencil
= false;
678 img
->target
= templ
->target
;
679 img
->width0
= templ
->width0
;
680 img
->height0
= templ
->height0
;
681 img
->depth0
= templ
->depth0
;
682 img
->array_size
= templ
->array_size
;
683 img
->level_count
= templ
->last_level
+ 1;
684 img
->sample_count
= (templ
->nr_samples
) ? templ
->nr_samples
: 1;
687 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
689 * "This field (Separate Stencil Buffer Enable) must be set to the same
690 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
692 * GEN7+ requires separate stencil buffers.
694 if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
695 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
696 require_separate_stencil
= true;
698 require_separate_stencil
= (img
->aux
.type
== ILO_IMAGE_AUX_HIZ
);
702 case PIPE_FORMAT_ETC1_RGB8
:
703 format
= PIPE_FORMAT_R8G8B8X8_UNORM
;
705 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
706 if (require_separate_stencil
) {
707 format
= PIPE_FORMAT_Z24X8_UNORM
;
708 img
->separate_stencil
= true;
711 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
712 if (require_separate_stencil
) {
713 format
= PIPE_FORMAT_Z32_FLOAT
;
714 img
->separate_stencil
= true;
721 img
->format
= format
;
722 img
->block_width
= util_format_get_blockwidth(format
);
723 img
->block_height
= util_format_get_blockheight(format
);
724 img
->block_size
= util_format_get_blocksize(format
);
726 params
->valid_tilings
= img_get_valid_tilings(img
, params
);
727 params
->compressed
= util_format_is_compressed(img
->format
);
731 img_want_mcs(const struct ilo_image
*img
,
732 const struct ilo_image_params
*params
)
734 const struct pipe_resource
*templ
= params
->templ
;
735 bool want_mcs
= false;
737 /* MCS is for RT on GEN7+ */
738 if (ilo_dev_gen(params
->dev
) < ILO_GEN(7))
741 if (templ
->target
!= PIPE_TEXTURE_2D
||
742 !(templ
->bind
& PIPE_BIND_RENDER_TARGET
))
746 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
748 * "For Render Target and Sampling Engine Surfaces:If the surface is
749 * multisampled (Number of Multisamples any value other than
750 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
752 * "This field must be set to 0 for all SINT MSRTs when all RT channels
755 if (templ
->nr_samples
> 1 && !util_format_is_pure_sint(templ
->format
)) {
757 } else if (templ
->nr_samples
<= 1) {
759 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
761 * "When MCS is buffer is used for color clear of non-multisampler
762 * render target, the following restrictions apply.
763 * - Support is limited to tiled render targets.
764 * - Support is for non-mip-mapped and non-array surface types
766 * - Clear is supported only on the full RT; i.e., no partial clear
767 * or overlapping clears.
768 * - MCS buffer for non-MSRT is supported only for RT formats
769 * 32bpp, 64bpp and 128bpp.
772 if (img
->tiling
!= GEN6_TILING_NONE
&&
773 templ
->last_level
== 0 && templ
->array_size
== 1) {
774 switch (img
->block_size
) {
790 img_want_hiz(const struct ilo_image
*img
,
791 const struct ilo_image_params
*params
)
793 const struct pipe_resource
*templ
= params
->templ
;
794 const struct util_format_description
*desc
=
795 util_format_description(templ
->format
);
797 if (ilo_debug
& ILO_DEBUG_NOHIZ
)
800 /* we want 8x4 aligned levels */
801 if (templ
->target
== PIPE_TEXTURE_1D
)
804 if (!(templ
->bind
& PIPE_BIND_DEPTH_STENCIL
))
807 if (!util_format_has_depth(desc
))
810 /* no point in having HiZ */
811 if (templ
->usage
== PIPE_USAGE_STAGING
)
815 * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
816 * for every level. This is generally fine except on GEN6, where HiZ and
817 * separate stencil are enabled and disabled at the same time. When the
818 * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
819 * can result in incompatible formats.
821 if (ilo_dev_gen(params
->dev
) == ILO_GEN(6) &&
822 templ
->format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
&&
830 img_init_aux(struct ilo_image
*img
,
831 const struct ilo_image_params
*params
)
833 if (img_want_hiz(img
, params
))
834 img
->aux
.type
= ILO_IMAGE_AUX_HIZ
;
835 else if (img_want_mcs(img
, params
))
836 img
->aux
.type
= ILO_IMAGE_AUX_MCS
;
840 img_align(struct ilo_image
*img
, struct ilo_image_params
*params
)
842 const struct pipe_resource
*templ
= params
->templ
;
843 int align_w
= 1, align_h
= 1, pad_h
= 0;
846 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
848 * "To determine the necessary padding on the bottom and right side of
849 * the surface, refer to the table in Section 7.18.3.4 for the i and j
850 * parameters for the surface format in use. The surface must then be
851 * extended to the next multiple of the alignment unit size in each
852 * dimension, and all texels contained in this extended surface must
853 * have valid GTT entries."
855 * "For cube surfaces, an additional two rows of padding are required
856 * at the bottom of the surface. This must be ensured regardless of
857 * whether the surface is stored tiled or linear. This is due to the
858 * potential rotation of cache line orientation from memory to cache."
860 * "For compressed textures (BC* and FXT1 surface formats), padding at
861 * the bottom of the surface is to an even compressed row, which is
862 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
863 * purposes, these surfaces behave as if j = 8 only for surface
864 * padding purposes. The value of 4 for j still applies for mip level
865 * alignment and QPitch calculation."
867 if (templ
->bind
& PIPE_BIND_SAMPLER_VIEW
) {
868 align_w
= MAX2(align_w
, img
->align_i
);
869 align_h
= MAX2(align_h
, img
->align_j
);
871 if (templ
->target
== PIPE_TEXTURE_CUBE
)
874 if (params
->compressed
)
875 align_h
= MAX2(align_h
, img
->align_j
* 2);
879 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
881 * "If the surface contains an odd number of rows of data, a final row
882 * below the surface must be allocated."
884 if (templ
->bind
& PIPE_BIND_RENDER_TARGET
)
885 align_h
= MAX2(align_h
, 2);
888 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
889 * for unaligned non-mipmapped and non-array images.
891 if (img
->aux
.type
== ILO_IMAGE_AUX_HIZ
&&
892 templ
->last_level
== 0 &&
893 templ
->array_size
== 1 &&
894 templ
->depth0
== 1) {
895 align_w
= MAX2(align_w
, 8);
896 align_h
= MAX2(align_h
, 4);
899 params
->max_x
= align(params
->max_x
, align_w
);
900 params
->max_y
= align(params
->max_y
+ pad_h
, align_h
);
903 /* note that this may force the texture to be linear */
905 img_calculate_bo_size(struct ilo_image
*img
,
906 const struct ilo_image_params
*params
)
908 assert(params
->max_x
% img
->block_width
== 0);
909 assert(params
->max_y
% img
->block_height
== 0);
910 assert(img
->walk_layer_height
% img
->block_height
== 0);
913 (params
->max_x
/ img
->block_width
) * img
->block_size
;
914 img
->bo_height
= params
->max_y
/ img
->block_height
;
917 unsigned w
= img
->bo_stride
, h
= img
->bo_height
;
918 unsigned align_w
, align_h
;
921 * From the Haswell PRM, volume 5, page 163:
923 * "For linear surfaces, additional padding of 64 bytes is required
924 * at the bottom of the surface. This is in addition to the padding
927 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7.5) &&
928 (params
->templ
->bind
& PIPE_BIND_SAMPLER_VIEW
) &&
929 img
->tiling
== GEN6_TILING_NONE
)
930 h
+= (64 + img
->bo_stride
- 1) / img
->bo_stride
;
933 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
935 * "- For linear render target surfaces, the pitch must be a
936 * multiple of the element size for non-YUV surface formats.
937 * Pitch must be a multiple of 2 * element size for YUV surface
939 * - For other linear surfaces, the pitch can be any multiple of
941 * - For tiled surfaces, the pitch must be a multiple of the tile
944 * Different requirements may exist when the bo is used in different
945 * places, but our alignments here should be good enough that we do not
946 * need to check params->templ->bind.
948 switch (img
->tiling
) {
959 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
961 * "A 4KB tile is subdivided into 8-high by 8-wide array of
962 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
969 assert(img
->tiling
== GEN6_TILING_NONE
);
970 /* some good enough values */
976 w
= align(w
, align_w
);
977 h
= align(h
, align_h
);
979 /* make sure the bo is mappable */
980 if (img
->tiling
!= GEN6_TILING_NONE
) {
982 * Usually only the first 256MB of the GTT is mappable.
984 * See also how intel_context::max_gtt_map_object_size is calculated.
986 const size_t mappable_gtt_size
= 256 * 1024 * 1024;
989 * Be conservative. We may be able to switch from VALIGN_4 to
990 * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
992 if (mappable_gtt_size
/ w
/ 4 < h
) {
993 if (params
->valid_tilings
& IMAGE_TILING_NONE
) {
994 img
->tiling
= GEN6_TILING_NONE
;
995 /* MCS support for non-MSRTs is limited to tiled RTs */
996 if (img
->aux
.type
== ILO_IMAGE_AUX_MCS
&&
997 params
->templ
->nr_samples
<= 1)
998 img
->aux
.type
= ILO_IMAGE_AUX_NONE
;
1002 ilo_warn("cannot force texture to be linear\n");
1014 img_calculate_hiz_size(struct ilo_image
*img
,
1015 const struct ilo_image_params
*params
)
1017 const struct pipe_resource
*templ
= params
->templ
;
1018 const unsigned hz_align_j
= 8;
1019 enum ilo_image_walk_type hz_walk
;
1020 unsigned hz_width
, hz_height
, lv
;
1021 unsigned hz_clear_w
, hz_clear_h
;
1023 assert(img
->aux
.type
== ILO_IMAGE_AUX_HIZ
);
1025 assert(img
->walk
== ILO_IMAGE_WALK_LAYER
||
1026 img
->walk
== ILO_IMAGE_WALK_3D
);
1029 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1031 * "The hierarchical depth buffer does not support the LOD field, it is
1032 * assumed by hardware to be zero. A separate hierarachical depth
1033 * buffer is required for each LOD used, and the corresponding
1034 * buffer's state delivered to hardware each time a new depth buffer
1035 * state with modified LOD is delivered."
1037 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1039 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
1040 hz_walk
= img
->walk
;
1042 hz_walk
= ILO_IMAGE_WALK_LOD
;
1045 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1046 * PRM, volume 2 part 1, page 312-313.
1048 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1052 case ILO_IMAGE_WALK_LAYER
:
1054 const unsigned h0
= align(params
->h0
, hz_align_j
);
1055 const unsigned h1
= align(params
->h1
, hz_align_j
);
1056 const unsigned htail
=
1057 ((ilo_dev_gen(params
->dev
) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j
;
1058 const unsigned hz_qpitch
= h0
+ h1
+ htail
;
1060 hz_width
= align(img
->lods
[0].slice_width
, 16);
1062 hz_height
= hz_qpitch
* templ
->array_size
/ 2;
1063 if (ilo_dev_gen(params
->dev
) >= ILO_GEN(7))
1064 hz_height
= align(hz_height
, 8);
1066 img
->aux
.walk_layer_height
= hz_qpitch
;
1069 case ILO_IMAGE_WALK_LOD
:
1071 unsigned lod_tx
[PIPE_MAX_TEXTURE_LEVELS
];
1072 unsigned lod_ty
[PIPE_MAX_TEXTURE_LEVELS
];
1073 unsigned cur_tx
, cur_ty
;
1075 /* figure out the tile offsets of LODs */
1080 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1083 lod_tx
[lv
] = cur_tx
;
1084 lod_ty
[lv
] = cur_ty
;
1086 tw
= align(img
->lods
[lv
].slice_width
, 16);
1087 th
= align(img
->lods
[lv
].slice_height
, hz_align_j
) *
1088 templ
->array_size
/ 2;
1089 /* convert to Y-tiles */
1090 tw
= align(tw
, 128) / 128;
1091 th
= align(th
, 32) / 32;
1093 if (hz_width
< cur_tx
+ tw
)
1094 hz_width
= cur_tx
+ tw
;
1095 if (hz_height
< cur_ty
+ th
)
1096 hz_height
= cur_ty
+ th
;
1104 /* convert tile offsets to memory offsets */
1105 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1106 img
->aux
.walk_lod_offsets
[lv
] =
1107 (lod_ty
[lv
] * hz_width
+ lod_tx
[lv
]) * 4096;
1113 case ILO_IMAGE_WALK_3D
:
1114 hz_width
= align(img
->lods
[0].slice_width
, 16);
1117 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1118 const unsigned h
= align(img
->lods
[lv
].slice_height
, hz_align_j
);
1119 /* according to the formula, slices are packed together vertically */
1120 hz_height
+= h
* u_minify(templ
->depth0
, lv
);
1125 assert(!"unknown HiZ walk");
1132 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1133 * Experiments on Haswell show that aligning the RECTLIST primitive and
1134 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1139 switch (templ
->nr_samples
) {
1161 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1162 if (u_minify(img
->width0
, lv
) % hz_clear_w
||
1163 u_minify(img
->height0
, lv
) % hz_clear_h
)
1165 img
->aux
.enables
|= 1 << lv
;
1168 /* we padded to allow this in img_align() */
1169 if (templ
->last_level
== 0 && templ
->array_size
== 1 && templ
->depth0
== 1)
1170 img
->aux
.enables
|= 0x1;
1172 /* align to Y-tile */
1173 img
->aux
.bo_stride
= align(hz_width
, 128);
1174 img
->aux
.bo_height
= align(hz_height
, 32);
1178 img_calculate_mcs_size(struct ilo_image
*img
,
1179 const struct ilo_image_params
*params
)
1181 const struct pipe_resource
*templ
= params
->templ
;
1182 int mcs_width
, mcs_height
, mcs_cpp
;
1183 int downscale_x
, downscale_y
;
1185 assert(img
->aux
.type
== ILO_IMAGE_AUX_MCS
);
1187 if (templ
->nr_samples
> 1) {
1189 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1190 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1191 * need of scale down could be that the clear rectangle is used to clear
1192 * the MCS instead of the RT.
1194 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1195 * 2x2 factor could come from that the hardware writes 128 bits (an
1196 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1197 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1198 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1199 * pixel block in the RT.
1201 switch (templ
->nr_samples
) {
1219 assert(!"unsupported sample count");
1225 * It also appears that the 2x2 subspans generated by the scaled-down
1226 * clear rectangle cannot be masked. The scale-down clear rectangle
1227 * thus must be aligned to 2x2, and we need to pad.
1229 mcs_width
= align(img
->width0
, downscale_x
* 2);
1230 mcs_height
= align(img
->height0
, downscale_y
* 2);
1233 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1248 * This table and the two following tables define the RT alignments, the
1249 * clear rectangle alignments, and the clear rectangle scale factors.
1250 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1251 * that the clear rectangle alignments are 16x32 blocks, and the clear
1252 * rectangle scale factors are 8x16 blocks.
1254 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1255 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1258 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1259 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1260 * which says that a Y-tile maps to 128x256 blocks (\see
1261 * intel_get_non_msrt_mcs_alignment). It does not really change
1262 * anything except for the size of the allocated MCS. Let's see if we
1263 * hit out-of-bound access.
1265 switch (img
->tiling
) {
1267 downscale_x
= 64 / img
->block_size
;
1271 downscale_x
= 32 / img
->block_size
;
1275 assert(!"unsupported tiling mode");
1284 * From the Haswell PRM, volume 7, page 652:
1286 * "Clear rectangle must be aligned to two times the number of
1287 * pixels in the table shown below due to 16X16 hashing across the
1290 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1291 * 2x2, and we need to pad.
1293 mcs_width
= align(img
->width0
, downscale_x
* 4) / downscale_x
;
1294 mcs_height
= align(img
->height0
, downscale_y
* 4) / downscale_y
;
1295 mcs_cpp
= 16; /* an OWord */
1298 img
->aux
.enables
= (1 << (templ
->last_level
+ 1)) - 1;
1299 /* align to Y-tile */
1300 img
->aux
.bo_stride
= align(mcs_width
* mcs_cpp
, 128);
1301 img
->aux
.bo_height
= align(mcs_height
, 32);
1305 img_init(struct ilo_image
*img
,
1306 struct ilo_image_params
*params
)
1308 /* there are hard dependencies between every function here */
1310 img_init_aux(img
, params
);
1311 img_init_size_and_format(img
, params
);
1312 img_init_walk(img
, params
);
1313 img_init_tiling(img
, params
);
1314 img_init_alignments(img
, params
);
1315 img_init_lods(img
, params
);
1316 img_init_layer_height(img
, params
);
1318 img_align(img
, params
);
1319 img_calculate_bo_size(img
, params
);
1321 img
->scanout
= (params
->templ
->bind
& PIPE_BIND_SCANOUT
);
1323 switch (img
->aux
.type
) {
1324 case ILO_IMAGE_AUX_HIZ
:
1325 img_calculate_hiz_size(img
, params
);
1327 case ILO_IMAGE_AUX_MCS
:
1328 img_calculate_mcs_size(img
, params
);
1336 * The texutre is for transfer only. We can define our own layout to save
1340 img_init_for_transfer(struct ilo_image
*img
,
1341 const struct ilo_dev
*dev
,
1342 const struct pipe_resource
*templ
)
1344 const unsigned num_layers
= (templ
->target
== PIPE_TEXTURE_3D
) ?
1345 templ
->depth0
: templ
->array_size
;
1346 unsigned layer_width
, layer_height
;
1348 assert(templ
->last_level
== 0);
1349 assert(templ
->nr_samples
<= 1);
1351 img
->aux
.type
= ILO_IMAGE_AUX_NONE
;
1353 img
->target
= templ
->target
;
1354 img
->width0
= templ
->width0
;
1355 img
->height0
= templ
->height0
;
1356 img
->depth0
= templ
->depth0
;
1357 img
->array_size
= templ
->array_size
;
1358 img
->level_count
= 1;
1359 img
->sample_count
= 1;
1361 img
->format
= templ
->format
;
1362 img
->block_width
= util_format_get_blockwidth(templ
->format
);
1363 img
->block_height
= util_format_get_blockheight(templ
->format
);
1364 img
->block_size
= util_format_get_blocksize(templ
->format
);
1366 img
->walk
= ILO_IMAGE_WALK_LOD
;
1368 img
->tiling
= GEN6_TILING_NONE
;
1370 img
->align_i
= img
->block_width
;
1371 img
->align_j
= img
->block_height
;
1373 assert(util_is_power_of_two(img
->block_width
) &&
1374 util_is_power_of_two(img
->block_height
));
1376 /* use packed layout */
1377 layer_width
= align(templ
->width0
, img
->align_i
);
1378 layer_height
= align(templ
->height0
, img
->align_j
);
1380 img
->lods
[0].slice_width
= layer_width
;
1381 img
->lods
[0].slice_height
= layer_height
;
1383 img
->bo_stride
= (layer_width
/ img
->block_width
) * img
->block_size
;
1384 img
->bo_stride
= align(img
->bo_stride
, 64);
1386 img
->bo_height
= (layer_height
/ img
->block_height
) * num_layers
;
1390 * Initialize the image. Callers should zero-initialize \p img first.
1392 void ilo_image_init(struct ilo_image
*img
,
1393 const struct ilo_dev
*dev
,
1394 const struct pipe_resource
*templ
)
1396 struct ilo_image_params params
;
1399 assert(ilo_is_zeroed(img
, sizeof(*img
)));
1401 /* use transfer layout when the texture is never bound to GPU */
1402 transfer_only
= !(templ
->bind
& ~(PIPE_BIND_TRANSFER_WRITE
|
1403 PIPE_BIND_TRANSFER_READ
));
1404 if (transfer_only
&& templ
->last_level
== 0 && templ
->nr_samples
<= 1) {
1405 img_init_for_transfer(img
, dev
, templ
);
1409 memset(¶ms
, 0, sizeof(params
));
1411 params
.templ
= templ
;
1412 params
.valid_tilings
= IMAGE_TILING_ALL
;
1414 img_init(img
, ¶ms
);
1418 ilo_image_init_for_imported(struct ilo_image
*img
,
1419 const struct ilo_dev
*dev
,
1420 const struct pipe_resource
*templ
,
1421 enum gen_surface_tiling tiling
,
1424 struct ilo_image_params params
;
1426 assert(ilo_is_zeroed(img
, sizeof(*img
)));
1428 if ((tiling
== GEN6_TILING_X
&& bo_stride
% 512) ||
1429 (tiling
== GEN6_TILING_Y
&& bo_stride
% 128) ||
1430 (tiling
== GEN8_TILING_W
&& bo_stride
% 64))
1433 memset(¶ms
, 0, sizeof(params
));
1435 params
.templ
= templ
;
1436 params
.valid_tilings
= 1 << tiling
;
1438 img_init(img
, ¶ms
);
1440 assert(img
->tiling
== tiling
);
1441 if (img
->bo_stride
> bo_stride
)
1444 img
->bo_stride
= bo_stride
;
1446 /* assume imported RTs are also scanouts */
1448 img
->scanout
= (templ
->bind
& PIPE_BIND_RENDER_TARGET
);
1454 ilo_image_disable_aux(struct ilo_image
*img
, const struct ilo_dev
*dev
)
1456 /* HiZ is required for separate stencil on Gen6 */
1457 if (ilo_dev_gen(dev
) == ILO_GEN(6) &&
1458 img
->aux
.type
== ILO_IMAGE_AUX_HIZ
&&
1459 img
->separate_stencil
)
1462 /* MCS is required for multisample images */
1463 if (img
->aux
.type
== ILO_IMAGE_AUX_MCS
&&
1464 img
->sample_count
> 1)
1467 img
->aux
.enables
= 0x0;