2 * Mesa 3-D graphics library
4 * Copyright (C) 2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_debug.h"
29 #include "ilo_image.h"
32 IMAGE_TILING_NONE
= 1 << GEN6_TILING_NONE
,
33 IMAGE_TILING_X
= 1 << GEN6_TILING_X
,
34 IMAGE_TILING_Y
= 1 << GEN6_TILING_Y
,
35 IMAGE_TILING_W
= 1 << GEN8_TILING_W
,
37 IMAGE_TILING_ALL
= (IMAGE_TILING_NONE
|
43 struct ilo_image_layout
{
44 enum ilo_image_walk_type walk
;
45 bool interleaved_samples
;
47 uint8_t valid_tilings
;
48 enum gen_surface_tiling tiling
;
50 enum ilo_image_aux_type aux
;
55 struct ilo_image_lod
*lods
;
58 int walk_layer_height
;
60 int monolithic_height
;
63 static enum ilo_image_walk_type
64 image_get_gen6_walk(const struct ilo_dev
*dev
,
65 const struct ilo_image_info
*info
)
67 ILO_DEV_ASSERT(dev
, 6, 6);
69 /* TODO we want LODs to be page-aligned */
70 if (info
->type
== GEN6_SURFTYPE_3D
)
71 return ILO_IMAGE_WALK_3D
;
74 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
76 * "The separate stencil buffer does not support mip mapping, thus the
77 * storage for LODs other than LOD 0 is not needed. The following
78 * QPitch equation applies only to the separate stencil buffer:
82 * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
85 if (info
->bind_zs
&& info
->format
== GEN6_FORMAT_R8_UINT
)
86 return ILO_IMAGE_WALK_LOD
;
88 /* compact spacing is not supported otherwise */
89 return ILO_IMAGE_WALK_LAYER
;
92 static enum ilo_image_walk_type
93 image_get_gen7_walk(const struct ilo_dev
*dev
,
94 const struct ilo_image_info
*info
)
96 ILO_DEV_ASSERT(dev
, 7, 8);
98 if (info
->type
== GEN6_SURFTYPE_3D
)
99 return ILO_IMAGE_WALK_3D
;
102 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
104 * "note that the depth buffer and stencil buffer have an implied value
107 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
109 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number of
110 * Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
111 * Spacing) must be set to ARYSPC_LOD0."
113 if (info
->sample_count
> 1)
114 assert(info
->level_count
== 1);
115 return (info
->bind_zs
|| info
->level_count
> 1) ?
116 ILO_IMAGE_WALK_LAYER
: ILO_IMAGE_WALK_LOD
;
120 image_get_gen6_interleaved_samples(const struct ilo_dev
*dev
,
121 const struct ilo_image_info
*info
)
123 ILO_DEV_ASSERT(dev
, 6, 8);
126 * Gen6 supports only interleaved samples. It is not explicitly stated,
127 * but on Gen7+, render targets are expected to be UMS/CMS (samples
128 * non-interleaved) and depth/stencil buffers are expected to be IMS
129 * (samples interleaved).
131 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
133 return (ilo_dev_gen(dev
) == ILO_GEN(6) || info
->bind_zs
);
137 image_get_gen6_valid_tilings(const struct ilo_dev
*dev
,
138 const struct ilo_image_info
*info
)
140 uint8_t valid_tilings
= IMAGE_TILING_ALL
;
142 ILO_DEV_ASSERT(dev
, 6, 8);
144 if (info
->valid_tilings
)
145 valid_tilings
&= info
->valid_tilings
;
148 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
150 * "Display/Overlay Y-Major not supported.
151 * X-Major required for Async Flips"
153 if (unlikely(info
->bind_scanout
))
154 valid_tilings
&= IMAGE_TILING_X
;
157 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
159 * "The cursor surface address must be 4K byte aligned. The cursor must
160 * be in linear memory, it cannot be tiled."
162 if (unlikely(info
->bind_cursor
))
163 valid_tilings
&= IMAGE_TILING_NONE
;
166 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
168 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
169 * Depth Buffer is not supported."
171 * "The Depth Buffer, if tiled, must use Y-Major tiling."
173 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
175 * "W-Major Tile Format is used for separate stencil."
178 if (info
->format
== GEN6_FORMAT_R8_UINT
)
179 valid_tilings
&= IMAGE_TILING_W
;
181 valid_tilings
&= IMAGE_TILING_Y
;
184 if (info
->bind_surface_sampler
||
185 info
->bind_surface_dp_render
||
186 info
->bind_surface_dp_typed
) {
188 * From the Haswell PRM, volume 2d, page 233:
190 * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
191 * (Tiled Surface) must be TRUE."
193 if (info
->sample_count
> 1)
194 valid_tilings
&= ~IMAGE_TILING_NONE
;
196 if (ilo_dev_gen(dev
) < ILO_GEN(8))
197 valid_tilings
&= ~IMAGE_TILING_W
;
200 if (info
->bind_surface_dp_render
) {
202 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
204 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
205 * either TileX or Linear."
207 * From the Haswell PRM, volume 5, page 32:
209 * "NOTE: 128 BPP format color buffer (render target) supports
210 * Linear, TiledX and TiledY."
212 if (ilo_dev_gen(dev
) < ILO_GEN(7.5) && info
->block_size
== 16)
213 valid_tilings
&= ~IMAGE_TILING_Y
;
216 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
218 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
219 * for all tiled Y Render Target surfaces."
221 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
223 * R32G32B32_FLOAT is not renderable and we only need an assert() here.
225 if (ilo_dev_gen(dev
) >= ILO_GEN(7) && ilo_dev_gen(dev
) <= ILO_GEN(7.5))
226 assert(info
->format
!= GEN6_FORMAT_R32G32B32_FLOAT
);
229 return valid_tilings
;
233 image_get_gen6_estimated_size(const struct ilo_dev
*dev
,
234 const struct ilo_image_info
*info
)
236 /* padding not considered */
237 const uint64_t slice_size
= info
->width
* info
->height
*
238 info
->block_size
/ (info
->block_width
* info
->block_height
);
239 const uint64_t slice_count
=
240 info
->depth
* info
->array_size
* info
->sample_count
;
241 const uint64_t estimated_size
= slice_size
* slice_count
;
243 ILO_DEV_ASSERT(dev
, 6, 8);
245 if (info
->level_count
== 1)
246 return estimated_size
;
248 return estimated_size
* 4 / 3;
251 static enum gen_surface_tiling
252 image_get_gen6_tiling(const struct ilo_dev
*dev
,
253 const struct ilo_image_info
*info
,
254 uint8_t valid_tilings
)
256 ILO_DEV_ASSERT(dev
, 6, 8);
258 switch (valid_tilings
) {
259 case IMAGE_TILING_NONE
:
260 return GEN6_TILING_NONE
;
262 return GEN6_TILING_X
;
264 return GEN6_TILING_Y
;
266 return GEN8_TILING_W
;
272 * X-tiling has the property that vertically adjacent pixels are usually in
273 * the same page. When the image size is less than a page, the image
274 * height is 1, or when the image is not accessed in blocks, there is no
277 * Y-tiling is similar, where vertically adjacent pixels are usually in the
280 if (valid_tilings
& IMAGE_TILING_NONE
) {
281 const uint64_t estimated_size
=
282 image_get_gen6_estimated_size(dev
, info
);
284 if (info
->height
== 1 || !(info
->bind_surface_sampler
||
285 info
->bind_surface_dp_render
||
286 info
->bind_surface_dp_typed
))
287 return GEN6_TILING_NONE
;
289 if (estimated_size
<= 64 || (info
->prefer_linear_threshold
&&
290 estimated_size
> info
->prefer_linear_threshold
))
291 return GEN6_TILING_NONE
;
293 if (estimated_size
<= 2048)
294 valid_tilings
&= ~IMAGE_TILING_X
;
297 return (valid_tilings
& IMAGE_TILING_Y
) ? GEN6_TILING_Y
:
298 (valid_tilings
& IMAGE_TILING_X
) ? GEN6_TILING_X
:
303 image_get_gen6_hiz_enable(const struct ilo_dev
*dev
,
304 const struct ilo_image_info
*info
)
306 ILO_DEV_ASSERT(dev
, 6, 8);
309 if (!info
->bind_zs
||
310 info
->format
== GEN6_FORMAT_R8_UINT
||
311 info
->interleaved_stencil
)
314 /* we want to be able to force 8x4 alignments */
315 if (info
->type
== GEN6_SURFTYPE_1D
)
318 if (info
->aux_disable
)
321 if (ilo_debug
& ILO_DEBUG_NOHIZ
)
328 image_get_gen7_mcs_enable(const struct ilo_dev
*dev
,
329 const struct ilo_image_info
*info
,
330 enum gen_surface_tiling tiling
)
332 ILO_DEV_ASSERT(dev
, 7, 8);
334 if (!info
->bind_surface_sampler
&& !info
->bind_surface_dp_render
)
338 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
340 * "For Render Target and Sampling Engine Surfaces:If the surface is
341 * multisampled (Number of Multisamples any value other than
342 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
344 * "This field must be set to 0 for all SINT MSRTs when all RT channels
347 if (info
->sample_count
> 1) {
348 if (ilo_dev_gen(dev
) < ILO_GEN(8))
349 assert(!info
->is_integer
);
353 if (info
->aux_disable
)
357 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
359 * "When MCS is buffer is used for color clear of non-multisampler
360 * render target, the following restrictions apply.
361 * - Support is limited to tiled render targets.
362 * - Support is for non-mip-mapped and non-array surface types only.
363 * - Clear is supported only on the full RT; i.e., no partial clear or
364 * overlapping clears.
365 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
369 * How about SURFTYPE_3D?
371 if (!info
->bind_surface_dp_render
||
372 tiling
== GEN6_TILING_NONE
||
373 info
->level_count
> 1 ||
374 info
->array_size
> 1)
377 switch (info
->block_size
) {
388 image_get_gen6_alignments(const struct ilo_dev
*dev
,
389 const struct ilo_image_info
*info
,
390 int *align_i
, int *align_j
)
392 ILO_DEV_ASSERT(dev
, 6, 6);
395 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
397 * "surface format align_i align_j
398 * YUV 4:2:2 formats 4 *see below
401 * all other formats 4 *see below"
403 * "- align_j = 4 for any depth buffer
404 * - align_j = 2 for separate stencil buffer
405 * - align_j = 4 for any render target surface is multisampled (4x)
406 * - align_j = 4 for any render target surface with Surface Vertical
407 * Alignment = VALIGN_4
408 * - align_j = 2 for any render target surface with Surface Vertical
409 * Alignment = VALIGN_2
410 * - align_j = 2 for all other render target surface
411 * - align_j = 2 for any sampling engine surface with Surface Vertical
412 * Alignment = VALIGN_2
413 * - align_j = 4 for any sampling engine surface with Surface Vertical
414 * Alignment = VALIGN_4"
416 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
418 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
419 * the Surface Format is 96 bits per element (BPE)."
421 * They can be rephrased as
424 * compressed formats block width block height
425 * GEN6_FORMAT_R8_UINT 4 2
426 * other depth/stencil formats 4 4
427 * 4x multisampled 4 4
432 *align_i
= (info
->compressed
) ? info
->block_width
: 4;
433 if (info
->compressed
) {
434 *align_j
= info
->block_height
;
435 } else if (info
->bind_zs
) {
436 *align_j
= (info
->format
== GEN6_FORMAT_R8_UINT
) ? 2 : 4;
438 *align_j
= (info
->sample_count
> 1 || info
->block_size
!= 12) ? 4 : 2;
443 image_get_gen7_alignments(const struct ilo_dev
*dev
,
444 const struct ilo_image_info
*info
,
445 enum gen_surface_tiling tiling
,
446 int *align_i
, int *align_j
)
450 ILO_DEV_ASSERT(dev
, 7, 8);
453 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
455 * "surface defined by surface format align_i align_j
456 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
458 * 3DSTATE_STENCIL_BUFFER N/A 8 8
459 * SURFACE_STATE BC*, ETC*, EAC* 4 4
461 * all others (set by SURFACE_STATE)"
463 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
465 * "- This field (Surface Vertical Aligment) is intended to be set to
466 * VALIGN_4 if the surface was rendered as a depth buffer, for a
467 * multisampled (4x) render target, or for a multisampled (8x)
468 * render target, since these surfaces support only alignment of 4.
469 * - Use of VALIGN_4 for other surfaces is supported, but uses more
471 * - This field must be set to VALIGN_4 for all tiled Y Render Target
473 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
474 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
475 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
476 * must be set to VALIGN_4."
477 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
479 * "- This field (Surface Horizontal Aligment) is intended to be set to
480 * HALIGN_8 only if the surface was rendered as a depth buffer with
481 * Z16 format or a stencil buffer, since these surfaces support only
483 * - Use of HALIGN_8 for other surfaces is supported, but uses more
485 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
486 * - This field must be set to HALIGN_8 if the Surface Format is
489 * They can be rephrased as
492 * compressed formats block width block height
493 * GEN6_FORMAT_R16_UNORM 8 4
494 * GEN6_FORMAT_R8_UINT 8 8
495 * other depth/stencil formats 4 4
496 * 2x or 4x multisampled 4 or 8 4
497 * tiled Y 4 or 8 4 (if rt)
498 * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2
499 * others 4 or 8 2 or 4
501 if (info
->compressed
) {
502 i
= info
->block_width
;
503 j
= info
->block_height
;
504 } else if (info
->bind_zs
) {
505 switch (info
->format
) {
506 case GEN6_FORMAT_R16_UNORM
:
510 case GEN6_FORMAT_R8_UINT
:
520 const bool valign_4
=
521 (info
->sample_count
> 1 || ilo_dev_gen(dev
) >= ILO_GEN(8) ||
522 (tiling
== GEN6_TILING_Y
&& info
->bind_surface_dp_render
));
524 if (ilo_dev_gen(dev
) < ILO_GEN(8) && valign_4
)
525 assert(info
->format
!= GEN6_FORMAT_R32G32B32_FLOAT
);
528 j
= (valign_4
) ? 4 : 2;
536 image_init_gen6_hardware_layout(const struct ilo_dev
*dev
,
537 const struct ilo_image_info
*info
,
538 struct ilo_image_layout
*layout
)
540 ILO_DEV_ASSERT(dev
, 6, 8);
542 if (ilo_dev_gen(dev
) >= ILO_GEN(7))
543 layout
->walk
= image_get_gen7_walk(dev
, info
);
545 layout
->walk
= image_get_gen6_walk(dev
, info
);
547 layout
->interleaved_samples
=
548 image_get_gen6_interleaved_samples(dev
, info
);
550 layout
->valid_tilings
= image_get_gen6_valid_tilings(dev
, info
);
551 if (!layout
->valid_tilings
)
554 layout
->tiling
= image_get_gen6_tiling(dev
, info
, layout
->valid_tilings
);
556 if (image_get_gen6_hiz_enable(dev
, info
))
557 layout
->aux
= ILO_IMAGE_AUX_HIZ
;
558 else if (ilo_dev_gen(dev
) >= ILO_GEN(7) &&
559 image_get_gen7_mcs_enable(dev
, info
, layout
->tiling
))
560 layout
->aux
= ILO_IMAGE_AUX_MCS
;
562 layout
->aux
= ILO_IMAGE_AUX_NONE
;
564 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
565 image_get_gen7_alignments(dev
, info
, layout
->tiling
,
566 &layout
->align_i
, &layout
->align_j
);
568 image_get_gen6_alignments(dev
, info
,
569 &layout
->align_i
, &layout
->align_j
);
576 image_init_gen6_transfer_layout(const struct ilo_dev
*dev
,
577 const struct ilo_image_info
*info
,
578 struct ilo_image_layout
*layout
)
580 ILO_DEV_ASSERT(dev
, 6, 8);
582 /* we can define our own layout to save space */
583 layout
->walk
= ILO_IMAGE_WALK_LOD
;
584 layout
->interleaved_samples
= false;
585 layout
->valid_tilings
= IMAGE_TILING_NONE
;
586 layout
->tiling
= GEN6_TILING_NONE
;
587 layout
->aux
= ILO_IMAGE_AUX_NONE
;
588 layout
->align_i
= info
->block_width
;
589 layout
->align_j
= info
->block_height
;
595 image_get_gen6_slice_size(const struct ilo_dev
*dev
,
596 const struct ilo_image_info
*info
,
597 const struct ilo_image_layout
*layout
,
599 int *width
, int *height
)
603 ILO_DEV_ASSERT(dev
, 6, 8);
605 w
= u_minify(info
->width
, level
);
606 h
= u_minify(info
->height
, level
);
609 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
611 * "The dimensions of the mip maps are first determined by applying the
612 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
613 * if necessary, they are padded out to compression block boundaries."
615 w
= align(w
, info
->block_width
);
616 h
= align(h
, info
->block_height
);
619 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
621 * "If the surface is multisampled (4x), these values must be adjusted
622 * as follows before proceeding:
624 * W_L = ceiling(W_L / 2) * 4
625 * H_L = ceiling(H_L / 2) * 4"
627 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
629 * "If the surface is multisampled and it is a depth or stencil surface
630 * or Multisampled Surface StorageFormat in SURFACE_STATE is
631 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
634 * #samples W_L = H_L =
635 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
636 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
637 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
638 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
640 * For interleaved samples (4x), where pixels
643 * (x, y+1) (x+1, y+1)
645 * would be is occupied by
647 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
648 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
649 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
650 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
654 * w = align(w, 2) * 2;
655 * y = align(y, 2) * 2;
657 if (layout
->interleaved_samples
) {
658 switch (info
->sample_count
) {
677 assert(!"unsupported sample count");
683 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
685 * "For separate stencil buffer, the width must be mutiplied by 2 and
686 * height divided by 2..."
688 * To make things easier (for transfer), we will just double the stencil
689 * stride in 3DSTATE_STENCIL_BUFFER.
691 w
= align(w
, layout
->align_i
);
692 h
= align(h
, layout
->align_j
);
699 image_get_gen6_layer_count(const struct ilo_dev
*dev
,
700 const struct ilo_image_info
*info
,
701 const struct ilo_image_layout
*layout
)
703 int count
= info
->array_size
;
705 ILO_DEV_ASSERT(dev
, 6, 8);
707 /* samples of the same index are stored in a layer */
708 if (!layout
->interleaved_samples
)
709 count
*= info
->sample_count
;
715 image_get_gen6_walk_layer_heights(const struct ilo_dev
*dev
,
716 const struct ilo_image_info
*info
,
717 struct ilo_image_layout
*layout
)
719 ILO_DEV_ASSERT(dev
, 6, 8);
721 layout
->walk_layer_h0
= layout
->lods
[0].slice_height
;
723 if (info
->level_count
> 1) {
724 layout
->walk_layer_h1
= layout
->lods
[1].slice_height
;
727 image_get_gen6_slice_size(dev
, info
, layout
, 1,
728 &dummy
, &layout
->walk_layer_h1
);
731 if (image_get_gen6_layer_count(dev
, info
, layout
) == 1) {
732 layout
->walk_layer_height
= 0;
737 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
739 * "The following equation is used for surface formats other than
740 * compressed textures:
742 * QPitch = (h0 + h1 + 11j)"
744 * "The equation for compressed textures (BC* and FXT1 surface formats)
747 * QPitch = (h0 + h1 + 11j) / 4"
749 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
750 * value calculated in the equation above, for every other odd Surface
751 * Height starting from 1 i.e. 1,5,9,13"
753 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
755 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
756 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
758 * QPitch = (h0 + h1 + 12j)
759 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
761 * (There are many typos or missing words here...)"
763 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
764 * the base address. The PRM divides QPitch by 4 for compressed formats
765 * because the block height for those formats are 4, and it wants QPitch to
766 * mean the number of memory rows, as opposed to texel rows, between
767 * slices. Since we use texel rows everywhere, we do not need to divide
770 layout
->walk_layer_height
= layout
->walk_layer_h0
+ layout
->walk_layer_h1
+
771 ((ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 12 : 11) * layout
->align_j
;
773 if (ilo_dev_gen(dev
) == ILO_GEN(6) && info
->sample_count
> 1 &&
774 info
->height
% 4 == 1)
775 layout
->walk_layer_height
+= 4;
779 image_get_gen6_monolithic_size(const struct ilo_dev
*dev
,
780 const struct ilo_image_info
*info
,
781 struct ilo_image_layout
*layout
,
782 int max_x
, int max_y
)
784 int align_w
= 1, align_h
= 1, pad_h
= 0;
786 ILO_DEV_ASSERT(dev
, 6, 8);
789 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
791 * "To determine the necessary padding on the bottom and right side of
792 * the surface, refer to the table in Section 7.18.3.4 for the i and j
793 * parameters for the surface format in use. The surface must then be
794 * extended to the next multiple of the alignment unit size in each
795 * dimension, and all texels contained in this extended surface must
796 * have valid GTT entries."
798 * "For cube surfaces, an additional two rows of padding are required
799 * at the bottom of the surface. This must be ensured regardless of
800 * whether the surface is stored tiled or linear. This is due to the
801 * potential rotation of cache line orientation from memory to cache."
803 * "For compressed textures (BC* and FXT1 surface formats), padding at
804 * the bottom of the surface is to an even compressed row, which is
805 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
806 * purposes, these surfaces behave as if j = 8 only for surface
807 * padding purposes. The value of 4 for j still applies for mip level
808 * alignment and QPitch calculation."
810 if (info
->bind_surface_sampler
) {
811 align_w
= MAX2(align_w
, layout
->align_i
);
812 align_h
= MAX2(align_h
, layout
->align_j
);
814 if (info
->type
== GEN6_SURFTYPE_CUBE
)
817 if (info
->compressed
)
818 align_h
= MAX2(align_h
, layout
->align_j
* 2);
822 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
824 * "If the surface contains an odd number of rows of data, a final row
825 * below the surface must be allocated."
827 if (info
->bind_surface_dp_render
)
828 align_h
= MAX2(align_h
, 2);
831 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
832 * for unaligned non-mipmapped and non-array images.
834 if (layout
->aux
== ILO_IMAGE_AUX_HIZ
&&
835 info
->level_count
== 1 && info
->array_size
== 1 && info
->depth
== 1) {
836 align_w
= MAX2(align_w
, 8);
837 align_h
= MAX2(align_h
, 4);
840 layout
->monolithic_width
= align(max_x
, align_w
);
841 layout
->monolithic_height
= align(max_y
+ pad_h
, align_h
);
845 image_get_gen6_lods(const struct ilo_dev
*dev
,
846 const struct ilo_image_info
*info
,
847 struct ilo_image_layout
*layout
)
849 const int layer_count
= image_get_gen6_layer_count(dev
, info
, layout
);
850 int cur_x
, cur_y
, max_x
, max_y
;
853 ILO_DEV_ASSERT(dev
, 6, 8);
859 for (lv
= 0; lv
< info
->level_count
; lv
++) {
860 int slice_w
, slice_h
, lod_w
, lod_h
;
862 image_get_gen6_slice_size(dev
, info
, layout
, lv
, &slice_w
, &slice_h
);
864 layout
->lods
[lv
].x
= cur_x
;
865 layout
->lods
[lv
].y
= cur_y
;
866 layout
->lods
[lv
].slice_width
= slice_w
;
867 layout
->lods
[lv
].slice_height
= slice_h
;
869 switch (layout
->walk
) {
870 case ILO_IMAGE_WALK_LAYER
:
874 /* MIPLAYOUT_BELOW */
880 case ILO_IMAGE_WALK_LOD
:
882 lod_h
= slice_h
* layer_count
;
889 /* every LOD begins at tile boundaries */
890 if (info
->level_count
> 1) {
891 assert(info
->format
== GEN6_FORMAT_R8_UINT
);
892 cur_x
= align(cur_x
, 64);
893 cur_y
= align(cur_y
, 64);
896 case ILO_IMAGE_WALK_3D
:
898 const int slice_count
= u_minify(info
->depth
, lv
);
899 const int slice_count_per_row
= 1 << lv
;
900 const int row_count
=
901 (slice_count
+ slice_count_per_row
- 1) / slice_count_per_row
;
903 lod_w
= slice_w
* slice_count_per_row
;
904 lod_h
= slice_h
* row_count
;
910 assert(!"unknown walk type");
916 if (max_x
< layout
->lods
[lv
].x
+ lod_w
)
917 max_x
= layout
->lods
[lv
].x
+ lod_w
;
918 if (max_y
< layout
->lods
[lv
].y
+ lod_h
)
919 max_y
= layout
->lods
[lv
].y
+ lod_h
;
922 if (layout
->walk
== ILO_IMAGE_WALK_LAYER
) {
923 image_get_gen6_walk_layer_heights(dev
, info
, layout
);
925 max_y
+= layout
->walk_layer_height
* (layer_count
- 1);
927 layout
->walk_layer_h0
= 0;
928 layout
->walk_layer_h1
= 0;
929 layout
->walk_layer_height
= 0;
932 image_get_gen6_monolithic_size(dev
, info
, layout
, max_x
, max_y
);
936 image_bind_gpu(const struct ilo_image_info
*info
)
938 return (info
->bind_surface_sampler
||
939 info
->bind_surface_dp_render
||
940 info
->bind_surface_dp_typed
||
942 info
->bind_scanout
||
947 image_validate_gen6(const struct ilo_dev
*dev
,
948 const struct ilo_image_info
*info
)
950 ILO_DEV_ASSERT(dev
, 6, 8);
953 * From the Ivy Bridge PRM, volume 2 part 1, page 314:
955 * "The separate stencil buffer is always enabled, thus the field in
956 * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
957 * buffer has been removed Surface formats with interleaved depth and
958 * stencil are no longer supported"
960 if (ilo_dev_gen(dev
) >= ILO_GEN(7) && info
->bind_zs
)
961 assert(!info
->interleaved_stencil
);
967 image_get_gen6_layout(const struct ilo_dev
*dev
,
968 const struct ilo_image_info
*info
,
969 struct ilo_image_layout
*layout
)
971 ILO_DEV_ASSERT(dev
, 6, 8);
973 if (!image_validate_gen6(dev
, info
))
976 if (image_bind_gpu(info
) || info
->level_count
> 1) {
977 if (!image_init_gen6_hardware_layout(dev
, info
, layout
))
980 if (!image_init_gen6_transfer_layout(dev
, info
, layout
))
985 * the fact that align i and j are multiples of block width and height
986 * respectively is what makes the size of the bo a multiple of the block
987 * size, slices start at block boundaries, and many of the computations
990 assert(layout
->align_i
% info
->block_width
== 0);
991 assert(layout
->align_j
% info
->block_height
== 0);
993 /* make sure align() works */
994 assert(util_is_power_of_two(layout
->align_i
) &&
995 util_is_power_of_two(layout
->align_j
));
996 assert(util_is_power_of_two(info
->block_width
) &&
997 util_is_power_of_two(info
->block_height
));
999 image_get_gen6_lods(dev
, info
, layout
);
1001 assert(layout
->walk_layer_height
% info
->block_height
== 0);
1002 assert(layout
->monolithic_width
% info
->block_width
== 0);
1003 assert(layout
->monolithic_height
% info
->block_height
== 0);
1009 image_set_gen6_bo_size(struct ilo_image
*img
,
1010 const struct ilo_dev
*dev
,
1011 const struct ilo_image_info
*info
,
1012 const struct ilo_image_layout
*layout
)
1015 int align_w
, align_h
;
1017 ILO_DEV_ASSERT(dev
, 6, 8);
1019 stride
= (layout
->monolithic_width
/ info
->block_width
) * info
->block_size
;
1020 height
= layout
->monolithic_height
/ info
->block_height
;
1023 * From the Haswell PRM, volume 5, page 163:
1025 * "For linear surfaces, additional padding of 64 bytes is required
1026 * at the bottom of the surface. This is in addition to the padding
1029 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5) && info
->bind_surface_sampler
&&
1030 layout
->tiling
== GEN6_TILING_NONE
)
1031 height
+= (64 + stride
- 1) / stride
;
1034 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1036 * "- For linear render target surfaces, the pitch must be a multiple
1037 * of the element size for non-YUV surface formats. Pitch must be a
1038 * multiple of 2 * element size for YUV surface formats.
1040 * - For other linear surfaces, the pitch can be any multiple of
1042 * - For tiled surfaces, the pitch must be a multiple of the tile
1045 * Different requirements may exist when the image is used in different
1046 * places, but our alignments here should be good enough that we do not
1047 * need to check info->bind_x.
1049 switch (layout
->tiling
) {
1060 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
1062 * "A 4KB tile is subdivided into 8-high by 8-wide array of
1063 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
1070 assert(layout
->tiling
== GEN6_TILING_NONE
);
1071 /* some good enough values */
1077 if (info
->force_bo_stride
) {
1078 if (info
->force_bo_stride
% align_w
|| info
->force_bo_stride
< stride
)
1081 img
->bo_stride
= info
->force_bo_stride
;
1083 img
->bo_stride
= align(stride
, align_w
);
1086 img
->bo_height
= align(height
, align_h
);
1092 image_set_gen6_hiz(struct ilo_image
*img
,
1093 const struct ilo_dev
*dev
,
1094 const struct ilo_image_info
*info
,
1095 const struct ilo_image_layout
*layout
)
1097 const int hz_align_j
= 8;
1098 enum ilo_image_walk_type hz_walk
;
1099 int hz_width
, hz_height
;
1100 int hz_clear_w
, hz_clear_h
;
1103 ILO_DEV_ASSERT(dev
, 6, 8);
1105 assert(layout
->aux
== ILO_IMAGE_AUX_HIZ
);
1107 assert(layout
->walk
== ILO_IMAGE_WALK_LAYER
||
1108 layout
->walk
== ILO_IMAGE_WALK_3D
);
1111 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1113 * "The hierarchical depth buffer does not support the LOD field, it is
1114 * assumed by hardware to be zero. A separate hierarachical depth
1115 * buffer is required for each LOD used, and the corresponding
1116 * buffer's state delivered to hardware each time a new depth buffer
1117 * state with modified LOD is delivered."
1119 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1121 if (ilo_dev_gen(dev
) >= ILO_GEN(7))
1122 hz_walk
= layout
->walk
;
1124 hz_walk
= ILO_IMAGE_WALK_LOD
;
1127 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1128 * PRM, volume 2 part 1, page 312-313.
1130 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1134 case ILO_IMAGE_WALK_LAYER
:
1136 const int h0
= align(layout
->walk_layer_h0
, hz_align_j
);
1137 const int h1
= align(layout
->walk_layer_h1
, hz_align_j
);
1139 ((ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j
;
1140 const int hz_qpitch
= h0
+ h1
+ htail
;
1142 hz_width
= align(layout
->lods
[0].slice_width
, 16);
1144 hz_height
= hz_qpitch
* info
->array_size
/ 2;
1145 if (ilo_dev_gen(dev
) >= ILO_GEN(7))
1146 hz_height
= align(hz_height
, 8);
1148 img
->aux
.walk_layer_height
= hz_qpitch
;
1151 case ILO_IMAGE_WALK_LOD
:
1153 int lod_tx
[ILO_IMAGE_MAX_LEVEL_COUNT
];
1154 int lod_ty
[ILO_IMAGE_MAX_LEVEL_COUNT
];
1157 /* figure out the tile offsets of LODs */
1162 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1165 lod_tx
[lv
] = cur_tx
;
1166 lod_ty
[lv
] = cur_ty
;
1168 tw
= align(layout
->lods
[lv
].slice_width
, 16);
1169 th
= align(layout
->lods
[lv
].slice_height
, hz_align_j
) *
1170 info
->array_size
/ 2;
1171 /* convert to Y-tiles */
1172 tw
= (tw
+ 127) / 128;
1173 th
= (th
+ 31) / 32;
1175 if (hz_width
< cur_tx
+ tw
)
1176 hz_width
= cur_tx
+ tw
;
1177 if (hz_height
< cur_ty
+ th
)
1178 hz_height
= cur_ty
+ th
;
1186 /* convert tile offsets to memory offsets */
1187 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1188 img
->aux
.walk_lod_offsets
[lv
] =
1189 (lod_ty
[lv
] * hz_width
+ lod_tx
[lv
]) * 4096;
1196 case ILO_IMAGE_WALK_3D
:
1197 hz_width
= align(layout
->lods
[0].slice_width
, 16);
1200 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1201 const int h
= align(layout
->lods
[lv
].slice_height
, hz_align_j
);
1202 /* according to the formula, slices are packed together vertically */
1203 hz_height
+= h
* u_minify(info
->depth
, lv
);
1208 assert(!"unknown HiZ walk");
1215 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1216 * Experiments on Haswell show that aligning the RECTLIST primitive and
1217 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1222 switch (info
->sample_count
) {
1243 for (lv
= 0; lv
< info
->level_count
; lv
++) {
1244 if (u_minify(info
->width
, lv
) % hz_clear_w
||
1245 u_minify(info
->height
, lv
) % hz_clear_h
)
1247 img
->aux
.enables
|= 1 << lv
;
1250 /* we padded to allow this in image_get_gen6_monolithic_size() */
1251 if (info
->level_count
== 1 && info
->array_size
== 1 && info
->depth
== 1)
1252 img
->aux
.enables
|= 0x1;
1254 /* align to Y-tile */
1255 img
->aux
.bo_stride
= align(hz_width
, 128);
1256 img
->aux
.bo_height
= align(hz_height
, 32);
1262 image_set_gen7_mcs(struct ilo_image
*img
,
1263 const struct ilo_dev
*dev
,
1264 const struct ilo_image_info
*info
,
1265 const struct ilo_image_layout
*layout
)
1267 int mcs_width
, mcs_height
, mcs_cpp
;
1268 int downscale_x
, downscale_y
;
1270 ILO_DEV_ASSERT(dev
, 7, 8);
1272 assert(layout
->aux
== ILO_IMAGE_AUX_MCS
);
1274 if (info
->sample_count
> 1) {
1276 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1277 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1278 * need of scale down could be that the clear rectangle is used to clear
1279 * the MCS instead of the RT.
1281 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1282 * 2x2 factor could come from that the hardware writes 128 bits (an
1283 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1284 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1285 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1286 * pixel block in the RT.
1288 switch (info
->sample_count
) {
1306 assert(!"unsupported sample count");
1312 * It also appears that the 2x2 subspans generated by the scaled-down
1313 * clear rectangle cannot be masked. The scale-down clear rectangle
1314 * thus must be aligned to 2x2, and we need to pad.
1316 mcs_width
= align(info
->width
, downscale_x
* 2);
1317 mcs_height
= align(info
->height
, downscale_y
* 2);
1320 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1335 * This table and the two following tables define the RT alignments, the
1336 * clear rectangle alignments, and the clear rectangle scale factors.
1337 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1338 * that the clear rectangle alignments are 16x32 blocks, and the clear
1339 * rectangle scale factors are 8x16 blocks.
1341 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1342 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1345 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1346 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1347 * which says that a Y-tile maps to 128x256 blocks (\see
1348 * intel_get_non_msrt_mcs_alignment). It does not really change
1349 * anything except for the size of the allocated MCS. Let's see if we
1350 * hit out-of-bound access.
1352 switch (layout
->tiling
) {
1354 downscale_x
= 64 / info
->block_size
;
1358 downscale_x
= 32 / info
->block_size
;
1362 assert(!"unsupported tiling mode");
1371 * From the Haswell PRM, volume 7, page 652:
1373 * "Clear rectangle must be aligned to two times the number of
1374 * pixels in the table shown below due to 16X16 hashing across the
1377 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1378 * 2x2, and we need to pad.
1380 mcs_width
= align(info
->width
, downscale_x
* 4) / downscale_x
;
1381 mcs_height
= align(info
->height
, downscale_y
* 4) / downscale_y
;
1382 mcs_cpp
= 16; /* an OWord */
1385 img
->aux
.enables
= (1 << info
->level_count
) - 1;
1386 /* align to Y-tile */
1387 img
->aux
.bo_stride
= align(mcs_width
* mcs_cpp
, 128);
1388 img
->aux
.bo_height
= align(mcs_height
, 32);
1394 ilo_image_init(struct ilo_image
*img
,
1395 const struct ilo_dev
*dev
,
1396 const struct ilo_image_info
*info
)
1398 struct ilo_image_layout layout
;
1400 assert(ilo_is_zeroed(img
, sizeof(*img
)));
1402 memset(&layout
, 0, sizeof(layout
));
1403 layout
.lods
= img
->lods
;
1405 if (!image_get_gen6_layout(dev
, info
, &layout
))
1408 img
->type
= info
->type
;
1410 img
->format
= info
->format
;
1411 img
->block_width
= info
->block_width
;
1412 img
->block_height
= info
->block_height
;
1413 img
->block_size
= info
->block_size
;
1415 img
->width0
= info
->width
;
1416 img
->height0
= info
->height
;
1417 img
->depth0
= info
->depth
;
1418 img
->array_size
= info
->array_size
;
1419 img
->level_count
= info
->level_count
;
1420 img
->sample_count
= info
->sample_count
;
1422 img
->walk
= layout
.walk
;
1423 img
->interleaved_samples
= layout
.interleaved_samples
;
1425 img
->tiling
= layout
.tiling
;
1427 img
->aux
.type
= layout
.aux
;
1429 img
->align_i
= layout
.align_i
;
1430 img
->align_j
= layout
.align_j
;
1432 img
->walk_layer_height
= layout
.walk_layer_height
;
1434 if (!image_set_gen6_bo_size(img
, dev
, info
, &layout
))
1437 img
->scanout
= info
->bind_scanout
;
1439 switch (layout
.aux
) {
1440 case ILO_IMAGE_AUX_HIZ
:
1441 image_set_gen6_hiz(img
, dev
, info
, &layout
);
1443 case ILO_IMAGE_AUX_MCS
:
1444 image_set_gen7_mcs(img
, dev
, info
, &layout
);