2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_screen.h"
29 #include "ilo_resource.h"
31 /* use PIPE_BIND_CUSTOM to indicate MCS */
32 #define ILO_BIND_MCS PIPE_BIND_CUSTOM
35 const struct ilo_dev_info
*dev
;
36 const struct pipe_resource
*templ
;
38 bool has_depth
, has_stencil
;
39 bool hiz
, separate_stencil
;
41 enum pipe_format format
;
42 unsigned block_width
, block_height
, block_size
;
45 enum intel_tiling_mode tiling
;
48 bool array_spacing_full
;
53 struct ilo_texture_slice
*slices
;
54 } levels
[PIPE_MAX_TEXTURE_LEVELS
];
63 tex_layout_init_qpitch(struct tex_layout
*layout
)
65 const struct pipe_resource
*templ
= layout
->templ
;
68 if (templ
->array_size
<= 1)
71 h0
= align(layout
->levels
[0].h
, layout
->align_j
);
73 if (!layout
->array_spacing_full
) {
78 h1
= align(layout
->levels
[1].h
, layout
->align_j
);
81 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
83 * "The following equation is used for surface formats other than
84 * compressed textures:
86 * QPitch = (h0 + h1 + 11j)"
88 * "The equation for compressed textures (BC* and FXT1 surface formats)
91 * QPitch = (h0 + h1 + 11j) / 4"
93 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
94 * value calculated in the equation above, for every other odd Surface
95 * Height starting from 1 i.e. 1,5,9,13"
97 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
99 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
100 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
102 * QPitch = (h0 + h1 + 12j)
103 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
105 * (There are many typos or missing words here...)"
107 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
108 * the base address. The PRM divides QPitch by 4 for compressed formats
109 * because the block height for those formats are 4, and it wants QPitch to
110 * mean the number of memory rows, as opposed to texel rows, between
111 * slices. Since we use texel rows in tex->slice_offsets, we do not need
112 * to divide QPitch by 4.
114 layout
->qpitch
= h0
+ h1
+
115 ((layout
->dev
->gen
>= ILO_GEN(7)) ? 12 : 11) * layout
->align_j
;
117 if (layout
->dev
->gen
== ILO_GEN(6) && templ
->nr_samples
> 1 &&
118 templ
->height0
% 4 == 1)
123 tex_layout_init_alignments(struct tex_layout
*layout
)
125 const struct pipe_resource
*templ
= layout
->templ
;
128 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
130 * "surface format align_i align_j
131 * YUV 4:2:2 formats 4 *see below
134 * all other formats 4 *see below"
136 * "- align_j = 4 for any depth buffer
137 * - align_j = 2 for separate stencil buffer
138 * - align_j = 4 for any render target surface is multisampled (4x)
139 * - align_j = 4 for any render target surface with Surface Vertical
140 * Alignment = VALIGN_4
141 * - align_j = 2 for any render target surface with Surface Vertical
142 * Alignment = VALIGN_2
143 * - align_j = 2 for all other render target surface
144 * - align_j = 2 for any sampling engine surface with Surface Vertical
145 * Alignment = VALIGN_2
146 * - align_j = 4 for any sampling engine surface with Surface Vertical
147 * Alignment = VALIGN_4"
149 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
151 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
152 * the Surface Format is 96 bits per element (BPE)."
154 * They can be rephrased as
157 * compressed formats block width block height
158 * PIPE_FORMAT_S8_UINT 4 2
159 * other depth/stencil formats 4 4
160 * 4x multisampled 4 4
166 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
168 * "surface defined by surface format align_i align_j
169 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
171 * 3DSTATE_STENCIL_BUFFER N/A 8 8
172 * SURFACE_STATE BC*, ETC*, EAC* 4 4
174 * all others (set by SURFACE_STATE)"
176 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
178 * "- This field (Surface Vertical Aligment) is intended to be set to
179 * VALIGN_4 if the surface was rendered as a depth buffer, for a
180 * multisampled (4x) render target, or for a multisampled (8x)
181 * render target, since these surfaces support only alignment of 4.
182 * - Use of VALIGN_4 for other surfaces is supported, but uses more
184 * - This field must be set to VALIGN_4 for all tiled Y Render Target
186 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
187 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
188 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
189 * must be set to VALIGN_4."
190 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
192 * "- This field (Surface Horizontal Aligment) is intended to be set to
193 * HALIGN_8 only if the surface was rendered as a depth buffer with
194 * Z16 format or a stencil buffer, since these surfaces support only
196 * - Use of HALIGN_8 for other surfaces is supported, but uses more
198 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
199 * - This field must be set to HALIGN_8 if the Surface Format is
202 * They can be rephrased as
205 * compressed formats block width block height
206 * PIPE_FORMAT_Z16_UNORM 8 4
207 * PIPE_FORMAT_S8_UINT 8 8
208 * other depth/stencil formats 4 or 8 4
209 * 2x or 4x multisampled 4 or 8 4
210 * tiled Y 4 or 8 4 (if rt)
211 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
212 * others 4 or 8 2 or 4
215 if (layout
->compressed
) {
216 /* this happens to be the case */
217 layout
->align_i
= layout
->block_width
;
218 layout
->align_j
= layout
->block_height
;
220 else if (layout
->has_depth
|| layout
->has_stencil
) {
221 if (layout
->dev
->gen
>= ILO_GEN(7)) {
222 switch (layout
->format
) {
223 case PIPE_FORMAT_Z16_UNORM
:
227 case PIPE_FORMAT_S8_UINT
:
238 switch (layout
->format
) {
239 case PIPE_FORMAT_S8_UINT
:
251 const bool valign_4
= (templ
->nr_samples
> 1) ||
252 (layout
->dev
->gen
>= ILO_GEN(7) &&
253 layout
->tiling
== INTEL_TILING_Y
&&
254 (templ
->bind
& PIPE_BIND_RENDER_TARGET
));
257 assert(layout
->block_size
!= 12);
260 layout
->align_j
= (valign_4
) ? 4 : 2;
264 * the fact that align i and j are multiples of block width and height
265 * respectively is what makes the size of the bo a multiple of the block
266 * size, slices start at block boundaries, and many of the computations
269 assert(layout
->align_i
% layout
->block_width
== 0);
270 assert(layout
->align_j
% layout
->block_height
== 0);
272 /* make sure align() works */
273 assert(util_is_power_of_two(layout
->align_i
) &&
274 util_is_power_of_two(layout
->align_j
));
275 assert(util_is_power_of_two(layout
->block_width
) &&
276 util_is_power_of_two(layout
->block_height
));
280 tex_layout_init_levels(struct tex_layout
*layout
)
282 const struct pipe_resource
*templ
= layout
->templ
;
285 last_level
= templ
->last_level
;
287 /* need at least 2 levels to compute full qpitch */
288 if (last_level
== 0 && templ
->array_size
> 1 && layout
->array_spacing_full
)
291 /* compute mip level sizes */
292 for (lv
= 0; lv
<= last_level
; lv
++) {
295 w
= u_minify(templ
->width0
, lv
);
296 h
= u_minify(templ
->height0
, lv
);
297 d
= u_minify(templ
->depth0
, lv
);
300 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
302 * "The dimensions of the mip maps are first determined by applying
303 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
304 * above. Then, if necessary, they are padded out to compression
307 w
= align(w
, layout
->block_width
);
308 h
= align(h
, layout
->block_height
);
311 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
313 * "If the surface is multisampled (4x), these values must be
314 * adjusted as follows before proceeding:
316 * W_L = ceiling(W_L / 2) * 4
317 * H_L = ceiling(H_L / 2) * 4"
319 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
321 * "If the surface is multisampled and it is a depth or stencil
322 * surface or Multisampled Surface StorageFormat in SURFACE_STATE
323 * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
326 * #samples W_L = H_L =
327 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
328 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
329 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
330 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
332 * For interleaved samples (4x), where pixels
335 * (x, y+1) (x+1, y+1)
337 * would be is occupied by
339 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
340 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
341 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
342 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
346 * w = align(w, 2) * 2;
347 * y = align(y, 2) * 2;
349 if (layout
->interleaved
) {
350 switch (templ
->nr_samples
) {
370 assert(!"unsupported sample count");
375 layout
->levels
[lv
].w
= w
;
376 layout
->levels
[lv
].h
= h
;
377 layout
->levels
[lv
].d
= d
;
382 tex_layout_init_spacing(struct tex_layout
*layout
)
384 const struct pipe_resource
*templ
= layout
->templ
;
386 if (layout
->dev
->gen
>= ILO_GEN(7)) {
388 * It is not explicitly states, but render targets are expected to be
389 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
390 * expected to be IMS (samples interleaved).
392 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
394 if (layout
->has_depth
|| layout
->has_stencil
) {
395 layout
->interleaved
= true;
398 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
400 * "note that the depth buffer and stencil buffer have an implied
401 * value of ARYSPC_FULL"
403 layout
->array_spacing_full
= true;
406 layout
->interleaved
= false;
409 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
411 * "If Multisampled Surface Storage Format is MSFMT_MSS and
412 * Number of Multisamples is not MULTISAMPLECOUNT_1, this field
413 * (Surface Array Spacing) must be set to ARYSPC_LOD0."
415 * As multisampled resources are not mipmapped, we never use
416 * ARYSPC_FULL for them.
418 if (templ
->nr_samples
> 1)
419 assert(templ
->last_level
== 0);
420 layout
->array_spacing_full
= (templ
->last_level
> 0);
424 /* GEN6 supports only interleaved samples */
425 layout
->interleaved
= true;
428 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
430 * "The separate stencil buffer does not support mip mapping, thus
431 * the storage for LODs other than LOD 0 is not needed. The
432 * following QPitch equation applies only to the separate stencil
437 * GEN6 does not support compact spacing otherwise.
439 layout
->array_spacing_full
= (layout
->format
!= PIPE_FORMAT_S8_UINT
);
444 tex_layout_init_tiling(struct tex_layout
*layout
)
446 const struct pipe_resource
*templ
= layout
->templ
;
447 const enum pipe_format format
= layout
->format
;
448 const unsigned tile_none
= 1 << INTEL_TILING_NONE
;
449 const unsigned tile_x
= 1 << INTEL_TILING_X
;
450 const unsigned tile_y
= 1 << INTEL_TILING_Y
;
451 unsigned valid_tilings
= tile_none
| tile_x
| tile_y
;
454 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
456 * "Display/Overlay Y-Major not supported.
457 * X-Major required for Async Flips"
459 if (unlikely(templ
->bind
& PIPE_BIND_SCANOUT
))
460 valid_tilings
&= tile_x
;
463 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
465 * "The cursor surface address must be 4K byte aligned. The cursor must
466 * be in linear memory, it cannot be tiled."
468 if (unlikely(templ
->bind
& (PIPE_BIND_CURSOR
| PIPE_BIND_LINEAR
)))
469 valid_tilings
&= tile_none
;
472 * From the Ivy Bridge PRM, volume 4 part 1, page 76:
474 * "The MCS surface must be stored as Tile Y."
476 if (templ
->bind
& ILO_BIND_MCS
)
477 valid_tilings
&= tile_y
;
480 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
482 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
483 * Depth Buffer is not supported."
485 * "The Depth Buffer, if tiled, must use Y-Major tiling."
487 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
489 * "W-Major Tile Format is used for separate stencil."
491 * Since the HW does not support W-tiled fencing, we have to do it in the
494 if (templ
->bind
& PIPE_BIND_DEPTH_STENCIL
) {
496 case PIPE_FORMAT_S8_UINT
:
497 valid_tilings
&= tile_none
;
500 valid_tilings
&= tile_y
;
505 if (templ
->bind
& (PIPE_BIND_RENDER_TARGET
| PIPE_BIND_SAMPLER_VIEW
)) {
506 if (templ
->bind
& PIPE_BIND_RENDER_TARGET
) {
508 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
510 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
511 * either TileX or Linear."
513 if (layout
->block_size
== 16)
514 valid_tilings
&= ~tile_y
;
517 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
519 * "This field (Surface Vertical Aligment) must be set to
520 * VALIGN_4 for all tiled Y Render Target surfaces."
522 * "VALIGN_4 is not supported for surface format
525 if (layout
->dev
->gen
>= ILO_GEN(7) && layout
->block_size
== 12)
526 valid_tilings
&= ~tile_y
;
530 * Also, heuristically set a minimum width/height for enabling tiling.
532 if (templ
->width0
< 64 && (valid_tilings
& ~tile_x
))
533 valid_tilings
&= ~tile_x
;
535 if ((templ
->width0
< 32 || templ
->height0
< 16) &&
536 (templ
->width0
< 16 || templ
->height0
< 32) &&
537 (valid_tilings
& ~tile_y
))
538 valid_tilings
&= ~tile_y
;
541 /* force linear if we are not sure where the texture is bound to */
542 if (valid_tilings
& tile_none
)
543 valid_tilings
&= tile_none
;
546 /* no conflicting binding flags */
547 assert(valid_tilings
);
549 /* prefer tiled than linear */
550 if (valid_tilings
& tile_y
)
551 layout
->tiling
= INTEL_TILING_Y
;
552 else if (valid_tilings
& tile_x
)
553 layout
->tiling
= INTEL_TILING_X
;
555 layout
->tiling
= INTEL_TILING_NONE
;
557 layout
->can_be_linear
= valid_tilings
& tile_none
;
561 tex_layout_init_format(struct tex_layout
*layout
)
563 const struct pipe_resource
*templ
= layout
->templ
;
564 enum pipe_format format
;
566 switch (templ
->format
) {
567 case PIPE_FORMAT_ETC1_RGB8
:
568 format
= PIPE_FORMAT_R8G8B8X8_UNORM
;
570 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
571 if (layout
->separate_stencil
)
572 format
= PIPE_FORMAT_Z24X8_UNORM
;
574 format
= templ
->format
;
576 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
577 if (layout
->separate_stencil
)
578 format
= PIPE_FORMAT_Z32_FLOAT
;
580 format
= templ
->format
;
583 format
= templ
->format
;
587 layout
->format
= format
;
589 layout
->block_width
= util_format_get_blockwidth(format
);
590 layout
->block_height
= util_format_get_blockheight(format
);
591 layout
->block_size
= util_format_get_blocksize(format
);
592 layout
->compressed
= util_format_is_compressed(format
);
596 tex_layout_init_hiz(struct tex_layout
*layout
)
598 const struct pipe_resource
*templ
= layout
->templ
;
599 const struct util_format_description
*desc
;
601 desc
= util_format_description(templ
->format
);
602 layout
->has_depth
= util_format_has_depth(desc
);
603 layout
->has_stencil
= util_format_has_stencil(desc
);
605 if (!layout
->has_depth
)
610 /* no point in having HiZ */
611 if (templ
->usage
== PIPE_USAGE_STAGING
)
614 if (layout
->dev
->gen
== ILO_GEN(6)) {
616 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
618 * "The hierarchical depth buffer does not support the LOD field, it
619 * is assumed by hardware to be zero. A separate hierarachical
620 * depth buffer is required for each LOD used, and the
621 * corresponding buffer's state delivered to hardware each time a
622 * new depth buffer state with modified LOD is delivered."
624 * But we have a stronger requirement. Because of layer offsetting
625 * (check out the callers of ilo_texture_get_slice_offset()), we already
626 * have to require the texture to be non-mipmapped and non-array.
628 if (templ
->last_level
> 0 || templ
->array_size
> 1 || templ
->depth0
> 1)
632 if (ilo_debug
& ILO_DEBUG_NOHIZ
)
635 if (layout
->has_stencil
) {
637 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
639 * "This field (Separate Stencil Buffer Enable) must be set to the
640 * same value (enabled or disabled) as Hierarchical Depth Buffer
643 * GEN7+ requires separate stencil buffers.
645 if (layout
->dev
->gen
>= ILO_GEN(7))
646 layout
->separate_stencil
= true;
648 layout
->separate_stencil
= layout
->hiz
;
650 if (layout
->separate_stencil
)
651 layout
->has_stencil
= false;
656 tex_layout_init(struct tex_layout
*layout
,
657 struct pipe_screen
*screen
,
658 const struct pipe_resource
*templ
,
659 struct ilo_texture_slice
**slices
)
661 struct ilo_screen
*is
= ilo_screen(screen
);
663 memset(layout
, 0, sizeof(*layout
));
665 layout
->dev
= &is
->dev
;
666 layout
->templ
= templ
;
668 /* note that there are dependencies between these functions */
669 tex_layout_init_hiz(layout
);
670 tex_layout_init_format(layout
);
671 tex_layout_init_tiling(layout
);
672 tex_layout_init_spacing(layout
);
673 tex_layout_init_levels(layout
);
674 tex_layout_init_alignments(layout
);
675 tex_layout_init_qpitch(layout
);
680 for (lv
= 0; lv
<= templ
->last_level
; lv
++)
681 layout
->levels
[lv
].slices
= slices
[lv
];
686 tex_layout_force_linear(struct tex_layout
*layout
)
688 if (!layout
->can_be_linear
)
692 * we may be able to switch from VALIGN_4 to VALIGN_2 when the layout was
693 * Y-tiled, but let's keep it simple
695 layout
->tiling
= INTEL_TILING_NONE
;
701 * Layout a 2D texture.
704 tex_layout_2d(struct tex_layout
*layout
)
706 const struct pipe_resource
*templ
= layout
->templ
;
707 unsigned int level_x
, level_y
, num_slices
;
712 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
713 const unsigned int level_w
= layout
->levels
[lv
].w
;
714 const unsigned int level_h
= layout
->levels
[lv
].h
;
717 /* set slice offsets */
718 if (layout
->levels
[lv
].slices
) {
719 for (slice
= 0; slice
< templ
->array_size
; slice
++) {
720 layout
->levels
[lv
].slices
[slice
].x
= level_x
;
721 /* slices are qpitch apart in Y-direction */
722 layout
->levels
[lv
].slices
[slice
].y
=
723 level_y
+ layout
->qpitch
* slice
;
727 /* extend the size of the monolithic bo to cover this mip level */
728 if (layout
->width
< level_x
+ level_w
)
729 layout
->width
= level_x
+ level_w
;
730 if (layout
->height
< level_y
+ level_h
)
731 layout
->height
= level_y
+ level_h
;
733 /* MIPLAYOUT_BELOW */
735 level_x
+= align(level_w
, layout
->align_i
);
737 level_y
+= align(level_h
, layout
->align_j
);
740 num_slices
= templ
->array_size
;
741 /* samples of the same index are stored in a slice */
742 if (templ
->nr_samples
> 1 && !layout
->interleaved
)
743 num_slices
*= templ
->nr_samples
;
745 /* we did not take slices into consideration in the computation above */
746 layout
->height
+= layout
->qpitch
* (num_slices
- 1);
750 * Layout a 3D texture.
753 tex_layout_3d(struct tex_layout
*layout
)
755 const struct pipe_resource
*templ
= layout
->templ
;
756 unsigned int level_y
;
760 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
761 const unsigned int level_w
= layout
->levels
[lv
].w
;
762 const unsigned int level_h
= layout
->levels
[lv
].h
;
763 const unsigned int level_d
= layout
->levels
[lv
].d
;
764 const unsigned int slice_pitch
= align(level_w
, layout
->align_i
);
765 const unsigned int slice_qpitch
= align(level_h
, layout
->align_j
);
766 const unsigned int num_slices_per_row
= 1 << lv
;
769 for (slice
= 0; slice
< level_d
; slice
+= num_slices_per_row
) {
772 /* set slice offsets */
773 if (layout
->levels
[lv
].slices
) {
774 for (i
= 0; i
< num_slices_per_row
&& slice
+ i
< level_d
; i
++) {
775 layout
->levels
[lv
].slices
[slice
+ i
].x
= slice_pitch
* i
;
776 layout
->levels
[lv
].slices
[slice
+ i
].y
= level_y
;
780 /* move on to the next slice row */
781 level_y
+= slice_qpitch
;
784 /* rightmost slice */
785 slice
= MIN2(num_slices_per_row
, level_d
) - 1;
787 /* extend the size of the monolithic bo to cover this slice */
788 if (layout
->width
< slice_pitch
* slice
+ level_w
)
789 layout
->width
= slice_pitch
* slice
+ level_w
;
790 if (lv
== templ
->last_level
)
791 layout
->height
= (level_y
- slice_qpitch
) + level_h
;
796 tex_layout_validate(struct tex_layout
*layout
)
799 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
801 * "To determine the necessary padding on the bottom and right side of
802 * the surface, refer to the table in Section 7.18.3.4 for the i and j
803 * parameters for the surface format in use. The surface must then be
804 * extended to the next multiple of the alignment unit size in each
805 * dimension, and all texels contained in this extended surface must
806 * have valid GTT entries."
808 * "For cube surfaces, an additional two rows of padding are required
809 * at the bottom of the surface. This must be ensured regardless of
810 * whether the surface is stored tiled or linear. This is due to the
811 * potential rotation of cache line orientation from memory to cache."
813 * "For compressed textures (BC* and FXT1 surface formats), padding at
814 * the bottom of the surface is to an even compressed row, which is
815 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
816 * purposes, these surfaces behave as if j = 8 only for surface
817 * padding purposes. The value of 4 for j still applies for mip level
818 * alignment and QPitch calculation."
820 if (layout
->templ
->bind
& PIPE_BIND_SAMPLER_VIEW
) {
821 layout
->width
= align(layout
->width
, layout
->align_i
);
822 layout
->height
= align(layout
->height
, layout
->align_j
);
824 if (layout
->templ
->target
== PIPE_TEXTURE_CUBE
)
827 if (layout
->compressed
)
828 layout
->height
= align(layout
->height
, layout
->align_j
* 2);
832 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
834 * "If the surface contains an odd number of rows of data, a final row
835 * below the surface must be allocated."
837 if (layout
->templ
->bind
& PIPE_BIND_RENDER_TARGET
)
838 layout
->height
= align(layout
->height
, 2);
841 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
843 * "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for
844 * W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes."
846 * Since we ask for INTEL_TILING_NONE instead of the non-existent
847 * INTEL_TILING_W, we need to manually align the width and height to the
850 if (layout
->templ
->format
== PIPE_FORMAT_S8_UINT
) {
851 layout
->width
= align(layout
->width
, 64);
852 layout
->height
= align(layout
->height
, 64);
856 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
857 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
858 * To avoid out-of-bound access, we have to pad.
861 layout
->width
= align(layout
->width
, 8);
862 layout
->height
= align(layout
->height
, 4);
865 assert(layout
->width
% layout
->block_width
== 0);
866 assert(layout
->height
% layout
->block_height
== 0);
867 assert(layout
->qpitch
% layout
->block_height
== 0);
871 tex_layout_estimate_size(const struct tex_layout
*layout
)
873 unsigned stride
, height
;
875 stride
= (layout
->width
/ layout
->block_width
) * layout
->block_size
;
876 height
= layout
->height
/ layout
->block_height
;
878 switch (layout
->tiling
) {
880 stride
= align(stride
, 512);
881 height
= align(height
, 8);
884 stride
= align(stride
, 128);
885 height
= align(height
, 32);
888 height
= align(height
, 2);
892 return stride
* height
;
896 tex_layout_apply(const struct tex_layout
*layout
, struct ilo_texture
*tex
)
898 tex
->bo_format
= layout
->format
;
901 tex
->bo_width
= layout
->width
/ layout
->block_width
;
902 tex
->bo_height
= layout
->height
/ layout
->block_height
;
903 tex
->bo_cpp
= layout
->block_size
;
904 tex
->tiling
= layout
->tiling
;
906 tex
->compressed
= layout
->compressed
;
907 tex
->block_width
= layout
->block_width
;
908 tex
->block_height
= layout
->block_height
;
910 tex
->halign_8
= (layout
->align_i
== 8);
911 tex
->valign_4
= (layout
->align_j
== 4);
912 tex
->array_spacing_full
= layout
->array_spacing_full
;
913 tex
->interleaved
= layout
->interleaved
;
917 tex_free_slices(struct ilo_texture
*tex
)
919 FREE(tex
->slices
[0]);
923 tex_alloc_slices(struct ilo_texture
*tex
)
925 const struct pipe_resource
*templ
= &tex
->base
;
926 struct ilo_texture_slice
*slices
;
929 /* sum the depths of all levels */
931 for (lv
= 0; lv
<= templ
->last_level
; lv
++)
932 depth
+= u_minify(templ
->depth0
, lv
);
935 * There are (depth * tex->base.array_size) slices in total. Either depth
936 * is one (non-3D) or templ->array_size is one (non-array), but it does
939 slices
= CALLOC(depth
* templ
->array_size
, sizeof(*slices
));
943 tex
->slices
[0] = slices
;
945 /* point to the respective positions in the buffer */
946 for (lv
= 1; lv
<= templ
->last_level
; lv
++) {
947 tex
->slices
[lv
] = tex
->slices
[lv
- 1] +
948 u_minify(templ
->depth0
, lv
- 1) * templ
->array_size
;
955 tex_create_bo(struct ilo_texture
*tex
,
956 const struct winsys_handle
*handle
)
958 struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
961 enum intel_tiling_mode tiling
;
964 switch (tex
->base
.target
) {
965 case PIPE_TEXTURE_1D
:
968 case PIPE_TEXTURE_2D
:
971 case PIPE_TEXTURE_3D
:
974 case PIPE_TEXTURE_CUBE
:
975 name
= "cube texture";
977 case PIPE_TEXTURE_RECT
:
978 name
= "rectangle texture";
980 case PIPE_TEXTURE_1D_ARRAY
:
981 name
= "1D array texture";
983 case PIPE_TEXTURE_2D_ARRAY
:
984 name
= "2D array texture";
986 case PIPE_TEXTURE_CUBE_ARRAY
:
987 name
= "cube array texture";
990 name
="unknown texture";
995 bo
= intel_winsys_import_handle(is
->winsys
, name
, handle
,
996 tex
->bo_width
, tex
->bo_height
, tex
->bo_cpp
,
1000 bo
= intel_winsys_alloc_texture(is
->winsys
, name
,
1001 tex
->bo_width
, tex
->bo_height
, tex
->bo_cpp
,
1002 tex
->tiling
, tex
->bo_flags
, &pitch
);
1004 tiling
= tex
->tiling
;
1011 intel_bo_unreference(tex
->bo
);
1014 tex
->tiling
= tiling
;
1015 tex
->bo_stride
= pitch
;
1021 tex_create_separate_stencil(struct ilo_texture
*tex
)
1023 struct pipe_resource templ
= tex
->base
;
1024 struct pipe_resource
*s8
;
1027 * Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other
1028 * tilings. But that should be fine since it will never be bound as the
1029 * stencil buffer, and our transfer code can handle all tilings.
1031 templ
.format
= PIPE_FORMAT_S8_UINT
;
1033 s8
= tex
->base
.screen
->resource_create(tex
->base
.screen
, &templ
);
1037 tex
->separate_s8
= ilo_texture(s8
);
1039 assert(tex
->separate_s8
->bo_format
== PIPE_FORMAT_S8_UINT
);
1045 tex_create_hiz(struct ilo_texture
*tex
, const struct tex_layout
*layout
)
1047 struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
1048 const struct pipe_resource
*templ
= layout
->templ
;
1049 const int hz_align_j
= 8;
1050 unsigned hz_width
, hz_height
, lv
;
1051 unsigned long pitch
;
1054 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1055 * PRM, volume 2 part 1, page 312-313.
1057 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1061 hz_width
= align(layout
->levels
[0].w
, 16);
1063 if (templ
->target
== PIPE_TEXTURE_3D
) {
1066 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1067 const unsigned h
= align(layout
->levels
[lv
].h
, hz_align_j
);
1068 hz_height
+= h
* layout
->levels
[lv
].d
;
1074 const unsigned h0
= align(layout
->levels
[0].h
, hz_align_j
);
1075 unsigned hz_qpitch
= h0
;
1077 if (layout
->array_spacing_full
) {
1078 const unsigned h1
= align(layout
->levels
[1].h
, hz_align_j
);
1079 const unsigned htail
=
1080 ((layout
->dev
->gen
>= ILO_GEN(7)) ? 12 : 11) * hz_align_j
;
1082 hz_qpitch
+= h1
+ htail
;
1085 hz_height
= hz_qpitch
* templ
->array_size
/ 2;
1087 if (layout
->dev
->gen
>= ILO_GEN(7))
1088 hz_height
= align(hz_height
, 8);
1091 tex
->hiz
.bo
= intel_winsys_alloc_texture(is
->winsys
,
1092 "hiz texture", hz_width
, hz_height
, 1,
1093 INTEL_TILING_Y
, INTEL_ALLOC_FOR_RENDER
, &pitch
);
1097 tex
->hiz
.bo_stride
= pitch
;
1100 * From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
1102 * "A rectangle primitive representing the clear area is delivered. The
1103 * primitive must adhere to the following restrictions on size:
1105 * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
1106 * aligned to an 8x4 pixel block relative to the upper left corner
1107 * of the depth buffer, and contain an integer number of these pixel
1108 * blocks, and all 8x4 pixels must be lit.
1110 * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
1111 * aligned to a 4x2 pixel block (8x4 sample block) relative to the
1112 * upper left corner of the depth buffer, and contain an integer
1113 * number of these pixel blocks, and all samples of the 4x2 pixels
1116 * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
1117 * aligned to a 2x2 pixel block (8x4 sample block) relative to the
1118 * upper left corner of the depth buffer, and contain an integer
1119 * number of these pixel blocks, and all samples of the 2x2 pixels
1122 * "The following is required when performing a depth buffer resolve:
1124 * - A rectangle primitive of the same size as the previous depth
1125 * buffer clear operation must be delivered, and depth buffer state
1126 * cannot have changed since the previous depth buffer clear
1129 * Experiments on Haswell show that depth buffer resolves have the same
1130 * alignment requirements, and aligning the RECTLIST primitive and
1131 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The mipmap size must be
1134 for (lv
= 0; lv
<= templ
->last_level
; lv
++) {
1135 unsigned align_w
= 8, align_h
= 4;
1138 switch (templ
->nr_samples
) {
1156 if (u_minify(templ
->width0
, lv
) % align_w
== 0 &&
1157 u_minify(templ
->height0
, lv
) % align_h
== 0) {
1158 flags
|= ILO_TEXTURE_HIZ
;
1160 /* this will trigger a HiZ resolve */
1162 flags
|= ILO_TEXTURE_CPU_WRITE
;
1166 const unsigned num_slices
= (templ
->target
== PIPE_TEXTURE_3D
) ?
1167 u_minify(templ
->depth0
, lv
) : templ
->array_size
;
1168 ilo_texture_set_slice_flags(tex
, lv
, 0, num_slices
, flags
, flags
);
1176 tex_destroy(struct ilo_texture
*tex
)
1179 intel_bo_unreference(tex
->hiz
.bo
);
1181 if (tex
->separate_s8
)
1182 tex_destroy(tex
->separate_s8
);
1184 intel_bo_unreference(tex
->bo
);
1185 tex_free_slices(tex
);
1189 static struct pipe_resource
*
1190 tex_create(struct pipe_screen
*screen
,
1191 const struct pipe_resource
*templ
,
1192 const struct winsys_handle
*handle
)
1194 struct tex_layout layout
;
1195 struct ilo_texture
*tex
;
1197 tex
= CALLOC_STRUCT(ilo_texture
);
1202 tex
->base
.screen
= screen
;
1203 pipe_reference_init(&tex
->base
.reference
, 1);
1205 if (!tex_alloc_slices(tex
)) {
1210 tex
->imported
= (handle
!= NULL
);
1212 if (tex
->base
.bind
& (PIPE_BIND_DEPTH_STENCIL
|
1213 PIPE_BIND_RENDER_TARGET
))
1214 tex
->bo_flags
|= INTEL_ALLOC_FOR_RENDER
;
1216 tex_layout_init(&layout
, screen
, templ
, tex
->slices
);
1218 switch (templ
->target
) {
1219 case PIPE_TEXTURE_1D
:
1220 case PIPE_TEXTURE_2D
:
1221 case PIPE_TEXTURE_CUBE
:
1222 case PIPE_TEXTURE_RECT
:
1223 case PIPE_TEXTURE_1D_ARRAY
:
1224 case PIPE_TEXTURE_2D_ARRAY
:
1225 case PIPE_TEXTURE_CUBE_ARRAY
:
1226 tex_layout_2d(&layout
);
1228 case PIPE_TEXTURE_3D
:
1229 tex_layout_3d(&layout
);
1232 assert(!"unknown resource target");
1236 tex_layout_validate(&layout
);
1238 /* make sure the bo can be mapped through GTT if tiled */
1239 if (layout
.tiling
!= INTEL_TILING_NONE
) {
1241 * Usually only the first 256MB of the GTT is mappable.
1243 * See also how intel_context::max_gtt_map_object_size is calculated.
1245 const size_t mappable_gtt_size
= 256 * 1024 * 1024;
1246 const size_t size
= tex_layout_estimate_size(&layout
);
1248 /* be conservative */
1249 if (size
> mappable_gtt_size
/ 4)
1250 tex_layout_force_linear(&layout
);
1253 tex_layout_apply(&layout
, tex
);
1255 if (!tex_create_bo(tex
, handle
)) {
1256 tex_free_slices(tex
);
1261 /* allocate separate stencil resource */
1262 if (layout
.separate_stencil
&& !tex_create_separate_stencil(tex
)) {
1267 if (layout
.hiz
&& !tex_create_hiz(tex
, &layout
)) {
1268 /* Separate Stencil Buffer requires HiZ to be enabled */
1269 if (layout
.dev
->gen
== ILO_GEN(6) && layout
.separate_stencil
) {
1279 tex_get_handle(struct ilo_texture
*tex
, struct winsys_handle
*handle
)
1281 struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
1284 err
= intel_winsys_export_handle(is
->winsys
, tex
->bo
,
1285 tex
->tiling
, tex
->bo_stride
, handle
);
1291 * Estimate the texture size. For large textures, the errors should be pretty
1295 tex_estimate_size(struct pipe_screen
*screen
,
1296 const struct pipe_resource
*templ
)
1298 struct tex_layout layout
;
1300 tex_layout_init(&layout
, screen
, templ
, NULL
);
1302 switch (templ
->target
) {
1303 case PIPE_TEXTURE_3D
:
1304 tex_layout_3d(&layout
);
1307 tex_layout_2d(&layout
);
1311 tex_layout_validate(&layout
);
1313 return tex_layout_estimate_size(&layout
);
1317 buf_create_bo(struct ilo_buffer
*buf
)
1319 struct ilo_screen
*is
= ilo_screen(buf
->base
.screen
);
1321 struct intel_bo
*bo
;
1323 switch (buf
->base
.bind
) {
1324 case PIPE_BIND_VERTEX_BUFFER
:
1325 name
= "vertex buffer";
1327 case PIPE_BIND_INDEX_BUFFER
:
1328 name
= "index buffer";
1330 case PIPE_BIND_CONSTANT_BUFFER
:
1331 name
= "constant buffer";
1333 case PIPE_BIND_STREAM_OUTPUT
:
1334 name
= "stream output";
1337 name
= "unknown buffer";
1341 bo
= intel_winsys_alloc_buffer(is
->winsys
,
1342 name
, buf
->bo_size
, buf
->bo_flags
);
1347 intel_bo_unreference(buf
->bo
);
1355 buf_destroy(struct ilo_buffer
*buf
)
1357 intel_bo_unreference(buf
->bo
);
1361 static struct pipe_resource
*
1362 buf_create(struct pipe_screen
*screen
, const struct pipe_resource
*templ
)
1364 struct ilo_buffer
*buf
;
1366 buf
= CALLOC_STRUCT(ilo_buffer
);
1371 buf
->base
.screen
= screen
;
1372 pipe_reference_init(&buf
->base
.reference
, 1);
1374 buf
->bo_size
= templ
->width0
;
1378 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
1380 * "For buffers, which have no inherent "height," padding requirements
1381 * are different. A buffer must be padded to the next multiple of 256
1382 * array elements, with an additional 16 bytes added beyond that to
1383 * account for the L1 cache line."
1385 if (templ
->bind
& PIPE_BIND_SAMPLER_VIEW
)
1386 buf
->bo_size
= align(buf
->bo_size
, 256) + 16;
1388 if (!buf_create_bo(buf
)) {
1397 ilo_can_create_resource(struct pipe_screen
*screen
,
1398 const struct pipe_resource
*templ
)
1401 * We do not know if we will fail until we try to allocate the bo.
1402 * So just set a limit on the texture size.
1404 const size_t max_size
= 1 * 1024 * 1024 * 1024;
1407 if (templ
->target
== PIPE_BUFFER
)
1408 size
= templ
->width0
;
1410 size
= tex_estimate_size(screen
, templ
);
1412 return (size
<= max_size
);
1415 static struct pipe_resource
*
1416 ilo_resource_create(struct pipe_screen
*screen
,
1417 const struct pipe_resource
*templ
)
1419 if (templ
->target
== PIPE_BUFFER
)
1420 return buf_create(screen
, templ
);
1422 return tex_create(screen
, templ
, NULL
);
1425 static struct pipe_resource
*
1426 ilo_resource_from_handle(struct pipe_screen
*screen
,
1427 const struct pipe_resource
*templ
,
1428 struct winsys_handle
*handle
)
1430 if (templ
->target
== PIPE_BUFFER
)
1433 return tex_create(screen
, templ
, handle
);
1437 ilo_resource_get_handle(struct pipe_screen
*screen
,
1438 struct pipe_resource
*res
,
1439 struct winsys_handle
*handle
)
1441 if (res
->target
== PIPE_BUFFER
)
1444 return tex_get_handle(ilo_texture(res
), handle
);
1449 ilo_resource_destroy(struct pipe_screen
*screen
,
1450 struct pipe_resource
*res
)
1452 if (res
->target
== PIPE_BUFFER
)
1453 buf_destroy(ilo_buffer(res
));
1455 tex_destroy(ilo_texture(res
));
1459 * Initialize resource-related functions.
1462 ilo_init_resource_functions(struct ilo_screen
*is
)
1464 is
->base
.can_create_resource
= ilo_can_create_resource
;
1465 is
->base
.resource_create
= ilo_resource_create
;
1466 is
->base
.resource_from_handle
= ilo_resource_from_handle
;
1467 is
->base
.resource_get_handle
= ilo_resource_get_handle
;
1468 is
->base
.resource_destroy
= ilo_resource_destroy
;
1472 ilo_buffer_alloc_bo(struct ilo_buffer
*buf
)
1474 return buf_create_bo(buf
);
1478 ilo_texture_alloc_bo(struct ilo_texture
*tex
)
1480 /* a shared bo cannot be reallocated */
1484 return tex_create_bo(tex
, NULL
);
1488 * Return the offset (in bytes) to a slice within the bo.
1490 * The returned offset is aligned to tile size. Since slices are not
1491 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1492 * from the tile origin to the slice are also returned. X offset is always a
1493 * multiple of 4 and Y offset is always a multiple of 2.
1496 ilo_texture_get_slice_offset(const struct ilo_texture
*tex
,
1497 unsigned level
, unsigned slice
,
1498 unsigned *x_offset
, unsigned *y_offset
)
1500 const struct ilo_texture_slice
*s
=
1501 ilo_texture_get_slice(tex
, level
, slice
);
1502 unsigned tile_w
, tile_h
, tile_size
, row_size
;
1503 unsigned x
, y
, slice_offset
;
1505 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1507 switch (tex
->tiling
) {
1508 case INTEL_TILING_NONE
:
1510 if (tex
->bo_format
== PIPE_FORMAT_S8_UINT
) {
1519 case INTEL_TILING_X
:
1523 case INTEL_TILING_Y
:
1528 assert(!"unknown tiling");
1534 tile_size
= tile_w
* tile_h
;
1535 row_size
= tex
->bo_stride
* tile_h
;
1538 x
= s
->x
/ tex
->block_width
* tex
->bo_cpp
;
1539 y
= s
->y
/ tex
->block_height
;
1540 slice_offset
= row_size
* (y
/ tile_h
) + tile_size
* (x
/ tile_w
);
1543 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1544 * aligned at this point.
1546 assert(slice_offset
% tile_size
== 0);
1549 * because of the possible values of align_i and align_j in
1550 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1551 * 4 and y_offset is guaranteed to be a multiple of 2.
1555 x
= (x
% tile_w
) / tex
->bo_cpp
* tex
->block_width
;
1563 y
= (y
% tile_h
) * tex
->block_height
;
1569 return slice_offset
;