b6e958585a59a3af0bf5b16805956528a43d6e03
[mesa.git] / src / gallium / drivers / ilo / ilo_layout.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_layout.h"
29
30 enum {
31 LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
32 LAYOUT_TILING_X = 1 << INTEL_TILING_X,
33 LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
34 LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
35
36 LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
37 LAYOUT_TILING_X |
38 LAYOUT_TILING_Y |
39 LAYOUT_TILING_W)
40 };
41
42 struct ilo_layout_params {
43 const struct ilo_dev_info *dev;
44 const struct pipe_resource *templ;
45
46 bool compressed;
47
48 unsigned h0, h1;
49 unsigned max_x, max_y;
50 };
51
52 static void
53 layout_get_slice_size(const struct ilo_layout *layout,
54 const struct ilo_layout_params *params,
55 unsigned level, unsigned *width, unsigned *height)
56 {
57 const struct pipe_resource *templ = params->templ;
58 unsigned w, h;
59
60 w = u_minify(layout->width0, level);
61 h = u_minify(layout->height0, level);
62
63 /*
64 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
65 *
66 * "The dimensions of the mip maps are first determined by applying the
67 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
68 * if necessary, they are padded out to compression block boundaries."
69 */
70 w = align(w, layout->block_width);
71 h = align(h, layout->block_height);
72
73 /*
74 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
75 *
76 * "If the surface is multisampled (4x), these values must be adjusted
77 * as follows before proceeding:
78 *
79 * W_L = ceiling(W_L / 2) * 4
80 * H_L = ceiling(H_L / 2) * 4"
81 *
82 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
83 *
84 * "If the surface is multisampled and it is a depth or stencil surface
85 * or Multisampled Surface StorageFormat in SURFACE_STATE is
86 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
87 * proceeding:
88 *
89 * #samples W_L = H_L =
90 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
91 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
92 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
93 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
94 *
95 * For interleaved samples (4x), where pixels
96 *
97 * (x, y ) (x+1, y )
98 * (x, y+1) (x+1, y+1)
99 *
100 * would be is occupied by
101 *
102 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
103 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
104 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
105 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
106 *
107 * Thus the need to
108 *
109 * w = align(w, 2) * 2;
110 * y = align(y, 2) * 2;
111 */
112 if (layout->interleaved_samples) {
113 switch (templ->nr_samples) {
114 case 0:
115 case 1:
116 break;
117 case 2:
118 w = align(w, 2) * 2;
119 break;
120 case 4:
121 w = align(w, 2) * 2;
122 h = align(h, 2) * 2;
123 break;
124 case 8:
125 w = align(w, 2) * 4;
126 h = align(h, 2) * 2;
127 break;
128 case 16:
129 w = align(w, 2) * 4;
130 h = align(h, 2) * 4;
131 break;
132 default:
133 assert(!"unsupported sample count");
134 break;
135 }
136 }
137
138 /*
139 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
140 *
141 * "For separate stencil buffer, the width must be mutiplied by 2 and
142 * height divided by 2..."
143 *
144 * To make things easier (for transfer), we will just double the stencil
145 * stride in 3DSTATE_STENCIL_BUFFER.
146 */
147 w = align(w, layout->align_i);
148 h = align(h, layout->align_j);
149
150 *width = w;
151 *height = h;
152 }
153
154 static unsigned
155 layout_get_num_layers(const struct ilo_layout *layout,
156 const struct ilo_layout_params *params)
157 {
158 const struct pipe_resource *templ = params->templ;
159 unsigned num_layers = templ->array_size;
160
161 /* samples of the same index are stored in a layer */
162 if (templ->nr_samples > 1 && !layout->interleaved_samples)
163 num_layers *= templ->nr_samples;
164
165 return num_layers;
166 }
167
168 static void
169 layout_init_layer_height(struct ilo_layout *layout,
170 struct ilo_layout_params *params)
171 {
172 const struct pipe_resource *templ = params->templ;
173 unsigned num_layers;
174
175 if (layout->walk != ILO_LAYOUT_WALK_LAYER)
176 return;
177
178 num_layers = layout_get_num_layers(layout, params);
179 if (num_layers <= 1)
180 return;
181
182 /*
183 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
184 *
185 * "The following equation is used for surface formats other than
186 * compressed textures:
187 *
188 * QPitch = (h0 + h1 + 11j)"
189 *
190 * "The equation for compressed textures (BC* and FXT1 surface formats)
191 * follows:
192 *
193 * QPitch = (h0 + h1 + 11j) / 4"
194 *
195 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
196 * value calculated in the equation above, for every other odd Surface
197 * Height starting from 1 i.e. 1,5,9,13"
198 *
199 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
200 *
201 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
202 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
203 *
204 * QPitch = (h0 + h1 + 12j)
205 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
206 *
207 * (There are many typos or missing words here...)"
208 *
209 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
210 * the base address. The PRM divides QPitch by 4 for compressed formats
211 * because the block height for those formats are 4, and it wants QPitch to
212 * mean the number of memory rows, as opposed to texel rows, between
213 * slices. Since we use texel rows everywhere, we do not need to divide
214 * QPitch by 4.
215 */
216 layout->layer_height = params->h0 + params->h1 +
217 ((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
218
219 if (params->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
220 layout->height0 % 4 == 1)
221 layout->layer_height += 4;
222
223 params->max_y += layout->layer_height * (num_layers - 1);
224 }
225
226 static void
227 layout_init_lods(struct ilo_layout *layout,
228 struct ilo_layout_params *params)
229 {
230 const struct pipe_resource *templ = params->templ;
231 unsigned cur_x, cur_y;
232 unsigned lv;
233
234 cur_x = 0;
235 cur_y = 0;
236 for (lv = 0; lv <= templ->last_level; lv++) {
237 unsigned lod_w, lod_h;
238
239 layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
240
241 layout->lods[lv].x = cur_x;
242 layout->lods[lv].y = cur_y;
243 layout->lods[lv].slice_width = lod_w;
244 layout->lods[lv].slice_height = lod_h;
245
246 switch (layout->walk) {
247 case ILO_LAYOUT_WALK_LOD:
248 lod_h *= layout_get_num_layers(layout, params);
249 if (lv == 1)
250 cur_x += lod_w;
251 else
252 cur_y += lod_h;
253
254 /* every LOD begins at tile boundaries */
255 if (templ->last_level > 0) {
256 assert(layout->format == PIPE_FORMAT_S8_UINT);
257 cur_x = align(cur_x, 64);
258 cur_y = align(cur_y, 64);
259 }
260 break;
261 case ILO_LAYOUT_WALK_LAYER:
262 /* MIPLAYOUT_BELOW */
263 if (lv == 1)
264 cur_x += lod_w;
265 else
266 cur_y += lod_h;
267 break;
268 case ILO_LAYOUT_WALK_3D:
269 {
270 const unsigned num_slices = u_minify(templ->depth0, lv);
271 const unsigned num_slices_per_row = 1 << lv;
272 const unsigned num_rows =
273 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
274
275 lod_w *= num_slices_per_row;
276 lod_h *= num_rows;
277
278 cur_y += lod_h;
279 }
280 break;
281 }
282
283 if (params->max_x < layout->lods[lv].x + lod_w)
284 params->max_x = layout->lods[lv].x + lod_w;
285 if (params->max_y < layout->lods[lv].y + lod_h)
286 params->max_y = layout->lods[lv].y + lod_h;
287 }
288
289 if (layout->walk == ILO_LAYOUT_WALK_LAYER) {
290 params->h0 = layout->lods[0].slice_height;
291
292 if (templ->last_level > 0)
293 params->h1 = layout->lods[1].slice_height;
294 else
295 layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
296 }
297 }
298
299 static void
300 layout_init_alignments(struct ilo_layout *layout,
301 struct ilo_layout_params *params)
302 {
303 const struct pipe_resource *templ = params->templ;
304
305 /*
306 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
307 *
308 * "surface format align_i align_j
309 * YUV 4:2:2 formats 4 *see below
310 * BC1-5 4 4
311 * FXT1 8 4
312 * all other formats 4 *see below"
313 *
314 * "- align_j = 4 for any depth buffer
315 * - align_j = 2 for separate stencil buffer
316 * - align_j = 4 for any render target surface is multisampled (4x)
317 * - align_j = 4 for any render target surface with Surface Vertical
318 * Alignment = VALIGN_4
319 * - align_j = 2 for any render target surface with Surface Vertical
320 * Alignment = VALIGN_2
321 * - align_j = 2 for all other render target surface
322 * - align_j = 2 for any sampling engine surface with Surface Vertical
323 * Alignment = VALIGN_2
324 * - align_j = 4 for any sampling engine surface with Surface Vertical
325 * Alignment = VALIGN_4"
326 *
327 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
328 *
329 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
330 * the Surface Format is 96 bits per element (BPE)."
331 *
332 * They can be rephrased as
333 *
334 * align_i align_j
335 * compressed formats block width block height
336 * PIPE_FORMAT_S8_UINT 4 2
337 * other depth/stencil formats 4 4
338 * 4x multisampled 4 4
339 * bpp 96 4 2
340 * others 4 2 or 4
341 */
342
343 /*
344 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
345 *
346 * "surface defined by surface format align_i align_j
347 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
348 * not D16_UNORM 4 4
349 * 3DSTATE_STENCIL_BUFFER N/A 8 8
350 * SURFACE_STATE BC*, ETC*, EAC* 4 4
351 * FXT1 8 4
352 * all others (set by SURFACE_STATE)"
353 *
354 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
355 *
356 * "- This field (Surface Vertical Aligment) is intended to be set to
357 * VALIGN_4 if the surface was rendered as a depth buffer, for a
358 * multisampled (4x) render target, or for a multisampled (8x)
359 * render target, since these surfaces support only alignment of 4.
360 * - Use of VALIGN_4 for other surfaces is supported, but uses more
361 * memory.
362 * - This field must be set to VALIGN_4 for all tiled Y Render Target
363 * surfaces.
364 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
365 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
366 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
367 * must be set to VALIGN_4."
368 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
369 *
370 * "- This field (Surface Horizontal Aligment) is intended to be set to
371 * HALIGN_8 only if the surface was rendered as a depth buffer with
372 * Z16 format or a stencil buffer, since these surfaces support only
373 * alignment of 8.
374 * - Use of HALIGN_8 for other surfaces is supported, but uses more
375 * memory.
376 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
377 * - This field must be set to HALIGN_8 if the Surface Format is
378 * FXT1."
379 *
380 * They can be rephrased as
381 *
382 * align_i align_j
383 * compressed formats block width block height
384 * PIPE_FORMAT_Z16_UNORM 8 4
385 * PIPE_FORMAT_S8_UINT 8 8
386 * other depth/stencil formats 4 4
387 * 2x or 4x multisampled 4 or 8 4
388 * tiled Y 4 or 8 4 (if rt)
389 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
390 * others 4 or 8 2 or 4
391 */
392
393 if (params->compressed) {
394 /* this happens to be the case */
395 layout->align_i = layout->block_width;
396 layout->align_j = layout->block_height;
397 } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
398 if (params->dev->gen >= ILO_GEN(7)) {
399 switch (layout->format) {
400 case PIPE_FORMAT_Z16_UNORM:
401 layout->align_i = 8;
402 layout->align_j = 4;
403 break;
404 case PIPE_FORMAT_S8_UINT:
405 layout->align_i = 8;
406 layout->align_j = 8;
407 break;
408 default:
409 layout->align_i = 4;
410 layout->align_j = 4;
411 break;
412 }
413 } else {
414 switch (layout->format) {
415 case PIPE_FORMAT_S8_UINT:
416 layout->align_i = 4;
417 layout->align_j = 2;
418 break;
419 default:
420 layout->align_i = 4;
421 layout->align_j = 4;
422 break;
423 }
424 }
425 } else {
426 const bool valign_4 = (templ->nr_samples > 1) ||
427 (params->dev->gen >= ILO_GEN(7) &&
428 layout->tiling == INTEL_TILING_Y &&
429 (templ->bind & PIPE_BIND_RENDER_TARGET));
430
431 if (valign_4)
432 assert(layout->block_size != 12);
433
434 layout->align_i = 4;
435 layout->align_j = (valign_4) ? 4 : 2;
436 }
437
438 /*
439 * the fact that align i and j are multiples of block width and height
440 * respectively is what makes the size of the bo a multiple of the block
441 * size, slices start at block boundaries, and many of the computations
442 * work.
443 */
444 assert(layout->align_i % layout->block_width == 0);
445 assert(layout->align_j % layout->block_height == 0);
446
447 /* make sure align() works */
448 assert(util_is_power_of_two(layout->align_i) &&
449 util_is_power_of_two(layout->align_j));
450 assert(util_is_power_of_two(layout->block_width) &&
451 util_is_power_of_two(layout->block_height));
452 }
453
454 static unsigned
455 layout_get_valid_tilings(const struct ilo_layout *layout,
456 const struct ilo_layout_params *params)
457 {
458 const struct pipe_resource *templ = params->templ;
459 const enum pipe_format format = layout->format;
460 unsigned valid_tilings = LAYOUT_TILING_ALL;
461
462 /*
463 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
464 *
465 * "Display/Overlay Y-Major not supported.
466 * X-Major required for Async Flips"
467 */
468 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
469 valid_tilings &= LAYOUT_TILING_X;
470
471 /*
472 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
473 *
474 * "The cursor surface address must be 4K byte aligned. The cursor must
475 * be in linear memory, it cannot be tiled."
476 */
477 if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
478 valid_tilings &= LAYOUT_TILING_NONE;
479
480 /*
481 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
482 *
483 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
484 * Depth Buffer is not supported."
485 *
486 * "The Depth Buffer, if tiled, must use Y-Major tiling."
487 *
488 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
489 *
490 * "W-Major Tile Format is used for separate stencil."
491 */
492 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
493 switch (format) {
494 case PIPE_FORMAT_S8_UINT:
495 valid_tilings &= LAYOUT_TILING_W;
496 break;
497 default:
498 valid_tilings &= LAYOUT_TILING_Y;
499 break;
500 }
501 }
502
503 if (templ->bind & PIPE_BIND_RENDER_TARGET) {
504 /*
505 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
506 *
507 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
508 * either TileX or Linear."
509 */
510 if (layout->block_size == 16)
511 valid_tilings &= ~LAYOUT_TILING_Y;
512
513 /*
514 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
515 *
516 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
517 * for all tiled Y Render Target surfaces."
518 *
519 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
520 */
521 if (params->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
522 valid_tilings &= ~LAYOUT_TILING_Y;
523 }
524
525 /* no conflicting binding flags */
526 assert(valid_tilings);
527
528 return valid_tilings;
529 }
530
531 static void
532 layout_init_tiling(struct ilo_layout *layout,
533 struct ilo_layout_params *params)
534 {
535 const struct pipe_resource *templ = params->templ;
536 unsigned valid_tilings = layout_get_valid_tilings(layout, params);
537
538 /* no hardware support for W-tile */
539 if (valid_tilings & LAYOUT_TILING_W)
540 valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) | LAYOUT_TILING_NONE;
541
542 layout->valid_tilings = valid_tilings;
543
544 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
545 /*
546 * heuristically set a minimum width/height for enabling tiling
547 */
548 if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
549 valid_tilings &= ~LAYOUT_TILING_X;
550
551 if ((layout->width0 < 32 || layout->height0 < 16) &&
552 (layout->width0 < 16 || layout->height0 < 32) &&
553 (valid_tilings & ~LAYOUT_TILING_Y))
554 valid_tilings &= ~LAYOUT_TILING_Y;
555 } else {
556 /* force linear if we are not sure where the texture is bound to */
557 if (valid_tilings & LAYOUT_TILING_NONE)
558 valid_tilings &= LAYOUT_TILING_NONE;
559 }
560
561 /* prefer tiled over linear */
562 if (valid_tilings & LAYOUT_TILING_Y)
563 layout->tiling = INTEL_TILING_Y;
564 else if (valid_tilings & LAYOUT_TILING_X)
565 layout->tiling = INTEL_TILING_X;
566 else
567 layout->tiling = INTEL_TILING_NONE;
568 }
569
570 static void
571 layout_init_walk_gen7(struct ilo_layout *layout,
572 struct ilo_layout_params *params)
573 {
574 const struct pipe_resource *templ = params->templ;
575
576 /*
577 * It is not explicitly states, but render targets are expected to be
578 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
579 * to be IMS (samples interleaved).
580 *
581 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
582 */
583 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
584 /*
585 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
586 *
587 * "note that the depth buffer and stencil buffer have an implied
588 * value of ARYSPC_FULL"
589 */
590 layout->walk = (templ->target == PIPE_TEXTURE_3D) ?
591 ILO_LAYOUT_WALK_3D : ILO_LAYOUT_WALK_LAYER;
592
593 layout->interleaved_samples = true;
594 } else {
595 /*
596 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
597 *
598 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
599 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
600 * Array Spacing) must be set to ARYSPC_LOD0."
601 *
602 * As multisampled resources are not mipmapped, we never use
603 * ARYSPC_FULL for them.
604 */
605 if (templ->nr_samples > 1)
606 assert(templ->last_level == 0);
607
608 layout->walk =
609 (templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D :
610 (templ->last_level > 0) ? ILO_LAYOUT_WALK_LAYER :
611 ILO_LAYOUT_WALK_LOD;
612
613 layout->interleaved_samples = false;
614 }
615 }
616
617 static void
618 layout_init_walk_gen6(struct ilo_layout *layout,
619 struct ilo_layout_params *params)
620 {
621 /*
622 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
623 *
624 * "The separate stencil buffer does not support mip mapping, thus the
625 * storage for LODs other than LOD 0 is not needed. The following
626 * QPitch equation applies only to the separate stencil buffer:
627 *
628 * QPitch = h_0"
629 *
630 * GEN6 does not support compact spacing otherwise.
631 */
632 layout->walk =
633 (params->templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D :
634 (layout->format == PIPE_FORMAT_S8_UINT) ? ILO_LAYOUT_WALK_LOD :
635 ILO_LAYOUT_WALK_LAYER;
636
637 /* GEN6 supports only interleaved samples */
638 layout->interleaved_samples = true;
639 }
640
641 static void
642 layout_init_walk(struct ilo_layout *layout,
643 struct ilo_layout_params *params)
644 {
645 if (params->dev->gen >= ILO_GEN(7))
646 layout_init_walk_gen7(layout, params);
647 else
648 layout_init_walk_gen6(layout, params);
649 }
650
651 static void
652 layout_init_size_and_format(struct ilo_layout *layout,
653 struct ilo_layout_params *params)
654 {
655 const struct pipe_resource *templ = params->templ;
656 enum pipe_format format = templ->format;
657 bool require_separate_stencil;
658
659 layout->width0 = templ->width0;
660 layout->height0 = templ->height0;
661
662 /*
663 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
664 *
665 * "This field (Separate Stencil Buffer Enable) must be set to the same
666 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
667 *
668 * GEN7+ requires separate stencil buffers.
669 */
670 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
671 if (params->dev->gen >= ILO_GEN(7))
672 require_separate_stencil = true;
673 else
674 require_separate_stencil = (layout->aux == ILO_LAYOUT_AUX_HIZ);
675 }
676
677 switch (format) {
678 case PIPE_FORMAT_ETC1_RGB8:
679 format = PIPE_FORMAT_R8G8B8X8_UNORM;
680 break;
681 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
682 if (require_separate_stencil) {
683 format = PIPE_FORMAT_Z24X8_UNORM;
684 layout->separate_stencil = true;
685 }
686 break;
687 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
688 if (require_separate_stencil) {
689 format = PIPE_FORMAT_Z32_FLOAT;
690 layout->separate_stencil = true;
691 }
692 break;
693 default:
694 break;
695 }
696
697 layout->format = format;
698 layout->block_width = util_format_get_blockwidth(format);
699 layout->block_height = util_format_get_blockheight(format);
700 layout->block_size = util_format_get_blocksize(format);
701
702 params->compressed = util_format_is_compressed(format);
703 }
704
705 static bool
706 layout_want_mcs(struct ilo_layout *layout,
707 struct ilo_layout_params *params)
708 {
709 const struct pipe_resource *templ = params->templ;
710 bool want_mcs = false;
711
712 /* MCS is for RT on GEN7+ */
713 if (params->dev->gen < ILO_GEN(7))
714 return false;
715
716 if (templ->target != PIPE_TEXTURE_2D ||
717 !(templ->bind & PIPE_BIND_RENDER_TARGET))
718 return false;
719
720 /*
721 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
722 *
723 * "For Render Target and Sampling Engine Surfaces:If the surface is
724 * multisampled (Number of Multisamples any value other than
725 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
726 *
727 * "This field must be set to 0 for all SINT MSRTs when all RT channels
728 * are not written"
729 */
730 if (templ->nr_samples > 1 && !layout->interleaved_samples &&
731 !util_format_is_pure_sint(templ->format)) {
732 want_mcs = true;
733 } else if (templ->nr_samples <= 1) {
734 /*
735 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
736 *
737 * "When MCS is buffer is used for color clear of non-multisampler
738 * render target, the following restrictions apply.
739 * - Support is limited to tiled render targets.
740 * - Support is for non-mip-mapped and non-array surface types
741 * only.
742 * - Clear is supported only on the full RT; i.e., no partial clear
743 * or overlapping clears.
744 * - MCS buffer for non-MSRT is supported only for RT formats
745 * 32bpp, 64bpp and 128bpp.
746 * ..."
747 */
748 if (layout->tiling != INTEL_TILING_NONE &&
749 templ->last_level == 0 && templ->array_size == 1) {
750 switch (layout->block_size) {
751 case 4:
752 case 8:
753 case 16:
754 want_mcs = true;
755 break;
756 default:
757 break;
758 }
759 }
760 }
761
762 return want_mcs;
763 }
764
765 static bool
766 layout_want_hiz(const struct ilo_layout *layout,
767 const struct ilo_layout_params *params)
768 {
769 const struct pipe_resource *templ = params->templ;
770 const struct util_format_description *desc =
771 util_format_description(templ->format);
772 bool want_hiz = false;
773
774 if (ilo_debug & ILO_DEBUG_NOHIZ)
775 return false;
776
777 if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
778 return false;
779
780 if (!util_format_has_depth(desc))
781 return false;
782
783 /* no point in having HiZ */
784 if (templ->usage == PIPE_USAGE_STAGING)
785 return false;
786
787 if (params->dev->gen >= ILO_GEN(7)) {
788 want_hiz = true;
789 } else {
790 /*
791 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
792 *
793 * "The hierarchical depth buffer does not support the LOD field, it
794 * is assumed by hardware to be zero. A separate hierarachical
795 * depth buffer is required for each LOD used, and the
796 * corresponding buffer's state delivered to hardware each time a
797 * new depth buffer state with modified LOD is delivered."
798 *
799 * But we have a stronger requirement. Because of layer offsetting
800 * (check out the callers of ilo_layout_get_slice_tile_offset()), we
801 * already have to require the texture to be non-mipmapped and
802 * non-array.
803 */
804 if (templ->last_level == 0 && templ->array_size == 1 &&
805 templ->depth0 == 1)
806 want_hiz = true;
807 }
808
809 return want_hiz;
810 }
811
812 static void
813 layout_init_aux(struct ilo_layout *layout,
814 struct ilo_layout_params *params)
815 {
816 if (layout_want_hiz(layout, params))
817 layout->aux = ILO_LAYOUT_AUX_HIZ;
818 else if (layout_want_mcs(layout, params))
819 layout->aux = ILO_LAYOUT_AUX_MCS;
820 }
821
822 static void
823 layout_align(struct ilo_layout *layout, struct ilo_layout_params *params)
824 {
825 const struct pipe_resource *templ = params->templ;
826 int align_w = 1, align_h = 1, pad_h = 0;
827
828 /*
829 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
830 *
831 * "To determine the necessary padding on the bottom and right side of
832 * the surface, refer to the table in Section 7.18.3.4 for the i and j
833 * parameters for the surface format in use. The surface must then be
834 * extended to the next multiple of the alignment unit size in each
835 * dimension, and all texels contained in this extended surface must
836 * have valid GTT entries."
837 *
838 * "For cube surfaces, an additional two rows of padding are required
839 * at the bottom of the surface. This must be ensured regardless of
840 * whether the surface is stored tiled or linear. This is due to the
841 * potential rotation of cache line orientation from memory to cache."
842 *
843 * "For compressed textures (BC* and FXT1 surface formats), padding at
844 * the bottom of the surface is to an even compressed row, which is
845 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
846 * purposes, these surfaces behave as if j = 8 only for surface
847 * padding purposes. The value of 4 for j still applies for mip level
848 * alignment and QPitch calculation."
849 */
850 if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
851 align_w = MAX2(align_w, layout->align_i);
852 align_h = MAX2(align_h, layout->align_j);
853
854 if (templ->target == PIPE_TEXTURE_CUBE)
855 pad_h += 2;
856
857 if (params->compressed)
858 align_h = MAX2(align_h, layout->align_j * 2);
859 }
860
861 /*
862 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
863 *
864 * "If the surface contains an odd number of rows of data, a final row
865 * below the surface must be allocated."
866 */
867 if (templ->bind & PIPE_BIND_RENDER_TARGET)
868 align_h = MAX2(align_h, 2);
869
870 /*
871 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
872 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
873 * To avoid out-of-bound access, we have to pad.
874 */
875 if (layout->aux == ILO_LAYOUT_AUX_HIZ &&
876 templ->last_level == 0 &&
877 templ->array_size == 1 &&
878 templ->depth0 == 1) {
879 align_w = MAX2(align_w, 8);
880 align_h = MAX2(align_h, 4);
881 }
882
883 params->max_x = align(params->max_x, align_w);
884 params->max_y = align(params->max_y + pad_h, align_h);
885 }
886
887 /* note that this may force the texture to be linear */
888 static void
889 layout_calculate_bo_size(struct ilo_layout *layout,
890 struct ilo_layout_params *params)
891 {
892 assert(params->max_x % layout->block_width == 0);
893 assert(params->max_y % layout->block_height == 0);
894 assert(layout->layer_height % layout->block_height == 0);
895
896 layout->bo_stride =
897 (params->max_x / layout->block_width) * layout->block_size;
898 layout->bo_height = params->max_y / layout->block_height;
899
900 while (true) {
901 unsigned w = layout->bo_stride, h = layout->bo_height;
902 unsigned align_w, align_h;
903
904 /*
905 * From the Haswell PRM, volume 5, page 163:
906 *
907 * "For linear surfaces, additional padding of 64 bytes is required
908 * at the bottom of the surface. This is in addition to the padding
909 * required above."
910 */
911 if (params->dev->gen >= ILO_GEN(7.5) &&
912 (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
913 layout->tiling == INTEL_TILING_NONE) {
914 layout->bo_height +=
915 (64 + layout->bo_stride - 1) / layout->bo_stride;
916 }
917
918 /*
919 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
920 *
921 * "- For linear render target surfaces, the pitch must be a
922 * multiple of the element size for non-YUV surface formats.
923 * Pitch must be a multiple of 2 * element size for YUV surface
924 * formats.
925 * - For other linear surfaces, the pitch can be any multiple of
926 * bytes.
927 * - For tiled surfaces, the pitch must be a multiple of the tile
928 * width."
929 *
930 * Different requirements may exist when the bo is used in different
931 * places, but our alignments here should be good enough that we do not
932 * need to check layout->templ->bind.
933 */
934 switch (layout->tiling) {
935 case INTEL_TILING_X:
936 align_w = 512;
937 align_h = 8;
938 break;
939 case INTEL_TILING_Y:
940 align_w = 128;
941 align_h = 32;
942 break;
943 default:
944 if (layout->format == PIPE_FORMAT_S8_UINT) {
945 /*
946 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
947 *
948 * "A 4KB tile is subdivided into 8-high by 8-wide array of
949 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
950 * bytes."
951 *
952 * Since we asked for INTEL_TILING_NONE instead of the non-existent
953 * INTEL_TILING_W, we want to align to W tiles here.
954 */
955 align_w = 64;
956 align_h = 64;
957 } else {
958 /* some good enough values */
959 align_w = 64;
960 align_h = 2;
961 }
962 break;
963 }
964
965 w = align(w, align_w);
966 h = align(h, align_h);
967
968 /* make sure the bo is mappable */
969 if (layout->tiling != INTEL_TILING_NONE) {
970 /*
971 * Usually only the first 256MB of the GTT is mappable.
972 *
973 * See also how intel_context::max_gtt_map_object_size is calculated.
974 */
975 const size_t mappable_gtt_size = 256 * 1024 * 1024;
976
977 /*
978 * Be conservative. We may be able to switch from VALIGN_4 to
979 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
980 */
981 if (mappable_gtt_size / w / 4 < h) {
982 if (layout->valid_tilings & LAYOUT_TILING_NONE) {
983 layout->tiling = INTEL_TILING_NONE;
984 /* MCS support for non-MSRTs is limited to tiled RTs */
985 if (layout->aux == ILO_LAYOUT_AUX_MCS &&
986 params->templ->nr_samples <= 1)
987 layout->aux = ILO_LAYOUT_AUX_NONE;
988
989 continue;
990 } else {
991 ilo_warn("cannot force texture to be linear\n");
992 }
993 }
994 }
995
996 layout->bo_stride = w;
997 layout->bo_height = h;
998 break;
999 }
1000 }
1001
1002 static void
1003 layout_calculate_hiz_size(struct ilo_layout *layout,
1004 struct ilo_layout_params *params)
1005 {
1006 const struct pipe_resource *templ = params->templ;
1007 const unsigned hz_align_j = 8;
1008 enum ilo_layout_walk_type hz_walk;
1009 unsigned hz_width, hz_height, lv;
1010 unsigned hz_clear_w, hz_clear_h;
1011
1012 assert(layout->aux == ILO_LAYOUT_AUX_HIZ);
1013
1014 assert(layout->walk == ILO_LAYOUT_WALK_LAYER ||
1015 layout->walk == ILO_LAYOUT_WALK_3D);
1016
1017 /*
1018 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1019 *
1020 * "The hierarchical depth buffer does not support the LOD field, it is
1021 * assumed by hardware to be zero. A separate hierarachical depth
1022 * buffer is required for each LOD used, and the corresponding
1023 * buffer's state delivered to hardware each time a new depth buffer
1024 * state with modified LOD is delivered."
1025 *
1026 * We will put all LODs in a single bo with ILO_LAYOUT_WALK_LOD.
1027 */
1028 if (params->dev->gen >= ILO_GEN(7))
1029 hz_walk = layout->walk;
1030 else
1031 hz_walk = ILO_LAYOUT_WALK_LOD;
1032
1033 /*
1034 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1035 * PRM, volume 2 part 1, page 312-313.
1036 *
1037 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1038 * memory row.
1039 */
1040 switch (hz_walk) {
1041 case ILO_LAYOUT_WALK_LOD:
1042 {
1043 unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
1044 unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
1045 unsigned cur_tx, cur_ty;
1046
1047 /* figure out the tile offsets of LODs */
1048 hz_width = 0;
1049 hz_height = 0;
1050 cur_tx = 0;
1051 cur_ty = 0;
1052 for (lv = 0; lv <= templ->last_level; lv++) {
1053 unsigned tw, th;
1054
1055 lod_tx[lv] = cur_tx;
1056 lod_ty[lv] = cur_ty;
1057
1058 tw = align(layout->lods[lv].slice_width, 16);
1059 th = align(layout->lods[lv].slice_height, hz_align_j) *
1060 templ->array_size / 2;
1061 /* convert to Y-tiles */
1062 tw = align(tw, 128) / 128;
1063 th = align(th, 32) / 32;
1064
1065 if (hz_width < cur_tx + tw)
1066 hz_width = cur_tx + tw;
1067 if (hz_height < cur_ty + th)
1068 hz_height = cur_ty + th;
1069
1070 if (lv == 1)
1071 cur_tx += tw;
1072 else
1073 cur_ty += th;
1074 }
1075
1076 /* convert tile offsets to memory offsets */
1077 for (lv = 0; lv <= templ->last_level; lv++) {
1078 layout->aux_offsets[lv] =
1079 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1080 }
1081 hz_width *= 128;
1082 hz_height *= 32;
1083 }
1084 break;
1085 case ILO_LAYOUT_WALK_LAYER:
1086 {
1087 const unsigned h0 = align(params->h0, hz_align_j);
1088 const unsigned h1 = align(params->h1, hz_align_j);
1089 const unsigned htail =
1090 ((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1091 const unsigned hz_qpitch = h0 + h1 + htail;
1092
1093 hz_width = align(layout->lods[0].slice_width, 16);
1094
1095 hz_height = hz_qpitch * templ->array_size / 2;
1096 if (params->dev->gen >= ILO_GEN(7))
1097 hz_height = align(hz_height, 8);
1098 }
1099 break;
1100 case ILO_LAYOUT_WALK_3D:
1101 hz_width = align(layout->lods[0].slice_width, 16);
1102
1103 hz_height = 0;
1104 for (lv = 0; lv <= templ->last_level; lv++) {
1105 const unsigned h = align(layout->lods[lv].slice_height, hz_align_j);
1106 /* according to the formula, slices are packed together vertically */
1107 hz_height += h * u_minify(templ->depth0, lv);
1108 }
1109 hz_height /= 2;
1110 break;
1111 }
1112
1113 /*
1114 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1115 * Experiments on Haswell show that aligning the RECTLIST primitive and
1116 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1117 * aligned.
1118 */
1119 hz_clear_w = 8;
1120 hz_clear_h = 4;
1121 switch (templ->nr_samples) {
1122 case 0:
1123 case 1:
1124 default:
1125 break;
1126 case 2:
1127 hz_clear_w /= 2;
1128 break;
1129 case 4:
1130 hz_clear_w /= 2;
1131 hz_clear_h /= 2;
1132 break;
1133 case 8:
1134 hz_clear_w /= 4;
1135 hz_clear_h /= 2;
1136 break;
1137 case 16:
1138 hz_clear_w /= 4;
1139 hz_clear_h /= 4;
1140 break;
1141 }
1142
1143 for (lv = 0; lv <= templ->last_level; lv++) {
1144 if (u_minify(layout->width0, lv) % hz_clear_w ||
1145 u_minify(layout->height0, lv) % hz_clear_h)
1146 break;
1147 layout->aux_enables |= 1 << lv;
1148 }
1149
1150 /* we padded to allow this in layout_align() */
1151 if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
1152 layout->aux_enables |= 0x1;
1153
1154 /* align to Y-tile */
1155 layout->aux_stride = align(hz_width, 128);
1156 layout->aux_height = align(hz_height, 32);
1157 }
1158
1159 static void
1160 layout_calculate_mcs_size(struct ilo_layout *layout,
1161 struct ilo_layout_params *params)
1162 {
1163 const struct pipe_resource *templ = params->templ;
1164 int mcs_width, mcs_height, mcs_cpp;
1165 int downscale_x, downscale_y;
1166
1167 assert(layout->aux == ILO_LAYOUT_AUX_MCS);
1168
1169 if (templ->nr_samples > 1) {
1170 /*
1171 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1172 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1173 * need of scale down could be that the clear rectangle is used to clear
1174 * the MCS instead of the RT.
1175 *
1176 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1177 * 2x2 factor could come from that the hardware writes 128 bits (an
1178 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1179 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1180 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1181 * pixel block in the RT.
1182 */
1183 switch (templ->nr_samples) {
1184 case 2:
1185 case 4:
1186 downscale_x = 8;
1187 downscale_y = 2;
1188 mcs_cpp = 1;
1189 break;
1190 case 8:
1191 downscale_x = 2;
1192 downscale_y = 2;
1193 mcs_cpp = 4;
1194 break;
1195 case 16:
1196 downscale_x = 2;
1197 downscale_y = 1;
1198 mcs_cpp = 8;
1199 break;
1200 default:
1201 assert(!"unsupported sample count");
1202 return;
1203 break;
1204 }
1205
1206 /*
1207 * It also appears that the 2x2 subspans generated by the scaled-down
1208 * clear rectangle cannot be masked. The scale-down clear rectangle
1209 * thus must be aligned to 2x2, and we need to pad.
1210 */
1211 mcs_width = align(layout->width0, downscale_x * 2);
1212 mcs_height = align(layout->height0, downscale_y * 2);
1213 } else {
1214 /*
1215 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1216 *
1217 * " Pixels Lines
1218 * TiledY RT CL
1219 * bpp
1220 * 32 8 4
1221 * 64 4 4
1222 * 128 2 4
1223 *
1224 * TiledX RT CL
1225 * bpp
1226 * 32 16 2
1227 * 64 8 2
1228 * 128 4 2"
1229 *
1230 * This table and the two following tables define the RT alignments, the
1231 * clear rectangle alignments, and the clear rectangle scale factors.
1232 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1233 * that the clear rectangle alignments are 16x32 blocks, and the clear
1234 * rectangle scale factors are 8x16 blocks.
1235 *
1236 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1237 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1238 * 8x16 blocks.
1239 *
1240 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1241 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1242 * which says that a Y-tile maps to 128x256 blocks (\see
1243 * intel_get_non_msrt_mcs_alignment). It does not really change
1244 * anything except for the size of the allocated MCS. Let's see if we
1245 * hit out-of-bound access.
1246 */
1247 switch (layout->tiling) {
1248 case INTEL_TILING_X:
1249 downscale_x = 64 / layout->block_size;
1250 downscale_y = 2;
1251 break;
1252 case INTEL_TILING_Y:
1253 downscale_x = 32 / layout->block_size;
1254 downscale_y = 4;
1255 break;
1256 default:
1257 assert(!"unsupported tiling mode");
1258 return;
1259 break;
1260 }
1261
1262 downscale_x *= 8;
1263 downscale_y *= 16;
1264
1265 /*
1266 * From the Haswell PRM, volume 7, page 652:
1267 *
1268 * "Clear rectangle must be aligned to two times the number of
1269 * pixels in the table shown below due to 16X16 hashing across the
1270 * slice."
1271 *
1272 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1273 * 2x2, and we need to pad.
1274 */
1275 mcs_width = align(layout->width0, downscale_x * 4) / downscale_x;
1276 mcs_height = align(layout->height0, downscale_y * 4) / downscale_y;
1277 mcs_cpp = 16; /* an OWord */
1278 }
1279
1280 layout->aux_enables = (1 << (templ->last_level + 1)) - 1;
1281 /* align to Y-tile */
1282 layout->aux_stride = align(mcs_width * mcs_cpp, 128);
1283 layout->aux_height = align(mcs_height, 32);
1284 }
1285
1286 /**
1287 * The texutre is for transfer only. We can define our own layout to save
1288 * space.
1289 */
1290 static void
1291 layout_init_for_transfer(struct ilo_layout *layout,
1292 const struct ilo_dev_info *dev,
1293 const struct pipe_resource *templ)
1294 {
1295 const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
1296 templ->depth0 : templ->array_size;
1297 unsigned layer_width, layer_height;
1298
1299 assert(templ->last_level == 0);
1300 assert(templ->nr_samples <= 1);
1301
1302 layout->aux = ILO_LAYOUT_AUX_NONE;
1303 layout->width0 = templ->width0;
1304 layout->height0 = templ->height0;
1305 layout->format = templ->format;
1306 layout->block_width = util_format_get_blockwidth(templ->format);
1307 layout->block_height = util_format_get_blockheight(templ->format);
1308 layout->block_size = util_format_get_blocksize(templ->format);
1309 layout->walk = ILO_LAYOUT_WALK_LOD;
1310
1311 layout->valid_tilings = LAYOUT_TILING_NONE;
1312 layout->tiling = INTEL_TILING_NONE;
1313
1314 layout->align_i = layout->block_width;
1315 layout->align_j = layout->block_height;
1316
1317 assert(util_is_power_of_two(layout->block_width) &&
1318 util_is_power_of_two(layout->block_height));
1319
1320 /* use packed layout */
1321 layer_width = align(templ->width0, layout->align_i);
1322 layer_height = align(templ->height0, layout->align_j);
1323
1324 layout->lods[0].slice_width = layer_width;
1325 layout->lods[0].slice_height = layer_height;
1326
1327 layout->bo_stride = (layer_width / layout->block_width) * layout->block_size;
1328 layout->bo_stride = align(layout->bo_stride, 64);
1329
1330 layout->bo_height = (layer_height / layout->block_height) * num_layers;
1331 }
1332
1333 /**
1334 * Initialize the layout. Callers should zero-initialize \p layout first.
1335 */
1336 void ilo_layout_init(struct ilo_layout *layout,
1337 const struct ilo_dev_info *dev,
1338 const struct pipe_resource *templ)
1339 {
1340 struct ilo_layout_params params;
1341 bool transfer_only;
1342
1343 /* use transfer layout when the texture is never bound to GPU */
1344 transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
1345 PIPE_BIND_TRANSFER_READ));
1346 if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
1347 layout_init_for_transfer(layout, dev, templ);
1348 return;
1349 }
1350
1351 memset(&params, 0, sizeof(params));
1352 params.dev = dev;
1353 params.templ = templ;
1354
1355 /* note that there are dependencies between these functions */
1356 layout_init_aux(layout, &params);
1357 layout_init_size_and_format(layout, &params);
1358 layout_init_walk(layout, &params);
1359 layout_init_tiling(layout, &params);
1360 layout_init_alignments(layout, &params);
1361 layout_init_lods(layout, &params);
1362 layout_init_layer_height(layout, &params);
1363
1364 layout_align(layout, &params);
1365 layout_calculate_bo_size(layout, &params);
1366
1367 switch (layout->aux) {
1368 case ILO_LAYOUT_AUX_HIZ:
1369 layout_calculate_hiz_size(layout, &params);
1370 break;
1371 case ILO_LAYOUT_AUX_MCS:
1372 layout_calculate_mcs_size(layout, &params);
1373 break;
1374 default:
1375 break;
1376 }
1377 }
1378
1379 /**
1380 * Update the tiling mode and bo stride (for imported resources).
1381 */
1382 bool
1383 ilo_layout_update_for_imported_bo(struct ilo_layout *layout,
1384 enum intel_tiling_mode tiling,
1385 unsigned bo_stride)
1386 {
1387 if (!(layout->valid_tilings & (1 << tiling)))
1388 return false;
1389
1390 if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1391 (tiling == INTEL_TILING_Y && bo_stride % 128))
1392 return false;
1393
1394 layout->tiling = tiling;
1395 layout->bo_stride = bo_stride;
1396
1397 return true;
1398 }