ilo: replace ilo_zs_surface with ilo_state_zs
[mesa.git] / src / gallium / drivers / ilo / core / ilo_image.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_debug.h"
29 #include "ilo_image.h"
30
31 enum {
32 IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
33 IMAGE_TILING_X = 1 << GEN6_TILING_X,
34 IMAGE_TILING_Y = 1 << GEN6_TILING_Y,
35 IMAGE_TILING_W = 1 << GEN8_TILING_W,
36
37 IMAGE_TILING_ALL = (IMAGE_TILING_NONE |
38 IMAGE_TILING_X |
39 IMAGE_TILING_Y |
40 IMAGE_TILING_W)
41 };
42
43 struct ilo_image_params {
44 const struct ilo_dev *dev;
45 const struct pipe_resource *templ;
46 unsigned valid_tilings;
47
48 bool compressed;
49
50 unsigned h0, h1;
51 unsigned max_x, max_y;
52 };
53
54 static void
55 img_get_slice_size(const struct ilo_image *img,
56 const struct ilo_image_params *params,
57 unsigned level, unsigned *width, unsigned *height)
58 {
59 const struct pipe_resource *templ = params->templ;
60 unsigned w, h;
61
62 w = u_minify(img->width0, level);
63 h = u_minify(img->height0, level);
64
65 /*
66 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
67 *
68 * "The dimensions of the mip maps are first determined by applying the
69 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
70 * if necessary, they are padded out to compression block boundaries."
71 */
72 w = align(w, img->block_width);
73 h = align(h, img->block_height);
74
75 /*
76 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
77 *
78 * "If the surface is multisampled (4x), these values must be adjusted
79 * as follows before proceeding:
80 *
81 * W_L = ceiling(W_L / 2) * 4
82 * H_L = ceiling(H_L / 2) * 4"
83 *
84 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
85 *
86 * "If the surface is multisampled and it is a depth or stencil surface
87 * or Multisampled Surface StorageFormat in SURFACE_STATE is
88 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
89 * proceeding:
90 *
91 * #samples W_L = H_L =
92 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
93 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
94 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
95 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
96 *
97 * For interleaved samples (4x), where pixels
98 *
99 * (x, y ) (x+1, y )
100 * (x, y+1) (x+1, y+1)
101 *
102 * would be is occupied by
103 *
104 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
105 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
106 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
107 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
108 *
109 * Thus the need to
110 *
111 * w = align(w, 2) * 2;
112 * y = align(y, 2) * 2;
113 */
114 if (img->interleaved_samples) {
115 switch (templ->nr_samples) {
116 case 0:
117 case 1:
118 break;
119 case 2:
120 w = align(w, 2) * 2;
121 break;
122 case 4:
123 w = align(w, 2) * 2;
124 h = align(h, 2) * 2;
125 break;
126 case 8:
127 w = align(w, 2) * 4;
128 h = align(h, 2) * 2;
129 break;
130 case 16:
131 w = align(w, 2) * 4;
132 h = align(h, 2) * 4;
133 break;
134 default:
135 assert(!"unsupported sample count");
136 break;
137 }
138 }
139
140 /*
141 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
142 *
143 * "For separate stencil buffer, the width must be mutiplied by 2 and
144 * height divided by 2..."
145 *
146 * To make things easier (for transfer), we will just double the stencil
147 * stride in 3DSTATE_STENCIL_BUFFER.
148 */
149 w = align(w, img->align_i);
150 h = align(h, img->align_j);
151
152 *width = w;
153 *height = h;
154 }
155
156 static unsigned
157 img_get_num_layers(const struct ilo_image *img,
158 const struct ilo_image_params *params)
159 {
160 const struct pipe_resource *templ = params->templ;
161 unsigned num_layers = templ->array_size;
162
163 /* samples of the same index are stored in a layer */
164 if (templ->nr_samples > 1 && !img->interleaved_samples)
165 num_layers *= templ->nr_samples;
166
167 return num_layers;
168 }
169
170 static void
171 img_init_layer_height(struct ilo_image *img,
172 struct ilo_image_params *params)
173 {
174 const struct pipe_resource *templ = params->templ;
175 unsigned num_layers;
176
177 if (img->walk != ILO_IMAGE_WALK_LAYER)
178 return;
179
180 num_layers = img_get_num_layers(img, params);
181 if (num_layers <= 1)
182 return;
183
184 /*
185 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
186 *
187 * "The following equation is used for surface formats other than
188 * compressed textures:
189 *
190 * QPitch = (h0 + h1 + 11j)"
191 *
192 * "The equation for compressed textures (BC* and FXT1 surface formats)
193 * follows:
194 *
195 * QPitch = (h0 + h1 + 11j) / 4"
196 *
197 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
198 * value calculated in the equation above, for every other odd Surface
199 * Height starting from 1 i.e. 1,5,9,13"
200 *
201 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
202 *
203 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
204 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
205 *
206 * QPitch = (h0 + h1 + 12j)
207 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
208 *
209 * (There are many typos or missing words here...)"
210 *
211 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
212 * the base address. The PRM divides QPitch by 4 for compressed formats
213 * because the block height for those formats are 4, and it wants QPitch to
214 * mean the number of memory rows, as opposed to texel rows, between
215 * slices. Since we use texel rows everywhere, we do not need to divide
216 * QPitch by 4.
217 */
218 img->walk_layer_height = params->h0 + params->h1 +
219 ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
220
221 if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
222 img->height0 % 4 == 1)
223 img->walk_layer_height += 4;
224
225 params->max_y += img->walk_layer_height * (num_layers - 1);
226 }
227
228 static void
229 img_init_lods(struct ilo_image *img,
230 struct ilo_image_params *params)
231 {
232 const struct pipe_resource *templ = params->templ;
233 unsigned cur_x, cur_y;
234 unsigned lv;
235
236 cur_x = 0;
237 cur_y = 0;
238 for (lv = 0; lv <= templ->last_level; lv++) {
239 unsigned lod_w, lod_h;
240
241 img_get_slice_size(img, params, lv, &lod_w, &lod_h);
242
243 img->lods[lv].x = cur_x;
244 img->lods[lv].y = cur_y;
245 img->lods[lv].slice_width = lod_w;
246 img->lods[lv].slice_height = lod_h;
247
248 switch (img->walk) {
249 case ILO_IMAGE_WALK_LAYER:
250 /* MIPLAYOUT_BELOW */
251 if (lv == 1)
252 cur_x += lod_w;
253 else
254 cur_y += lod_h;
255 break;
256 case ILO_IMAGE_WALK_LOD:
257 lod_h *= img_get_num_layers(img, params);
258 if (lv == 1)
259 cur_x += lod_w;
260 else
261 cur_y += lod_h;
262
263 /* every LOD begins at tile boundaries */
264 if (templ->last_level > 0) {
265 assert(img->format == PIPE_FORMAT_S8_UINT);
266 cur_x = align(cur_x, 64);
267 cur_y = align(cur_y, 64);
268 }
269 break;
270 case ILO_IMAGE_WALK_3D:
271 {
272 const unsigned num_slices = u_minify(templ->depth0, lv);
273 const unsigned num_slices_per_row = 1 << lv;
274 const unsigned num_rows =
275 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
276
277 lod_w *= num_slices_per_row;
278 lod_h *= num_rows;
279
280 cur_y += lod_h;
281 }
282 break;
283 }
284
285 if (params->max_x < img->lods[lv].x + lod_w)
286 params->max_x = img->lods[lv].x + lod_w;
287 if (params->max_y < img->lods[lv].y + lod_h)
288 params->max_y = img->lods[lv].y + lod_h;
289 }
290
291 if (img->walk == ILO_IMAGE_WALK_LAYER) {
292 params->h0 = img->lods[0].slice_height;
293
294 if (templ->last_level > 0)
295 params->h1 = img->lods[1].slice_height;
296 else
297 img_get_slice_size(img, params, 1, &cur_x, &params->h1);
298 }
299 }
300
301 static void
302 img_init_alignments(struct ilo_image *img,
303 const struct ilo_image_params *params)
304 {
305 const struct pipe_resource *templ = params->templ;
306
307 /*
308 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
309 *
310 * "surface format align_i align_j
311 * YUV 4:2:2 formats 4 *see below
312 * BC1-5 4 4
313 * FXT1 8 4
314 * all other formats 4 *see below"
315 *
316 * "- align_j = 4 for any depth buffer
317 * - align_j = 2 for separate stencil buffer
318 * - align_j = 4 for any render target surface is multisampled (4x)
319 * - align_j = 4 for any render target surface with Surface Vertical
320 * Alignment = VALIGN_4
321 * - align_j = 2 for any render target surface with Surface Vertical
322 * Alignment = VALIGN_2
323 * - align_j = 2 for all other render target surface
324 * - align_j = 2 for any sampling engine surface with Surface Vertical
325 * Alignment = VALIGN_2
326 * - align_j = 4 for any sampling engine surface with Surface Vertical
327 * Alignment = VALIGN_4"
328 *
329 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
330 *
331 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
332 * the Surface Format is 96 bits per element (BPE)."
333 *
334 * They can be rephrased as
335 *
336 * align_i align_j
337 * compressed formats block width block height
338 * PIPE_FORMAT_S8_UINT 4 2
339 * other depth/stencil formats 4 4
340 * 4x multisampled 4 4
341 * bpp 96 4 2
342 * others 4 2 or 4
343 */
344
345 /*
346 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
347 *
348 * "surface defined by surface format align_i align_j
349 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
350 * not D16_UNORM 4 4
351 * 3DSTATE_STENCIL_BUFFER N/A 8 8
352 * SURFACE_STATE BC*, ETC*, EAC* 4 4
353 * FXT1 8 4
354 * all others (set by SURFACE_STATE)"
355 *
356 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
357 *
358 * "- This field (Surface Vertical Aligment) is intended to be set to
359 * VALIGN_4 if the surface was rendered as a depth buffer, for a
360 * multisampled (4x) render target, or for a multisampled (8x)
361 * render target, since these surfaces support only alignment of 4.
362 * - Use of VALIGN_4 for other surfaces is supported, but uses more
363 * memory.
364 * - This field must be set to VALIGN_4 for all tiled Y Render Target
365 * surfaces.
366 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
367 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
368 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
369 * must be set to VALIGN_4."
370 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
371 *
372 * "- This field (Surface Horizontal Aligment) is intended to be set to
373 * HALIGN_8 only if the surface was rendered as a depth buffer with
374 * Z16 format or a stencil buffer, since these surfaces support only
375 * alignment of 8.
376 * - Use of HALIGN_8 for other surfaces is supported, but uses more
377 * memory.
378 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
379 * - This field must be set to HALIGN_8 if the Surface Format is
380 * FXT1."
381 *
382 * They can be rephrased as
383 *
384 * align_i align_j
385 * compressed formats block width block height
386 * PIPE_FORMAT_Z16_UNORM 8 4
387 * PIPE_FORMAT_S8_UINT 8 8
388 * other depth/stencil formats 4 4
389 * 2x or 4x multisampled 4 or 8 4
390 * tiled Y 4 or 8 4 (if rt)
391 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
392 * others 4 or 8 2 or 4
393 */
394
395 if (params->compressed) {
396 /* this happens to be the case */
397 img->align_i = img->block_width;
398 img->align_j = img->block_height;
399 } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
400 if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
401 switch (img->format) {
402 case PIPE_FORMAT_Z16_UNORM:
403 img->align_i = 8;
404 img->align_j = 4;
405 break;
406 case PIPE_FORMAT_S8_UINT:
407 img->align_i = 8;
408 img->align_j = 8;
409 break;
410 default:
411 img->align_i = 4;
412 img->align_j = 4;
413 break;
414 }
415 } else {
416 switch (img->format) {
417 case PIPE_FORMAT_S8_UINT:
418 img->align_i = 4;
419 img->align_j = 2;
420 break;
421 default:
422 img->align_i = 4;
423 img->align_j = 4;
424 break;
425 }
426 }
427 } else {
428 const bool valign_4 =
429 (templ->nr_samples > 1) ||
430 (ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
431 (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
432 img->tiling == GEN6_TILING_Y &&
433 (templ->bind & PIPE_BIND_RENDER_TARGET));
434
435 if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
436 ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
437 assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
438
439 img->align_i = 4;
440 img->align_j = (valign_4) ? 4 : 2;
441 }
442
443 /*
444 * the fact that align i and j are multiples of block width and height
445 * respectively is what makes the size of the bo a multiple of the block
446 * size, slices start at block boundaries, and many of the computations
447 * work.
448 */
449 assert(img->align_i % img->block_width == 0);
450 assert(img->align_j % img->block_height == 0);
451
452 /* make sure align() works */
453 assert(util_is_power_of_two(img->align_i) &&
454 util_is_power_of_two(img->align_j));
455 assert(util_is_power_of_two(img->block_width) &&
456 util_is_power_of_two(img->block_height));
457 }
458
459 static void
460 img_init_tiling(struct ilo_image *img,
461 const struct ilo_image_params *params)
462 {
463 const struct pipe_resource *templ = params->templ;
464 unsigned preferred_tilings = params->valid_tilings;
465
466 /* no fencing nor BLT support */
467 if (preferred_tilings & ~IMAGE_TILING_W)
468 preferred_tilings &= ~IMAGE_TILING_W;
469
470 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
471 /*
472 * heuristically set a minimum width/height for enabling tiling
473 */
474 if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
475 preferred_tilings &= ~IMAGE_TILING_X;
476
477 if ((img->width0 < 32 || img->height0 < 16) &&
478 (img->width0 < 16 || img->height0 < 32) &&
479 (preferred_tilings & ~IMAGE_TILING_Y))
480 preferred_tilings &= ~IMAGE_TILING_Y;
481 } else {
482 /* force linear if we are not sure where the texture is bound to */
483 if (preferred_tilings & IMAGE_TILING_NONE)
484 preferred_tilings &= IMAGE_TILING_NONE;
485 }
486
487 /* prefer tiled over linear */
488 if (preferred_tilings & IMAGE_TILING_Y)
489 img->tiling = GEN6_TILING_Y;
490 else if (preferred_tilings & IMAGE_TILING_X)
491 img->tiling = GEN6_TILING_X;
492 else if (preferred_tilings & IMAGE_TILING_W)
493 img->tiling = GEN8_TILING_W;
494 else
495 img->tiling = GEN6_TILING_NONE;
496 }
497
498 static void
499 img_init_walk_gen7(struct ilo_image *img,
500 const struct ilo_image_params *params)
501 {
502 const struct pipe_resource *templ = params->templ;
503
504 /*
505 * It is not explicitly states, but render targets are expected to be
506 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
507 * to be IMS (samples interleaved).
508 *
509 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
510 */
511 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
512 /*
513 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
514 *
515 * "note that the depth buffer and stencil buffer have an implied
516 * value of ARYSPC_FULL"
517 */
518 img->walk = (templ->target == PIPE_TEXTURE_3D) ?
519 ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
520
521 img->interleaved_samples = true;
522 } else {
523 /*
524 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
525 *
526 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
527 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
528 * Array Spacing) must be set to ARYSPC_LOD0."
529 *
530 * As multisampled resources are not mipmapped, we never use
531 * ARYSPC_FULL for them.
532 */
533 if (templ->nr_samples > 1)
534 assert(templ->last_level == 0);
535
536 img->walk =
537 (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
538 (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
539 ILO_IMAGE_WALK_LOD;
540
541 img->interleaved_samples = false;
542 }
543 }
544
545 static void
546 img_init_walk_gen6(struct ilo_image *img,
547 const struct ilo_image_params *params)
548 {
549 /*
550 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
551 *
552 * "The separate stencil buffer does not support mip mapping, thus the
553 * storage for LODs other than LOD 0 is not needed. The following
554 * QPitch equation applies only to the separate stencil buffer:
555 *
556 * QPitch = h_0"
557 *
558 * GEN6 does not support compact spacing otherwise.
559 */
560 img->walk =
561 (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
562 (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
563 ILO_IMAGE_WALK_LAYER;
564
565 /* GEN6 supports only interleaved samples */
566 img->interleaved_samples = true;
567 }
568
569 static void
570 img_init_walk(struct ilo_image *img,
571 const struct ilo_image_params *params)
572 {
573 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
574 img_init_walk_gen7(img, params);
575 else
576 img_init_walk_gen6(img, params);
577 }
578
579 static unsigned
580 img_get_valid_tilings(const struct ilo_image *img,
581 const struct ilo_image_params *params)
582 {
583 const struct pipe_resource *templ = params->templ;
584 const enum pipe_format format = img->format;
585 unsigned valid_tilings = params->valid_tilings;
586
587 /*
588 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
589 *
590 * "Display/Overlay Y-Major not supported.
591 * X-Major required for Async Flips"
592 */
593 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
594 valid_tilings &= IMAGE_TILING_X;
595
596 /*
597 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
598 *
599 * "The cursor surface address must be 4K byte aligned. The cursor must
600 * be in linear memory, it cannot be tiled."
601 */
602 if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
603 valid_tilings &= IMAGE_TILING_NONE;
604
605 /*
606 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
607 *
608 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
609 * Depth Buffer is not supported."
610 *
611 * "The Depth Buffer, if tiled, must use Y-Major tiling."
612 *
613 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
614 *
615 * "W-Major Tile Format is used for separate stencil."
616 */
617 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
618 switch (format) {
619 case PIPE_FORMAT_S8_UINT:
620 valid_tilings &= IMAGE_TILING_W;
621 break;
622 default:
623 valid_tilings &= IMAGE_TILING_Y;
624 break;
625 }
626 }
627
628 if (templ->bind & PIPE_BIND_RENDER_TARGET) {
629 /*
630 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
631 *
632 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
633 * either TileX or Linear."
634 *
635 * From the Haswell PRM, volume 5, page 32:
636 *
637 * "NOTE: 128 BPP format color buffer (render target) supports
638 * Linear, TiledX and TiledY."
639 */
640 if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
641 valid_tilings &= ~IMAGE_TILING_Y;
642
643 /*
644 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
645 *
646 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
647 * for all tiled Y Render Target surfaces."
648 *
649 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
650 */
651 if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
652 ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
653 img->format == PIPE_FORMAT_R32G32B32_FLOAT)
654 valid_tilings &= ~IMAGE_TILING_Y;
655
656 valid_tilings &= ~IMAGE_TILING_W;
657 }
658
659 if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
660 if (ilo_dev_gen(params->dev) < ILO_GEN(8))
661 valid_tilings &= ~IMAGE_TILING_W;
662 }
663
664 /* no conflicting binding flags */
665 assert(valid_tilings);
666
667 return valid_tilings;
668 }
669
670 static void
671 img_init_size_and_format(struct ilo_image *img,
672 struct ilo_image_params *params)
673 {
674 const struct pipe_resource *templ = params->templ;
675 enum pipe_format format = templ->format;
676 bool require_separate_stencil = false;
677
678 img->target = templ->target;
679 img->width0 = templ->width0;
680 img->height0 = templ->height0;
681 img->depth0 = templ->depth0;
682 img->array_size = templ->array_size;
683 img->level_count = templ->last_level + 1;
684 img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
685
686 /*
687 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
688 *
689 * "This field (Separate Stencil Buffer Enable) must be set to the same
690 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
691 *
692 * GEN7+ requires separate stencil buffers.
693 */
694 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
695 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
696 require_separate_stencil = true;
697 else
698 require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
699 }
700
701 switch (format) {
702 case PIPE_FORMAT_ETC1_RGB8:
703 format = PIPE_FORMAT_R8G8B8X8_UNORM;
704 break;
705 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
706 if (require_separate_stencil) {
707 format = PIPE_FORMAT_Z24X8_UNORM;
708 img->separate_stencil = true;
709 }
710 break;
711 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
712 if (require_separate_stencil) {
713 format = PIPE_FORMAT_Z32_FLOAT;
714 img->separate_stencil = true;
715 }
716 break;
717 default:
718 break;
719 }
720
721 img->format = format;
722 img->block_width = util_format_get_blockwidth(format);
723 img->block_height = util_format_get_blockheight(format);
724 img->block_size = util_format_get_blocksize(format);
725
726 params->valid_tilings = img_get_valid_tilings(img, params);
727 params->compressed = util_format_is_compressed(img->format);
728 }
729
730 static bool
731 img_want_mcs(const struct ilo_image *img,
732 const struct ilo_image_params *params)
733 {
734 const struct pipe_resource *templ = params->templ;
735 bool want_mcs = false;
736
737 /* MCS is for RT on GEN7+ */
738 if (ilo_dev_gen(params->dev) < ILO_GEN(7))
739 return false;
740
741 if (templ->target != PIPE_TEXTURE_2D ||
742 !(templ->bind & PIPE_BIND_RENDER_TARGET))
743 return false;
744
745 /*
746 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
747 *
748 * "For Render Target and Sampling Engine Surfaces:If the surface is
749 * multisampled (Number of Multisamples any value other than
750 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
751 *
752 * "This field must be set to 0 for all SINT MSRTs when all RT channels
753 * are not written"
754 */
755 if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
756 want_mcs = true;
757 } else if (templ->nr_samples <= 1) {
758 /*
759 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
760 *
761 * "When MCS is buffer is used for color clear of non-multisampler
762 * render target, the following restrictions apply.
763 * - Support is limited to tiled render targets.
764 * - Support is for non-mip-mapped and non-array surface types
765 * only.
766 * - Clear is supported only on the full RT; i.e., no partial clear
767 * or overlapping clears.
768 * - MCS buffer for non-MSRT is supported only for RT formats
769 * 32bpp, 64bpp and 128bpp.
770 * ..."
771 */
772 if (img->tiling != GEN6_TILING_NONE &&
773 templ->last_level == 0 && templ->array_size == 1) {
774 switch (img->block_size) {
775 case 4:
776 case 8:
777 case 16:
778 want_mcs = true;
779 break;
780 default:
781 break;
782 }
783 }
784 }
785
786 return want_mcs;
787 }
788
789 static bool
790 img_want_hiz(const struct ilo_image *img,
791 const struct ilo_image_params *params)
792 {
793 const struct pipe_resource *templ = params->templ;
794 const struct util_format_description *desc =
795 util_format_description(templ->format);
796
797 if (ilo_debug & ILO_DEBUG_NOHIZ)
798 return false;
799
800 /* we want 8x4 aligned levels */
801 if (templ->target == PIPE_TEXTURE_1D)
802 return false;
803
804 if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
805 return false;
806
807 if (!util_format_has_depth(desc))
808 return false;
809
810 /* no point in having HiZ */
811 if (templ->usage == PIPE_USAGE_STAGING)
812 return false;
813
814 /*
815 * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
816 * for every level. This is generally fine except on GEN6, where HiZ and
817 * separate stencil are enabled and disabled at the same time. When the
818 * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
819 * can result in incompatible formats.
820 */
821 if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
822 templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
823 templ->last_level)
824 return false;
825
826 return true;
827 }
828
829 static void
830 img_init_aux(struct ilo_image *img,
831 const struct ilo_image_params *params)
832 {
833 if (img_want_hiz(img, params))
834 img->aux.type = ILO_IMAGE_AUX_HIZ;
835 else if (img_want_mcs(img, params))
836 img->aux.type = ILO_IMAGE_AUX_MCS;
837 }
838
839 static void
840 img_align(struct ilo_image *img, struct ilo_image_params *params)
841 {
842 const struct pipe_resource *templ = params->templ;
843 int align_w = 1, align_h = 1, pad_h = 0;
844
845 /*
846 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
847 *
848 * "To determine the necessary padding on the bottom and right side of
849 * the surface, refer to the table in Section 7.18.3.4 for the i and j
850 * parameters for the surface format in use. The surface must then be
851 * extended to the next multiple of the alignment unit size in each
852 * dimension, and all texels contained in this extended surface must
853 * have valid GTT entries."
854 *
855 * "For cube surfaces, an additional two rows of padding are required
856 * at the bottom of the surface. This must be ensured regardless of
857 * whether the surface is stored tiled or linear. This is due to the
858 * potential rotation of cache line orientation from memory to cache."
859 *
860 * "For compressed textures (BC* and FXT1 surface formats), padding at
861 * the bottom of the surface is to an even compressed row, which is
862 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
863 * purposes, these surfaces behave as if j = 8 only for surface
864 * padding purposes. The value of 4 for j still applies for mip level
865 * alignment and QPitch calculation."
866 */
867 if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
868 align_w = MAX2(align_w, img->align_i);
869 align_h = MAX2(align_h, img->align_j);
870
871 if (templ->target == PIPE_TEXTURE_CUBE)
872 pad_h += 2;
873
874 if (params->compressed)
875 align_h = MAX2(align_h, img->align_j * 2);
876 }
877
878 /*
879 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
880 *
881 * "If the surface contains an odd number of rows of data, a final row
882 * below the surface must be allocated."
883 */
884 if (templ->bind & PIPE_BIND_RENDER_TARGET)
885 align_h = MAX2(align_h, 2);
886
887 /*
888 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
889 * for unaligned non-mipmapped and non-array images.
890 */
891 if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
892 templ->last_level == 0 &&
893 templ->array_size == 1 &&
894 templ->depth0 == 1) {
895 align_w = MAX2(align_w, 8);
896 align_h = MAX2(align_h, 4);
897 }
898
899 params->max_x = align(params->max_x, align_w);
900 params->max_y = align(params->max_y + pad_h, align_h);
901 }
902
903 /* note that this may force the texture to be linear */
904 static void
905 img_calculate_bo_size(struct ilo_image *img,
906 const struct ilo_image_params *params)
907 {
908 assert(params->max_x % img->block_width == 0);
909 assert(params->max_y % img->block_height == 0);
910 assert(img->walk_layer_height % img->block_height == 0);
911
912 img->bo_stride =
913 (params->max_x / img->block_width) * img->block_size;
914 img->bo_height = params->max_y / img->block_height;
915
916 while (true) {
917 unsigned w = img->bo_stride, h = img->bo_height;
918 unsigned align_w, align_h;
919
920 /*
921 * From the Haswell PRM, volume 5, page 163:
922 *
923 * "For linear surfaces, additional padding of 64 bytes is required
924 * at the bottom of the surface. This is in addition to the padding
925 * required above."
926 */
927 if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
928 (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
929 img->tiling == GEN6_TILING_NONE)
930 h += (64 + img->bo_stride - 1) / img->bo_stride;
931
932 /*
933 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
934 *
935 * "- For linear render target surfaces, the pitch must be a
936 * multiple of the element size for non-YUV surface formats.
937 * Pitch must be a multiple of 2 * element size for YUV surface
938 * formats.
939 * - For other linear surfaces, the pitch can be any multiple of
940 * bytes.
941 * - For tiled surfaces, the pitch must be a multiple of the tile
942 * width."
943 *
944 * Different requirements may exist when the bo is used in different
945 * places, but our alignments here should be good enough that we do not
946 * need to check params->templ->bind.
947 */
948 switch (img->tiling) {
949 case GEN6_TILING_X:
950 align_w = 512;
951 align_h = 8;
952 break;
953 case GEN6_TILING_Y:
954 align_w = 128;
955 align_h = 32;
956 break;
957 case GEN8_TILING_W:
958 /*
959 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
960 *
961 * "A 4KB tile is subdivided into 8-high by 8-wide array of
962 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
963 * bytes."
964 */
965 align_w = 64;
966 align_h = 64;
967 break;
968 default:
969 assert(img->tiling == GEN6_TILING_NONE);
970 /* some good enough values */
971 align_w = 64;
972 align_h = 2;
973 break;
974 }
975
976 w = align(w, align_w);
977 h = align(h, align_h);
978
979 /* make sure the bo is mappable */
980 if (img->tiling != GEN6_TILING_NONE) {
981 /*
982 * Usually only the first 256MB of the GTT is mappable.
983 *
984 * See also how intel_context::max_gtt_map_object_size is calculated.
985 */
986 const size_t mappable_gtt_size = 256 * 1024 * 1024;
987
988 /*
989 * Be conservative. We may be able to switch from VALIGN_4 to
990 * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
991 */
992 if (mappable_gtt_size / w / 4 < h) {
993 if (params->valid_tilings & IMAGE_TILING_NONE) {
994 img->tiling = GEN6_TILING_NONE;
995 /* MCS support for non-MSRTs is limited to tiled RTs */
996 if (img->aux.type == ILO_IMAGE_AUX_MCS &&
997 params->templ->nr_samples <= 1)
998 img->aux.type = ILO_IMAGE_AUX_NONE;
999
1000 continue;
1001 } else {
1002 ilo_warn("cannot force texture to be linear\n");
1003 }
1004 }
1005 }
1006
1007 img->bo_stride = w;
1008 img->bo_height = h;
1009 break;
1010 }
1011 }
1012
1013 static void
1014 img_calculate_hiz_size(struct ilo_image *img,
1015 const struct ilo_image_params *params)
1016 {
1017 const struct pipe_resource *templ = params->templ;
1018 const unsigned hz_align_j = 8;
1019 enum ilo_image_walk_type hz_walk;
1020 unsigned hz_width, hz_height, lv;
1021 unsigned hz_clear_w, hz_clear_h;
1022
1023 assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
1024
1025 assert(img->walk == ILO_IMAGE_WALK_LAYER ||
1026 img->walk == ILO_IMAGE_WALK_3D);
1027
1028 /*
1029 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1030 *
1031 * "The hierarchical depth buffer does not support the LOD field, it is
1032 * assumed by hardware to be zero. A separate hierarachical depth
1033 * buffer is required for each LOD used, and the corresponding
1034 * buffer's state delivered to hardware each time a new depth buffer
1035 * state with modified LOD is delivered."
1036 *
1037 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1038 */
1039 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
1040 hz_walk = img->walk;
1041 else
1042 hz_walk = ILO_IMAGE_WALK_LOD;
1043
1044 /*
1045 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1046 * PRM, volume 2 part 1, page 312-313.
1047 *
1048 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1049 * memory row.
1050 */
1051 switch (hz_walk) {
1052 case ILO_IMAGE_WALK_LAYER:
1053 {
1054 const unsigned h0 = align(params->h0, hz_align_j);
1055 const unsigned h1 = align(params->h1, hz_align_j);
1056 const unsigned htail =
1057 ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1058 const unsigned hz_qpitch = h0 + h1 + htail;
1059
1060 hz_width = align(img->lods[0].slice_width, 16);
1061
1062 hz_height = hz_qpitch * templ->array_size / 2;
1063 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
1064 hz_height = align(hz_height, 8);
1065
1066 img->aux.walk_layer_height = hz_qpitch;
1067 }
1068 break;
1069 case ILO_IMAGE_WALK_LOD:
1070 {
1071 unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
1072 unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
1073 unsigned cur_tx, cur_ty;
1074
1075 /* figure out the tile offsets of LODs */
1076 hz_width = 0;
1077 hz_height = 0;
1078 cur_tx = 0;
1079 cur_ty = 0;
1080 for (lv = 0; lv <= templ->last_level; lv++) {
1081 unsigned tw, th;
1082
1083 lod_tx[lv] = cur_tx;
1084 lod_ty[lv] = cur_ty;
1085
1086 tw = align(img->lods[lv].slice_width, 16);
1087 th = align(img->lods[lv].slice_height, hz_align_j) *
1088 templ->array_size / 2;
1089 /* convert to Y-tiles */
1090 tw = align(tw, 128) / 128;
1091 th = align(th, 32) / 32;
1092
1093 if (hz_width < cur_tx + tw)
1094 hz_width = cur_tx + tw;
1095 if (hz_height < cur_ty + th)
1096 hz_height = cur_ty + th;
1097
1098 if (lv == 1)
1099 cur_tx += tw;
1100 else
1101 cur_ty += th;
1102 }
1103
1104 /* convert tile offsets to memory offsets */
1105 for (lv = 0; lv <= templ->last_level; lv++) {
1106 img->aux.walk_lod_offsets[lv] =
1107 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1108 }
1109 hz_width *= 128;
1110 hz_height *= 32;
1111 }
1112 break;
1113 case ILO_IMAGE_WALK_3D:
1114 hz_width = align(img->lods[0].slice_width, 16);
1115
1116 hz_height = 0;
1117 for (lv = 0; lv <= templ->last_level; lv++) {
1118 const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
1119 /* according to the formula, slices are packed together vertically */
1120 hz_height += h * u_minify(templ->depth0, lv);
1121 }
1122 hz_height /= 2;
1123 break;
1124 default:
1125 assert(!"unknown HiZ walk");
1126 hz_width = 0;
1127 hz_height = 0;
1128 break;
1129 }
1130
1131 /*
1132 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1133 * Experiments on Haswell show that aligning the RECTLIST primitive and
1134 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1135 * aligned.
1136 */
1137 hz_clear_w = 8;
1138 hz_clear_h = 4;
1139 switch (templ->nr_samples) {
1140 case 0:
1141 case 1:
1142 default:
1143 break;
1144 case 2:
1145 hz_clear_w /= 2;
1146 break;
1147 case 4:
1148 hz_clear_w /= 2;
1149 hz_clear_h /= 2;
1150 break;
1151 case 8:
1152 hz_clear_w /= 4;
1153 hz_clear_h /= 2;
1154 break;
1155 case 16:
1156 hz_clear_w /= 4;
1157 hz_clear_h /= 4;
1158 break;
1159 }
1160
1161 for (lv = 0; lv <= templ->last_level; lv++) {
1162 if (u_minify(img->width0, lv) % hz_clear_w ||
1163 u_minify(img->height0, lv) % hz_clear_h)
1164 break;
1165 img->aux.enables |= 1 << lv;
1166 }
1167
1168 /* we padded to allow this in img_align() */
1169 if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
1170 img->aux.enables |= 0x1;
1171
1172 /* align to Y-tile */
1173 img->aux.bo_stride = align(hz_width, 128);
1174 img->aux.bo_height = align(hz_height, 32);
1175 }
1176
1177 static void
1178 img_calculate_mcs_size(struct ilo_image *img,
1179 const struct ilo_image_params *params)
1180 {
1181 const struct pipe_resource *templ = params->templ;
1182 int mcs_width, mcs_height, mcs_cpp;
1183 int downscale_x, downscale_y;
1184
1185 assert(img->aux.type == ILO_IMAGE_AUX_MCS);
1186
1187 if (templ->nr_samples > 1) {
1188 /*
1189 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1190 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1191 * need of scale down could be that the clear rectangle is used to clear
1192 * the MCS instead of the RT.
1193 *
1194 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1195 * 2x2 factor could come from that the hardware writes 128 bits (an
1196 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1197 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1198 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1199 * pixel block in the RT.
1200 */
1201 switch (templ->nr_samples) {
1202 case 2:
1203 case 4:
1204 downscale_x = 8;
1205 downscale_y = 2;
1206 mcs_cpp = 1;
1207 break;
1208 case 8:
1209 downscale_x = 2;
1210 downscale_y = 2;
1211 mcs_cpp = 4;
1212 break;
1213 case 16:
1214 downscale_x = 2;
1215 downscale_y = 1;
1216 mcs_cpp = 8;
1217 break;
1218 default:
1219 assert(!"unsupported sample count");
1220 return;
1221 break;
1222 }
1223
1224 /*
1225 * It also appears that the 2x2 subspans generated by the scaled-down
1226 * clear rectangle cannot be masked. The scale-down clear rectangle
1227 * thus must be aligned to 2x2, and we need to pad.
1228 */
1229 mcs_width = align(img->width0, downscale_x * 2);
1230 mcs_height = align(img->height0, downscale_y * 2);
1231 } else {
1232 /*
1233 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1234 *
1235 * " Pixels Lines
1236 * TiledY RT CL
1237 * bpp
1238 * 32 8 4
1239 * 64 4 4
1240 * 128 2 4
1241 *
1242 * TiledX RT CL
1243 * bpp
1244 * 32 16 2
1245 * 64 8 2
1246 * 128 4 2"
1247 *
1248 * This table and the two following tables define the RT alignments, the
1249 * clear rectangle alignments, and the clear rectangle scale factors.
1250 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1251 * that the clear rectangle alignments are 16x32 blocks, and the clear
1252 * rectangle scale factors are 8x16 blocks.
1253 *
1254 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1255 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1256 * 8x16 blocks.
1257 *
1258 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1259 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1260 * which says that a Y-tile maps to 128x256 blocks (\see
1261 * intel_get_non_msrt_mcs_alignment). It does not really change
1262 * anything except for the size of the allocated MCS. Let's see if we
1263 * hit out-of-bound access.
1264 */
1265 switch (img->tiling) {
1266 case GEN6_TILING_X:
1267 downscale_x = 64 / img->block_size;
1268 downscale_y = 2;
1269 break;
1270 case GEN6_TILING_Y:
1271 downscale_x = 32 / img->block_size;
1272 downscale_y = 4;
1273 break;
1274 default:
1275 assert(!"unsupported tiling mode");
1276 return;
1277 break;
1278 }
1279
1280 downscale_x *= 8;
1281 downscale_y *= 16;
1282
1283 /*
1284 * From the Haswell PRM, volume 7, page 652:
1285 *
1286 * "Clear rectangle must be aligned to two times the number of
1287 * pixels in the table shown below due to 16X16 hashing across the
1288 * slice."
1289 *
1290 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1291 * 2x2, and we need to pad.
1292 */
1293 mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
1294 mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
1295 mcs_cpp = 16; /* an OWord */
1296 }
1297
1298 img->aux.enables = (1 << (templ->last_level + 1)) - 1;
1299 /* align to Y-tile */
1300 img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
1301 img->aux.bo_height = align(mcs_height, 32);
1302 }
1303
1304 static void
1305 img_init(struct ilo_image *img,
1306 struct ilo_image_params *params)
1307 {
1308 /* there are hard dependencies between every function here */
1309
1310 img_init_aux(img, params);
1311 img_init_size_and_format(img, params);
1312 img_init_walk(img, params);
1313 img_init_tiling(img, params);
1314 img_init_alignments(img, params);
1315 img_init_lods(img, params);
1316 img_init_layer_height(img, params);
1317
1318 img_align(img, params);
1319 img_calculate_bo_size(img, params);
1320
1321 img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
1322
1323 switch (img->aux.type) {
1324 case ILO_IMAGE_AUX_HIZ:
1325 img_calculate_hiz_size(img, params);
1326 break;
1327 case ILO_IMAGE_AUX_MCS:
1328 img_calculate_mcs_size(img, params);
1329 break;
1330 default:
1331 break;
1332 }
1333 }
1334
1335 /**
1336 * The texutre is for transfer only. We can define our own layout to save
1337 * space.
1338 */
1339 static void
1340 img_init_for_transfer(struct ilo_image *img,
1341 const struct ilo_dev *dev,
1342 const struct pipe_resource *templ)
1343 {
1344 const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
1345 templ->depth0 : templ->array_size;
1346 unsigned layer_width, layer_height;
1347
1348 assert(templ->last_level == 0);
1349 assert(templ->nr_samples <= 1);
1350
1351 img->aux.type = ILO_IMAGE_AUX_NONE;
1352
1353 img->target = templ->target;
1354 img->width0 = templ->width0;
1355 img->height0 = templ->height0;
1356 img->depth0 = templ->depth0;
1357 img->array_size = templ->array_size;
1358 img->level_count = 1;
1359 img->sample_count = 1;
1360
1361 img->format = templ->format;
1362 img->block_width = util_format_get_blockwidth(templ->format);
1363 img->block_height = util_format_get_blockheight(templ->format);
1364 img->block_size = util_format_get_blocksize(templ->format);
1365
1366 img->walk = ILO_IMAGE_WALK_LOD;
1367
1368 img->tiling = GEN6_TILING_NONE;
1369
1370 img->align_i = img->block_width;
1371 img->align_j = img->block_height;
1372
1373 assert(util_is_power_of_two(img->block_width) &&
1374 util_is_power_of_two(img->block_height));
1375
1376 /* use packed layout */
1377 layer_width = align(templ->width0, img->align_i);
1378 layer_height = align(templ->height0, img->align_j);
1379
1380 img->lods[0].slice_width = layer_width;
1381 img->lods[0].slice_height = layer_height;
1382
1383 img->bo_stride = (layer_width / img->block_width) * img->block_size;
1384 img->bo_stride = align(img->bo_stride, 64);
1385
1386 img->bo_height = (layer_height / img->block_height) * num_layers;
1387 }
1388
1389 /**
1390 * Initialize the image. Callers should zero-initialize \p img first.
1391 */
1392 void ilo_image_init(struct ilo_image *img,
1393 const struct ilo_dev *dev,
1394 const struct pipe_resource *templ)
1395 {
1396 struct ilo_image_params params;
1397 bool transfer_only;
1398
1399 assert(ilo_is_zeroed(img, sizeof(*img)));
1400
1401 /* use transfer layout when the texture is never bound to GPU */
1402 transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
1403 PIPE_BIND_TRANSFER_READ));
1404 if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
1405 img_init_for_transfer(img, dev, templ);
1406 return;
1407 }
1408
1409 memset(&params, 0, sizeof(params));
1410 params.dev = dev;
1411 params.templ = templ;
1412 params.valid_tilings = IMAGE_TILING_ALL;
1413
1414 img_init(img, &params);
1415 }
1416
1417 bool
1418 ilo_image_init_for_imported(struct ilo_image *img,
1419 const struct ilo_dev *dev,
1420 const struct pipe_resource *templ,
1421 enum gen_surface_tiling tiling,
1422 unsigned bo_stride)
1423 {
1424 struct ilo_image_params params;
1425
1426 assert(ilo_is_zeroed(img, sizeof(*img)));
1427
1428 if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
1429 (tiling == GEN6_TILING_Y && bo_stride % 128) ||
1430 (tiling == GEN8_TILING_W && bo_stride % 64))
1431 return false;
1432
1433 memset(&params, 0, sizeof(params));
1434 params.dev = dev;
1435 params.templ = templ;
1436 params.valid_tilings = 1 << tiling;
1437
1438 img_init(img, &params);
1439
1440 assert(img->tiling == tiling);
1441 if (img->bo_stride > bo_stride)
1442 return false;
1443
1444 img->bo_stride = bo_stride;
1445
1446 /* assume imported RTs are also scanouts */
1447 if (!img->scanout)
1448 img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
1449
1450 return true;
1451 }
1452
1453 bool
1454 ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev)
1455 {
1456 /* HiZ is required for separate stencil on Gen6 */
1457 if (ilo_dev_gen(dev) == ILO_GEN(6) &&
1458 img->aux.type == ILO_IMAGE_AUX_HIZ &&
1459 img->separate_stencil)
1460 return false;
1461
1462 /* MCS is required for multisample images */
1463 if (img->aux.type == ILO_IMAGE_AUX_MCS &&
1464 img->sample_count > 1)
1465 return false;
1466
1467 img->aux.enables = 0x0;
1468
1469 return true;
1470 }