9ec6792146f38913e59015e13dcc728984434a72
[mesa.git] / src / gallium / drivers / ilo / core / ilo_image.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_debug.h"
29 #include "ilo_image.h"
30
31 enum {
32 IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
33 IMAGE_TILING_X = 1 << GEN6_TILING_X,
34 IMAGE_TILING_Y = 1 << GEN6_TILING_Y,
35 IMAGE_TILING_W = 1 << GEN8_TILING_W,
36
37 IMAGE_TILING_ALL = (IMAGE_TILING_NONE |
38 IMAGE_TILING_X |
39 IMAGE_TILING_Y |
40 IMAGE_TILING_W)
41 };
42
43 struct ilo_image_params {
44 const struct ilo_dev *dev;
45 const struct ilo_image_info *info;
46 unsigned valid_tilings;
47
48 bool compressed;
49
50 unsigned h0, h1;
51 unsigned max_x, max_y;
52 };
53
54 static void
55 img_get_slice_size(const struct ilo_image *img,
56 const struct ilo_image_params *params,
57 unsigned level, unsigned *width, unsigned *height)
58 {
59 const struct ilo_image_info *info = params->info;
60 unsigned w, h;
61
62 w = u_minify(img->width0, level);
63 h = u_minify(img->height0, level);
64
65 /*
66 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
67 *
68 * "The dimensions of the mip maps are first determined by applying the
69 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
70 * if necessary, they are padded out to compression block boundaries."
71 */
72 w = align(w, img->block_width);
73 h = align(h, img->block_height);
74
75 /*
76 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
77 *
78 * "If the surface is multisampled (4x), these values must be adjusted
79 * as follows before proceeding:
80 *
81 * W_L = ceiling(W_L / 2) * 4
82 * H_L = ceiling(H_L / 2) * 4"
83 *
84 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
85 *
86 * "If the surface is multisampled and it is a depth or stencil surface
87 * or Multisampled Surface StorageFormat in SURFACE_STATE is
88 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
89 * proceeding:
90 *
91 * #samples W_L = H_L =
92 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
93 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
94 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
95 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
96 *
97 * For interleaved samples (4x), where pixels
98 *
99 * (x, y ) (x+1, y )
100 * (x, y+1) (x+1, y+1)
101 *
102 * would be is occupied by
103 *
104 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
105 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
106 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
107 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
108 *
109 * Thus the need to
110 *
111 * w = align(w, 2) * 2;
112 * y = align(y, 2) * 2;
113 */
114 if (img->interleaved_samples) {
115 switch (info->sample_count) {
116 case 1:
117 break;
118 case 2:
119 w = align(w, 2) * 2;
120 break;
121 case 4:
122 w = align(w, 2) * 2;
123 h = align(h, 2) * 2;
124 break;
125 case 8:
126 w = align(w, 2) * 4;
127 h = align(h, 2) * 2;
128 break;
129 case 16:
130 w = align(w, 2) * 4;
131 h = align(h, 2) * 4;
132 break;
133 default:
134 assert(!"unsupported sample count");
135 break;
136 }
137 }
138
139 /*
140 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
141 *
142 * "For separate stencil buffer, the width must be mutiplied by 2 and
143 * height divided by 2..."
144 *
145 * To make things easier (for transfer), we will just double the stencil
146 * stride in 3DSTATE_STENCIL_BUFFER.
147 */
148 w = align(w, img->align_i);
149 h = align(h, img->align_j);
150
151 *width = w;
152 *height = h;
153 }
154
155 static unsigned
156 img_get_num_layers(const struct ilo_image *img,
157 const struct ilo_image_params *params)
158 {
159 const struct ilo_image_info *info = params->info;
160 unsigned num_layers = info->array_size;
161
162 /* samples of the same index are stored in a layer */
163 if (info->sample_count > 1 && !img->interleaved_samples)
164 num_layers *= info->sample_count;
165
166 return num_layers;
167 }
168
169 static void
170 img_init_layer_height(struct ilo_image *img,
171 struct ilo_image_params *params)
172 {
173 const struct ilo_image_info *info = params->info;
174 unsigned num_layers;
175
176 if (img->walk != ILO_IMAGE_WALK_LAYER)
177 return;
178
179 num_layers = img_get_num_layers(img, params);
180 if (num_layers <= 1)
181 return;
182
183 /*
184 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
185 *
186 * "The following equation is used for surface formats other than
187 * compressed textures:
188 *
189 * QPitch = (h0 + h1 + 11j)"
190 *
191 * "The equation for compressed textures (BC* and FXT1 surface formats)
192 * follows:
193 *
194 * QPitch = (h0 + h1 + 11j) / 4"
195 *
196 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
197 * value calculated in the equation above, for every other odd Surface
198 * Height starting from 1 i.e. 1,5,9,13"
199 *
200 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
201 *
202 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
203 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
204 *
205 * QPitch = (h0 + h1 + 12j)
206 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
207 *
208 * (There are many typos or missing words here...)"
209 *
210 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
211 * the base address. The PRM divides QPitch by 4 for compressed formats
212 * because the block height for those formats are 4, and it wants QPitch to
213 * mean the number of memory rows, as opposed to texel rows, between
214 * slices. Since we use texel rows everywhere, we do not need to divide
215 * QPitch by 4.
216 */
217 img->walk_layer_height = params->h0 + params->h1 +
218 ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
219
220 if (ilo_dev_gen(params->dev) == ILO_GEN(6) && info->sample_count > 1 &&
221 img->height0 % 4 == 1)
222 img->walk_layer_height += 4;
223
224 params->max_y += img->walk_layer_height * (num_layers - 1);
225 }
226
227 static void
228 img_init_lods(struct ilo_image *img,
229 struct ilo_image_params *params)
230 {
231 const struct ilo_image_info *info = params->info;
232 unsigned cur_x, cur_y;
233 unsigned lv;
234
235 cur_x = 0;
236 cur_y = 0;
237 for (lv = 0; lv < info->level_count; lv++) {
238 unsigned lod_w, lod_h;
239
240 img_get_slice_size(img, params, lv, &lod_w, &lod_h);
241
242 img->lods[lv].x = cur_x;
243 img->lods[lv].y = cur_y;
244 img->lods[lv].slice_width = lod_w;
245 img->lods[lv].slice_height = lod_h;
246
247 switch (img->walk) {
248 case ILO_IMAGE_WALK_LAYER:
249 /* MIPLAYOUT_BELOW */
250 if (lv == 1)
251 cur_x += lod_w;
252 else
253 cur_y += lod_h;
254 break;
255 case ILO_IMAGE_WALK_LOD:
256 lod_h *= img_get_num_layers(img, params);
257 if (lv == 1)
258 cur_x += lod_w;
259 else
260 cur_y += lod_h;
261
262 /* every LOD begins at tile boundaries */
263 if (info->level_count > 1) {
264 assert(img->format == PIPE_FORMAT_S8_UINT);
265 cur_x = align(cur_x, 64);
266 cur_y = align(cur_y, 64);
267 }
268 break;
269 case ILO_IMAGE_WALK_3D:
270 {
271 const unsigned num_slices = u_minify(info->depth, lv);
272 const unsigned num_slices_per_row = 1 << lv;
273 const unsigned num_rows =
274 (num_slices + num_slices_per_row - 1) / num_slices_per_row;
275
276 lod_w *= num_slices_per_row;
277 lod_h *= num_rows;
278
279 cur_y += lod_h;
280 }
281 break;
282 }
283
284 if (params->max_x < img->lods[lv].x + lod_w)
285 params->max_x = img->lods[lv].x + lod_w;
286 if (params->max_y < img->lods[lv].y + lod_h)
287 params->max_y = img->lods[lv].y + lod_h;
288 }
289
290 if (img->walk == ILO_IMAGE_WALK_LAYER) {
291 params->h0 = img->lods[0].slice_height;
292
293 if (info->level_count > 1)
294 params->h1 = img->lods[1].slice_height;
295 else
296 img_get_slice_size(img, params, 1, &cur_x, &params->h1);
297 }
298 }
299
300 static void
301 img_init_alignments(struct ilo_image *img,
302 const struct ilo_image_params *params)
303 {
304 const struct ilo_image_info *info = params->info;
305
306 /*
307 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
308 *
309 * "surface format align_i align_j
310 * YUV 4:2:2 formats 4 *see below
311 * BC1-5 4 4
312 * FXT1 8 4
313 * all other formats 4 *see below"
314 *
315 * "- align_j = 4 for any depth buffer
316 * - align_j = 2 for separate stencil buffer
317 * - align_j = 4 for any render target surface is multisampled (4x)
318 * - align_j = 4 for any render target surface with Surface Vertical
319 * Alignment = VALIGN_4
320 * - align_j = 2 for any render target surface with Surface Vertical
321 * Alignment = VALIGN_2
322 * - align_j = 2 for all other render target surface
323 * - align_j = 2 for any sampling engine surface with Surface Vertical
324 * Alignment = VALIGN_2
325 * - align_j = 4 for any sampling engine surface with Surface Vertical
326 * Alignment = VALIGN_4"
327 *
328 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
329 *
330 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
331 * the Surface Format is 96 bits per element (BPE)."
332 *
333 * They can be rephrased as
334 *
335 * align_i align_j
336 * compressed formats block width block height
337 * PIPE_FORMAT_S8_UINT 4 2
338 * other depth/stencil formats 4 4
339 * 4x multisampled 4 4
340 * bpp 96 4 2
341 * others 4 2 or 4
342 */
343
344 /*
345 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
346 *
347 * "surface defined by surface format align_i align_j
348 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
349 * not D16_UNORM 4 4
350 * 3DSTATE_STENCIL_BUFFER N/A 8 8
351 * SURFACE_STATE BC*, ETC*, EAC* 4 4
352 * FXT1 8 4
353 * all others (set by SURFACE_STATE)"
354 *
355 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
356 *
357 * "- This field (Surface Vertical Aligment) is intended to be set to
358 * VALIGN_4 if the surface was rendered as a depth buffer, for a
359 * multisampled (4x) render target, or for a multisampled (8x)
360 * render target, since these surfaces support only alignment of 4.
361 * - Use of VALIGN_4 for other surfaces is supported, but uses more
362 * memory.
363 * - This field must be set to VALIGN_4 for all tiled Y Render Target
364 * surfaces.
365 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
366 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
367 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
368 * must be set to VALIGN_4."
369 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
370 *
371 * "- This field (Surface Horizontal Aligment) is intended to be set to
372 * HALIGN_8 only if the surface was rendered as a depth buffer with
373 * Z16 format or a stencil buffer, since these surfaces support only
374 * alignment of 8.
375 * - Use of HALIGN_8 for other surfaces is supported, but uses more
376 * memory.
377 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
378 * - This field must be set to HALIGN_8 if the Surface Format is
379 * FXT1."
380 *
381 * They can be rephrased as
382 *
383 * align_i align_j
384 * compressed formats block width block height
385 * PIPE_FORMAT_Z16_UNORM 8 4
386 * PIPE_FORMAT_S8_UINT 8 8
387 * other depth/stencil formats 4 4
388 * 2x or 4x multisampled 4 or 8 4
389 * tiled Y 4 or 8 4 (if rt)
390 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
391 * others 4 or 8 2 or 4
392 */
393
394 if (params->compressed) {
395 /* this happens to be the case */
396 img->align_i = img->block_width;
397 img->align_j = img->block_height;
398 } else if (info->bind_zs) {
399 if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
400 switch (img->format) {
401 case PIPE_FORMAT_Z16_UNORM:
402 img->align_i = 8;
403 img->align_j = 4;
404 break;
405 case PIPE_FORMAT_S8_UINT:
406 img->align_i = 8;
407 img->align_j = 8;
408 break;
409 default:
410 img->align_i = 4;
411 img->align_j = 4;
412 break;
413 }
414 } else {
415 switch (img->format) {
416 case PIPE_FORMAT_S8_UINT:
417 img->align_i = 4;
418 img->align_j = 2;
419 break;
420 default:
421 img->align_i = 4;
422 img->align_j = 4;
423 break;
424 }
425 }
426 } else {
427 const bool valign_4 =
428 (info->sample_count > 1) ||
429 (ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
430 (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
431 img->tiling == GEN6_TILING_Y &&
432 info->bind_surface_dp_render);
433
434 if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
435 ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
436 assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
437
438 img->align_i = 4;
439 img->align_j = (valign_4) ? 4 : 2;
440 }
441
442 /*
443 * the fact that align i and j are multiples of block width and height
444 * respectively is what makes the size of the bo a multiple of the block
445 * size, slices start at block boundaries, and many of the computations
446 * work.
447 */
448 assert(img->align_i % img->block_width == 0);
449 assert(img->align_j % img->block_height == 0);
450
451 /* make sure align() works */
452 assert(util_is_power_of_two(img->align_i) &&
453 util_is_power_of_two(img->align_j));
454 assert(util_is_power_of_two(img->block_width) &&
455 util_is_power_of_two(img->block_height));
456 }
457
458 static void
459 img_init_tiling(struct ilo_image *img,
460 const struct ilo_image_params *params)
461 {
462 const struct ilo_image_info *info = params->info;
463 unsigned preferred_tilings = params->valid_tilings;
464
465 /* no fencing nor BLT support */
466 if (preferred_tilings & ~IMAGE_TILING_W)
467 preferred_tilings &= ~IMAGE_TILING_W;
468
469 if (info->bind_surface_dp_render || info->bind_surface_sampler) {
470 /*
471 * heuristically set a minimum width/height for enabling tiling
472 */
473 if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
474 preferred_tilings &= ~IMAGE_TILING_X;
475
476 if ((img->width0 < 32 || img->height0 < 16) &&
477 (img->width0 < 16 || img->height0 < 32) &&
478 (preferred_tilings & ~IMAGE_TILING_Y))
479 preferred_tilings &= ~IMAGE_TILING_Y;
480 } else {
481 /* force linear if we are not sure where the texture is bound to */
482 if (preferred_tilings & IMAGE_TILING_NONE)
483 preferred_tilings &= IMAGE_TILING_NONE;
484 }
485
486 /* prefer tiled over linear */
487 if (preferred_tilings & IMAGE_TILING_Y)
488 img->tiling = GEN6_TILING_Y;
489 else if (preferred_tilings & IMAGE_TILING_X)
490 img->tiling = GEN6_TILING_X;
491 else if (preferred_tilings & IMAGE_TILING_W)
492 img->tiling = GEN8_TILING_W;
493 else
494 img->tiling = GEN6_TILING_NONE;
495 }
496
497 static void
498 img_init_walk_gen7(struct ilo_image *img,
499 const struct ilo_image_params *params)
500 {
501 const struct ilo_image_info *info = params->info;
502
503 /*
504 * It is not explicitly states, but render targets are expected to be
505 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
506 * to be IMS (samples interleaved).
507 *
508 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
509 */
510 if (info->bind_zs) {
511 /*
512 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
513 *
514 * "note that the depth buffer and stencil buffer have an implied
515 * value of ARYSPC_FULL"
516 */
517 img->walk = (info->type == GEN6_SURFTYPE_3D) ?
518 ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
519
520 img->interleaved_samples = true;
521 } else {
522 /*
523 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
524 *
525 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
526 * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
527 * Array Spacing) must be set to ARYSPC_LOD0."
528 *
529 * As multisampled resources are not mipmapped, we never use
530 * ARYSPC_FULL for them.
531 */
532 if (info->sample_count > 1)
533 assert(info->level_count == 1);
534
535 img->walk =
536 (info->type == GEN6_SURFTYPE_3D) ? ILO_IMAGE_WALK_3D :
537 (info->level_count > 1) ? ILO_IMAGE_WALK_LAYER :
538 ILO_IMAGE_WALK_LOD;
539
540 img->interleaved_samples = false;
541 }
542 }
543
544 static void
545 img_init_walk_gen6(struct ilo_image *img,
546 const struct ilo_image_params *params)
547 {
548 /*
549 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
550 *
551 * "The separate stencil buffer does not support mip mapping, thus the
552 * storage for LODs other than LOD 0 is not needed. The following
553 * QPitch equation applies only to the separate stencil buffer:
554 *
555 * QPitch = h_0"
556 *
557 * GEN6 does not support compact spacing otherwise.
558 */
559 img->walk =
560 (params->info->type == GEN6_SURFTYPE_3D) ? ILO_IMAGE_WALK_3D :
561 (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
562 ILO_IMAGE_WALK_LAYER;
563
564 /* GEN6 supports only interleaved samples */
565 img->interleaved_samples = true;
566 }
567
568 static void
569 img_init_walk(struct ilo_image *img,
570 const struct ilo_image_params *params)
571 {
572 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
573 img_init_walk_gen7(img, params);
574 else
575 img_init_walk_gen6(img, params);
576 }
577
578 static unsigned
579 img_get_valid_tilings(const struct ilo_image *img,
580 const struct ilo_image_params *params)
581 {
582 const struct ilo_image_info *info = params->info;
583 const enum pipe_format format = img->format;
584 unsigned valid_tilings = params->valid_tilings;
585
586 /*
587 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
588 *
589 * "Display/Overlay Y-Major not supported.
590 * X-Major required for Async Flips"
591 */
592 if (unlikely(info->bind_scanout))
593 valid_tilings &= IMAGE_TILING_X;
594
595 /*
596 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
597 *
598 * "The cursor surface address must be 4K byte aligned. The cursor must
599 * be in linear memory, it cannot be tiled."
600 */
601 if (unlikely(info->bind_cursor))
602 valid_tilings &= IMAGE_TILING_NONE;
603
604 /*
605 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
606 *
607 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
608 * Depth Buffer is not supported."
609 *
610 * "The Depth Buffer, if tiled, must use Y-Major tiling."
611 *
612 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
613 *
614 * "W-Major Tile Format is used for separate stencil."
615 */
616 if (info->bind_zs) {
617 switch (format) {
618 case PIPE_FORMAT_S8_UINT:
619 valid_tilings &= IMAGE_TILING_W;
620 break;
621 default:
622 valid_tilings &= IMAGE_TILING_Y;
623 break;
624 }
625 }
626
627 if (info->bind_surface_dp_render) {
628 /*
629 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
630 *
631 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
632 * either TileX or Linear."
633 *
634 * From the Haswell PRM, volume 5, page 32:
635 *
636 * "NOTE: 128 BPP format color buffer (render target) supports
637 * Linear, TiledX and TiledY."
638 */
639 if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
640 valid_tilings &= ~IMAGE_TILING_Y;
641
642 /*
643 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
644 *
645 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
646 * for all tiled Y Render Target surfaces."
647 *
648 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
649 */
650 if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
651 ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
652 img->format == PIPE_FORMAT_R32G32B32_FLOAT)
653 valid_tilings &= ~IMAGE_TILING_Y;
654
655 valid_tilings &= ~IMAGE_TILING_W;
656 }
657
658 if (info->bind_surface_sampler) {
659 if (ilo_dev_gen(params->dev) < ILO_GEN(8))
660 valid_tilings &= ~IMAGE_TILING_W;
661 }
662
663 /* no conflicting binding flags */
664 assert(valid_tilings);
665
666 return valid_tilings;
667 }
668
669 static void
670 img_init_size_and_format(struct ilo_image *img,
671 struct ilo_image_params *params)
672 {
673 const struct ilo_image_info *info = params->info;
674 enum pipe_format format = info->format;
675 bool require_separate_stencil = false;
676
677 img->type = info->type;
678 img->width0 = info->width;
679 img->height0 = info->height;
680 img->depth0 = info->depth;
681 img->array_size = info->array_size;
682 img->level_count = info->level_count;
683 img->sample_count = info->sample_count;
684
685 /*
686 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
687 *
688 * "This field (Separate Stencil Buffer Enable) must be set to the same
689 * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
690 *
691 * GEN7+ requires separate stencil buffers.
692 */
693 if (info->bind_zs) {
694 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
695 require_separate_stencil = true;
696 else
697 require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
698 }
699
700 switch (format) {
701 case PIPE_FORMAT_ETC1_RGB8:
702 format = PIPE_FORMAT_R8G8B8X8_UNORM;
703 break;
704 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
705 if (require_separate_stencil) {
706 format = PIPE_FORMAT_Z24X8_UNORM;
707 img->separate_stencil = true;
708 }
709 break;
710 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
711 if (require_separate_stencil) {
712 format = PIPE_FORMAT_Z32_FLOAT;
713 img->separate_stencil = true;
714 }
715 break;
716 default:
717 break;
718 }
719
720 img->format = format;
721 img->block_width = util_format_get_blockwidth(format);
722 img->block_height = util_format_get_blockheight(format);
723 img->block_size = util_format_get_blocksize(format);
724
725 params->valid_tilings = img_get_valid_tilings(img, params);
726 params->compressed = util_format_is_compressed(img->format);
727 }
728
729 static bool
730 img_want_mcs(const struct ilo_image *img,
731 const struct ilo_image_params *params)
732 {
733 const struct ilo_image_info *info = params->info;
734 bool want_mcs = false;
735
736 /* MCS is for RT on GEN7+ */
737 if (ilo_dev_gen(params->dev) < ILO_GEN(7))
738 return false;
739
740 if (info->type != GEN6_SURFTYPE_2D || !info->bind_surface_dp_render)
741 return false;
742
743 /*
744 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
745 *
746 * "For Render Target and Sampling Engine Surfaces:If the surface is
747 * multisampled (Number of Multisamples any value other than
748 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
749 *
750 * "This field must be set to 0 for all SINT MSRTs when all RT channels
751 * are not written"
752 */
753 if (info->sample_count > 1 && !util_format_is_pure_sint(info->format)) {
754 want_mcs = true;
755 } else if (info->sample_count == 1 && !info->aux_disable) {
756 /*
757 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
758 *
759 * "When MCS is buffer is used for color clear of non-multisampler
760 * render target, the following restrictions apply.
761 * - Support is limited to tiled render targets.
762 * - Support is for non-mip-mapped and non-array surface types
763 * only.
764 * - Clear is supported only on the full RT; i.e., no partial clear
765 * or overlapping clears.
766 * - MCS buffer for non-MSRT is supported only for RT formats
767 * 32bpp, 64bpp and 128bpp.
768 * ..."
769 */
770 if (img->tiling != GEN6_TILING_NONE &&
771 info->level_count == 1 && info->array_size == 1) {
772 switch (img->block_size) {
773 case 4:
774 case 8:
775 case 16:
776 want_mcs = true;
777 break;
778 default:
779 break;
780 }
781 }
782 }
783
784 return want_mcs;
785 }
786
787 static bool
788 img_want_hiz(const struct ilo_image *img,
789 const struct ilo_image_params *params)
790 {
791 const struct ilo_image_info *info = params->info;
792 const struct util_format_description *desc =
793 util_format_description(info->format);
794
795 if (ilo_debug & ILO_DEBUG_NOHIZ)
796 return false;
797
798 if (info->aux_disable)
799 return false;
800
801 /* we want 8x4 aligned levels */
802 if (info->type == GEN6_SURFTYPE_1D)
803 return false;
804
805 if (!info->bind_zs)
806 return false;
807
808 if (!util_format_has_depth(desc))
809 return false;
810
811 /*
812 * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
813 * for every level. This is generally fine except on GEN6, where HiZ and
814 * separate stencil are enabled and disabled at the same time. When the
815 * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
816 * can result in incompatible formats.
817 */
818 if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
819 info->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
820 info->level_count > 1)
821 return false;
822
823 return true;
824 }
825
826 static void
827 img_init_aux(struct ilo_image *img,
828 const struct ilo_image_params *params)
829 {
830 if (img_want_hiz(img, params))
831 img->aux.type = ILO_IMAGE_AUX_HIZ;
832 else if (img_want_mcs(img, params))
833 img->aux.type = ILO_IMAGE_AUX_MCS;
834 }
835
836 static void
837 img_align(struct ilo_image *img, struct ilo_image_params *params)
838 {
839 const struct ilo_image_info *info = params->info;
840 int align_w = 1, align_h = 1, pad_h = 0;
841
842 /*
843 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
844 *
845 * "To determine the necessary padding on the bottom and right side of
846 * the surface, refer to the table in Section 7.18.3.4 for the i and j
847 * parameters for the surface format in use. The surface must then be
848 * extended to the next multiple of the alignment unit size in each
849 * dimension, and all texels contained in this extended surface must
850 * have valid GTT entries."
851 *
852 * "For cube surfaces, an additional two rows of padding are required
853 * at the bottom of the surface. This must be ensured regardless of
854 * whether the surface is stored tiled or linear. This is due to the
855 * potential rotation of cache line orientation from memory to cache."
856 *
857 * "For compressed textures (BC* and FXT1 surface formats), padding at
858 * the bottom of the surface is to an even compressed row, which is
859 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
860 * purposes, these surfaces behave as if j = 8 only for surface
861 * padding purposes. The value of 4 for j still applies for mip level
862 * alignment and QPitch calculation."
863 */
864 if (info->bind_surface_sampler) {
865 align_w = MAX2(align_w, img->align_i);
866 align_h = MAX2(align_h, img->align_j);
867
868 if (info->type == GEN6_SURFTYPE_CUBE)
869 pad_h += 2;
870
871 if (params->compressed)
872 align_h = MAX2(align_h, img->align_j * 2);
873 }
874
875 /*
876 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
877 *
878 * "If the surface contains an odd number of rows of data, a final row
879 * below the surface must be allocated."
880 */
881 if (info->bind_surface_dp_render)
882 align_h = MAX2(align_h, 2);
883
884 /*
885 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
886 * for unaligned non-mipmapped and non-array images.
887 */
888 if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
889 info->level_count == 1 &&
890 info->array_size == 1 &&
891 info->depth == 1) {
892 align_w = MAX2(align_w, 8);
893 align_h = MAX2(align_h, 4);
894 }
895
896 params->max_x = align(params->max_x, align_w);
897 params->max_y = align(params->max_y + pad_h, align_h);
898 }
899
900 /* note that this may force the texture to be linear */
901 static void
902 img_calculate_bo_size(struct ilo_image *img,
903 const struct ilo_image_params *params)
904 {
905 assert(params->max_x % img->block_width == 0);
906 assert(params->max_y % img->block_height == 0);
907 assert(img->walk_layer_height % img->block_height == 0);
908
909 img->bo_stride =
910 (params->max_x / img->block_width) * img->block_size;
911 img->bo_height = params->max_y / img->block_height;
912
913 while (true) {
914 unsigned w = img->bo_stride, h = img->bo_height;
915 unsigned align_w, align_h;
916
917 /*
918 * From the Haswell PRM, volume 5, page 163:
919 *
920 * "For linear surfaces, additional padding of 64 bytes is required
921 * at the bottom of the surface. This is in addition to the padding
922 * required above."
923 */
924 if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
925 params->info->bind_surface_sampler &&
926 img->tiling == GEN6_TILING_NONE)
927 h += (64 + img->bo_stride - 1) / img->bo_stride;
928
929 /*
930 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
931 *
932 * "- For linear render target surfaces, the pitch must be a
933 * multiple of the element size for non-YUV surface formats.
934 * Pitch must be a multiple of 2 * element size for YUV surface
935 * formats.
936 * - For other linear surfaces, the pitch can be any multiple of
937 * bytes.
938 * - For tiled surfaces, the pitch must be a multiple of the tile
939 * width."
940 *
941 * Different requirements may exist when the bo is used in different
942 * places, but our alignments here should be good enough that we do not
943 * need to check params->info->bind_x.
944 */
945 switch (img->tiling) {
946 case GEN6_TILING_X:
947 align_w = 512;
948 align_h = 8;
949 break;
950 case GEN6_TILING_Y:
951 align_w = 128;
952 align_h = 32;
953 break;
954 case GEN8_TILING_W:
955 /*
956 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
957 *
958 * "A 4KB tile is subdivided into 8-high by 8-wide array of
959 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
960 * bytes."
961 */
962 align_w = 64;
963 align_h = 64;
964 break;
965 default:
966 assert(img->tiling == GEN6_TILING_NONE);
967 /* some good enough values */
968 align_w = 64;
969 align_h = 2;
970 break;
971 }
972
973 w = align(w, align_w);
974 h = align(h, align_h);
975
976 /* make sure the bo is mappable */
977 if (img->tiling != GEN6_TILING_NONE) {
978 /*
979 * Usually only the first 256MB of the GTT is mappable.
980 *
981 * See also how intel_context::max_gtt_map_object_size is calculated.
982 */
983 const size_t mappable_gtt_size = 256 * 1024 * 1024;
984
985 /*
986 * Be conservative. We may be able to switch from VALIGN_4 to
987 * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
988 */
989 if (mappable_gtt_size / w / 4 < h) {
990 if (params->valid_tilings & IMAGE_TILING_NONE) {
991 img->tiling = GEN6_TILING_NONE;
992 /* MCS support for non-MSRTs is limited to tiled RTs */
993 if (img->aux.type == ILO_IMAGE_AUX_MCS &&
994 params->info->sample_count == 1)
995 img->aux.type = ILO_IMAGE_AUX_NONE;
996
997 continue;
998 } else {
999 ilo_warn("cannot force texture to be linear\n");
1000 }
1001 }
1002 }
1003
1004 img->bo_stride = w;
1005 img->bo_height = h;
1006 break;
1007 }
1008 }
1009
1010 static void
1011 img_calculate_hiz_size(struct ilo_image *img,
1012 const struct ilo_image_params *params)
1013 {
1014 const struct ilo_image_info *info = params->info;
1015 const unsigned hz_align_j = 8;
1016 enum ilo_image_walk_type hz_walk;
1017 unsigned hz_width, hz_height, lv;
1018 unsigned hz_clear_w, hz_clear_h;
1019
1020 assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
1021
1022 assert(img->walk == ILO_IMAGE_WALK_LAYER ||
1023 img->walk == ILO_IMAGE_WALK_3D);
1024
1025 /*
1026 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1027 *
1028 * "The hierarchical depth buffer does not support the LOD field, it is
1029 * assumed by hardware to be zero. A separate hierarachical depth
1030 * buffer is required for each LOD used, and the corresponding
1031 * buffer's state delivered to hardware each time a new depth buffer
1032 * state with modified LOD is delivered."
1033 *
1034 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1035 */
1036 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
1037 hz_walk = img->walk;
1038 else
1039 hz_walk = ILO_IMAGE_WALK_LOD;
1040
1041 /*
1042 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1043 * PRM, volume 2 part 1, page 312-313.
1044 *
1045 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1046 * memory row.
1047 */
1048 switch (hz_walk) {
1049 case ILO_IMAGE_WALK_LAYER:
1050 {
1051 const unsigned h0 = align(params->h0, hz_align_j);
1052 const unsigned h1 = align(params->h1, hz_align_j);
1053 const unsigned htail =
1054 ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1055 const unsigned hz_qpitch = h0 + h1 + htail;
1056
1057 hz_width = align(img->lods[0].slice_width, 16);
1058
1059 hz_height = hz_qpitch * info->array_size / 2;
1060 if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
1061 hz_height = align(hz_height, 8);
1062
1063 img->aux.walk_layer_height = hz_qpitch;
1064 }
1065 break;
1066 case ILO_IMAGE_WALK_LOD:
1067 {
1068 unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
1069 unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
1070 unsigned cur_tx, cur_ty;
1071
1072 /* figure out the tile offsets of LODs */
1073 hz_width = 0;
1074 hz_height = 0;
1075 cur_tx = 0;
1076 cur_ty = 0;
1077 for (lv = 0; lv < info->level_count; lv++) {
1078 unsigned tw, th;
1079
1080 lod_tx[lv] = cur_tx;
1081 lod_ty[lv] = cur_ty;
1082
1083 tw = align(img->lods[lv].slice_width, 16);
1084 th = align(img->lods[lv].slice_height, hz_align_j) *
1085 info->array_size / 2;
1086 /* convert to Y-tiles */
1087 tw = align(tw, 128) / 128;
1088 th = align(th, 32) / 32;
1089
1090 if (hz_width < cur_tx + tw)
1091 hz_width = cur_tx + tw;
1092 if (hz_height < cur_ty + th)
1093 hz_height = cur_ty + th;
1094
1095 if (lv == 1)
1096 cur_tx += tw;
1097 else
1098 cur_ty += th;
1099 }
1100
1101 /* convert tile offsets to memory offsets */
1102 for (lv = 0; lv < info->level_count; lv++) {
1103 img->aux.walk_lod_offsets[lv] =
1104 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1105 }
1106 hz_width *= 128;
1107 hz_height *= 32;
1108 }
1109 break;
1110 case ILO_IMAGE_WALK_3D:
1111 hz_width = align(img->lods[0].slice_width, 16);
1112
1113 hz_height = 0;
1114 for (lv = 0; lv < info->level_count; lv++) {
1115 const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
1116 /* according to the formula, slices are packed together vertically */
1117 hz_height += h * u_minify(info->depth, lv);
1118 }
1119 hz_height /= 2;
1120 break;
1121 default:
1122 assert(!"unknown HiZ walk");
1123 hz_width = 0;
1124 hz_height = 0;
1125 break;
1126 }
1127
1128 /*
1129 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1130 * Experiments on Haswell show that aligning the RECTLIST primitive and
1131 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1132 * aligned.
1133 */
1134 hz_clear_w = 8;
1135 hz_clear_h = 4;
1136 switch (info->sample_count) {
1137 case 1:
1138 default:
1139 break;
1140 case 2:
1141 hz_clear_w /= 2;
1142 break;
1143 case 4:
1144 hz_clear_w /= 2;
1145 hz_clear_h /= 2;
1146 break;
1147 case 8:
1148 hz_clear_w /= 4;
1149 hz_clear_h /= 2;
1150 break;
1151 case 16:
1152 hz_clear_w /= 4;
1153 hz_clear_h /= 4;
1154 break;
1155 }
1156
1157 for (lv = 0; lv < info->level_count; lv++) {
1158 if (u_minify(img->width0, lv) % hz_clear_w ||
1159 u_minify(img->height0, lv) % hz_clear_h)
1160 break;
1161 img->aux.enables |= 1 << lv;
1162 }
1163
1164 /* we padded to allow this in img_align() */
1165 if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
1166 img->aux.enables |= 0x1;
1167
1168 /* align to Y-tile */
1169 img->aux.bo_stride = align(hz_width, 128);
1170 img->aux.bo_height = align(hz_height, 32);
1171 }
1172
1173 static void
1174 img_calculate_mcs_size(struct ilo_image *img,
1175 const struct ilo_image_params *params)
1176 {
1177 const struct ilo_image_info *info = params->info;
1178 int mcs_width, mcs_height, mcs_cpp;
1179 int downscale_x, downscale_y;
1180
1181 assert(img->aux.type == ILO_IMAGE_AUX_MCS);
1182
1183 if (info->sample_count > 1) {
1184 /*
1185 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1186 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1187 * need of scale down could be that the clear rectangle is used to clear
1188 * the MCS instead of the RT.
1189 *
1190 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1191 * 2x2 factor could come from that the hardware writes 128 bits (an
1192 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1193 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1194 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1195 * pixel block in the RT.
1196 */
1197 switch (info->sample_count) {
1198 case 2:
1199 case 4:
1200 downscale_x = 8;
1201 downscale_y = 2;
1202 mcs_cpp = 1;
1203 break;
1204 case 8:
1205 downscale_x = 2;
1206 downscale_y = 2;
1207 mcs_cpp = 4;
1208 break;
1209 case 16:
1210 downscale_x = 2;
1211 downscale_y = 1;
1212 mcs_cpp = 8;
1213 break;
1214 default:
1215 assert(!"unsupported sample count");
1216 return;
1217 break;
1218 }
1219
1220 /*
1221 * It also appears that the 2x2 subspans generated by the scaled-down
1222 * clear rectangle cannot be masked. The scale-down clear rectangle
1223 * thus must be aligned to 2x2, and we need to pad.
1224 */
1225 mcs_width = align(img->width0, downscale_x * 2);
1226 mcs_height = align(img->height0, downscale_y * 2);
1227 } else {
1228 /*
1229 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1230 *
1231 * " Pixels Lines
1232 * TiledY RT CL
1233 * bpp
1234 * 32 8 4
1235 * 64 4 4
1236 * 128 2 4
1237 *
1238 * TiledX RT CL
1239 * bpp
1240 * 32 16 2
1241 * 64 8 2
1242 * 128 4 2"
1243 *
1244 * This table and the two following tables define the RT alignments, the
1245 * clear rectangle alignments, and the clear rectangle scale factors.
1246 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1247 * that the clear rectangle alignments are 16x32 blocks, and the clear
1248 * rectangle scale factors are 8x16 blocks.
1249 *
1250 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1251 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1252 * 8x16 blocks.
1253 *
1254 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1255 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1256 * which says that a Y-tile maps to 128x256 blocks (\see
1257 * intel_get_non_msrt_mcs_alignment). It does not really change
1258 * anything except for the size of the allocated MCS. Let's see if we
1259 * hit out-of-bound access.
1260 */
1261 switch (img->tiling) {
1262 case GEN6_TILING_X:
1263 downscale_x = 64 / img->block_size;
1264 downscale_y = 2;
1265 break;
1266 case GEN6_TILING_Y:
1267 downscale_x = 32 / img->block_size;
1268 downscale_y = 4;
1269 break;
1270 default:
1271 assert(!"unsupported tiling mode");
1272 return;
1273 break;
1274 }
1275
1276 downscale_x *= 8;
1277 downscale_y *= 16;
1278
1279 /*
1280 * From the Haswell PRM, volume 7, page 652:
1281 *
1282 * "Clear rectangle must be aligned to two times the number of
1283 * pixels in the table shown below due to 16X16 hashing across the
1284 * slice."
1285 *
1286 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1287 * 2x2, and we need to pad.
1288 */
1289 mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
1290 mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
1291 mcs_cpp = 16; /* an OWord */
1292 }
1293
1294 img->aux.enables = (1 << info->level_count) - 1;
1295 /* align to Y-tile */
1296 img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
1297 img->aux.bo_height = align(mcs_height, 32);
1298 }
1299
1300 static bool
1301 img_init(struct ilo_image *img,
1302 struct ilo_image_params *params)
1303 {
1304 /* there are hard dependencies between every function here */
1305
1306 img_init_aux(img, params);
1307 img_init_size_and_format(img, params);
1308 img_init_walk(img, params);
1309 img_init_tiling(img, params);
1310 img_init_alignments(img, params);
1311 img_init_lods(img, params);
1312 img_init_layer_height(img, params);
1313
1314 img_align(img, params);
1315 img_calculate_bo_size(img, params);
1316
1317 img->scanout = params->info->bind_scanout;
1318
1319 switch (img->aux.type) {
1320 case ILO_IMAGE_AUX_HIZ:
1321 img_calculate_hiz_size(img, params);
1322 break;
1323 case ILO_IMAGE_AUX_MCS:
1324 img_calculate_mcs_size(img, params);
1325 break;
1326 default:
1327 break;
1328 }
1329
1330 return true;
1331 }
1332
1333 /**
1334 * The texutre is for transfer only. We can define our own layout to save
1335 * space.
1336 */
1337 static void
1338 img_init_for_transfer(struct ilo_image *img,
1339 const struct ilo_dev *dev,
1340 const struct ilo_image_info *info)
1341 {
1342 const unsigned num_layers = (info->type == GEN6_SURFTYPE_3D) ?
1343 info->depth : info->array_size;
1344 unsigned layer_width, layer_height;
1345
1346 assert(info->level_count == 1);
1347 assert(info->sample_count == 1);
1348
1349 img->aux.type = ILO_IMAGE_AUX_NONE;
1350
1351 img->type = info->type;
1352 img->width0 = info->width;
1353 img->height0 = info->height;
1354 img->depth0 = info->depth;
1355 img->array_size = info->array_size;
1356 img->level_count = 1;
1357 img->sample_count = 1;
1358
1359 img->format = info->format;
1360 img->block_width = util_format_get_blockwidth(info->format);
1361 img->block_height = util_format_get_blockheight(info->format);
1362 img->block_size = util_format_get_blocksize(info->format);
1363
1364 img->walk = ILO_IMAGE_WALK_LOD;
1365
1366 img->tiling = GEN6_TILING_NONE;
1367
1368 img->align_i = img->block_width;
1369 img->align_j = img->block_height;
1370
1371 assert(util_is_power_of_two(img->block_width) &&
1372 util_is_power_of_two(img->block_height));
1373
1374 /* use packed layout */
1375 layer_width = align(info->width, img->align_i);
1376 layer_height = align(info->height, img->align_j);
1377
1378 img->lods[0].slice_width = layer_width;
1379 img->lods[0].slice_height = layer_height;
1380
1381 img->bo_stride = (layer_width / img->block_width) * img->block_size;
1382 img->bo_stride = align(img->bo_stride, 64);
1383
1384 img->bo_height = (layer_height / img->block_height) * num_layers;
1385 }
1386
1387 static bool
1388 img_is_bind_gpu(const struct ilo_image_info *info)
1389 {
1390 return (info->bind_surface_sampler ||
1391 info->bind_surface_dp_render ||
1392 info->bind_surface_dp_typed ||
1393 info->bind_zs ||
1394 info->bind_scanout ||
1395 info->bind_cursor);
1396 }
1397
1398 /**
1399 * Initialize the image. Callers should zero-initialize \p img first.
1400 */
1401 bool
1402 ilo_image_init(struct ilo_image *img,
1403 const struct ilo_dev *dev,
1404 const struct ilo_image_info *info)
1405 {
1406 struct ilo_image_params params;
1407
1408 assert(ilo_is_zeroed(img, sizeof(*img)));
1409
1410 /* use transfer layout when the texture is never bound to GPU */
1411 if (!img_is_bind_gpu(info) &&
1412 info->level_count == 1 &&
1413 info->sample_count == 1) {
1414 img_init_for_transfer(img, dev, info);
1415 return true;
1416 }
1417
1418 memset(&params, 0, sizeof(params));
1419 params.dev = dev;
1420 params.info = info;
1421 params.valid_tilings = (info->valid_tilings) ?
1422 info->valid_tilings : IMAGE_TILING_ALL;
1423
1424 if (!img_init(img, &params))
1425 return false;
1426
1427 if (info->force_bo_stride) {
1428 if ((img->tiling == GEN6_TILING_X && info->force_bo_stride % 512) ||
1429 (img->tiling == GEN6_TILING_Y && info->force_bo_stride % 128) ||
1430 (img->tiling == GEN8_TILING_W && info->force_bo_stride % 64))
1431 return false;
1432
1433 if (img->bo_stride > info->force_bo_stride)
1434 return false;
1435
1436 img->bo_stride = info->force_bo_stride;
1437 }
1438
1439 return true;
1440 }