vc4: Move job-submit skip cases to vc4_job_submit().
[mesa.git] / src / gallium / drivers / ilo / core / ilo_image.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_debug.h"
29 #include "ilo_image.h"
30
31 enum {
32 IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
33 IMAGE_TILING_X = 1 << GEN6_TILING_X,
34 IMAGE_TILING_Y = 1 << GEN6_TILING_Y,
35 IMAGE_TILING_W = 1 << GEN8_TILING_W,
36
37 IMAGE_TILING_ALL = (IMAGE_TILING_NONE |
38 IMAGE_TILING_X |
39 IMAGE_TILING_Y |
40 IMAGE_TILING_W)
41 };
42
43 struct ilo_image_layout {
44 enum ilo_image_walk_type walk;
45 bool interleaved_samples;
46
47 uint8_t valid_tilings;
48 enum gen_surface_tiling tiling;
49
50 enum ilo_image_aux_type aux;
51
52 int align_i;
53 int align_j;
54
55 struct ilo_image_lod *lods;
56 int walk_layer_h0;
57 int walk_layer_h1;
58 int walk_layer_height;
59 int monolithic_width;
60 int monolithic_height;
61 };
62
63 static enum ilo_image_walk_type
64 image_get_gen6_walk(const struct ilo_dev *dev,
65 const struct ilo_image_info *info)
66 {
67 ILO_DEV_ASSERT(dev, 6, 6);
68
69 /* TODO we want LODs to be page-aligned */
70 if (info->type == GEN6_SURFTYPE_3D)
71 return ILO_IMAGE_WALK_3D;
72
73 /*
74 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
75 *
76 * "The separate stencil buffer does not support mip mapping, thus the
77 * storage for LODs other than LOD 0 is not needed. The following
78 * QPitch equation applies only to the separate stencil buffer:
79 *
80 * QPitch = h_0"
81 *
82 * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
83 * when bound.
84 */
85 if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT)
86 return ILO_IMAGE_WALK_LOD;
87
88 /* compact spacing is not supported otherwise */
89 return ILO_IMAGE_WALK_LAYER;
90 }
91
92 static enum ilo_image_walk_type
93 image_get_gen7_walk(const struct ilo_dev *dev,
94 const struct ilo_image_info *info)
95 {
96 ILO_DEV_ASSERT(dev, 7, 8);
97
98 if (info->type == GEN6_SURFTYPE_3D)
99 return ILO_IMAGE_WALK_3D;
100
101 /*
102 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
103 *
104 * "note that the depth buffer and stencil buffer have an implied value
105 * of ARYSPC_FULL"
106 *
107 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
108 *
109 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number of
110 * Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
111 * Spacing) must be set to ARYSPC_LOD0."
112 */
113 if (info->sample_count > 1)
114 assert(info->level_count == 1);
115 return (info->bind_zs || info->level_count > 1) ?
116 ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD;
117 }
118
119 static bool
120 image_get_gen6_interleaved_samples(const struct ilo_dev *dev,
121 const struct ilo_image_info *info)
122 {
123 ILO_DEV_ASSERT(dev, 6, 8);
124
125 /*
126 * Gen6 supports only interleaved samples. It is not explicitly stated,
127 * but on Gen7+, render targets are expected to be UMS/CMS (samples
128 * non-interleaved) and depth/stencil buffers are expected to be IMS
129 * (samples interleaved).
130 *
131 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
132 */
133 return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs);
134 }
135
136 static uint8_t
137 image_get_gen6_valid_tilings(const struct ilo_dev *dev,
138 const struct ilo_image_info *info)
139 {
140 uint8_t valid_tilings = IMAGE_TILING_ALL;
141
142 ILO_DEV_ASSERT(dev, 6, 8);
143
144 if (info->valid_tilings)
145 valid_tilings &= info->valid_tilings;
146
147 /*
148 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
149 *
150 * "Display/Overlay Y-Major not supported.
151 * X-Major required for Async Flips"
152 */
153 if (unlikely(info->bind_scanout))
154 valid_tilings &= IMAGE_TILING_X;
155
156 /*
157 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
158 *
159 * "The cursor surface address must be 4K byte aligned. The cursor must
160 * be in linear memory, it cannot be tiled."
161 */
162 if (unlikely(info->bind_cursor))
163 valid_tilings &= IMAGE_TILING_NONE;
164
165 /*
166 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
167 *
168 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
169 * Depth Buffer is not supported."
170 *
171 * "The Depth Buffer, if tiled, must use Y-Major tiling."
172 *
173 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
174 *
175 * "W-Major Tile Format is used for separate stencil."
176 */
177 if (info->bind_zs) {
178 if (info->format == GEN6_FORMAT_R8_UINT)
179 valid_tilings &= IMAGE_TILING_W;
180 else
181 valid_tilings &= IMAGE_TILING_Y;
182 }
183
184 if (info->bind_surface_sampler ||
185 info->bind_surface_dp_render ||
186 info->bind_surface_dp_typed) {
187 /*
188 * From the Haswell PRM, volume 2d, page 233:
189 *
190 * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
191 * (Tiled Surface) must be TRUE."
192 */
193 if (info->sample_count > 1)
194 valid_tilings &= ~IMAGE_TILING_NONE;
195
196 if (ilo_dev_gen(dev) < ILO_GEN(8))
197 valid_tilings &= ~IMAGE_TILING_W;
198 }
199
200 if (info->bind_surface_dp_render) {
201 /*
202 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
203 *
204 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
205 * either TileX or Linear."
206 *
207 * From the Haswell PRM, volume 5, page 32:
208 *
209 * "NOTE: 128 BPP format color buffer (render target) supports
210 * Linear, TiledX and TiledY."
211 */
212 if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16)
213 valid_tilings &= ~IMAGE_TILING_Y;
214
215 /*
216 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
217 *
218 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
219 * for all tiled Y Render Target surfaces."
220 *
221 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
222 *
223 * R32G32B32_FLOAT is not renderable and we only need an assert() here.
224 */
225 if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
226 assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
227 }
228
229 return valid_tilings;
230 }
231
232 static uint64_t
233 image_get_gen6_estimated_size(const struct ilo_dev *dev,
234 const struct ilo_image_info *info)
235 {
236 /* padding not considered */
237 const uint64_t slice_size = info->width * info->height *
238 info->block_size / (info->block_width * info->block_height);
239 const uint64_t slice_count =
240 info->depth * info->array_size * info->sample_count;
241 const uint64_t estimated_size = slice_size * slice_count;
242
243 ILO_DEV_ASSERT(dev, 6, 8);
244
245 if (info->level_count == 1)
246 return estimated_size;
247 else
248 return estimated_size * 4 / 3;
249 }
250
251 static enum gen_surface_tiling
252 image_get_gen6_tiling(const struct ilo_dev *dev,
253 const struct ilo_image_info *info,
254 uint8_t valid_tilings)
255 {
256 ILO_DEV_ASSERT(dev, 6, 8);
257
258 switch (valid_tilings) {
259 case IMAGE_TILING_NONE:
260 return GEN6_TILING_NONE;
261 case IMAGE_TILING_X:
262 return GEN6_TILING_X;
263 case IMAGE_TILING_Y:
264 return GEN6_TILING_Y;
265 case IMAGE_TILING_W:
266 return GEN8_TILING_W;
267 default:
268 break;
269 }
270
271 /*
272 * X-tiling has the property that vertically adjacent pixels are usually in
273 * the same page. When the image size is less than a page, the image
274 * height is 1, or when the image is not accessed in blocks, there is no
275 * reason to tile.
276 *
277 * Y-tiling is similar, where vertically adjacent pixels are usually in the
278 * same cacheline.
279 */
280 if (valid_tilings & IMAGE_TILING_NONE) {
281 const uint64_t estimated_size =
282 image_get_gen6_estimated_size(dev, info);
283
284 if (info->height == 1 || !(info->bind_surface_sampler ||
285 info->bind_surface_dp_render ||
286 info->bind_surface_dp_typed))
287 return GEN6_TILING_NONE;
288
289 if (estimated_size <= 64 || (info->prefer_linear_threshold &&
290 estimated_size > info->prefer_linear_threshold))
291 return GEN6_TILING_NONE;
292
293 if (estimated_size <= 2048)
294 valid_tilings &= ~IMAGE_TILING_X;
295 }
296
297 return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y :
298 (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X :
299 GEN6_TILING_NONE;
300 }
301
302 static bool
303 image_get_gen6_hiz_enable(const struct ilo_dev *dev,
304 const struct ilo_image_info *info)
305 {
306 ILO_DEV_ASSERT(dev, 6, 8);
307
308 /* depth buffer? */
309 if (!info->bind_zs ||
310 info->format == GEN6_FORMAT_R8_UINT ||
311 info->interleaved_stencil)
312 return false;
313
314 /* we want to be able to force 8x4 alignments */
315 if (info->type == GEN6_SURFTYPE_1D)
316 return false;
317
318 if (info->aux_disable)
319 return false;
320
321 if (ilo_debug & ILO_DEBUG_NOHIZ)
322 return false;
323
324 return true;
325 }
326
327 static bool
328 image_get_gen7_mcs_enable(const struct ilo_dev *dev,
329 const struct ilo_image_info *info,
330 enum gen_surface_tiling tiling)
331 {
332 ILO_DEV_ASSERT(dev, 7, 8);
333
334 if (!info->bind_surface_sampler && !info->bind_surface_dp_render)
335 return false;
336
337 /*
338 * From the Ivy Bridge PRM, volume 4 part 1, page 77:
339 *
340 * "For Render Target and Sampling Engine Surfaces:If the surface is
341 * multisampled (Number of Multisamples any value other than
342 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
343 *
344 * "This field must be set to 0 for all SINT MSRTs when all RT channels
345 * are not written"
346 */
347 if (info->sample_count > 1) {
348 if (ilo_dev_gen(dev) < ILO_GEN(8))
349 assert(!info->is_integer);
350 return true;
351 }
352
353 if (info->aux_disable)
354 return false;
355
356 /*
357 * From the Ivy Bridge PRM, volume 2 part 1, page 326:
358 *
359 * "When MCS is buffer is used for color clear of non-multisampler
360 * render target, the following restrictions apply.
361 * - Support is limited to tiled render targets.
362 * - Support is for non-mip-mapped and non-array surface types only.
363 * - Clear is supported only on the full RT; i.e., no partial clear or
364 * overlapping clears.
365 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
366 * 64bpp and 128bpp.
367 * ..."
368 *
369 * How about SURFTYPE_3D?
370 */
371 if (!info->bind_surface_dp_render ||
372 tiling == GEN6_TILING_NONE ||
373 info->level_count > 1 ||
374 info->array_size > 1)
375 return false;
376
377 switch (info->block_size) {
378 case 4:
379 case 8:
380 case 16:
381 return true;
382 default:
383 return false;
384 }
385 }
386
387 static void
388 image_get_gen6_alignments(const struct ilo_dev *dev,
389 const struct ilo_image_info *info,
390 int *align_i, int *align_j)
391 {
392 ILO_DEV_ASSERT(dev, 6, 6);
393
394 /*
395 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
396 *
397 * "surface format align_i align_j
398 * YUV 4:2:2 formats 4 *see below
399 * BC1-5 4 4
400 * FXT1 8 4
401 * all other formats 4 *see below"
402 *
403 * "- align_j = 4 for any depth buffer
404 * - align_j = 2 for separate stencil buffer
405 * - align_j = 4 for any render target surface is multisampled (4x)
406 * - align_j = 4 for any render target surface with Surface Vertical
407 * Alignment = VALIGN_4
408 * - align_j = 2 for any render target surface with Surface Vertical
409 * Alignment = VALIGN_2
410 * - align_j = 2 for all other render target surface
411 * - align_j = 2 for any sampling engine surface with Surface Vertical
412 * Alignment = VALIGN_2
413 * - align_j = 4 for any sampling engine surface with Surface Vertical
414 * Alignment = VALIGN_4"
415 *
416 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
417 *
418 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
419 * the Surface Format is 96 bits per element (BPE)."
420 *
421 * They can be rephrased as
422 *
423 * align_i align_j
424 * compressed formats block width block height
425 * GEN6_FORMAT_R8_UINT 4 2
426 * other depth/stencil formats 4 4
427 * 4x multisampled 4 4
428 * bpp 96 4 2
429 * others 4 2 or 4
430 */
431
432 *align_i = (info->compressed) ? info->block_width : 4;
433 if (info->compressed) {
434 *align_j = info->block_height;
435 } else if (info->bind_zs) {
436 *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4;
437 } else {
438 *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2;
439 }
440 }
441
442 static void
443 image_get_gen7_alignments(const struct ilo_dev *dev,
444 const struct ilo_image_info *info,
445 enum gen_surface_tiling tiling,
446 int *align_i, int *align_j)
447 {
448 int i, j;
449
450 ILO_DEV_ASSERT(dev, 7, 8);
451
452 /*
453 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
454 *
455 * "surface defined by surface format align_i align_j
456 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
457 * not D16_UNORM 4 4
458 * 3DSTATE_STENCIL_BUFFER N/A 8 8
459 * SURFACE_STATE BC*, ETC*, EAC* 4 4
460 * FXT1 8 4
461 * all others (set by SURFACE_STATE)"
462 *
463 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
464 *
465 * "- This field (Surface Vertical Aligment) is intended to be set to
466 * VALIGN_4 if the surface was rendered as a depth buffer, for a
467 * multisampled (4x) render target, or for a multisampled (8x)
468 * render target, since these surfaces support only alignment of 4.
469 * - Use of VALIGN_4 for other surfaces is supported, but uses more
470 * memory.
471 * - This field must be set to VALIGN_4 for all tiled Y Render Target
472 * surfaces.
473 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
474 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
475 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
476 * must be set to VALIGN_4."
477 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
478 *
479 * "- This field (Surface Horizontal Aligment) is intended to be set to
480 * HALIGN_8 only if the surface was rendered as a depth buffer with
481 * Z16 format or a stencil buffer, since these surfaces support only
482 * alignment of 8.
483 * - Use of HALIGN_8 for other surfaces is supported, but uses more
484 * memory.
485 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
486 * - This field must be set to HALIGN_8 if the Surface Format is
487 * FXT1."
488 *
489 * They can be rephrased as
490 *
491 * align_i align_j
492 * compressed formats block width block height
493 * GEN6_FORMAT_R16_UNORM 8 4
494 * GEN6_FORMAT_R8_UINT 8 8
495 * other depth/stencil formats 4 4
496 * 2x or 4x multisampled 4 or 8 4
497 * tiled Y 4 or 8 4 (if rt)
498 * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2
499 * others 4 or 8 2 or 4
500 */
501 if (info->compressed) {
502 i = info->block_width;
503 j = info->block_height;
504 } else if (info->bind_zs) {
505 switch (info->format) {
506 case GEN6_FORMAT_R16_UNORM:
507 i = 8;
508 j = 4;
509 break;
510 case GEN6_FORMAT_R8_UINT:
511 i = 8;
512 j = 8;
513 break;
514 default:
515 i = 4;
516 j = 4;
517 break;
518 }
519 } else {
520 const bool valign_4 =
521 (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) ||
522 (tiling == GEN6_TILING_Y && info->bind_surface_dp_render));
523
524 if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4)
525 assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
526
527 i = 4;
528 j = (valign_4) ? 4 : 2;
529 }
530
531 *align_i = i;
532 *align_j = j;
533 }
534
535 static bool
536 image_init_gen6_hardware_layout(const struct ilo_dev *dev,
537 const struct ilo_image_info *info,
538 struct ilo_image_layout *layout)
539 {
540 ILO_DEV_ASSERT(dev, 6, 8);
541
542 if (ilo_dev_gen(dev) >= ILO_GEN(7))
543 layout->walk = image_get_gen7_walk(dev, info);
544 else
545 layout->walk = image_get_gen6_walk(dev, info);
546
547 layout->interleaved_samples =
548 image_get_gen6_interleaved_samples(dev, info);
549
550 layout->valid_tilings = image_get_gen6_valid_tilings(dev, info);
551 if (!layout->valid_tilings)
552 return false;
553
554 layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings);
555
556 if (image_get_gen6_hiz_enable(dev, info))
557 layout->aux = ILO_IMAGE_AUX_HIZ;
558 else if (ilo_dev_gen(dev) >= ILO_GEN(7) &&
559 image_get_gen7_mcs_enable(dev, info, layout->tiling))
560 layout->aux = ILO_IMAGE_AUX_MCS;
561 else
562 layout->aux = ILO_IMAGE_AUX_NONE;
563
564 if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
565 image_get_gen7_alignments(dev, info, layout->tiling,
566 &layout->align_i, &layout->align_j);
567 } else {
568 image_get_gen6_alignments(dev, info,
569 &layout->align_i, &layout->align_j);
570 }
571
572 return true;
573 }
574
575 static bool
576 image_init_gen6_transfer_layout(const struct ilo_dev *dev,
577 const struct ilo_image_info *info,
578 struct ilo_image_layout *layout)
579 {
580 ILO_DEV_ASSERT(dev, 6, 8);
581
582 /* we can define our own layout to save space */
583 layout->walk = ILO_IMAGE_WALK_LOD;
584 layout->interleaved_samples = false;
585 layout->valid_tilings = IMAGE_TILING_NONE;
586 layout->tiling = GEN6_TILING_NONE;
587 layout->aux = ILO_IMAGE_AUX_NONE;
588 layout->align_i = info->block_width;
589 layout->align_j = info->block_height;
590
591 return true;
592 }
593
594 static void
595 image_get_gen6_slice_size(const struct ilo_dev *dev,
596 const struct ilo_image_info *info,
597 const struct ilo_image_layout *layout,
598 uint8_t level,
599 int *width, int *height)
600 {
601 int w, h;
602
603 ILO_DEV_ASSERT(dev, 6, 8);
604
605 w = u_minify(info->width, level);
606 h = u_minify(info->height, level);
607
608 /*
609 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
610 *
611 * "The dimensions of the mip maps are first determined by applying the
612 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
613 * if necessary, they are padded out to compression block boundaries."
614 */
615 w = align(w, info->block_width);
616 h = align(h, info->block_height);
617
618 /*
619 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
620 *
621 * "If the surface is multisampled (4x), these values must be adjusted
622 * as follows before proceeding:
623 *
624 * W_L = ceiling(W_L / 2) * 4
625 * H_L = ceiling(H_L / 2) * 4"
626 *
627 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
628 *
629 * "If the surface is multisampled and it is a depth or stencil surface
630 * or Multisampled Surface StorageFormat in SURFACE_STATE is
631 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
632 * proceeding:
633 *
634 * #samples W_L = H_L =
635 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
636 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
637 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
638 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
639 *
640 * For interleaved samples (4x), where pixels
641 *
642 * (x, y ) (x+1, y )
643 * (x, y+1) (x+1, y+1)
644 *
645 * would be is occupied by
646 *
647 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
648 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
649 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
650 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
651 *
652 * Thus the need to
653 *
654 * w = align(w, 2) * 2;
655 * y = align(y, 2) * 2;
656 */
657 if (layout->interleaved_samples) {
658 switch (info->sample_count) {
659 case 1:
660 break;
661 case 2:
662 w = align(w, 2) * 2;
663 break;
664 case 4:
665 w = align(w, 2) * 2;
666 h = align(h, 2) * 2;
667 break;
668 case 8:
669 w = align(w, 2) * 4;
670 h = align(h, 2) * 2;
671 break;
672 case 16:
673 w = align(w, 2) * 4;
674 h = align(h, 2) * 4;
675 break;
676 default:
677 assert(!"unsupported sample count");
678 break;
679 }
680 }
681
682 /*
683 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
684 *
685 * "For separate stencil buffer, the width must be mutiplied by 2 and
686 * height divided by 2..."
687 *
688 * To make things easier (for transfer), we will just double the stencil
689 * stride in 3DSTATE_STENCIL_BUFFER.
690 */
691 w = align(w, layout->align_i);
692 h = align(h, layout->align_j);
693
694 *width = w;
695 *height = h;
696 }
697
698 static int
699 image_get_gen6_layer_count(const struct ilo_dev *dev,
700 const struct ilo_image_info *info,
701 const struct ilo_image_layout *layout)
702 {
703 int count = info->array_size;
704
705 ILO_DEV_ASSERT(dev, 6, 8);
706
707 /* samples of the same index are stored in a layer */
708 if (!layout->interleaved_samples)
709 count *= info->sample_count;
710
711 return count;
712 }
713
714 static void
715 image_get_gen6_walk_layer_heights(const struct ilo_dev *dev,
716 const struct ilo_image_info *info,
717 struct ilo_image_layout *layout)
718 {
719 ILO_DEV_ASSERT(dev, 6, 8);
720
721 layout->walk_layer_h0 = layout->lods[0].slice_height;
722
723 if (info->level_count > 1) {
724 layout->walk_layer_h1 = layout->lods[1].slice_height;
725 } else {
726 int dummy;
727 image_get_gen6_slice_size(dev, info, layout, 1,
728 &dummy, &layout->walk_layer_h1);
729 }
730
731 if (image_get_gen6_layer_count(dev, info, layout) == 1) {
732 layout->walk_layer_height = 0;
733 return;
734 }
735
736 /*
737 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
738 *
739 * "The following equation is used for surface formats other than
740 * compressed textures:
741 *
742 * QPitch = (h0 + h1 + 11j)"
743 *
744 * "The equation for compressed textures (BC* and FXT1 surface formats)
745 * follows:
746 *
747 * QPitch = (h0 + h1 + 11j) / 4"
748 *
749 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
750 * value calculated in the equation above, for every other odd Surface
751 * Height starting from 1 i.e. 1,5,9,13"
752 *
753 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
754 *
755 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
756 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
757 *
758 * QPitch = (h0 + h1 + 12j)
759 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
760 *
761 * (There are many typos or missing words here...)"
762 *
763 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
764 * the base address. The PRM divides QPitch by 4 for compressed formats
765 * because the block height for those formats are 4, and it wants QPitch to
766 * mean the number of memory rows, as opposed to texel rows, between
767 * slices. Since we use texel rows everywhere, we do not need to divide
768 * QPitch by 4.
769 */
770 layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 +
771 ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
772
773 if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 &&
774 info->height % 4 == 1)
775 layout->walk_layer_height += 4;
776 }
777
778 static void
779 image_get_gen6_monolithic_size(const struct ilo_dev *dev,
780 const struct ilo_image_info *info,
781 struct ilo_image_layout *layout,
782 int max_x, int max_y)
783 {
784 int align_w = 1, align_h = 1, pad_h = 0;
785
786 ILO_DEV_ASSERT(dev, 6, 8);
787
788 /*
789 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
790 *
791 * "To determine the necessary padding on the bottom and right side of
792 * the surface, refer to the table in Section 7.18.3.4 for the i and j
793 * parameters for the surface format in use. The surface must then be
794 * extended to the next multiple of the alignment unit size in each
795 * dimension, and all texels contained in this extended surface must
796 * have valid GTT entries."
797 *
798 * "For cube surfaces, an additional two rows of padding are required
799 * at the bottom of the surface. This must be ensured regardless of
800 * whether the surface is stored tiled or linear. This is due to the
801 * potential rotation of cache line orientation from memory to cache."
802 *
803 * "For compressed textures (BC* and FXT1 surface formats), padding at
804 * the bottom of the surface is to an even compressed row, which is
805 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
806 * purposes, these surfaces behave as if j = 8 only for surface
807 * padding purposes. The value of 4 for j still applies for mip level
808 * alignment and QPitch calculation."
809 */
810 if (info->bind_surface_sampler) {
811 align_w = MAX2(align_w, layout->align_i);
812 align_h = MAX2(align_h, layout->align_j);
813
814 if (info->type == GEN6_SURFTYPE_CUBE)
815 pad_h += 2;
816
817 if (info->compressed)
818 align_h = MAX2(align_h, layout->align_j * 2);
819 }
820
821 /*
822 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
823 *
824 * "If the surface contains an odd number of rows of data, a final row
825 * below the surface must be allocated."
826 */
827 if (info->bind_surface_dp_render)
828 align_h = MAX2(align_h, 2);
829
830 /*
831 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
832 * for unaligned non-mipmapped and non-array images.
833 */
834 if (layout->aux == ILO_IMAGE_AUX_HIZ &&
835 info->level_count == 1 && info->array_size == 1 && info->depth == 1) {
836 align_w = MAX2(align_w, 8);
837 align_h = MAX2(align_h, 4);
838 }
839
840 layout->monolithic_width = align(max_x, align_w);
841 layout->monolithic_height = align(max_y + pad_h, align_h);
842 }
843
844 static void
845 image_get_gen6_lods(const struct ilo_dev *dev,
846 const struct ilo_image_info *info,
847 struct ilo_image_layout *layout)
848 {
849 const int layer_count = image_get_gen6_layer_count(dev, info, layout);
850 int cur_x, cur_y, max_x, max_y;
851 uint8_t lv;
852
853 ILO_DEV_ASSERT(dev, 6, 8);
854
855 cur_x = 0;
856 cur_y = 0;
857 max_x = 0;
858 max_y = 0;
859 for (lv = 0; lv < info->level_count; lv++) {
860 int slice_w, slice_h, lod_w, lod_h;
861
862 image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h);
863
864 layout->lods[lv].x = cur_x;
865 layout->lods[lv].y = cur_y;
866 layout->lods[lv].slice_width = slice_w;
867 layout->lods[lv].slice_height = slice_h;
868
869 switch (layout->walk) {
870 case ILO_IMAGE_WALK_LAYER:
871 lod_w = slice_w;
872 lod_h = slice_h;
873
874 /* MIPLAYOUT_BELOW */
875 if (lv == 1)
876 cur_x += lod_w;
877 else
878 cur_y += lod_h;
879 break;
880 case ILO_IMAGE_WALK_LOD:
881 lod_w = slice_w;
882 lod_h = slice_h * layer_count;
883
884 if (lv == 1)
885 cur_x += lod_w;
886 else
887 cur_y += lod_h;
888
889 /* every LOD begins at tile boundaries */
890 if (info->level_count > 1) {
891 assert(info->format == GEN6_FORMAT_R8_UINT);
892 cur_x = align(cur_x, 64);
893 cur_y = align(cur_y, 64);
894 }
895 break;
896 case ILO_IMAGE_WALK_3D:
897 {
898 const int slice_count = u_minify(info->depth, lv);
899 const int slice_count_per_row = 1 << lv;
900 const int row_count =
901 (slice_count + slice_count_per_row - 1) / slice_count_per_row;
902
903 lod_w = slice_w * slice_count_per_row;
904 lod_h = slice_h * row_count;
905 }
906
907 cur_y += lod_h;
908 break;
909 default:
910 assert(!"unknown walk type");
911 lod_w = 0;
912 lod_h = 0;
913 break;
914 }
915
916 if (max_x < layout->lods[lv].x + lod_w)
917 max_x = layout->lods[lv].x + lod_w;
918 if (max_y < layout->lods[lv].y + lod_h)
919 max_y = layout->lods[lv].y + lod_h;
920 }
921
922 if (layout->walk == ILO_IMAGE_WALK_LAYER) {
923 image_get_gen6_walk_layer_heights(dev, info, layout);
924 if (layer_count > 1)
925 max_y += layout->walk_layer_height * (layer_count - 1);
926 } else {
927 layout->walk_layer_h0 = 0;
928 layout->walk_layer_h1 = 0;
929 layout->walk_layer_height = 0;
930 }
931
932 image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y);
933 }
934
935 static bool
936 image_bind_gpu(const struct ilo_image_info *info)
937 {
938 return (info->bind_surface_sampler ||
939 info->bind_surface_dp_render ||
940 info->bind_surface_dp_typed ||
941 info->bind_zs ||
942 info->bind_scanout ||
943 info->bind_cursor);
944 }
945
946 static bool
947 image_validate_gen6(const struct ilo_dev *dev,
948 const struct ilo_image_info *info)
949 {
950 ILO_DEV_ASSERT(dev, 6, 8);
951
952 /*
953 * From the Ivy Bridge PRM, volume 2 part 1, page 314:
954 *
955 * "The separate stencil buffer is always enabled, thus the field in
956 * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
957 * buffer has been removed Surface formats with interleaved depth and
958 * stencil are no longer supported"
959 */
960 if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs)
961 assert(!info->interleaved_stencil);
962
963 return true;
964 }
965
966 static bool
967 image_get_gen6_layout(const struct ilo_dev *dev,
968 const struct ilo_image_info *info,
969 struct ilo_image_layout *layout)
970 {
971 ILO_DEV_ASSERT(dev, 6, 8);
972
973 if (!image_validate_gen6(dev, info))
974 return false;
975
976 if (image_bind_gpu(info) || info->level_count > 1) {
977 if (!image_init_gen6_hardware_layout(dev, info, layout))
978 return false;
979 } else {
980 if (!image_init_gen6_transfer_layout(dev, info, layout))
981 return false;
982 }
983
984 /*
985 * the fact that align i and j are multiples of block width and height
986 * respectively is what makes the size of the bo a multiple of the block
987 * size, slices start at block boundaries, and many of the computations
988 * work.
989 */
990 assert(layout->align_i % info->block_width == 0);
991 assert(layout->align_j % info->block_height == 0);
992
993 /* make sure align() works */
994 assert(util_is_power_of_two(layout->align_i) &&
995 util_is_power_of_two(layout->align_j));
996 assert(util_is_power_of_two(info->block_width) &&
997 util_is_power_of_two(info->block_height));
998
999 image_get_gen6_lods(dev, info, layout);
1000
1001 assert(layout->walk_layer_height % info->block_height == 0);
1002 assert(layout->monolithic_width % info->block_width == 0);
1003 assert(layout->monolithic_height % info->block_height == 0);
1004
1005 return true;
1006 }
1007
1008 static bool
1009 image_set_gen6_bo_size(struct ilo_image *img,
1010 const struct ilo_dev *dev,
1011 const struct ilo_image_info *info,
1012 const struct ilo_image_layout *layout)
1013 {
1014 int stride, height;
1015 int align_w, align_h;
1016
1017 ILO_DEV_ASSERT(dev, 6, 8);
1018
1019 stride = (layout->monolithic_width / info->block_width) * info->block_size;
1020 height = layout->monolithic_height / info->block_height;
1021
1022 /*
1023 * From the Haswell PRM, volume 5, page 163:
1024 *
1025 * "For linear surfaces, additional padding of 64 bytes is required
1026 * at the bottom of the surface. This is in addition to the padding
1027 * required above."
1028 */
1029 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler &&
1030 layout->tiling == GEN6_TILING_NONE)
1031 height += (64 + stride - 1) / stride;
1032
1033 /*
1034 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1035 *
1036 * "- For linear render target surfaces, the pitch must be a multiple
1037 * of the element size for non-YUV surface formats. Pitch must be a
1038 * multiple of 2 * element size for YUV surface formats.
1039 *
1040 * - For other linear surfaces, the pitch can be any multiple of
1041 * bytes.
1042 * - For tiled surfaces, the pitch must be a multiple of the tile
1043 * width."
1044 *
1045 * Different requirements may exist when the image is used in different
1046 * places, but our alignments here should be good enough that we do not
1047 * need to check info->bind_x.
1048 */
1049 switch (layout->tiling) {
1050 case GEN6_TILING_X:
1051 align_w = 512;
1052 align_h = 8;
1053 break;
1054 case GEN6_TILING_Y:
1055 align_w = 128;
1056 align_h = 32;
1057 break;
1058 case GEN8_TILING_W:
1059 /*
1060 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
1061 *
1062 * "A 4KB tile is subdivided into 8-high by 8-wide array of
1063 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
1064 * bytes."
1065 */
1066 align_w = 64;
1067 align_h = 64;
1068 break;
1069 default:
1070 assert(layout->tiling == GEN6_TILING_NONE);
1071 /* some good enough values */
1072 align_w = 64;
1073 align_h = 2;
1074 break;
1075 }
1076
1077 if (info->force_bo_stride) {
1078 if (info->force_bo_stride % align_w || info->force_bo_stride < stride)
1079 return false;
1080
1081 img->bo_stride = info->force_bo_stride;
1082 } else {
1083 img->bo_stride = align(stride, align_w);
1084 }
1085
1086 img->bo_height = align(height, align_h);
1087
1088 return true;
1089 }
1090
1091 static bool
1092 image_set_gen6_hiz(struct ilo_image *img,
1093 const struct ilo_dev *dev,
1094 const struct ilo_image_info *info,
1095 const struct ilo_image_layout *layout)
1096 {
1097 const int hz_align_j = 8;
1098 enum ilo_image_walk_type hz_walk;
1099 int hz_width, hz_height;
1100 int hz_clear_w, hz_clear_h;
1101 uint8_t lv;
1102
1103 ILO_DEV_ASSERT(dev, 6, 8);
1104
1105 assert(layout->aux == ILO_IMAGE_AUX_HIZ);
1106
1107 assert(layout->walk == ILO_IMAGE_WALK_LAYER ||
1108 layout->walk == ILO_IMAGE_WALK_3D);
1109
1110 /*
1111 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1112 *
1113 * "The hierarchical depth buffer does not support the LOD field, it is
1114 * assumed by hardware to be zero. A separate hierarachical depth
1115 * buffer is required for each LOD used, and the corresponding
1116 * buffer's state delivered to hardware each time a new depth buffer
1117 * state with modified LOD is delivered."
1118 *
1119 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1120 */
1121 if (ilo_dev_gen(dev) >= ILO_GEN(7))
1122 hz_walk = layout->walk;
1123 else
1124 hz_walk = ILO_IMAGE_WALK_LOD;
1125
1126 /*
1127 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1128 * PRM, volume 2 part 1, page 312-313.
1129 *
1130 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1131 * memory row.
1132 */
1133 switch (hz_walk) {
1134 case ILO_IMAGE_WALK_LAYER:
1135 {
1136 const int h0 = align(layout->walk_layer_h0, hz_align_j);
1137 const int h1 = align(layout->walk_layer_h1, hz_align_j);
1138 const int htail =
1139 ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1140 const int hz_qpitch = h0 + h1 + htail;
1141
1142 hz_width = align(layout->lods[0].slice_width, 16);
1143
1144 hz_height = hz_qpitch * info->array_size / 2;
1145 if (ilo_dev_gen(dev) >= ILO_GEN(7))
1146 hz_height = align(hz_height, 8);
1147
1148 img->aux.walk_layer_height = hz_qpitch;
1149 }
1150 break;
1151 case ILO_IMAGE_WALK_LOD:
1152 {
1153 int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
1154 int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
1155 int cur_tx, cur_ty;
1156
1157 /* figure out the tile offsets of LODs */
1158 hz_width = 0;
1159 hz_height = 0;
1160 cur_tx = 0;
1161 cur_ty = 0;
1162 for (lv = 0; lv < info->level_count; lv++) {
1163 int tw, th;
1164
1165 lod_tx[lv] = cur_tx;
1166 lod_ty[lv] = cur_ty;
1167
1168 tw = align(layout->lods[lv].slice_width, 16);
1169 th = align(layout->lods[lv].slice_height, hz_align_j) *
1170 info->array_size / 2;
1171 /* convert to Y-tiles */
1172 tw = (tw + 127) / 128;
1173 th = (th + 31) / 32;
1174
1175 if (hz_width < cur_tx + tw)
1176 hz_width = cur_tx + tw;
1177 if (hz_height < cur_ty + th)
1178 hz_height = cur_ty + th;
1179
1180 if (lv == 1)
1181 cur_tx += tw;
1182 else
1183 cur_ty += th;
1184 }
1185
1186 /* convert tile offsets to memory offsets */
1187 for (lv = 0; lv < info->level_count; lv++) {
1188 img->aux.walk_lod_offsets[lv] =
1189 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1190 }
1191
1192 hz_width *= 128;
1193 hz_height *= 32;
1194 }
1195 break;
1196 case ILO_IMAGE_WALK_3D:
1197 hz_width = align(layout->lods[0].slice_width, 16);
1198
1199 hz_height = 0;
1200 for (lv = 0; lv < info->level_count; lv++) {
1201 const int h = align(layout->lods[lv].slice_height, hz_align_j);
1202 /* according to the formula, slices are packed together vertically */
1203 hz_height += h * u_minify(info->depth, lv);
1204 }
1205 hz_height /= 2;
1206 break;
1207 default:
1208 assert(!"unknown HiZ walk");
1209 hz_width = 0;
1210 hz_height = 0;
1211 break;
1212 }
1213
1214 /*
1215 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1216 * Experiments on Haswell show that aligning the RECTLIST primitive and
1217 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
1218 * aligned.
1219 */
1220 hz_clear_w = 8;
1221 hz_clear_h = 4;
1222 switch (info->sample_count) {
1223 case 1:
1224 default:
1225 break;
1226 case 2:
1227 hz_clear_w /= 2;
1228 break;
1229 case 4:
1230 hz_clear_w /= 2;
1231 hz_clear_h /= 2;
1232 break;
1233 case 8:
1234 hz_clear_w /= 4;
1235 hz_clear_h /= 2;
1236 break;
1237 case 16:
1238 hz_clear_w /= 4;
1239 hz_clear_h /= 4;
1240 break;
1241 }
1242
1243 for (lv = 0; lv < info->level_count; lv++) {
1244 if (u_minify(info->width, lv) % hz_clear_w ||
1245 u_minify(info->height, lv) % hz_clear_h)
1246 break;
1247 img->aux.enables |= 1 << lv;
1248 }
1249
1250 /* we padded to allow this in image_get_gen6_monolithic_size() */
1251 if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
1252 img->aux.enables |= 0x1;
1253
1254 /* align to Y-tile */
1255 img->aux.bo_stride = align(hz_width, 128);
1256 img->aux.bo_height = align(hz_height, 32);
1257
1258 return true;
1259 }
1260
1261 static bool
1262 image_set_gen7_mcs(struct ilo_image *img,
1263 const struct ilo_dev *dev,
1264 const struct ilo_image_info *info,
1265 const struct ilo_image_layout *layout)
1266 {
1267 int mcs_width, mcs_height, mcs_cpp;
1268 int downscale_x, downscale_y;
1269
1270 ILO_DEV_ASSERT(dev, 7, 8);
1271
1272 assert(layout->aux == ILO_IMAGE_AUX_MCS);
1273
1274 if (info->sample_count > 1) {
1275 /*
1276 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1277 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
1278 * need of scale down could be that the clear rectangle is used to clear
1279 * the MCS instead of the RT.
1280 *
1281 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
1282 * 2x2 factor could come from that the hardware writes 128 bits (an
1283 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1284 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
1285 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1286 * pixel block in the RT.
1287 */
1288 switch (info->sample_count) {
1289 case 2:
1290 case 4:
1291 downscale_x = 8;
1292 downscale_y = 2;
1293 mcs_cpp = 1;
1294 break;
1295 case 8:
1296 downscale_x = 2;
1297 downscale_y = 2;
1298 mcs_cpp = 4;
1299 break;
1300 case 16:
1301 downscale_x = 2;
1302 downscale_y = 1;
1303 mcs_cpp = 8;
1304 break;
1305 default:
1306 assert(!"unsupported sample count");
1307 return false;
1308 break;
1309 }
1310
1311 /*
1312 * It also appears that the 2x2 subspans generated by the scaled-down
1313 * clear rectangle cannot be masked. The scale-down clear rectangle
1314 * thus must be aligned to 2x2, and we need to pad.
1315 */
1316 mcs_width = align(info->width, downscale_x * 2);
1317 mcs_height = align(info->height, downscale_y * 2);
1318 } else {
1319 /*
1320 * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1321 *
1322 * " Pixels Lines
1323 * TiledY RT CL
1324 * bpp
1325 * 32 8 4
1326 * 64 4 4
1327 * 128 2 4
1328 *
1329 * TiledX RT CL
1330 * bpp
1331 * 32 16 2
1332 * 64 8 2
1333 * 128 4 2"
1334 *
1335 * This table and the two following tables define the RT alignments, the
1336 * clear rectangle alignments, and the clear rectangle scale factors.
1337 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1338 * that the clear rectangle alignments are 16x32 blocks, and the clear
1339 * rectangle scale factors are 8x16 blocks.
1340 *
1341 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1342 * RT. Similar to the MSAA cases, we can argue that an OWord maps to
1343 * 8x16 blocks.
1344 *
1345 * One problem with this reasoning is that a Y-tile in MCS has 8x32
1346 * OWords and maps to 64x512 128-byte blocks. This differs from i965,
1347 * which says that a Y-tile maps to 128x256 blocks (\see
1348 * intel_get_non_msrt_mcs_alignment). It does not really change
1349 * anything except for the size of the allocated MCS. Let's see if we
1350 * hit out-of-bound access.
1351 */
1352 switch (layout->tiling) {
1353 case GEN6_TILING_X:
1354 downscale_x = 64 / info->block_size;
1355 downscale_y = 2;
1356 break;
1357 case GEN6_TILING_Y:
1358 downscale_x = 32 / info->block_size;
1359 downscale_y = 4;
1360 break;
1361 default:
1362 assert(!"unsupported tiling mode");
1363 return false;
1364 break;
1365 }
1366
1367 downscale_x *= 8;
1368 downscale_y *= 16;
1369
1370 /*
1371 * From the Haswell PRM, volume 7, page 652:
1372 *
1373 * "Clear rectangle must be aligned to two times the number of
1374 * pixels in the table shown below due to 16X16 hashing across the
1375 * slice."
1376 *
1377 * The scaled-down clear rectangle must be aligned to 4x4 instead of
1378 * 2x2, and we need to pad.
1379 */
1380 mcs_width = align(info->width, downscale_x * 4) / downscale_x;
1381 mcs_height = align(info->height, downscale_y * 4) / downscale_y;
1382 mcs_cpp = 16; /* an OWord */
1383 }
1384
1385 img->aux.enables = (1 << info->level_count) - 1;
1386 /* align to Y-tile */
1387 img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
1388 img->aux.bo_height = align(mcs_height, 32);
1389
1390 return true;
1391 }
1392
1393 bool
1394 ilo_image_init(struct ilo_image *img,
1395 const struct ilo_dev *dev,
1396 const struct ilo_image_info *info)
1397 {
1398 struct ilo_image_layout layout;
1399
1400 assert(ilo_is_zeroed(img, sizeof(*img)));
1401
1402 memset(&layout, 0, sizeof(layout));
1403 layout.lods = img->lods;
1404
1405 if (!image_get_gen6_layout(dev, info, &layout))
1406 return false;
1407
1408 img->type = info->type;
1409
1410 img->format = info->format;
1411 img->block_width = info->block_width;
1412 img->block_height = info->block_height;
1413 img->block_size = info->block_size;
1414
1415 img->width0 = info->width;
1416 img->height0 = info->height;
1417 img->depth0 = info->depth;
1418 img->array_size = info->array_size;
1419 img->level_count = info->level_count;
1420 img->sample_count = info->sample_count;
1421
1422 img->walk = layout.walk;
1423 img->interleaved_samples = layout.interleaved_samples;
1424
1425 img->tiling = layout.tiling;
1426
1427 img->aux.type = layout.aux;
1428
1429 img->align_i = layout.align_i;
1430 img->align_j = layout.align_j;
1431
1432 img->walk_layer_height = layout.walk_layer_height;
1433
1434 if (!image_set_gen6_bo_size(img, dev, info, &layout))
1435 return false;
1436
1437 img->scanout = info->bind_scanout;
1438
1439 switch (layout.aux) {
1440 case ILO_IMAGE_AUX_HIZ:
1441 image_set_gen6_hiz(img, dev, info, &layout);
1442 break;
1443 case ILO_IMAGE_AUX_MCS:
1444 image_set_gen7_mcs(img, dev, info, &layout);
1445 break;
1446 default:
1447 break;
1448 }
1449
1450 return true;
1451 }