isl: Add 2D ASTC format layouts and enums
[mesa.git] / src / intel / isl / isl.c
1 /*
2 * Copyright 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27
28 #include "isl.h"
29 #include "isl_gen4.h"
30 #include "isl_gen6.h"
31 #include "isl_gen7.h"
32 #include "isl_gen8.h"
33 #include "isl_gen9.h"
34 #include "isl_priv.h"
35
36 void PRINTFLIKE(3, 4) UNUSED
37 __isl_finishme(const char *file, int line, const char *fmt, ...)
38 {
39 va_list ap;
40 char buf[512];
41
42 va_start(ap, fmt);
43 vsnprintf(buf, sizeof(buf), fmt, ap);
44 va_end(ap);
45
46 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
47 }
48
49 void
50 isl_device_init(struct isl_device *dev,
51 const struct brw_device_info *info,
52 bool has_bit6_swizzling)
53 {
54 dev->info = info;
55 dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
56 dev->has_bit6_swizzling = has_bit6_swizzling;
57
58 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
59 * device properties at buildtime. Verify that the macros with the device
60 * properties chosen during runtime.
61 */
62 assert(ISL_DEV_GEN(dev) == dev->info->gen);
63 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev) == dev->use_separate_stencil);
64
65 /* Did we break hiz or stencil? */
66 if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
67 assert(info->has_hiz_and_separate_stencil);
68 if (info->must_use_separate_stencil)
69 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
70 }
71
72 /**
73 * @brief Query the set of multisamples supported by the device.
74 *
75 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
76 * supported.
77 */
78 isl_sample_count_mask_t ATTRIBUTE_CONST
79 isl_device_get_sample_counts(struct isl_device *dev)
80 {
81 if (ISL_DEV_GEN(dev) >= 9) {
82 return ISL_SAMPLE_COUNT_1_BIT |
83 ISL_SAMPLE_COUNT_2_BIT |
84 ISL_SAMPLE_COUNT_4_BIT |
85 ISL_SAMPLE_COUNT_8_BIT |
86 ISL_SAMPLE_COUNT_16_BIT;
87 } else if (ISL_DEV_GEN(dev) >= 8) {
88 return ISL_SAMPLE_COUNT_1_BIT |
89 ISL_SAMPLE_COUNT_2_BIT |
90 ISL_SAMPLE_COUNT_4_BIT |
91 ISL_SAMPLE_COUNT_8_BIT;
92 } else if (ISL_DEV_GEN(dev) >= 7) {
93 return ISL_SAMPLE_COUNT_1_BIT |
94 ISL_SAMPLE_COUNT_4_BIT |
95 ISL_SAMPLE_COUNT_8_BIT;
96 } else if (ISL_DEV_GEN(dev) >= 6) {
97 return ISL_SAMPLE_COUNT_1_BIT |
98 ISL_SAMPLE_COUNT_4_BIT;
99 } else {
100 return ISL_SAMPLE_COUNT_1_BIT;
101 }
102 }
103
104 /**
105 * @param[out] info is written only on success
106 */
107 bool
108 isl_tiling_get_info(const struct isl_device *dev,
109 enum isl_tiling tiling,
110 uint32_t format_block_size,
111 struct isl_tile_info *tile_info)
112 {
113 const uint32_t bs = format_block_size;
114 uint32_t width, height;
115
116 assert(bs > 0);
117
118 switch (tiling) {
119 case ISL_TILING_LINEAR:
120 width = 1;
121 height = 1;
122 break;
123
124 case ISL_TILING_X:
125 width = 1 << 9;
126 height = 1 << 3;
127 break;
128
129 case ISL_TILING_Y0:
130 width = 1 << 7;
131 height = 1 << 5;
132 break;
133
134 case ISL_TILING_W:
135 /* XXX: Should W tile be same as Y? */
136 width = 1 << 6;
137 height = 1 << 6;
138 break;
139
140 case ISL_TILING_Yf:
141 case ISL_TILING_Ys: {
142 if (ISL_DEV_GEN(dev) < 9)
143 return false;
144
145 if (!isl_is_pow2(bs))
146 return false;
147
148 bool is_Ys = tiling == ISL_TILING_Ys;
149
150 width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
151 height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
152 break;
153 }
154
155 default:
156 unreachable("not reached");
157 } /* end switch */
158
159 *tile_info = (struct isl_tile_info) {
160 .tiling = tiling,
161 .width = width,
162 .height = height,
163 .size = width * height,
164 };
165
166 return true;
167 }
168
169 void
170 isl_tiling_get_extent(const struct isl_device *dev,
171 enum isl_tiling tiling,
172 uint32_t format_block_size,
173 struct isl_extent2d *e)
174 {
175 struct isl_tile_info tile_info;
176 isl_tiling_get_info(dev, tiling, format_block_size, &tile_info);
177 *e = isl_extent2d(tile_info.width, tile_info.height);
178 }
179
180 /**
181 * @param[out] tiling is set only on success
182 */
183 bool
184 isl_surf_choose_tiling(const struct isl_device *dev,
185 const struct isl_surf_init_info *restrict info,
186 enum isl_tiling *tiling)
187 {
188 isl_tiling_flags_t tiling_flags = info->tiling_flags;
189
190 /* Filter if multiple tiling options are given */
191 if (!isl_is_pow2(tiling_flags)) {
192 if (ISL_DEV_GEN(dev) >= 7) {
193 gen7_filter_tiling(dev, info, &tiling_flags);
194 } else {
195 isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev));
196 gen7_filter_tiling(dev, info, &tiling_flags);
197 }
198 }
199
200 #define CHOOSE(__tiling) \
201 do { \
202 if (tiling_flags & (1u << (__tiling))) { \
203 *tiling = (__tiling); \
204 return true; \
205 } \
206 } while (0)
207
208 /* Of the tiling modes remaining, choose the one that offers the best
209 * performance.
210 */
211
212 if (info->dim == ISL_SURF_DIM_1D) {
213 /* Prefer linear for 1D surfaces because they do not benefit from
214 * tiling. To the contrary, tiling leads to wasted memory and poor
215 * memory locality due to the swizzling and alignment restrictions
216 * required in tiled surfaces.
217 */
218 CHOOSE(ISL_TILING_LINEAR);
219 }
220
221 CHOOSE(ISL_TILING_Ys);
222 CHOOSE(ISL_TILING_Yf);
223 CHOOSE(ISL_TILING_Y0);
224 CHOOSE(ISL_TILING_X);
225 CHOOSE(ISL_TILING_W);
226 CHOOSE(ISL_TILING_LINEAR);
227
228 #undef CHOOSE
229
230 /* No tiling mode accomodates the inputs. */
231 return false;
232 }
233
234 static bool
235 isl_choose_msaa_layout(const struct isl_device *dev,
236 const struct isl_surf_init_info *info,
237 enum isl_tiling tiling,
238 enum isl_msaa_layout *msaa_layout)
239 {
240 if (ISL_DEV_GEN(dev) >= 8) {
241 return gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
242 } else if (ISL_DEV_GEN(dev) >= 7) {
243 return gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
244 } else if (ISL_DEV_GEN(dev) >= 6) {
245 return gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
246 } else {
247 return gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
248 }
249 }
250
251 static void
252 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
253 uint32_t *width, uint32_t *height)
254 {
255 assert(isl_is_pow2(samples));
256
257 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
258 * Sizes (p133):
259 *
260 * If the surface is multisampled and it is a depth or stencil surface
261 * or Multisampled Surface StorageFormat in SURFACE_STATE is
262 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
263 * proceeding: [...]
264 */
265 if (width)
266 *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2);
267 if (height)
268 *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2);
269 }
270
271 static enum isl_array_pitch_span
272 isl_choose_array_pitch_span(const struct isl_device *dev,
273 const struct isl_surf_init_info *restrict info,
274 enum isl_dim_layout dim_layout,
275 const struct isl_extent4d *phys_level0_sa)
276 {
277 switch (dim_layout) {
278 case ISL_DIM_LAYOUT_GEN9_1D:
279 case ISL_DIM_LAYOUT_GEN4_2D:
280 if (ISL_DEV_GEN(dev) >= 8) {
281 /* QPitch becomes programmable in Broadwell. So choose the
282 * most compact QPitch possible in order to conserve memory.
283 *
284 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
285 * >> RENDER_SURFACE_STATE Surface QPitch (p325):
286 *
287 * - Software must ensure that this field is set to a value
288 * sufficiently large such that the array slices in the surface
289 * do not overlap. Refer to the Memory Data Formats section for
290 * information on how surfaces are stored in memory.
291 *
292 * - This field specifies the distance in rows between array
293 * slices. It is used only in the following cases:
294 *
295 * - Surface Array is enabled OR
296 * - Number of Mulitsamples is not NUMSAMPLES_1 and
297 * Multisampled Surface Storage Format set to MSFMT_MSS OR
298 * - Surface Type is SURFTYPE_CUBE
299 */
300 return ISL_ARRAY_PITCH_SPAN_COMPACT;
301 } else if (ISL_DEV_GEN(dev) >= 7) {
302 /* Note that Ivybridge introduces
303 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
304 * driver more control over the QPitch.
305 */
306
307 if (phys_level0_sa->array_len == 1) {
308 /* The hardware will never use the QPitch. So choose the most
309 * compact QPitch possible in order to conserve memory.
310 */
311 return ISL_ARRAY_PITCH_SPAN_COMPACT;
312 }
313
314 if (isl_surf_usage_is_depth_or_stencil(info->usage)) {
315 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
316 * Section 6.18.4.7: Surface Arrays (p112):
317 *
318 * If Surface Array Spacing is set to ARYSPC_FULL (note that
319 * the depth buffer and stencil buffer have an implied value of
320 * ARYSPC_FULL):
321 */
322 return ISL_ARRAY_PITCH_SPAN_COMPACT;
323 }
324
325 if (info->levels == 1) {
326 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
327 * to ARYSPC_LOD0.
328 */
329 return ISL_ARRAY_PITCH_SPAN_COMPACT;
330 }
331
332 return ISL_ARRAY_PITCH_SPAN_FULL;
333 } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
334 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
335 isl_surf_usage_is_stencil(info->usage)) {
336 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
337 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
338 *
339 * The separate stencil buffer does not support mip mapping, thus
340 * the storage for LODs other than LOD 0 is not needed.
341 */
342 assert(info->levels == 1);
343 assert(phys_level0_sa->array_len == 1);
344 return ISL_ARRAY_PITCH_SPAN_COMPACT;
345 } else {
346 if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
347 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
348 isl_surf_usage_is_stencil(info->usage)) {
349 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
350 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
351 *
352 * The separate stencil buffer does not support mip mapping,
353 * thus the storage for LODs other than LOD 0 is not needed.
354 */
355 assert(info->levels == 1);
356 assert(phys_level0_sa->array_len == 1);
357 return ISL_ARRAY_PITCH_SPAN_COMPACT;
358 }
359
360 if (phys_level0_sa->array_len == 1) {
361 /* The hardware will never use the QPitch. So choose the most
362 * compact QPitch possible in order to conserve memory.
363 */
364 return ISL_ARRAY_PITCH_SPAN_COMPACT;
365 }
366
367 return ISL_ARRAY_PITCH_SPAN_FULL;
368 }
369
370 case ISL_DIM_LAYOUT_GEN4_3D:
371 /* The hardware will never use the QPitch. So choose the most
372 * compact QPitch possible in order to conserve memory.
373 */
374 return ISL_ARRAY_PITCH_SPAN_COMPACT;
375 }
376
377 unreachable("bad isl_dim_layout");
378 return ISL_ARRAY_PITCH_SPAN_FULL;
379 }
380
381 static void
382 isl_choose_image_alignment_el(const struct isl_device *dev,
383 const struct isl_surf_init_info *restrict info,
384 enum isl_tiling tiling,
385 enum isl_msaa_layout msaa_layout,
386 struct isl_extent3d *image_align_el)
387 {
388 if (ISL_DEV_GEN(dev) >= 9) {
389 gen9_choose_image_alignment_el(dev, info, tiling, msaa_layout,
390 image_align_el);
391 } else if (ISL_DEV_GEN(dev) >= 8) {
392 gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout,
393 image_align_el);
394 } else if (ISL_DEV_GEN(dev) >= 7) {
395 gen7_choose_image_alignment_el(dev, info, tiling, msaa_layout,
396 image_align_el);
397 } else if (ISL_DEV_GEN(dev) >= 6) {
398 gen6_choose_image_alignment_el(dev, info, tiling, msaa_layout,
399 image_align_el);
400 } else {
401 gen4_choose_image_alignment_el(dev, info, tiling, msaa_layout,
402 image_align_el);
403 }
404 }
405
406 static enum isl_dim_layout
407 isl_surf_choose_dim_layout(const struct isl_device *dev,
408 enum isl_surf_dim logical_dim)
409 {
410 if (ISL_DEV_GEN(dev) >= 9) {
411 switch (logical_dim) {
412 case ISL_SURF_DIM_1D:
413 return ISL_DIM_LAYOUT_GEN9_1D;
414 case ISL_SURF_DIM_2D:
415 case ISL_SURF_DIM_3D:
416 return ISL_DIM_LAYOUT_GEN4_2D;
417 }
418 } else {
419 switch (logical_dim) {
420 case ISL_SURF_DIM_1D:
421 case ISL_SURF_DIM_2D:
422 return ISL_DIM_LAYOUT_GEN4_2D;
423 case ISL_SURF_DIM_3D:
424 return ISL_DIM_LAYOUT_GEN4_3D;
425 }
426 }
427
428 unreachable("bad isl_surf_dim");
429 return ISL_DIM_LAYOUT_GEN4_2D;
430 }
431
432 /**
433 * Calculate the physical extent of the surface's first level, in units of
434 * surface samples. The result is aligned to the format's compression block.
435 */
436 static void
437 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
438 const struct isl_surf_init_info *restrict info,
439 enum isl_dim_layout dim_layout,
440 enum isl_tiling tiling,
441 enum isl_msaa_layout msaa_layout,
442 struct isl_extent4d *phys_level0_sa)
443 {
444 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
445
446 if (isl_format_is_yuv(info->format))
447 isl_finishme("%s:%s: YUV format", __FILE__, __func__);
448
449 switch (info->dim) {
450 case ISL_SURF_DIM_1D:
451 assert(info->height == 1);
452 assert(info->depth == 1);
453 assert(info->samples == 1);
454 assert(!isl_format_is_compressed(info->format));
455
456 switch (dim_layout) {
457 case ISL_DIM_LAYOUT_GEN4_3D:
458 unreachable("bad isl_dim_layout");
459
460 case ISL_DIM_LAYOUT_GEN9_1D:
461 case ISL_DIM_LAYOUT_GEN4_2D:
462 *phys_level0_sa = (struct isl_extent4d) {
463 .w = info->width,
464 .h = 1,
465 .d = 1,
466 .a = info->array_len,
467 };
468 break;
469 }
470 break;
471
472 case ISL_SURF_DIM_2D:
473 assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D);
474
475 if (tiling == ISL_TILING_Ys && info->samples > 1)
476 isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
477
478 switch (msaa_layout) {
479 case ISL_MSAA_LAYOUT_NONE:
480 assert(info->depth == 1);
481 assert(info->samples == 1);
482
483 *phys_level0_sa = (struct isl_extent4d) {
484 .w = isl_align(info->width, fmtl->bw),
485 .h = isl_align(info->height, fmtl->bh),
486 .d = 1,
487 .a = info->array_len,
488 };
489 break;
490
491 case ISL_MSAA_LAYOUT_ARRAY:
492 assert(info->depth == 1);
493 assert(info->array_len == 1);
494 assert(!isl_format_is_compressed(info->format));
495
496 *phys_level0_sa = (struct isl_extent4d) {
497 .w = info->width,
498 .h = info->height,
499 .d = 1,
500 .a = info->samples,
501 };
502 break;
503
504 case ISL_MSAA_LAYOUT_INTERLEAVED:
505 assert(info->depth == 1);
506 assert(info->array_len == 1);
507 assert(!isl_format_is_compressed(info->format));
508
509 *phys_level0_sa = (struct isl_extent4d) {
510 .w = info->width,
511 .h = info->height,
512 .d = 1,
513 .a = 1,
514 };
515
516 isl_msaa_interleaved_scale_px_to_sa(info->samples,
517 &phys_level0_sa->w,
518 &phys_level0_sa->h);
519 break;
520 }
521 break;
522
523 case ISL_SURF_DIM_3D:
524 assert(info->array_len == 1);
525 assert(info->samples == 1);
526
527 if (fmtl->bd > 1) {
528 isl_finishme("%s:%s: compression block with depth > 1",
529 __FILE__, __func__);
530 }
531
532 switch (dim_layout) {
533 case ISL_DIM_LAYOUT_GEN9_1D:
534 unreachable("bad isl_dim_layout");
535
536 case ISL_DIM_LAYOUT_GEN4_2D:
537 assert(ISL_DEV_GEN(dev) >= 9);
538
539 *phys_level0_sa = (struct isl_extent4d) {
540 .w = isl_align(info->width, fmtl->bw),
541 .h = isl_align(info->height, fmtl->bh),
542 .d = 1,
543 .a = info->depth,
544 };
545 break;
546
547 case ISL_DIM_LAYOUT_GEN4_3D:
548 assert(ISL_DEV_GEN(dev) < 9);
549 *phys_level0_sa = (struct isl_extent4d) {
550 .w = isl_align(info->width, fmtl->bw),
551 .h = isl_align(info->height, fmtl->bh),
552 .d = info->depth,
553 .a = 1,
554 };
555 break;
556 }
557 break;
558 }
559 }
560
561 /**
562 * A variant of isl_calc_phys_slice0_extent_sa() specific to
563 * ISL_DIM_LAYOUT_GEN4_2D.
564 */
565 static void
566 isl_calc_phys_slice0_extent_sa_gen4_2d(
567 const struct isl_device *dev,
568 const struct isl_surf_init_info *restrict info,
569 enum isl_msaa_layout msaa_layout,
570 const struct isl_extent3d *image_align_sa,
571 const struct isl_extent4d *phys_level0_sa,
572 struct isl_extent2d *phys_slice0_sa)
573 {
574 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
575
576 assert(phys_level0_sa->depth == 1);
577
578 if (info->levels == 1 && msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED) {
579 /* Do not pad the surface to the image alignment. Instead, pad it only
580 * to the pixel format's block alignment.
581 *
582 * For tiled surfaces, using a reduced alignment here avoids wasting CPU
583 * cycles on the below mipmap layout caluclations. Reducing the
584 * alignment here is safe because we later align the row pitch and array
585 * pitch to the tile boundary. It is safe even for
586 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
587 * to accomodate the interleaved samples.
588 *
589 * For linear surfaces, reducing the alignment here permits us to later
590 * choose an arbitrary, non-aligned row pitch. If the surface backs
591 * a VkBuffer, then an arbitrary pitch may be needed to accomodate
592 * VkBufferImageCopy::bufferRowLength.
593 */
594 *phys_slice0_sa = (struct isl_extent2d) {
595 .w = isl_align_npot(phys_level0_sa->w, fmtl->bw),
596 .h = isl_align_npot(phys_level0_sa->h, fmtl->bh),
597 };
598 return;
599 }
600
601 uint32_t slice_top_w = 0;
602 uint32_t slice_bottom_w = 0;
603 uint32_t slice_left_h = 0;
604 uint32_t slice_right_h = 0;
605
606 uint32_t W0 = phys_level0_sa->w;
607 uint32_t H0 = phys_level0_sa->h;
608
609 for (uint32_t l = 0; l < info->levels; ++l) {
610 uint32_t W = isl_minify(W0, l);
611 uint32_t H = isl_minify(H0, l);
612
613 if (msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
614 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
615 * Sizes (p133):
616 *
617 * If the surface is multisampled and it is a depth or stencil
618 * surface or Multisampled Surface StorageFormat in
619 * SURFACE_STATE is MSFMT_DEPTH_STENCIL, W_L and H_L must be
620 * adjusted as follows before proceeding: [...]
621 */
622 isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H);
623 }
624
625 uint32_t w = isl_align_npot(W, image_align_sa->w);
626 uint32_t h = isl_align_npot(H, image_align_sa->h);
627
628 if (l == 0) {
629 slice_top_w = w;
630 slice_left_h = h;
631 slice_right_h = h;
632 } else if (l == 1) {
633 slice_bottom_w = w;
634 slice_left_h += h;
635 } else if (l == 2) {
636 slice_bottom_w += w;
637 slice_right_h += h;
638 } else {
639 slice_right_h += h;
640 }
641 }
642
643 *phys_slice0_sa = (struct isl_extent2d) {
644 .w = MAX(slice_top_w, slice_bottom_w),
645 .h = MAX(slice_left_h, slice_right_h),
646 };
647 }
648
649 /**
650 * A variant of isl_calc_phys_slice0_extent_sa() specific to
651 * ISL_DIM_LAYOUT_GEN4_3D.
652 */
653 static void
654 isl_calc_phys_slice0_extent_sa_gen4_3d(
655 const struct isl_device *dev,
656 const struct isl_surf_init_info *restrict info,
657 const struct isl_extent3d *image_align_sa,
658 const struct isl_extent4d *phys_level0_sa,
659 struct isl_extent2d *phys_slice0_sa)
660 {
661 assert(info->samples == 1);
662 assert(phys_level0_sa->array_len == 1);
663
664 uint32_t slice_w = 0;
665 uint32_t slice_h = 0;
666
667 uint32_t W0 = phys_level0_sa->w;
668 uint32_t H0 = phys_level0_sa->h;
669 uint32_t D0 = phys_level0_sa->d;
670
671 for (uint32_t l = 0; l < info->levels; ++l) {
672 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
673 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
674 uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d);
675
676 uint32_t max_layers_horiz = MIN(level_d, 1u << l);
677 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
678
679 slice_w = MAX(slice_w, level_w * max_layers_horiz);
680 slice_h += level_h * max_layers_vert;
681 }
682
683 *phys_slice0_sa = (struct isl_extent2d) {
684 .w = slice_w,
685 .h = slice_h,
686 };
687 }
688
689 /**
690 * A variant of isl_calc_phys_slice0_extent_sa() specific to
691 * ISL_DIM_LAYOUT_GEN9_1D.
692 */
693 static void
694 isl_calc_phys_slice0_extent_sa_gen9_1d(
695 const struct isl_device *dev,
696 const struct isl_surf_init_info *restrict info,
697 const struct isl_extent3d *image_align_sa,
698 const struct isl_extent4d *phys_level0_sa,
699 struct isl_extent2d *phys_slice0_sa)
700 {
701 MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
702
703 assert(phys_level0_sa->height == 1);
704 assert(phys_level0_sa->depth == 1);
705 assert(info->samples == 1);
706 assert(image_align_sa->w >= fmtl->bw);
707
708 uint32_t slice_w = 0;
709 const uint32_t W0 = phys_level0_sa->w;
710
711 for (uint32_t l = 0; l < info->levels; ++l) {
712 uint32_t W = isl_minify(W0, l);
713 uint32_t w = isl_align_npot(W, image_align_sa->w);
714
715 slice_w += w;
716 }
717
718 *phys_slice0_sa = isl_extent2d(slice_w, 1);
719 }
720
721 /**
722 * Calculate the physical extent of the surface's first array slice, in units
723 * of surface samples. If the surface is multi-leveled, then the result will
724 * be aligned to \a image_align_sa.
725 */
726 static void
727 isl_calc_phys_slice0_extent_sa(const struct isl_device *dev,
728 const struct isl_surf_init_info *restrict info,
729 enum isl_dim_layout dim_layout,
730 enum isl_msaa_layout msaa_layout,
731 const struct isl_extent3d *image_align_sa,
732 const struct isl_extent4d *phys_level0_sa,
733 struct isl_extent2d *phys_slice0_sa)
734 {
735 switch (dim_layout) {
736 case ISL_DIM_LAYOUT_GEN9_1D:
737 isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info,
738 image_align_sa, phys_level0_sa,
739 phys_slice0_sa);
740 return;
741 case ISL_DIM_LAYOUT_GEN4_2D:
742 isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
743 image_align_sa, phys_level0_sa,
744 phys_slice0_sa);
745 return;
746 case ISL_DIM_LAYOUT_GEN4_3D:
747 isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa,
748 phys_level0_sa, phys_slice0_sa);
749 return;
750 }
751 }
752
753 /**
754 * Calculate the pitch between physical array slices, in units of rows of
755 * surface elements.
756 */
757 static uint32_t
758 isl_calc_array_pitch_el_rows(const struct isl_device *dev,
759 const struct isl_surf_init_info *restrict info,
760 const struct isl_tile_info *tile_info,
761 enum isl_dim_layout dim_layout,
762 enum isl_array_pitch_span array_pitch_span,
763 const struct isl_extent3d *image_align_sa,
764 const struct isl_extent4d *phys_level0_sa,
765 const struct isl_extent2d *phys_slice0_sa)
766 {
767 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
768 uint32_t pitch_sa_rows = 0;
769
770 switch (dim_layout) {
771 case ISL_DIM_LAYOUT_GEN9_1D:
772 /* Each row is an array slice */
773 pitch_sa_rows = 1;
774 break;
775 case ISL_DIM_LAYOUT_GEN4_2D:
776 switch (array_pitch_span) {
777 case ISL_ARRAY_PITCH_SPAN_COMPACT:
778 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
779 break;
780 case ISL_ARRAY_PITCH_SPAN_FULL: {
781 /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
782 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
783 * Surfaces >> Surface Arrays.
784 */
785 uint32_t H0_sa = phys_level0_sa->h;
786 uint32_t H1_sa = isl_minify(H0_sa, 1);
787
788 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
789 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
790
791 uint32_t m;
792 if (ISL_DEV_GEN(dev) >= 7) {
793 /* The QPitch equation changed slightly in Ivybridge. */
794 m = 12;
795 } else {
796 m = 11;
797 }
798
799 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
800
801 if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
802 (info->height % 4 == 1)) {
803 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
804 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
805 *
806 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
807 * the value calculated in the equation above , for every
808 * other odd Surface Height starting from 1 i.e. 1,5,9,13.
809 *
810 * XXX(chadv): Is the errata natural corollary of the physical
811 * layout of interleaved samples?
812 */
813 pitch_sa_rows += 4;
814 }
815
816 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
817 } /* end case */
818 break;
819 }
820 break;
821 case ISL_DIM_LAYOUT_GEN4_3D:
822 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
823 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
824 break;
825 default:
826 unreachable("bad isl_dim_layout");
827 break;
828 }
829
830 assert(pitch_sa_rows % fmtl->bh == 0);
831 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
832
833 if (ISL_DEV_GEN(dev) >= 9 &&
834 info->dim == ISL_SURF_DIM_3D &&
835 tile_info->tiling != ISL_TILING_LINEAR) {
836 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
837 *
838 * Tile Mode != Linear: This field must be set to an integer multiple
839 * of the tile height
840 */
841 pitch_el_rows = isl_align(pitch_el_rows, tile_info->height);
842 }
843
844 return pitch_el_rows;
845 }
846
847 /**
848 * Calculate the pitch of each surface row, in bytes.
849 */
850 static uint32_t
851 isl_calc_row_pitch(const struct isl_device *dev,
852 const struct isl_surf_init_info *restrict info,
853 const struct isl_tile_info *tile_info,
854 const struct isl_extent3d *image_align_sa,
855 const struct isl_extent2d *phys_slice0_sa)
856 {
857 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
858
859 uint32_t row_pitch = info->min_pitch;
860
861 /* First, align the surface to a cache line boundary, as the PRM explains
862 * below.
863 *
864 * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
865 * Formats >> Surface Padding Requirements >> Render Target and Media
866 * Surfaces:
867 *
868 * The data port accesses data (pixels) outside of the surface if they
869 * are contained in the same cache request as pixels that are within the
870 * surface. These pixels will not be returned by the requesting message,
871 * however if these pixels lie outside of defined pages in the GTT,
872 * a GTT error will result when the cache request is processed. In order
873 * to avoid these GTT errors, “padding” at the bottom of the surface is
874 * sometimes necessary.
875 *
876 * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
877 * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
878 *
879 * The sampling engine accesses texels outside of the surface if they
880 * are contained in the same cache line as texels that are within the
881 * surface. These texels will not participate in any calculation
882 * performed by the sampling engine and will not affect the result of
883 * any sampling engine operation, however if these texels lie outside of
884 * defined pages in the GTT, a GTT error will result when the cache line
885 * is accessed. In order to avoid these GTT errors, “padding” at the
886 * bottom and right side of a sampling engine surface is sometimes
887 * necessary.
888 *
889 * It is possible that a cache line will straddle a page boundary if the
890 * base address or pitch is not aligned. All pages included in the cache
891 * lines that are part of the surface must map to valid GTT entries to
892 * avoid errors. To determine the necessary padding on the bottom and
893 * right side of the surface, refer to the table in Alignment Unit Size
894 * section for the i and j parameters for the surface format in use. The
895 * surface must then be extended to the next multiple of the alignment
896 * unit size in each dimension, and all texels contained in this
897 * extended surface must have valid GTT entries.
898 *
899 * For example, suppose the surface size is 15 texels by 10 texels and
900 * the alignment parameters are i=4 and j=2. In this case, the extended
901 * surface would be 16 by 10. Note that these calculations are done in
902 * texels, and must be converted to bytes based on the surface format
903 * being used to determine whether additional pages need to be defined.
904 */
905 assert(phys_slice0_sa->w % fmtl->bw == 0);
906 row_pitch = MAX(row_pitch, fmtl->bs * (phys_slice0_sa->w / fmtl->bw));
907
908 switch (tile_info->tiling) {
909 case ISL_TILING_LINEAR:
910 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
911 * RENDER_SURFACE_STATE Surface Pitch (p349):
912 *
913 * - For linear render target surfaces and surfaces accessed with the
914 * typed data port messages, the pitch must be a multiple of the
915 * element size for non-YUV surface formats. Pitch must be
916 * a multiple of 2 * element size for YUV surface formats.
917 *
918 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
919 * ignore because isl doesn't do buffers.]
920 *
921 * - For other linear surfaces, the pitch can be any multiple of
922 * bytes.
923 */
924 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
925 if (isl_format_is_yuv(info->format)) {
926 row_pitch = isl_align_npot(row_pitch, 2 * fmtl->bs);
927 } else {
928 row_pitch = isl_align_npot(row_pitch, fmtl->bs);
929 }
930 }
931 break;
932 default:
933 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
934 * RENDER_SURFACE_STATE Surface Pitch (p349):
935 *
936 * - For tiled surfaces, the pitch must be a multiple of the tile
937 * width.
938 */
939 row_pitch = isl_align(row_pitch, tile_info->width);
940 break;
941 }
942
943 return row_pitch;
944 }
945
946 /**
947 * Calculate the surface's total height, including padding, in units of
948 * surface elements.
949 */
950 static uint32_t
951 isl_calc_total_height_el(const struct isl_device *dev,
952 const struct isl_surf_init_info *restrict info,
953 const struct isl_tile_info *tile_info,
954 uint32_t phys_array_len,
955 uint32_t row_pitch,
956 uint32_t array_pitch_el_rows)
957 {
958 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
959
960 uint32_t total_h_el = phys_array_len * array_pitch_el_rows;
961 uint32_t pad_bytes = 0;
962
963 /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
964 * Formats >> Surface Padding Requirements >> Render Target and Media
965 * Surfaces:
966 *
967 * The data port accesses data (pixels) outside of the surface if they
968 * are contained in the same cache request as pixels that are within the
969 * surface. These pixels will not be returned by the requesting message,
970 * however if these pixels lie outside of defined pages in the GTT,
971 * a GTT error will result when the cache request is processed. In
972 * order to avoid these GTT errors, “padding” at the bottom of the
973 * surface is sometimes necessary.
974 *
975 * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
976 * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
977 *
978 * ... Lots of padding requirements, all listed separately below.
979 */
980
981 /* We can safely ignore the first padding requirement, quoted below,
982 * because isl doesn't do buffers.
983 *
984 * - [pre-BDW] For buffers, which have no inherent “height,” padding
985 * requirements are different. A buffer must be padded to the next
986 * multiple of 256 array elements, with an additional 16 bytes added
987 * beyond that to account for the L1 cache line.
988 */
989
990 /*
991 * - For compressed textures [...], padding at the bottom of the surface
992 * is to an even compressed row.
993 */
994 if (isl_format_is_compressed(info->format))
995 total_h_el = isl_align(total_h_el, 2);
996
997 /*
998 * - For cube surfaces, an additional two rows of padding are required
999 * at the bottom of the surface.
1000 */
1001 if (info->usage & ISL_SURF_USAGE_CUBE_BIT)
1002 total_h_el += 2;
1003
1004 /*
1005 * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
1006 * additional padding is required. These surfaces require an extra row
1007 * plus 16 bytes of padding at the bottom in addition to the general
1008 * padding requirements.
1009 */
1010 if (isl_format_is_yuv(info->format) &&
1011 (fmtl->bs == 96 || fmtl->bs == 48|| fmtl->bs == 24)) {
1012 total_h_el += 1;
1013 pad_bytes += 16;
1014 }
1015
1016 /*
1017 * - For linear surfaces, additional padding of 64 bytes is required at
1018 * the bottom of the surface. This is in addition to the padding
1019 * required above.
1020 */
1021 if (tile_info->tiling == ISL_TILING_LINEAR)
1022 pad_bytes += 64;
1023
1024 /* The below text weakens, not strengthens, the padding requirements for
1025 * linear surfaces. Therefore we can safely ignore it.
1026 *
1027 * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array,
1028 * non-MSAA, non-mip-mapped surfaces in linear memory, the only
1029 * padding requirement is to the next aligned 64-byte boundary beyond
1030 * the end of the surface. The rest of the padding requirements
1031 * documented above do not apply to these surfaces.
1032 */
1033
1034 /*
1035 * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and
1036 * height % 4 != 0, the surface must be padded with
1037 * 4-(height % 4)*Surface Pitch # of bytes.
1038 */
1039 if (ISL_DEV_GEN(dev) >= 9 &&
1040 tile_info->tiling == ISL_TILING_LINEAR &&
1041 (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) {
1042 total_h_el = isl_align(total_h_el, 4);
1043 }
1044
1045 /*
1046 * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded
1047 * to 4 times the Surface Pitch # of bytes
1048 */
1049 if (ISL_DEV_GEN(dev) >= 9 &&
1050 tile_info->tiling == ISL_TILING_LINEAR &&
1051 info->dim == ISL_SURF_DIM_1D) {
1052 total_h_el += 4;
1053 }
1054
1055 /* Be sloppy. Align any leftover padding to a row boundary. */
1056 total_h_el += isl_align_div_npot(pad_bytes, row_pitch);
1057
1058 return total_h_el;
1059 }
1060
1061 bool
1062 isl_surf_init_s(const struct isl_device *dev,
1063 struct isl_surf *surf,
1064 const struct isl_surf_init_info *restrict info)
1065 {
1066 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1067
1068 const struct isl_extent4d logical_level0_px = {
1069 .w = info->width,
1070 .h = info->height,
1071 .d = info->depth,
1072 .a = info->array_len,
1073 };
1074
1075 enum isl_dim_layout dim_layout =
1076 isl_surf_choose_dim_layout(dev, info->dim);
1077
1078 enum isl_tiling tiling;
1079 if (!isl_surf_choose_tiling(dev, info, &tiling))
1080 return false;
1081
1082 struct isl_tile_info tile_info;
1083 if (!isl_tiling_get_info(dev, tiling, fmtl->bs, &tile_info))
1084 return false;
1085
1086 enum isl_msaa_layout msaa_layout;
1087 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1088 return false;
1089
1090 struct isl_extent3d image_align_el;
1091 isl_choose_image_alignment_el(dev, info, tiling, msaa_layout,
1092 &image_align_el);
1093
1094 struct isl_extent3d image_align_sa =
1095 isl_extent3d_el_to_sa(info->format, image_align_el);
1096
1097 struct isl_extent4d phys_level0_sa;
1098 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1099 &phys_level0_sa);
1100 assert(phys_level0_sa.w % fmtl->bw == 0);
1101 assert(phys_level0_sa.h % fmtl->bh == 0);
1102
1103 enum isl_array_pitch_span array_pitch_span =
1104 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1105
1106 struct isl_extent2d phys_slice0_sa;
1107 isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout,
1108 &image_align_sa, &phys_level0_sa,
1109 &phys_slice0_sa);
1110 assert(phys_slice0_sa.w % fmtl->bw == 0);
1111 assert(phys_slice0_sa.h % fmtl->bh == 0);
1112
1113 const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info,
1114 &image_align_sa,
1115 &phys_slice0_sa);
1116
1117 const uint32_t array_pitch_el_rows =
1118 isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout,
1119 array_pitch_span, &image_align_sa,
1120 &phys_level0_sa, &phys_slice0_sa);
1121
1122 const uint32_t total_h_el =
1123 isl_calc_total_height_el(dev, info, &tile_info,
1124 phys_level0_sa.array_len, row_pitch,
1125 array_pitch_el_rows);
1126
1127 const uint32_t total_h_sa = total_h_el * fmtl->bh;
1128 const uint32_t size = row_pitch * isl_align(total_h_sa, tile_info.height);
1129
1130 /* Alignment of surface base address, in bytes */
1131 uint32_t base_alignment = MAX(1, info->min_alignment);
1132 assert(isl_is_pow2(base_alignment) && isl_is_pow2(tile_info.size));
1133 base_alignment = MAX(base_alignment, tile_info.size);
1134
1135 *surf = (struct isl_surf) {
1136 .dim = info->dim,
1137 .dim_layout = dim_layout,
1138 .msaa_layout = msaa_layout,
1139 .tiling = tiling,
1140 .format = info->format,
1141
1142 .levels = info->levels,
1143 .samples = info->samples,
1144
1145 .image_alignment_el = image_align_el,
1146 .logical_level0_px = logical_level0_px,
1147 .phys_level0_sa = phys_level0_sa,
1148
1149 .size = size,
1150 .alignment = base_alignment,
1151 .row_pitch = row_pitch,
1152 .array_pitch_el_rows = array_pitch_el_rows,
1153 .array_pitch_span = array_pitch_span,
1154
1155 .usage = info->usage,
1156 };
1157
1158 return true;
1159 }
1160
1161 void
1162 isl_surf_get_tile_info(const struct isl_device *dev,
1163 const struct isl_surf *surf,
1164 struct isl_tile_info *tile_info)
1165 {
1166 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1167 isl_tiling_get_info(dev, surf->tiling, fmtl->bs, tile_info);
1168 }
1169
1170 void
1171 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
1172 const struct isl_surf_fill_state_info *restrict info)
1173 {
1174 #ifndef NDEBUG
1175 isl_surf_usage_flags_t _base_usage =
1176 info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1177 ISL_SURF_USAGE_TEXTURE_BIT |
1178 ISL_SURF_USAGE_STORAGE_BIT);
1179 /* They may only specify one of the above bits at a time */
1180 assert(__builtin_popcount(_base_usage) == 1);
1181 /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
1182 assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
1183 #endif
1184
1185 if (info->surf->dim == ISL_SURF_DIM_3D) {
1186 assert(info->view->base_array_layer + info->view->array_len <=
1187 info->surf->logical_level0_px.depth);
1188 } else {
1189 assert(info->view->base_array_layer + info->view->array_len <=
1190 info->surf->logical_level0_px.array_len);
1191 }
1192
1193 switch (ISL_DEV_GEN(dev)) {
1194 case 7:
1195 if (ISL_DEV_IS_HASWELL(dev)) {
1196 isl_gen75_surf_fill_state_s(dev, state, info);
1197 } else {
1198 isl_gen7_surf_fill_state_s(dev, state, info);
1199 }
1200 break;
1201 case 8:
1202 isl_gen8_surf_fill_state_s(dev, state, info);
1203 break;
1204 case 9:
1205 isl_gen9_surf_fill_state_s(dev, state, info);
1206 break;
1207 default:
1208 assert(!"Cannot fill surface state for this gen");
1209 }
1210 }
1211
1212 void
1213 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
1214 const struct isl_buffer_fill_state_info *restrict info)
1215 {
1216 switch (ISL_DEV_GEN(dev)) {
1217 case 7:
1218 if (ISL_DEV_IS_HASWELL(dev)) {
1219 isl_gen75_buffer_fill_state_s(state, info);
1220 } else {
1221 isl_gen7_buffer_fill_state_s(state, info);
1222 }
1223 break;
1224 case 8:
1225 isl_gen8_buffer_fill_state_s(state, info);
1226 break;
1227 case 9:
1228 isl_gen9_buffer_fill_state_s(state, info);
1229 break;
1230 default:
1231 assert(!"Cannot fill surface state for this gen");
1232 }
1233 }
1234
1235 /**
1236 * A variant of isl_surf_get_image_offset_sa() specific to
1237 * ISL_DIM_LAYOUT_GEN4_2D.
1238 */
1239 static void
1240 get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
1241 uint32_t level, uint32_t layer,
1242 uint32_t *x_offset_sa,
1243 uint32_t *y_offset_sa)
1244 {
1245 assert(level < surf->levels);
1246 assert(layer < surf->phys_level0_sa.array_len);
1247 assert(surf->phys_level0_sa.depth == 1);
1248
1249 const struct isl_extent3d image_align_sa =
1250 isl_surf_get_image_alignment_sa(surf);
1251
1252 const uint32_t W0 = surf->phys_level0_sa.width;
1253 const uint32_t H0 = surf->phys_level0_sa.height;
1254
1255 uint32_t x = 0;
1256 uint32_t y = layer * isl_surf_get_array_pitch_sa_rows(surf);
1257
1258 for (uint32_t l = 0; l < level; ++l) {
1259 if (l == 1) {
1260 uint32_t W = isl_minify(W0, l);
1261
1262 if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED)
1263 isl_msaa_interleaved_scale_px_to_sa(surf->samples, &W, NULL);
1264
1265 x += isl_align_npot(W, image_align_sa.w);
1266 } else {
1267 uint32_t H = isl_minify(H0, l);
1268
1269 if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED)
1270 isl_msaa_interleaved_scale_px_to_sa(surf->samples, NULL, &H);
1271
1272 y += isl_align_npot(H, image_align_sa.h);
1273 }
1274 }
1275
1276 *x_offset_sa = x;
1277 *y_offset_sa = y;
1278 }
1279
1280 /**
1281 * A variant of isl_surf_get_image_offset_sa() specific to
1282 * ISL_DIM_LAYOUT_GEN4_3D.
1283 */
1284 static void
1285 get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
1286 uint32_t level, uint32_t logical_z_offset_px,
1287 uint32_t *x_offset_sa,
1288 uint32_t *y_offset_sa)
1289 {
1290 assert(level < surf->levels);
1291 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
1292 assert(surf->phys_level0_sa.array_len == 1);
1293
1294 const struct isl_extent3d image_align_sa =
1295 isl_surf_get_image_alignment_sa(surf);
1296
1297 const uint32_t W0 = surf->phys_level0_sa.width;
1298 const uint32_t H0 = surf->phys_level0_sa.height;
1299 const uint32_t D0 = surf->phys_level0_sa.depth;
1300
1301 uint32_t x = 0;
1302 uint32_t y = 0;
1303
1304 for (uint32_t l = 0; l < level; ++l) {
1305 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
1306 const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d);
1307 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1308
1309 y += level_h * max_layers_vert;
1310 }
1311
1312 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
1313 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
1314 const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d);
1315
1316 const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
1317
1318 x += level_w * (logical_z_offset_px % max_layers_horiz);
1319 y += level_h * (logical_z_offset_px / max_layers_horiz);
1320
1321 *x_offset_sa = x;
1322 *y_offset_sa = y;
1323 }
1324
1325 /**
1326 * A variant of isl_surf_get_image_offset_sa() specific to
1327 * ISL_DIM_LAYOUT_GEN9_1D.
1328 */
1329 static void
1330 get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
1331 uint32_t level, uint32_t layer,
1332 uint32_t *x_offset_sa,
1333 uint32_t *y_offset_sa)
1334 {
1335 assert(level < surf->levels);
1336 assert(layer < surf->phys_level0_sa.array_len);
1337 assert(surf->phys_level0_sa.height == 1);
1338 assert(surf->phys_level0_sa.depth == 1);
1339 assert(surf->samples == 1);
1340
1341 const uint32_t W0 = surf->phys_level0_sa.width;
1342 const struct isl_extent3d image_align_sa =
1343 isl_surf_get_image_alignment_sa(surf);
1344
1345 uint32_t x = 0;
1346
1347 for (uint32_t l = 0; l < level; ++l) {
1348 uint32_t W = isl_minify(W0, l);
1349 uint32_t w = isl_align_npot(W, image_align_sa.w);
1350
1351 x += w;
1352 }
1353
1354 *x_offset_sa = x;
1355 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
1356 }
1357
1358 /**
1359 * Calculate the offset, in units of surface samples, to a subimage in the
1360 * surface.
1361 *
1362 * @invariant level < surface levels
1363 * @invariant logical_array_layer < logical array length of surface
1364 * @invariant logical_z_offset_px < logical depth of surface at level
1365 */
1366 static void
1367 get_image_offset_sa(const struct isl_surf *surf,
1368 uint32_t level,
1369 uint32_t logical_array_layer,
1370 uint32_t logical_z_offset_px,
1371 uint32_t *x_offset_sa,
1372 uint32_t *y_offset_sa)
1373 {
1374 assert(level < surf->levels);
1375 assert(logical_array_layer < surf->logical_level0_px.array_len);
1376 assert(logical_z_offset_px
1377 < isl_minify(surf->logical_level0_px.depth, level));
1378
1379 switch (surf->dim_layout) {
1380 case ISL_DIM_LAYOUT_GEN9_1D:
1381 get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
1382 x_offset_sa, y_offset_sa);
1383 break;
1384 case ISL_DIM_LAYOUT_GEN4_2D:
1385 get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
1386 + logical_z_offset_px,
1387 x_offset_sa, y_offset_sa);
1388 break;
1389 case ISL_DIM_LAYOUT_GEN4_3D:
1390 get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px,
1391 x_offset_sa, y_offset_sa);
1392 break;
1393
1394 default:
1395 unreachable("not reached");
1396 }
1397 }
1398
1399 void
1400 isl_surf_get_image_offset_el(const struct isl_surf *surf,
1401 uint32_t level,
1402 uint32_t logical_array_layer,
1403 uint32_t logical_z_offset_px,
1404 uint32_t *x_offset_el,
1405 uint32_t *y_offset_el)
1406 {
1407 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1408
1409 assert(level < surf->levels);
1410 assert(logical_array_layer < surf->logical_level0_px.array_len);
1411 assert(logical_z_offset_px
1412 < isl_minify(surf->logical_level0_px.depth, level));
1413
1414 uint32_t x_offset_sa, y_offset_sa;
1415 get_image_offset_sa(surf, level,
1416 logical_array_layer,
1417 logical_z_offset_px,
1418 &x_offset_sa,
1419 &y_offset_sa);
1420
1421 *x_offset_el = x_offset_sa / fmtl->bw;
1422 *y_offset_el = y_offset_sa / fmtl->bh;
1423 }
1424
1425 void
1426 isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
1427 enum isl_tiling tiling,
1428 uint8_t bs,
1429 uint32_t row_pitch,
1430 uint32_t total_x_offset_el,
1431 uint32_t total_y_offset_el,
1432 uint32_t *base_address_offset,
1433 uint32_t *x_offset_el,
1434 uint32_t *y_offset_el)
1435 {
1436 struct isl_tile_info tile_info;
1437 isl_tiling_get_info(dev, tiling, bs, &tile_info);
1438
1439 /* This function only really works for power-of-two surfaces. In
1440 * theory, we could make it work for non-power-of-two surfaces by going
1441 * to the left until we find a block that is bs-aligned. The Vulkan
1442 * driver doesn't use non-power-of-two tiled surfaces so we'll leave
1443 * this unimplemented for now.
1444 */
1445 assert(tiling == ISL_TILING_LINEAR || isl_is_pow2(bs));
1446
1447 uint32_t small_y_offset_el = total_y_offset_el % tile_info.height;
1448 uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el;
1449 uint32_t big_y_offset_B = big_y_offset_el * row_pitch;
1450
1451 uint32_t total_x_offset_B = total_x_offset_el * bs;
1452 uint32_t small_x_offset_B = total_x_offset_B % tile_info.width;
1453 uint32_t small_x_offset_el = small_x_offset_B / bs;
1454 uint32_t big_x_offset_B = (total_x_offset_B / tile_info.width) * tile_info.size;
1455
1456 *base_address_offset = big_y_offset_B + big_x_offset_B;
1457 *x_offset_el = small_x_offset_el;
1458 *y_offset_el = small_y_offset_el;
1459 }
1460
1461 uint32_t
1462 isl_surf_get_depth_format(const struct isl_device *dev,
1463 const struct isl_surf *surf)
1464 {
1465 /* Support for separate stencil buffers began in gen5. Support for
1466 * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
1467 * those that supported separate and interleaved stencil, were gen5 and
1468 * gen6.
1469 *
1470 * For a list of all available formats, see the Sandybridge PRM >> Volume
1471 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
1472 * Format (p321).
1473 */
1474
1475 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
1476
1477 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
1478
1479 if (has_stencil)
1480 assert(ISL_DEV_GEN(dev) < 7);
1481
1482 switch (surf->format) {
1483 default:
1484 unreachable("bad isl depth format");
1485 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
1486 assert(ISL_DEV_GEN(dev) < 7);
1487 return 0; /* D32_FLOAT_S8X24_UINT */
1488 case ISL_FORMAT_R32_FLOAT:
1489 assert(!has_stencil);
1490 return 1; /* D32_FLOAT */
1491 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
1492 if (has_stencil) {
1493 assert(ISL_DEV_GEN(dev) < 7);
1494 return 2; /* D24_UNORM_S8_UINT */
1495 } else {
1496 assert(ISL_DEV_GEN(dev) >= 5);
1497 return 3; /* D24_UNORM_X8_UINT */
1498 }
1499 case ISL_FORMAT_R16_UNORM:
1500 assert(!has_stencil);
1501 return 5; /* D16_UNORM */
1502 }
1503 }