isl: Refactor row pitch calculation (v2)
[mesa.git] / src / intel / isl / isl.c
1 /*
2 * Copyright 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27
28 #include "isl.h"
29 #include "isl_gen4.h"
30 #include "isl_gen6.h"
31 #include "isl_gen7.h"
32 #include "isl_gen8.h"
33 #include "isl_gen9.h"
34 #include "isl_priv.h"
35
36 void PRINTFLIKE(3, 4) UNUSED
37 __isl_finishme(const char *file, int line, const char *fmt, ...)
38 {
39 va_list ap;
40 char buf[512];
41
42 va_start(ap, fmt);
43 vsnprintf(buf, sizeof(buf), fmt, ap);
44 va_end(ap);
45
46 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
47 }
48
49 static const struct {
50 uint8_t size;
51 uint8_t align;
52 uint8_t addr_offset;
53 uint8_t aux_addr_offset;
54 } ss_infos[] = {
55 [4] = {24, 32, 4},
56 [5] = {24, 32, 4},
57 [6] = {24, 32, 4},
58 [7] = {32, 32, 4, 24},
59 [8] = {64, 64, 32, 40},
60 [9] = {64, 64, 32, 40},
61 };
62
63 void
64 isl_device_init(struct isl_device *dev,
65 const struct gen_device_info *info,
66 bool has_bit6_swizzling)
67 {
68 dev->info = info;
69 dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
70 dev->has_bit6_swizzling = has_bit6_swizzling;
71
72 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
73 * device properties at buildtime. Verify that the macros with the device
74 * properties chosen during runtime.
75 */
76 ISL_DEV_GEN_SANITIZE(dev);
77 ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
78
79 /* Did we break hiz or stencil? */
80 if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
81 assert(info->has_hiz_and_separate_stencil);
82 if (info->must_use_separate_stencil)
83 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
84
85 dev->ss.size = ss_infos[ISL_DEV_GEN(dev)].size;
86 dev->ss.align = ss_infos[ISL_DEV_GEN(dev)].align;
87 dev->ss.addr_offset = ss_infos[ISL_DEV_GEN(dev)].addr_offset;
88 dev->ss.aux_addr_offset = ss_infos[ISL_DEV_GEN(dev)].aux_addr_offset;
89 }
90
91 /**
92 * @brief Query the set of multisamples supported by the device.
93 *
94 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
95 * supported.
96 */
97 isl_sample_count_mask_t ATTRIBUTE_CONST
98 isl_device_get_sample_counts(struct isl_device *dev)
99 {
100 if (ISL_DEV_GEN(dev) >= 9) {
101 return ISL_SAMPLE_COUNT_1_BIT |
102 ISL_SAMPLE_COUNT_2_BIT |
103 ISL_SAMPLE_COUNT_4_BIT |
104 ISL_SAMPLE_COUNT_8_BIT |
105 ISL_SAMPLE_COUNT_16_BIT;
106 } else if (ISL_DEV_GEN(dev) >= 8) {
107 return ISL_SAMPLE_COUNT_1_BIT |
108 ISL_SAMPLE_COUNT_2_BIT |
109 ISL_SAMPLE_COUNT_4_BIT |
110 ISL_SAMPLE_COUNT_8_BIT;
111 } else if (ISL_DEV_GEN(dev) >= 7) {
112 return ISL_SAMPLE_COUNT_1_BIT |
113 ISL_SAMPLE_COUNT_4_BIT |
114 ISL_SAMPLE_COUNT_8_BIT;
115 } else if (ISL_DEV_GEN(dev) >= 6) {
116 return ISL_SAMPLE_COUNT_1_BIT |
117 ISL_SAMPLE_COUNT_4_BIT;
118 } else {
119 return ISL_SAMPLE_COUNT_1_BIT;
120 }
121 }
122
123 /**
124 * @param[out] info is written only on success
125 */
126 static bool
127 isl_tiling_get_info(const struct isl_device *dev,
128 enum isl_tiling tiling,
129 uint32_t format_bpb,
130 struct isl_tile_info *tile_info)
131 {
132 const uint32_t bs = format_bpb / 8;
133 struct isl_extent2d logical_el, phys_B;
134
135 if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
136 /* It is possible to have non-power-of-two formats in a tiled buffer.
137 * The easiest way to handle this is to treat the tile as if it is three
138 * times as wide. This way no pixel will ever cross a tile boundary.
139 * This really only works on legacy X and Y tiling formats.
140 */
141 assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
142 assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
143 return isl_tiling_get_info(dev, tiling, format_bpb / 3, tile_info);
144 }
145
146 switch (tiling) {
147 case ISL_TILING_LINEAR:
148 assert(bs > 0);
149 logical_el = isl_extent2d(1, 1);
150 phys_B = isl_extent2d(bs, 1);
151 break;
152
153 case ISL_TILING_X:
154 assert(bs > 0);
155 logical_el = isl_extent2d(512 / bs, 8);
156 phys_B = isl_extent2d(512, 8);
157 break;
158
159 case ISL_TILING_Y0:
160 assert(bs > 0);
161 logical_el = isl_extent2d(128 / bs, 32);
162 phys_B = isl_extent2d(128, 32);
163 break;
164
165 case ISL_TILING_W:
166 assert(bs == 1);
167 logical_el = isl_extent2d(64, 64);
168 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
169 *
170 * "If the surface is a stencil buffer (and thus has Tile Mode set
171 * to TILEMODE_WMAJOR), the pitch must be set to 2x the value
172 * computed based on width, as the stencil buffer is stored with two
173 * rows interleaved."
174 *
175 * This, together with the fact that stencil buffers are referred to as
176 * being Y-tiled in the PRMs for older hardware implies that the
177 * physical size of a W-tile is actually the same as for a Y-tile.
178 */
179 phys_B = isl_extent2d(128, 32);
180 break;
181
182 case ISL_TILING_Yf:
183 case ISL_TILING_Ys: {
184 if (ISL_DEV_GEN(dev) < 9)
185 return false;
186
187 if (!isl_is_pow2(bs))
188 return false;
189
190 bool is_Ys = tiling == ISL_TILING_Ys;
191
192 assert(bs > 0);
193 unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
194 unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
195
196 logical_el = isl_extent2d(width / bs, height);
197 phys_B = isl_extent2d(width, height);
198 break;
199 }
200
201 case ISL_TILING_HIZ:
202 /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
203 * 128bpb format. The tiling has the same physical dimensions as
204 * Y-tiling but actually has two HiZ columns per Y-tiled column.
205 */
206 assert(bs == 16);
207 logical_el = isl_extent2d(16, 16);
208 phys_B = isl_extent2d(128, 32);
209 break;
210
211 case ISL_TILING_CCS:
212 /* CCS surfaces are required to have one of the GENX_CCS_* formats which
213 * have a block size of 1 or 2 bits per block and each CCS element
214 * corresponds to one cache-line pair in the main surface. From the Sky
215 * Lake PRM Vol. 12 in the section on planes:
216 *
217 * "The Color Control Surface (CCS) contains the compression status
218 * of the cache-line pairs. The compression state of the cache-line
219 * pair is specified by 2 bits in the CCS. Each CCS cache-line
220 * represents an area on the main surface of 16x16 sets of 128 byte
221 * Y-tiled cache-line-pairs. CCS is always Y tiled."
222 *
223 * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
224 * Since each cache line corresponds to a 16x16 set of cache-line pairs,
225 * that yields total tile area of 128x128 cache-line pairs or CCS
226 * elements. On older hardware, each CCS element is 1 bit and the tile
227 * is 128x256 elements.
228 */
229 assert(format_bpb == 1 || format_bpb == 2);
230 logical_el = isl_extent2d(128, 256 / format_bpb);
231 phys_B = isl_extent2d(128, 32);
232 break;
233
234 default:
235 unreachable("not reached");
236 } /* end switch */
237
238 *tile_info = (struct isl_tile_info) {
239 .tiling = tiling,
240 .format_bpb = format_bpb,
241 .logical_extent_el = logical_el,
242 .phys_extent_B = phys_B,
243 };
244
245 return true;
246 }
247
248 /**
249 * @param[out] tiling is set only on success
250 */
251 static bool
252 isl_surf_choose_tiling(const struct isl_device *dev,
253 const struct isl_surf_init_info *restrict info,
254 enum isl_tiling *tiling)
255 {
256 isl_tiling_flags_t tiling_flags = info->tiling_flags;
257
258 /* HiZ surfaces always use the HiZ tiling */
259 if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
260 assert(info->format == ISL_FORMAT_HIZ);
261 assert(tiling_flags == ISL_TILING_HIZ_BIT);
262 *tiling = ISL_TILING_HIZ;
263 return true;
264 }
265
266 /* CCS surfaces always use the CCS tiling */
267 if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
268 assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
269 assert(tiling_flags == ISL_TILING_CCS_BIT);
270 *tiling = ISL_TILING_CCS;
271 return true;
272 }
273
274 if (ISL_DEV_GEN(dev) >= 6) {
275 isl_gen6_filter_tiling(dev, info, &tiling_flags);
276 } else {
277 isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev));
278 isl_gen6_filter_tiling(dev, info, &tiling_flags);
279 }
280
281 #define CHOOSE(__tiling) \
282 do { \
283 if (tiling_flags & (1u << (__tiling))) { \
284 *tiling = (__tiling); \
285 return true; \
286 } \
287 } while (0)
288
289 /* Of the tiling modes remaining, choose the one that offers the best
290 * performance.
291 */
292
293 if (info->dim == ISL_SURF_DIM_1D) {
294 /* Prefer linear for 1D surfaces because they do not benefit from
295 * tiling. To the contrary, tiling leads to wasted memory and poor
296 * memory locality due to the swizzling and alignment restrictions
297 * required in tiled surfaces.
298 */
299 CHOOSE(ISL_TILING_LINEAR);
300 }
301
302 CHOOSE(ISL_TILING_Ys);
303 CHOOSE(ISL_TILING_Yf);
304 CHOOSE(ISL_TILING_Y0);
305 CHOOSE(ISL_TILING_X);
306 CHOOSE(ISL_TILING_W);
307 CHOOSE(ISL_TILING_LINEAR);
308
309 #undef CHOOSE
310
311 /* No tiling mode accomodates the inputs. */
312 return false;
313 }
314
315 static bool
316 isl_choose_msaa_layout(const struct isl_device *dev,
317 const struct isl_surf_init_info *info,
318 enum isl_tiling tiling,
319 enum isl_msaa_layout *msaa_layout)
320 {
321 if (ISL_DEV_GEN(dev) >= 8) {
322 return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
323 } else if (ISL_DEV_GEN(dev) >= 7) {
324 return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
325 } else if (ISL_DEV_GEN(dev) >= 6) {
326 return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
327 } else {
328 return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
329 }
330 }
331
332 struct isl_extent2d
333 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
334 {
335 assert(isl_is_pow2(samples));
336
337 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
338 * Sizes (p133):
339 *
340 * If the surface is multisampled and it is a depth or stencil surface
341 * or Multisampled Surface StorageFormat in SURFACE_STATE is
342 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
343 * proceeding: [...]
344 */
345 return (struct isl_extent2d) {
346 .width = 1 << ((ffs(samples) - 0) / 2),
347 .height = 1 << ((ffs(samples) - 1) / 2),
348 };
349 }
350
351 static void
352 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
353 uint32_t *width, uint32_t *height)
354 {
355 const struct isl_extent2d px_size_sa =
356 isl_get_interleaved_msaa_px_size_sa(samples);
357
358 if (width)
359 *width = isl_align(*width, 2) * px_size_sa.width;
360 if (height)
361 *height = isl_align(*height, 2) * px_size_sa.height;
362 }
363
364 static enum isl_array_pitch_span
365 isl_choose_array_pitch_span(const struct isl_device *dev,
366 const struct isl_surf_init_info *restrict info,
367 enum isl_dim_layout dim_layout,
368 const struct isl_extent4d *phys_level0_sa)
369 {
370 switch (dim_layout) {
371 case ISL_DIM_LAYOUT_GEN9_1D:
372 case ISL_DIM_LAYOUT_GEN4_2D:
373 if (ISL_DEV_GEN(dev) >= 8) {
374 /* QPitch becomes programmable in Broadwell. So choose the
375 * most compact QPitch possible in order to conserve memory.
376 *
377 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
378 * >> RENDER_SURFACE_STATE Surface QPitch (p325):
379 *
380 * - Software must ensure that this field is set to a value
381 * sufficiently large such that the array slices in the surface
382 * do not overlap. Refer to the Memory Data Formats section for
383 * information on how surfaces are stored in memory.
384 *
385 * - This field specifies the distance in rows between array
386 * slices. It is used only in the following cases:
387 *
388 * - Surface Array is enabled OR
389 * - Number of Mulitsamples is not NUMSAMPLES_1 and
390 * Multisampled Surface Storage Format set to MSFMT_MSS OR
391 * - Surface Type is SURFTYPE_CUBE
392 */
393 return ISL_ARRAY_PITCH_SPAN_COMPACT;
394 } else if (ISL_DEV_GEN(dev) >= 7) {
395 /* Note that Ivybridge introduces
396 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
397 * driver more control over the QPitch.
398 */
399
400 if (phys_level0_sa->array_len == 1) {
401 /* The hardware will never use the QPitch. So choose the most
402 * compact QPitch possible in order to conserve memory.
403 */
404 return ISL_ARRAY_PITCH_SPAN_COMPACT;
405 }
406
407 if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
408 (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
409 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
410 * Section 6.18.4.7: Surface Arrays (p112):
411 *
412 * If Surface Array Spacing is set to ARYSPC_FULL (note that
413 * the depth buffer and stencil buffer have an implied value of
414 * ARYSPC_FULL):
415 */
416 return ISL_ARRAY_PITCH_SPAN_FULL;
417 }
418
419 if (info->levels == 1) {
420 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
421 * to ARYSPC_LOD0.
422 */
423 return ISL_ARRAY_PITCH_SPAN_COMPACT;
424 }
425
426 return ISL_ARRAY_PITCH_SPAN_FULL;
427 } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
428 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
429 isl_surf_usage_is_stencil(info->usage)) {
430 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
431 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
432 *
433 * The separate stencil buffer does not support mip mapping, thus
434 * the storage for LODs other than LOD 0 is not needed.
435 */
436 assert(info->levels == 1);
437 assert(phys_level0_sa->array_len == 1);
438 return ISL_ARRAY_PITCH_SPAN_COMPACT;
439 } else {
440 if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
441 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
442 isl_surf_usage_is_stencil(info->usage)) {
443 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
444 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
445 *
446 * The separate stencil buffer does not support mip mapping,
447 * thus the storage for LODs other than LOD 0 is not needed.
448 */
449 assert(info->levels == 1);
450 assert(phys_level0_sa->array_len == 1);
451 return ISL_ARRAY_PITCH_SPAN_COMPACT;
452 }
453
454 if (phys_level0_sa->array_len == 1) {
455 /* The hardware will never use the QPitch. So choose the most
456 * compact QPitch possible in order to conserve memory.
457 */
458 return ISL_ARRAY_PITCH_SPAN_COMPACT;
459 }
460
461 return ISL_ARRAY_PITCH_SPAN_FULL;
462 }
463
464 case ISL_DIM_LAYOUT_GEN4_3D:
465 /* The hardware will never use the QPitch. So choose the most
466 * compact QPitch possible in order to conserve memory.
467 */
468 return ISL_ARRAY_PITCH_SPAN_COMPACT;
469 }
470
471 unreachable("bad isl_dim_layout");
472 return ISL_ARRAY_PITCH_SPAN_FULL;
473 }
474
475 static void
476 isl_choose_image_alignment_el(const struct isl_device *dev,
477 const struct isl_surf_init_info *restrict info,
478 enum isl_tiling tiling,
479 enum isl_dim_layout dim_layout,
480 enum isl_msaa_layout msaa_layout,
481 struct isl_extent3d *image_align_el)
482 {
483 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
484 if (fmtl->txc == ISL_TXC_MCS) {
485 assert(tiling == ISL_TILING_Y0);
486
487 /*
488 * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
489 *
490 * Height, width, and layout of MCS buffer in this case must match with
491 * Render Target height, width, and layout. MCS buffer is tiledY.
492 *
493 * To avoid wasting memory, choose the smallest alignment possible:
494 * HALIGN_4 and VALIGN_4.
495 */
496 *image_align_el = isl_extent3d(4, 4, 1);
497 return;
498 } else if (info->format == ISL_FORMAT_HIZ) {
499 assert(ISL_DEV_GEN(dev) >= 6);
500 /* HiZ surfaces are always aligned to 16x8 pixels in the primary surface
501 * which works out to 2x2 HiZ elments.
502 */
503 *image_align_el = isl_extent3d(2, 2, 1);
504 return;
505 }
506
507 if (ISL_DEV_GEN(dev) >= 9) {
508 isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
509 msaa_layout, image_align_el);
510 } else if (ISL_DEV_GEN(dev) >= 8) {
511 isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
512 msaa_layout, image_align_el);
513 } else if (ISL_DEV_GEN(dev) >= 7) {
514 isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
515 msaa_layout, image_align_el);
516 } else if (ISL_DEV_GEN(dev) >= 6) {
517 isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
518 msaa_layout, image_align_el);
519 } else {
520 isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
521 msaa_layout, image_align_el);
522 }
523 }
524
525 static enum isl_dim_layout
526 isl_surf_choose_dim_layout(const struct isl_device *dev,
527 enum isl_surf_dim logical_dim,
528 enum isl_tiling tiling)
529 {
530 if (ISL_DEV_GEN(dev) >= 9) {
531 switch (logical_dim) {
532 case ISL_SURF_DIM_1D:
533 /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
534 *
535 * One-dimensional surfaces use a tiling mode of linear.
536 * Technically, they are not tiled resources, but the Tiled
537 * Resource Mode field in RENDER_SURFACE_STATE is still used to
538 * indicate the alignment requirements for this linear surface
539 * (See 1D Alignment requirements for how 4K and 64KB Tiled
540 * Resource Modes impact alignment). Alternatively, a 1D surface
541 * can be defined as a 2D tiled surface (e.g. TileY or TileX) with
542 * a height of 0.
543 *
544 * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
545 * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
546 */
547 if (tiling == ISL_TILING_LINEAR)
548 return ISL_DIM_LAYOUT_GEN9_1D;
549 else
550 return ISL_DIM_LAYOUT_GEN4_2D;
551 case ISL_SURF_DIM_2D:
552 case ISL_SURF_DIM_3D:
553 return ISL_DIM_LAYOUT_GEN4_2D;
554 }
555 } else {
556 switch (logical_dim) {
557 case ISL_SURF_DIM_1D:
558 case ISL_SURF_DIM_2D:
559 return ISL_DIM_LAYOUT_GEN4_2D;
560 case ISL_SURF_DIM_3D:
561 return ISL_DIM_LAYOUT_GEN4_3D;
562 }
563 }
564
565 unreachable("bad isl_surf_dim");
566 return ISL_DIM_LAYOUT_GEN4_2D;
567 }
568
569 /**
570 * Calculate the physical extent of the surface's first level, in units of
571 * surface samples. The result is aligned to the format's compression block.
572 */
573 static void
574 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
575 const struct isl_surf_init_info *restrict info,
576 enum isl_dim_layout dim_layout,
577 enum isl_tiling tiling,
578 enum isl_msaa_layout msaa_layout,
579 struct isl_extent4d *phys_level0_sa)
580 {
581 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
582
583 if (isl_format_is_yuv(info->format))
584 isl_finishme("%s:%s: YUV format", __FILE__, __func__);
585
586 switch (info->dim) {
587 case ISL_SURF_DIM_1D:
588 assert(info->height == 1);
589 assert(info->depth == 1);
590 assert(info->samples == 1);
591
592 switch (dim_layout) {
593 case ISL_DIM_LAYOUT_GEN4_3D:
594 unreachable("bad isl_dim_layout");
595
596 case ISL_DIM_LAYOUT_GEN9_1D:
597 case ISL_DIM_LAYOUT_GEN4_2D:
598 *phys_level0_sa = (struct isl_extent4d) {
599 .w = isl_align_npot(info->width, fmtl->bw),
600 .h = fmtl->bh,
601 .d = 1,
602 .a = info->array_len,
603 };
604 break;
605 }
606 break;
607
608 case ISL_SURF_DIM_2D:
609 assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D);
610
611 if (tiling == ISL_TILING_Ys && info->samples > 1)
612 isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
613
614 switch (msaa_layout) {
615 case ISL_MSAA_LAYOUT_NONE:
616 assert(info->depth == 1);
617 assert(info->samples == 1);
618
619 *phys_level0_sa = (struct isl_extent4d) {
620 .w = isl_align_npot(info->width, fmtl->bw),
621 .h = isl_align_npot(info->height, fmtl->bh),
622 .d = 1,
623 .a = info->array_len,
624 };
625 break;
626
627 case ISL_MSAA_LAYOUT_ARRAY:
628 assert(info->depth == 1);
629 assert(info->levels == 1);
630 assert(isl_format_supports_multisampling(dev->info, info->format));
631 assert(fmtl->bw == 1 && fmtl->bh == 1);
632
633 *phys_level0_sa = (struct isl_extent4d) {
634 .w = info->width,
635 .h = info->height,
636 .d = 1,
637 .a = info->array_len * info->samples,
638 };
639 break;
640
641 case ISL_MSAA_LAYOUT_INTERLEAVED:
642 assert(info->depth == 1);
643 assert(info->levels == 1);
644 assert(isl_format_supports_multisampling(dev->info, info->format));
645
646 *phys_level0_sa = (struct isl_extent4d) {
647 .w = info->width,
648 .h = info->height,
649 .d = 1,
650 .a = info->array_len,
651 };
652
653 isl_msaa_interleaved_scale_px_to_sa(info->samples,
654 &phys_level0_sa->w,
655 &phys_level0_sa->h);
656
657 phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw);
658 phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh);
659 break;
660 }
661 break;
662
663 case ISL_SURF_DIM_3D:
664 assert(info->array_len == 1);
665 assert(info->samples == 1);
666
667 if (fmtl->bd > 1) {
668 isl_finishme("%s:%s: compression block with depth > 1",
669 __FILE__, __func__);
670 }
671
672 switch (dim_layout) {
673 case ISL_DIM_LAYOUT_GEN9_1D:
674 unreachable("bad isl_dim_layout");
675
676 case ISL_DIM_LAYOUT_GEN4_2D:
677 assert(ISL_DEV_GEN(dev) >= 9);
678
679 *phys_level0_sa = (struct isl_extent4d) {
680 .w = isl_align_npot(info->width, fmtl->bw),
681 .h = isl_align_npot(info->height, fmtl->bh),
682 .d = 1,
683 .a = info->depth,
684 };
685 break;
686
687 case ISL_DIM_LAYOUT_GEN4_3D:
688 assert(ISL_DEV_GEN(dev) < 9);
689 *phys_level0_sa = (struct isl_extent4d) {
690 .w = isl_align(info->width, fmtl->bw),
691 .h = isl_align(info->height, fmtl->bh),
692 .d = info->depth,
693 .a = 1,
694 };
695 break;
696 }
697 break;
698 }
699 }
700
701 /**
702 * A variant of isl_calc_phys_slice0_extent_sa() specific to
703 * ISL_DIM_LAYOUT_GEN4_2D.
704 */
705 static void
706 isl_calc_phys_slice0_extent_sa_gen4_2d(
707 const struct isl_device *dev,
708 const struct isl_surf_init_info *restrict info,
709 enum isl_msaa_layout msaa_layout,
710 const struct isl_extent3d *image_align_sa,
711 const struct isl_extent4d *phys_level0_sa,
712 struct isl_extent2d *phys_slice0_sa)
713 {
714 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
715
716 assert(phys_level0_sa->depth == 1);
717
718 if (info->levels == 1) {
719 /* Do not pad the surface to the image alignment. Instead, pad it only
720 * to the pixel format's block alignment.
721 *
722 * For tiled surfaces, using a reduced alignment here avoids wasting CPU
723 * cycles on the below mipmap layout caluclations. Reducing the
724 * alignment here is safe because we later align the row pitch and array
725 * pitch to the tile boundary. It is safe even for
726 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
727 * to accomodate the interleaved samples.
728 *
729 * For linear surfaces, reducing the alignment here permits us to later
730 * choose an arbitrary, non-aligned row pitch. If the surface backs
731 * a VkBuffer, then an arbitrary pitch may be needed to accomodate
732 * VkBufferImageCopy::bufferRowLength.
733 */
734 *phys_slice0_sa = (struct isl_extent2d) {
735 .w = isl_align_npot(phys_level0_sa->w, fmtl->bw),
736 .h = isl_align_npot(phys_level0_sa->h, fmtl->bh),
737 };
738 return;
739 }
740
741 uint32_t slice_top_w = 0;
742 uint32_t slice_bottom_w = 0;
743 uint32_t slice_left_h = 0;
744 uint32_t slice_right_h = 0;
745
746 uint32_t W0 = phys_level0_sa->w;
747 uint32_t H0 = phys_level0_sa->h;
748
749 for (uint32_t l = 0; l < info->levels; ++l) {
750 uint32_t W = isl_minify(W0, l);
751 uint32_t H = isl_minify(H0, l);
752
753 uint32_t w = isl_align_npot(W, image_align_sa->w);
754 uint32_t h = isl_align_npot(H, image_align_sa->h);
755
756 if (l == 0) {
757 slice_top_w = w;
758 slice_left_h = h;
759 slice_right_h = h;
760 } else if (l == 1) {
761 slice_bottom_w = w;
762 slice_left_h += h;
763 } else if (l == 2) {
764 slice_bottom_w += w;
765 slice_right_h += h;
766 } else {
767 slice_right_h += h;
768 }
769 }
770
771 *phys_slice0_sa = (struct isl_extent2d) {
772 .w = MAX(slice_top_w, slice_bottom_w),
773 .h = MAX(slice_left_h, slice_right_h),
774 };
775 }
776
777 /**
778 * A variant of isl_calc_phys_slice0_extent_sa() specific to
779 * ISL_DIM_LAYOUT_GEN4_3D.
780 */
781 static void
782 isl_calc_phys_slice0_extent_sa_gen4_3d(
783 const struct isl_device *dev,
784 const struct isl_surf_init_info *restrict info,
785 const struct isl_extent3d *image_align_sa,
786 const struct isl_extent4d *phys_level0_sa,
787 struct isl_extent2d *phys_slice0_sa)
788 {
789 assert(info->samples == 1);
790 assert(phys_level0_sa->array_len == 1);
791
792 uint32_t slice_w = 0;
793 uint32_t slice_h = 0;
794
795 uint32_t W0 = phys_level0_sa->w;
796 uint32_t H0 = phys_level0_sa->h;
797 uint32_t D0 = phys_level0_sa->d;
798
799 for (uint32_t l = 0; l < info->levels; ++l) {
800 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
801 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
802 uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d);
803
804 uint32_t max_layers_horiz = MIN(level_d, 1u << l);
805 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
806
807 slice_w = MAX(slice_w, level_w * max_layers_horiz);
808 slice_h += level_h * max_layers_vert;
809 }
810
811 *phys_slice0_sa = (struct isl_extent2d) {
812 .w = slice_w,
813 .h = slice_h,
814 };
815 }
816
817 /**
818 * A variant of isl_calc_phys_slice0_extent_sa() specific to
819 * ISL_DIM_LAYOUT_GEN9_1D.
820 */
821 static void
822 isl_calc_phys_slice0_extent_sa_gen9_1d(
823 const struct isl_device *dev,
824 const struct isl_surf_init_info *restrict info,
825 const struct isl_extent3d *image_align_sa,
826 const struct isl_extent4d *phys_level0_sa,
827 struct isl_extent2d *phys_slice0_sa)
828 {
829 MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
830
831 assert(phys_level0_sa->height == 1);
832 assert(phys_level0_sa->depth == 1);
833 assert(info->samples == 1);
834 assert(image_align_sa->w >= fmtl->bw);
835
836 uint32_t slice_w = 0;
837 const uint32_t W0 = phys_level0_sa->w;
838
839 for (uint32_t l = 0; l < info->levels; ++l) {
840 uint32_t W = isl_minify(W0, l);
841 uint32_t w = isl_align_npot(W, image_align_sa->w);
842
843 slice_w += w;
844 }
845
846 *phys_slice0_sa = isl_extent2d(slice_w, 1);
847 }
848
849 /**
850 * Calculate the physical extent of the surface's first array slice, in units
851 * of surface samples. If the surface is multi-leveled, then the result will
852 * be aligned to \a image_align_sa.
853 */
854 static void
855 isl_calc_phys_slice0_extent_sa(const struct isl_device *dev,
856 const struct isl_surf_init_info *restrict info,
857 enum isl_dim_layout dim_layout,
858 enum isl_msaa_layout msaa_layout,
859 const struct isl_extent3d *image_align_sa,
860 const struct isl_extent4d *phys_level0_sa,
861 struct isl_extent2d *phys_slice0_sa)
862 {
863 switch (dim_layout) {
864 case ISL_DIM_LAYOUT_GEN9_1D:
865 isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info,
866 image_align_sa, phys_level0_sa,
867 phys_slice0_sa);
868 return;
869 case ISL_DIM_LAYOUT_GEN4_2D:
870 isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
871 image_align_sa, phys_level0_sa,
872 phys_slice0_sa);
873 return;
874 case ISL_DIM_LAYOUT_GEN4_3D:
875 isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa,
876 phys_level0_sa, phys_slice0_sa);
877 return;
878 }
879 }
880
881 /**
882 * Calculate the pitch between physical array slices, in units of rows of
883 * surface elements.
884 */
885 static uint32_t
886 isl_calc_array_pitch_el_rows(const struct isl_device *dev,
887 const struct isl_surf_init_info *restrict info,
888 const struct isl_tile_info *tile_info,
889 enum isl_dim_layout dim_layout,
890 enum isl_array_pitch_span array_pitch_span,
891 const struct isl_extent3d *image_align_sa,
892 const struct isl_extent4d *phys_level0_sa,
893 const struct isl_extent2d *phys_slice0_sa)
894 {
895 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
896 uint32_t pitch_sa_rows = 0;
897
898 switch (dim_layout) {
899 case ISL_DIM_LAYOUT_GEN9_1D:
900 /* Each row is an array slice */
901 pitch_sa_rows = 1;
902 break;
903 case ISL_DIM_LAYOUT_GEN4_2D:
904 switch (array_pitch_span) {
905 case ISL_ARRAY_PITCH_SPAN_COMPACT:
906 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
907 break;
908 case ISL_ARRAY_PITCH_SPAN_FULL: {
909 /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
910 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
911 * Surfaces >> Surface Arrays.
912 */
913 uint32_t H0_sa = phys_level0_sa->h;
914 uint32_t H1_sa = isl_minify(H0_sa, 1);
915
916 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
917 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
918
919 uint32_t m;
920 if (ISL_DEV_GEN(dev) >= 7) {
921 /* The QPitch equation changed slightly in Ivybridge. */
922 m = 12;
923 } else {
924 m = 11;
925 }
926
927 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
928
929 if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
930 (info->height % 4 == 1)) {
931 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
932 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
933 *
934 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
935 * the value calculated in the equation above , for every
936 * other odd Surface Height starting from 1 i.e. 1,5,9,13.
937 *
938 * XXX(chadv): Is the errata natural corollary of the physical
939 * layout of interleaved samples?
940 */
941 pitch_sa_rows += 4;
942 }
943
944 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
945 } /* end case */
946 break;
947 }
948 break;
949 case ISL_DIM_LAYOUT_GEN4_3D:
950 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
951 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
952 break;
953 default:
954 unreachable("bad isl_dim_layout");
955 break;
956 }
957
958 assert(pitch_sa_rows % fmtl->bh == 0);
959 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
960
961 if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) {
962 /*
963 * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
964 *
965 * "Mip-mapped and arrayed surfaces are supported with MCS buffer
966 * layout with these alignments in the RT space: Horizontal
967 * Alignment = 128 and Vertical Alignment = 64."
968 *
969 * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
970 *
971 * "For non-multisampled render target's CCS auxiliary surface,
972 * QPitch must be computed with Horizontal Alignment = 128 and
973 * Surface Vertical Alignment = 256. These alignments are only for
974 * CCS buffer and not for associated render target."
975 *
976 * The first restriction is already handled by isl_choose_image_alignment_el
977 * but the second restriction, which is an extension of the first, only
978 * applies to qpitch and must be applied here.
979 */
980 assert(fmtl->bh == 4);
981 pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
982 }
983
984 if (ISL_DEV_GEN(dev) >= 9 &&
985 info->dim == ISL_SURF_DIM_3D &&
986 tile_info->tiling != ISL_TILING_LINEAR) {
987 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
988 *
989 * Tile Mode != Linear: This field must be set to an integer multiple
990 * of the tile height
991 */
992 pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
993 }
994
995 return pitch_el_rows;
996 }
997
998 static uint32_t
999 isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info,
1000 const struct isl_tile_info *tile_info)
1001 {
1002 if (tile_info->tiling != ISL_TILING_LINEAR)
1003 return tile_info->phys_extent_B.width;
1004
1005 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1006 * RENDER_SURFACE_STATE Surface Pitch (p349):
1007 *
1008 * - For linear render target surfaces and surfaces accessed with the
1009 * typed data port messages, the pitch must be a multiple of the
1010 * element size for non-YUV surface formats. Pitch must be
1011 * a multiple of 2 * element size for YUV surface formats.
1012 *
1013 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1014 * ignore because isl doesn't do buffers.]
1015 *
1016 * - For other linear surfaces, the pitch can be any multiple of
1017 * bytes.
1018 */
1019 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1020 const uint32_t bs = fmtl->bpb / 8;
1021
1022 if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1023 if (isl_format_is_yuv(surf_info->format)) {
1024 return 2 * bs;
1025 } else {
1026 return bs;
1027 }
1028 }
1029
1030 return 1;
1031 }
1032
1033 static uint32_t
1034 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1035 const struct isl_surf_init_info *info,
1036 const struct isl_extent2d *phys_slice0_sa,
1037 uint32_t alignment)
1038 {
1039 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1040 const uint32_t bs = fmtl->bpb / 8;
1041
1042 assert(phys_slice0_sa->w % fmtl->bw == 0);
1043
1044 uint32_t min_row_pitch = bs * (phys_slice0_sa->w / fmtl->bw);
1045 min_row_pitch = MAX2(min_row_pitch, info->min_pitch);
1046 min_row_pitch = isl_align_npot(min_row_pitch, alignment);
1047
1048 return min_row_pitch;
1049 }
1050
1051 static uint32_t
1052 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1053 const struct isl_surf_init_info *surf_info,
1054 const struct isl_tile_info *tile_info,
1055 const struct isl_extent2d *phys_slice0_sa,
1056 uint32_t alignment)
1057 {
1058 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1059
1060 assert(fmtl->bpb % tile_info->format_bpb == 0);
1061 assert(phys_slice0_sa->w % fmtl->bw == 0);
1062
1063 const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1064 const uint32_t total_w_el = phys_slice0_sa->width / fmtl->bw;
1065 const uint32_t total_w_tl =
1066 isl_align_div(total_w_el * tile_el_scale,
1067 tile_info->logical_extent_el.width);
1068
1069 uint32_t min_row_pitch = total_w_tl * tile_info->phys_extent_B.width;
1070 min_row_pitch = MAX2(min_row_pitch, surf_info->min_pitch);
1071 min_row_pitch = isl_align_npot(min_row_pitch, alignment);
1072
1073 return min_row_pitch;
1074 }
1075
1076 static uint32_t
1077 isl_calc_min_row_pitch(const struct isl_device *dev,
1078 const struct isl_surf_init_info *surf_info,
1079 const struct isl_tile_info *tile_info,
1080 const struct isl_extent2d *phys_slice0_sa,
1081 uint32_t alignment)
1082 {
1083 if (tile_info->tiling == ISL_TILING_LINEAR) {
1084 return isl_calc_linear_min_row_pitch(dev, surf_info, phys_slice0_sa,
1085 alignment);
1086 } else {
1087 return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1088 phys_slice0_sa, alignment);
1089 }
1090 }
1091
1092 static uint32_t
1093 isl_calc_row_pitch(const struct isl_device *dev,
1094 const struct isl_surf_init_info *surf_info,
1095 const struct isl_tile_info *tile_info,
1096 enum isl_dim_layout dim_layout,
1097 const struct isl_extent2d *phys_slice0_sa)
1098 {
1099 const uint32_t alignment =
1100 isl_calc_row_pitch_alignment(surf_info, tile_info);
1101
1102 return isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_slice0_sa,
1103 alignment);
1104 }
1105
1106 /**
1107 * Calculate and apply any padding required for the surface.
1108 *
1109 * @param[inout] total_h_el is updated with the new height
1110 * @param[out] pad_bytes is overwritten with additional padding requirements.
1111 */
1112 static void
1113 isl_apply_surface_padding(const struct isl_device *dev,
1114 const struct isl_surf_init_info *restrict info,
1115 const struct isl_tile_info *tile_info,
1116 uint32_t *total_h_el,
1117 uint32_t *pad_bytes)
1118 {
1119 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1120
1121 *pad_bytes = 0;
1122
1123 /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
1124 * Formats >> Surface Padding Requirements >> Render Target and Media
1125 * Surfaces:
1126 *
1127 * The data port accesses data (pixels) outside of the surface if they
1128 * are contained in the same cache request as pixels that are within the
1129 * surface. These pixels will not be returned by the requesting message,
1130 * however if these pixels lie outside of defined pages in the GTT,
1131 * a GTT error will result when the cache request is processed. In
1132 * order to avoid these GTT errors, “padding” at the bottom of the
1133 * surface is sometimes necessary.
1134 *
1135 * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
1136 * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
1137 *
1138 * ... Lots of padding requirements, all listed separately below.
1139 */
1140
1141 /* We can safely ignore the first padding requirement, quoted below,
1142 * because isl doesn't do buffers.
1143 *
1144 * - [pre-BDW] For buffers, which have no inherent “height,” padding
1145 * requirements are different. A buffer must be padded to the next
1146 * multiple of 256 array elements, with an additional 16 bytes added
1147 * beyond that to account for the L1 cache line.
1148 */
1149
1150 /*
1151 * - For compressed textures [...], padding at the bottom of the surface
1152 * is to an even compressed row.
1153 */
1154 if (isl_format_is_compressed(info->format))
1155 *total_h_el = isl_align(*total_h_el, 2);
1156
1157 /*
1158 * - For cube surfaces, an additional two rows of padding are required
1159 * at the bottom of the surface.
1160 */
1161 if (info->usage & ISL_SURF_USAGE_CUBE_BIT)
1162 *total_h_el += 2;
1163
1164 /*
1165 * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
1166 * additional padding is required. These surfaces require an extra row
1167 * plus 16 bytes of padding at the bottom in addition to the general
1168 * padding requirements.
1169 */
1170 if (isl_format_is_yuv(info->format) &&
1171 (fmtl->bpb == 96 || fmtl->bpb == 48|| fmtl->bpb == 24)) {
1172 *total_h_el += 1;
1173 *pad_bytes += 16;
1174 }
1175
1176 /*
1177 * - For linear surfaces, additional padding of 64 bytes is required at
1178 * the bottom of the surface. This is in addition to the padding
1179 * required above.
1180 */
1181 if (tile_info->tiling == ISL_TILING_LINEAR)
1182 *pad_bytes += 64;
1183
1184 /* The below text weakens, not strengthens, the padding requirements for
1185 * linear surfaces. Therefore we can safely ignore it.
1186 *
1187 * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array,
1188 * non-MSAA, non-mip-mapped surfaces in linear memory, the only
1189 * padding requirement is to the next aligned 64-byte boundary beyond
1190 * the end of the surface. The rest of the padding requirements
1191 * documented above do not apply to these surfaces.
1192 */
1193
1194 /*
1195 * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and
1196 * height % 4 != 0, the surface must be padded with
1197 * 4-(height % 4)*Surface Pitch # of bytes.
1198 */
1199 if (ISL_DEV_GEN(dev) >= 9 &&
1200 tile_info->tiling == ISL_TILING_LINEAR &&
1201 (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) {
1202 *total_h_el = isl_align(*total_h_el, 4);
1203 }
1204
1205 /*
1206 * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded
1207 * to 4 times the Surface Pitch # of bytes
1208 */
1209 if (ISL_DEV_GEN(dev) >= 9 &&
1210 tile_info->tiling == ISL_TILING_LINEAR &&
1211 info->dim == ISL_SURF_DIM_1D) {
1212 *total_h_el += 4;
1213 }
1214 }
1215
1216 bool
1217 isl_surf_init_s(const struct isl_device *dev,
1218 struct isl_surf *surf,
1219 const struct isl_surf_init_info *restrict info)
1220 {
1221 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1222
1223 const struct isl_extent4d logical_level0_px = {
1224 .w = info->width,
1225 .h = info->height,
1226 .d = info->depth,
1227 .a = info->array_len,
1228 };
1229
1230 enum isl_tiling tiling;
1231 if (!isl_surf_choose_tiling(dev, info, &tiling))
1232 return false;
1233
1234 struct isl_tile_info tile_info;
1235 if (!isl_tiling_get_info(dev, tiling, fmtl->bpb, &tile_info))
1236 return false;
1237
1238 const enum isl_dim_layout dim_layout =
1239 isl_surf_choose_dim_layout(dev, info->dim, tiling);
1240
1241 enum isl_msaa_layout msaa_layout;
1242 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1243 return false;
1244
1245 struct isl_extent3d image_align_el;
1246 isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1247 &image_align_el);
1248
1249 struct isl_extent3d image_align_sa =
1250 isl_extent3d_el_to_sa(info->format, image_align_el);
1251
1252 struct isl_extent4d phys_level0_sa;
1253 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1254 &phys_level0_sa);
1255 assert(phys_level0_sa.w % fmtl->bw == 0);
1256 assert(phys_level0_sa.h % fmtl->bh == 0);
1257
1258 enum isl_array_pitch_span array_pitch_span =
1259 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1260
1261 struct isl_extent2d phys_slice0_sa;
1262 isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout,
1263 &image_align_sa, &phys_level0_sa,
1264 &phys_slice0_sa);
1265 assert(phys_slice0_sa.w % fmtl->bw == 0);
1266 assert(phys_slice0_sa.h % fmtl->bh == 0);
1267
1268 const uint32_t array_pitch_el_rows =
1269 isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout,
1270 array_pitch_span, &image_align_sa,
1271 &phys_level0_sa, &phys_slice0_sa);
1272
1273 uint32_t total_h_el = phys_level0_sa.array_len * array_pitch_el_rows;
1274
1275 uint32_t pad_bytes;
1276 isl_apply_surface_padding(dev, info, &tile_info, &total_h_el, &pad_bytes);
1277
1278 const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info,
1279 dim_layout, &phys_slice0_sa);
1280
1281 uint32_t size, base_alignment;
1282 if (tiling == ISL_TILING_LINEAR) {
1283 size = row_pitch * total_h_el + pad_bytes;
1284
1285 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1286 *
1287 * "The Base Address for linear render target surfaces and surfaces
1288 * accessed with the typed surface read/write data port messages must
1289 * be element-size aligned, for non-YUV surface formats, or a
1290 * multiple of 2 element-sizes for YUV surface formats. Other linear
1291 * surfaces have no alignment requirements (byte alignment is
1292 * sufficient.)"
1293 */
1294 base_alignment = MAX(1, info->min_alignment);
1295 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1296 if (isl_format_is_yuv(info->format)) {
1297 base_alignment = MAX(base_alignment, fmtl->bpb / 4);
1298 } else {
1299 base_alignment = MAX(base_alignment, fmtl->bpb / 8);
1300 }
1301 }
1302 base_alignment = isl_round_up_to_power_of_two(base_alignment);
1303 } else {
1304 total_h_el += isl_align_div_npot(pad_bytes, row_pitch);
1305 const uint32_t total_h_tl =
1306 isl_align_div(total_h_el, tile_info.logical_extent_el.height);
1307
1308 size = total_h_tl * tile_info.phys_extent_B.height * row_pitch;
1309
1310 const uint32_t tile_size = tile_info.phys_extent_B.width *
1311 tile_info.phys_extent_B.height;
1312 assert(isl_is_pow2(info->min_alignment) && isl_is_pow2(tile_size));
1313 base_alignment = MAX(info->min_alignment, tile_size);
1314 }
1315
1316 *surf = (struct isl_surf) {
1317 .dim = info->dim,
1318 .dim_layout = dim_layout,
1319 .msaa_layout = msaa_layout,
1320 .tiling = tiling,
1321 .format = info->format,
1322
1323 .levels = info->levels,
1324 .samples = info->samples,
1325
1326 .image_alignment_el = image_align_el,
1327 .logical_level0_px = logical_level0_px,
1328 .phys_level0_sa = phys_level0_sa,
1329
1330 .size = size,
1331 .alignment = base_alignment,
1332 .row_pitch = row_pitch,
1333 .array_pitch_el_rows = array_pitch_el_rows,
1334 .array_pitch_span = array_pitch_span,
1335
1336 .usage = info->usage,
1337 };
1338
1339 return true;
1340 }
1341
1342 void
1343 isl_surf_get_tile_info(const struct isl_device *dev,
1344 const struct isl_surf *surf,
1345 struct isl_tile_info *tile_info)
1346 {
1347 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1348 isl_tiling_get_info(dev, surf->tiling, fmtl->bpb, tile_info);
1349 }
1350
1351 bool
1352 isl_surf_get_hiz_surf(const struct isl_device *dev,
1353 const struct isl_surf *surf,
1354 struct isl_surf *hiz_surf)
1355 {
1356 assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1357
1358 /* Multisampled depth is always interleaved */
1359 assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1360 surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1361
1362 /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1363 *
1364 * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1365 * Target View Extent, and Depth Coordinate Offset X/Y of the
1366 * hierarchical depth buffer are inherited from the depth buffer. The
1367 * height and width of the hierarchical depth buffer that must be
1368 * allocated are computed by the following formulas, where HZ is the
1369 * hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1370 * Z_Width, and Z_Depth values given in these formulas are those present
1371 * in 3DSTATE_DEPTH_BUFFER incremented by one.
1372 *
1373 * "The value of Z_Height and Z_Width must each be multiplied by 2 before
1374 * being applied to the table below if Number of Multisamples is set to
1375 * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1376 * Z_Width must be multiplied by 4 before being applied to the table
1377 * below if Number of Multisamples is set to NUMSAMPLES_8."
1378 *
1379 * In the Sky Lake PRM, the second paragraph is replaced with this:
1380 *
1381 * "The Z_Height and Z_Width values must equal those present in
1382 * 3DSTATE_DEPTH_BUFFER incremented by one."
1383 *
1384 * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
1385 * block corresponds to a region of 8x4 samples in the primary depth
1386 * surface. On Sky Lake, on the other hand, each HiZ block corresponds to
1387 * a region of 8x4 pixels in the primary depth surface regardless of the
1388 * number of samples. The dimensions of a HiZ block in both pixels and
1389 * samples are given in the table below:
1390 *
1391 * | SNB - BDW | SKL+
1392 * ------+-----------+-------------
1393 * 1x | 8 x 4 sa | 8 x 4 sa
1394 * MSAA | 8 x 4 px | 8 x 4 px
1395 * ------+-----------+-------------
1396 * 2x | 8 x 4 sa | 16 x 4 sa
1397 * MSAA | 4 x 4 px | 8 x 4 px
1398 * ------+-----------+-------------
1399 * 4x | 8 x 4 sa | 16 x 8 sa
1400 * MSAA | 4 x 2 px | 8 x 4 px
1401 * ------+-----------+-------------
1402 * 8x | 8 x 4 sa | 32 x 8 sa
1403 * MSAA | 2 x 2 px | 8 x 4 px
1404 * ------+-----------+-------------
1405 * 16x | N/A | 32 x 16 sa
1406 * MSAA | N/A | 8 x 4 px
1407 * ------+-----------+-------------
1408 *
1409 * There are a number of different ways that this discrepency could be
1410 * handled. The way we have chosen is to simply make MSAA HiZ have the
1411 * same number of samples as the parent surface pre-Sky Lake and always be
1412 * single-sampled on Sky Lake and above. Since the block sizes of
1413 * compressed formats are given in samples, this neatly handles everything
1414 * without the need for additional HiZ formats with different block sizes
1415 * on SKL+.
1416 */
1417 const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
1418
1419 return isl_surf_init(dev, hiz_surf,
1420 .dim = surf->dim,
1421 .format = ISL_FORMAT_HIZ,
1422 .width = surf->logical_level0_px.width,
1423 .height = surf->logical_level0_px.height,
1424 .depth = surf->logical_level0_px.depth,
1425 .levels = surf->levels,
1426 .array_len = surf->logical_level0_px.array_len,
1427 .samples = samples,
1428 .usage = ISL_SURF_USAGE_HIZ_BIT,
1429 .tiling_flags = ISL_TILING_HIZ_BIT);
1430 }
1431
1432 bool
1433 isl_surf_get_mcs_surf(const struct isl_device *dev,
1434 const struct isl_surf *surf,
1435 struct isl_surf *mcs_surf)
1436 {
1437 /* It must be multisampled with an array layout */
1438 assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
1439
1440 /* The following are true of all multisampled surfaces */
1441 assert(surf->dim == ISL_SURF_DIM_2D);
1442 assert(surf->levels == 1);
1443 assert(surf->logical_level0_px.depth == 1);
1444
1445 /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
1446 * bits which means the maximum pitch of a compression surface is 512
1447 * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is
1448 * 64bpp, this gives us a maximum width of 8192 pixels. We can create
1449 * larger multisampled surfaces, we just can't compress them. For 2x, 4x,
1450 * and 8x, we have enough room for the full 16k supported by the hardware.
1451 */
1452 if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
1453 return false;
1454
1455 enum isl_format mcs_format;
1456 switch (surf->samples) {
1457 case 2: mcs_format = ISL_FORMAT_MCS_2X; break;
1458 case 4: mcs_format = ISL_FORMAT_MCS_4X; break;
1459 case 8: mcs_format = ISL_FORMAT_MCS_8X; break;
1460 case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
1461 default:
1462 unreachable("Invalid sample count");
1463 }
1464
1465 return isl_surf_init(dev, mcs_surf,
1466 .dim = ISL_SURF_DIM_2D,
1467 .format = mcs_format,
1468 .width = surf->logical_level0_px.width,
1469 .height = surf->logical_level0_px.height,
1470 .depth = 1,
1471 .levels = 1,
1472 .array_len = surf->logical_level0_px.array_len,
1473 .samples = 1, /* MCS surfaces are really single-sampled */
1474 .usage = ISL_SURF_USAGE_MCS_BIT,
1475 .tiling_flags = ISL_TILING_Y0_BIT);
1476 }
1477
1478 bool
1479 isl_surf_get_ccs_surf(const struct isl_device *dev,
1480 const struct isl_surf *surf,
1481 struct isl_surf *ccs_surf)
1482 {
1483 assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE);
1484 assert(ISL_DEV_GEN(dev) >= 7);
1485
1486 if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
1487 return false;
1488
1489 if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
1490 return false;
1491
1492 if (isl_format_is_compressed(surf->format))
1493 return false;
1494
1495 /* TODO: More conditions where it can fail. */
1496
1497 enum isl_format ccs_format;
1498 if (ISL_DEV_GEN(dev) >= 9) {
1499 if (!isl_tiling_is_any_y(surf->tiling))
1500 return false;
1501
1502 switch (isl_format_get_layout(surf->format)->bpb) {
1503 case 32: ccs_format = ISL_FORMAT_GEN9_CCS_32BPP; break;
1504 case 64: ccs_format = ISL_FORMAT_GEN9_CCS_64BPP; break;
1505 case 128: ccs_format = ISL_FORMAT_GEN9_CCS_128BPP; break;
1506 default:
1507 return false;
1508 }
1509 } else if (surf->tiling == ISL_TILING_Y0) {
1510 switch (isl_format_get_layout(surf->format)->bpb) {
1511 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y; break;
1512 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y; break;
1513 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y; break;
1514 default:
1515 return false;
1516 }
1517 } else if (surf->tiling == ISL_TILING_X) {
1518 switch (isl_format_get_layout(surf->format)->bpb) {
1519 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X; break;
1520 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X; break;
1521 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X; break;
1522 default:
1523 return false;
1524 }
1525 } else {
1526 return false;
1527 }
1528
1529 return isl_surf_init(dev, ccs_surf,
1530 .dim = surf->dim,
1531 .format = ccs_format,
1532 .width = surf->logical_level0_px.width,
1533 .height = surf->logical_level0_px.height,
1534 .depth = surf->logical_level0_px.depth,
1535 .levels = surf->levels,
1536 .array_len = surf->logical_level0_px.array_len,
1537 .samples = 1,
1538 .usage = ISL_SURF_USAGE_CCS_BIT,
1539 .tiling_flags = ISL_TILING_CCS_BIT);
1540 }
1541
1542 void
1543 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
1544 const struct isl_surf_fill_state_info *restrict info)
1545 {
1546 #ifndef NDEBUG
1547 isl_surf_usage_flags_t _base_usage =
1548 info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1549 ISL_SURF_USAGE_TEXTURE_BIT |
1550 ISL_SURF_USAGE_STORAGE_BIT);
1551 /* They may only specify one of the above bits at a time */
1552 assert(__builtin_popcount(_base_usage) == 1);
1553 /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
1554 assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
1555 #endif
1556
1557 if (info->surf->dim == ISL_SURF_DIM_3D) {
1558 assert(info->view->base_array_layer + info->view->array_len <=
1559 info->surf->logical_level0_px.depth);
1560 } else {
1561 assert(info->view->base_array_layer + info->view->array_len <=
1562 info->surf->logical_level0_px.array_len);
1563 }
1564
1565 switch (ISL_DEV_GEN(dev)) {
1566 case 4:
1567 if (ISL_DEV_IS_G4X(dev)) {
1568 /* G45 surface state is the same as gen5 */
1569 isl_gen5_surf_fill_state_s(dev, state, info);
1570 } else {
1571 isl_gen4_surf_fill_state_s(dev, state, info);
1572 }
1573 break;
1574 case 5:
1575 isl_gen5_surf_fill_state_s(dev, state, info);
1576 break;
1577 case 6:
1578 isl_gen6_surf_fill_state_s(dev, state, info);
1579 break;
1580 case 7:
1581 if (ISL_DEV_IS_HASWELL(dev)) {
1582 isl_gen75_surf_fill_state_s(dev, state, info);
1583 } else {
1584 isl_gen7_surf_fill_state_s(dev, state, info);
1585 }
1586 break;
1587 case 8:
1588 isl_gen8_surf_fill_state_s(dev, state, info);
1589 break;
1590 case 9:
1591 isl_gen9_surf_fill_state_s(dev, state, info);
1592 break;
1593 default:
1594 assert(!"Cannot fill surface state for this gen");
1595 }
1596 }
1597
1598 void
1599 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
1600 const struct isl_buffer_fill_state_info *restrict info)
1601 {
1602 switch (ISL_DEV_GEN(dev)) {
1603 case 4:
1604 case 5:
1605 /* Gen 4-5 are all the same when it comes to buffer surfaces */
1606 isl_gen5_buffer_fill_state_s(state, info);
1607 break;
1608 case 6:
1609 isl_gen6_buffer_fill_state_s(state, info);
1610 break;
1611 case 7:
1612 if (ISL_DEV_IS_HASWELL(dev)) {
1613 isl_gen75_buffer_fill_state_s(state, info);
1614 } else {
1615 isl_gen7_buffer_fill_state_s(state, info);
1616 }
1617 break;
1618 case 8:
1619 isl_gen8_buffer_fill_state_s(state, info);
1620 break;
1621 case 9:
1622 isl_gen9_buffer_fill_state_s(state, info);
1623 break;
1624 default:
1625 assert(!"Cannot fill surface state for this gen");
1626 }
1627 }
1628
1629 /**
1630 * A variant of isl_surf_get_image_offset_sa() specific to
1631 * ISL_DIM_LAYOUT_GEN4_2D.
1632 */
1633 static void
1634 get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
1635 uint32_t level, uint32_t logical_array_layer,
1636 uint32_t *x_offset_sa,
1637 uint32_t *y_offset_sa)
1638 {
1639 assert(level < surf->levels);
1640 if (surf->dim == ISL_SURF_DIM_3D)
1641 assert(logical_array_layer < surf->logical_level0_px.depth);
1642 else
1643 assert(logical_array_layer < surf->logical_level0_px.array_len);
1644
1645 const struct isl_extent3d image_align_sa =
1646 isl_surf_get_image_alignment_sa(surf);
1647
1648 const uint32_t W0 = surf->phys_level0_sa.width;
1649 const uint32_t H0 = surf->phys_level0_sa.height;
1650
1651 const uint32_t phys_layer = logical_array_layer *
1652 (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
1653
1654 uint32_t x = 0;
1655 uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
1656
1657 for (uint32_t l = 0; l < level; ++l) {
1658 if (l == 1) {
1659 uint32_t W = isl_minify(W0, l);
1660 x += isl_align_npot(W, image_align_sa.w);
1661 } else {
1662 uint32_t H = isl_minify(H0, l);
1663 y += isl_align_npot(H, image_align_sa.h);
1664 }
1665 }
1666
1667 *x_offset_sa = x;
1668 *y_offset_sa = y;
1669 }
1670
1671 /**
1672 * A variant of isl_surf_get_image_offset_sa() specific to
1673 * ISL_DIM_LAYOUT_GEN4_3D.
1674 */
1675 static void
1676 get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
1677 uint32_t level, uint32_t logical_z_offset_px,
1678 uint32_t *x_offset_sa,
1679 uint32_t *y_offset_sa)
1680 {
1681 assert(level < surf->levels);
1682 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
1683 assert(surf->phys_level0_sa.array_len == 1);
1684
1685 const struct isl_extent3d image_align_sa =
1686 isl_surf_get_image_alignment_sa(surf);
1687
1688 const uint32_t W0 = surf->phys_level0_sa.width;
1689 const uint32_t H0 = surf->phys_level0_sa.height;
1690 const uint32_t D0 = surf->phys_level0_sa.depth;
1691
1692 uint32_t x = 0;
1693 uint32_t y = 0;
1694
1695 for (uint32_t l = 0; l < level; ++l) {
1696 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
1697 const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d);
1698 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1699
1700 y += level_h * max_layers_vert;
1701 }
1702
1703 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
1704 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
1705 const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d);
1706
1707 const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
1708
1709 x += level_w * (logical_z_offset_px % max_layers_horiz);
1710 y += level_h * (logical_z_offset_px / max_layers_horiz);
1711
1712 *x_offset_sa = x;
1713 *y_offset_sa = y;
1714 }
1715
1716 /**
1717 * A variant of isl_surf_get_image_offset_sa() specific to
1718 * ISL_DIM_LAYOUT_GEN9_1D.
1719 */
1720 static void
1721 get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
1722 uint32_t level, uint32_t layer,
1723 uint32_t *x_offset_sa,
1724 uint32_t *y_offset_sa)
1725 {
1726 assert(level < surf->levels);
1727 assert(layer < surf->phys_level0_sa.array_len);
1728 assert(surf->phys_level0_sa.height == 1);
1729 assert(surf->phys_level0_sa.depth == 1);
1730 assert(surf->samples == 1);
1731
1732 const uint32_t W0 = surf->phys_level0_sa.width;
1733 const struct isl_extent3d image_align_sa =
1734 isl_surf_get_image_alignment_sa(surf);
1735
1736 uint32_t x = 0;
1737
1738 for (uint32_t l = 0; l < level; ++l) {
1739 uint32_t W = isl_minify(W0, l);
1740 uint32_t w = isl_align_npot(W, image_align_sa.w);
1741
1742 x += w;
1743 }
1744
1745 *x_offset_sa = x;
1746 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
1747 }
1748
1749 /**
1750 * Calculate the offset, in units of surface samples, to a subimage in the
1751 * surface.
1752 *
1753 * @invariant level < surface levels
1754 * @invariant logical_array_layer < logical array length of surface
1755 * @invariant logical_z_offset_px < logical depth of surface at level
1756 */
1757 void
1758 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
1759 uint32_t level,
1760 uint32_t logical_array_layer,
1761 uint32_t logical_z_offset_px,
1762 uint32_t *x_offset_sa,
1763 uint32_t *y_offset_sa)
1764 {
1765 assert(level < surf->levels);
1766 assert(logical_array_layer < surf->logical_level0_px.array_len);
1767 assert(logical_z_offset_px
1768 < isl_minify(surf->logical_level0_px.depth, level));
1769
1770 switch (surf->dim_layout) {
1771 case ISL_DIM_LAYOUT_GEN9_1D:
1772 get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
1773 x_offset_sa, y_offset_sa);
1774 break;
1775 case ISL_DIM_LAYOUT_GEN4_2D:
1776 get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
1777 + logical_z_offset_px,
1778 x_offset_sa, y_offset_sa);
1779 break;
1780 case ISL_DIM_LAYOUT_GEN4_3D:
1781 get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px,
1782 x_offset_sa, y_offset_sa);
1783 break;
1784
1785 default:
1786 unreachable("not reached");
1787 }
1788 }
1789
1790 void
1791 isl_surf_get_image_offset_el(const struct isl_surf *surf,
1792 uint32_t level,
1793 uint32_t logical_array_layer,
1794 uint32_t logical_z_offset_px,
1795 uint32_t *x_offset_el,
1796 uint32_t *y_offset_el)
1797 {
1798 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1799
1800 assert(level < surf->levels);
1801 assert(logical_array_layer < surf->logical_level0_px.array_len);
1802 assert(logical_z_offset_px
1803 < isl_minify(surf->logical_level0_px.depth, level));
1804
1805 uint32_t x_offset_sa, y_offset_sa;
1806 isl_surf_get_image_offset_sa(surf, level,
1807 logical_array_layer,
1808 logical_z_offset_px,
1809 &x_offset_sa,
1810 &y_offset_sa);
1811
1812 *x_offset_el = x_offset_sa / fmtl->bw;
1813 *y_offset_el = y_offset_sa / fmtl->bh;
1814 }
1815
1816 void
1817 isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
1818 enum isl_tiling tiling,
1819 uint8_t bs,
1820 uint32_t row_pitch,
1821 uint32_t total_x_offset_el,
1822 uint32_t total_y_offset_el,
1823 uint32_t *base_address_offset,
1824 uint32_t *x_offset_el,
1825 uint32_t *y_offset_el)
1826 {
1827 if (tiling == ISL_TILING_LINEAR) {
1828 *base_address_offset = total_y_offset_el * row_pitch +
1829 total_x_offset_el * bs;
1830 *x_offset_el = 0;
1831 *y_offset_el = 0;
1832 return;
1833 }
1834
1835 const uint32_t bpb = bs * 8;
1836
1837 struct isl_tile_info tile_info;
1838 isl_tiling_get_info(dev, tiling, bpb, &tile_info);
1839
1840 assert(row_pitch % tile_info.phys_extent_B.width == 0);
1841
1842 /* For non-power-of-two formats, we need the address to be both tile and
1843 * element-aligned. The easiest way to achieve this is to work with a tile
1844 * that is three times as wide as the regular tile.
1845 *
1846 * The tile info returned by get_tile_info has a logical size that is an
1847 * integer number of tile_info.format_bpb size elements. To scale the
1848 * tile, we scale up the physical width and then treat the logical tile
1849 * size as if it has bpb size elements.
1850 */
1851 const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
1852 tile_info.phys_extent_B.width *= tile_el_scale;
1853
1854 /* Compute the offset into the tile */
1855 *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
1856 *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
1857
1858 /* Compute the offset of the tile in units of whole tiles */
1859 uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
1860 uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
1861
1862 *base_address_offset =
1863 y_offset_tl * tile_info.phys_extent_B.h * row_pitch +
1864 x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
1865 }
1866
1867 uint32_t
1868 isl_surf_get_depth_format(const struct isl_device *dev,
1869 const struct isl_surf *surf)
1870 {
1871 /* Support for separate stencil buffers began in gen5. Support for
1872 * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
1873 * those that supported separate and interleaved stencil, were gen5 and
1874 * gen6.
1875 *
1876 * For a list of all available formats, see the Sandybridge PRM >> Volume
1877 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
1878 * Format (p321).
1879 */
1880
1881 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
1882
1883 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
1884
1885 if (has_stencil)
1886 assert(ISL_DEV_GEN(dev) < 7);
1887
1888 switch (surf->format) {
1889 default:
1890 unreachable("bad isl depth format");
1891 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
1892 assert(ISL_DEV_GEN(dev) < 7);
1893 return 0; /* D32_FLOAT_S8X24_UINT */
1894 case ISL_FORMAT_R32_FLOAT:
1895 assert(!has_stencil);
1896 return 1; /* D32_FLOAT */
1897 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
1898 if (has_stencil) {
1899 assert(ISL_DEV_GEN(dev) < 7);
1900 return 2; /* D24_UNORM_S8_UINT */
1901 } else {
1902 assert(ISL_DEV_GEN(dev) >= 5);
1903 return 3; /* D24_UNORM_X8_UINT */
1904 }
1905 case ISL_FORMAT_R16_UNORM:
1906 assert(!has_stencil);
1907 return 5; /* D16_UNORM */
1908 }
1909 }