Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / src / intel / isl / isl.c
1 /*
2 * Copyright 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27
28 #include "genxml/genX_bits.h"
29
30 #include "isl.h"
31 #include "isl_gen4.h"
32 #include "isl_gen6.h"
33 #include "isl_gen7.h"
34 #include "isl_gen8.h"
35 #include "isl_gen9.h"
36 #include "isl_gen12.h"
37 #include "isl_priv.h"
38
39 void
40 isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
41 uint32_t yt1, uint32_t yt2,
42 char *dst, const char *src,
43 uint32_t dst_pitch, int32_t src_pitch,
44 bool has_swizzling,
45 enum isl_tiling tiling,
46 isl_memcpy_type copy_type)
47 {
48 #ifdef USE_SSE41
49 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
50 _isl_memcpy_linear_to_tiled_sse41(
51 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
52 tiling, copy_type);
53 return;
54 }
55 #endif
56
57 _isl_memcpy_linear_to_tiled(
58 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
59 tiling, copy_type);
60 }
61
62 void
63 isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
64 uint32_t yt1, uint32_t yt2,
65 char *dst, const char *src,
66 int32_t dst_pitch, uint32_t src_pitch,
67 bool has_swizzling,
68 enum isl_tiling tiling,
69 isl_memcpy_type copy_type)
70 {
71 #ifdef USE_SSE41
72 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
73 _isl_memcpy_tiled_to_linear_sse41(
74 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
75 tiling, copy_type);
76 return;
77 }
78 #endif
79
80 _isl_memcpy_tiled_to_linear(
81 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
82 tiling, copy_type);
83 }
84
85 void PRINTFLIKE(3, 4) UNUSED
86 __isl_finishme(const char *file, int line, const char *fmt, ...)
87 {
88 va_list ap;
89 char buf[512];
90
91 va_start(ap, fmt);
92 vsnprintf(buf, sizeof(buf), fmt, ap);
93 va_end(ap);
94
95 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
96 }
97
98 static void
99 isl_device_setup_mocs(struct isl_device *dev)
100 {
101 if (dev->info->gen >= 12) {
102 if (dev->info->is_dg1) {
103 /* L3CC=WB */
104 dev->mocs.internal = 5 << 1;
105 /* Displayables on DG1 are free to cache in L3 since L3 is transient
106 * and flushed at bottom of each submission.
107 */
108 dev->mocs.external = 5 << 1;
109 } else {
110 /* TODO: Set PTE to MOCS 61 when the kernel is ready */
111 /* TC=1/LLC Only, LeCC=1/Uncacheable, LRUM=0, L3CC=1/Uncacheable */
112 dev->mocs.external = 3 << 1;
113 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
114 dev->mocs.internal = 2 << 1;
115 }
116 } else if (dev->info->gen >= 9) {
117 /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
118 dev->mocs.external = 1 << 1;
119 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
120 dev->mocs.internal = 2 << 1;
121 } else if (dev->info->gen >= 8) {
122 /* MEMORY_OBJECT_CONTROL_STATE:
123 * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
124 * .TargetCache = L3DefertoPATforLLCeLLCselection,
125 * .AgeforQUADLRU = 0
126 */
127 dev->mocs.external = 0x18;
128 /* MEMORY_OBJECT_CONTROL_STATE:
129 * .MemoryTypeLLCeLLCCacheabilityControl = WB,
130 * .TargetCache = L3DefertoPATforLLCeLLCselection,
131 * .AgeforQUADLRU = 0
132 */
133 dev->mocs.internal = 0x78;
134 } else if (dev->info->gen >= 7) {
135 if (dev->info->is_haswell) {
136 /* MEMORY_OBJECT_CONTROL_STATE:
137 * .LLCeLLCCacheabilityControlLLCCC = 0,
138 * .L3CacheabilityControlL3CC = 1,
139 */
140 dev->mocs.internal = 1;
141 dev->mocs.external = 1;
142 } else {
143 /* MEMORY_OBJECT_CONTROL_STATE:
144 * .GraphicsDataTypeGFDT = 0,
145 * .LLCCacheabilityControlLLCCC = 0,
146 * .L3CacheabilityControlL3CC = 1,
147 */
148 dev->mocs.internal = 1;
149 dev->mocs.external = 1;
150 }
151 } else {
152 dev->mocs.internal = 0;
153 dev->mocs.external = 0;
154 }
155 }
156
157 void
158 isl_device_init(struct isl_device *dev,
159 const struct gen_device_info *info,
160 bool has_bit6_swizzling)
161 {
162 /* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */
163 assert(!(has_bit6_swizzling && info->gen >= 8));
164
165 dev->info = info;
166 dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
167 dev->has_bit6_swizzling = has_bit6_swizzling;
168
169 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
170 * device properties at buildtime. Verify that the macros with the device
171 * properties chosen during runtime.
172 */
173 ISL_DEV_GEN_SANITIZE(dev);
174 ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
175
176 /* Did we break hiz or stencil? */
177 if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
178 assert(info->has_hiz_and_separate_stencil);
179 if (info->must_use_separate_stencil)
180 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
181
182 dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
183 dev->ss.align = isl_align(dev->ss.size, 32);
184
185 dev->ss.clear_color_state_size =
186 isl_align(CLEAR_COLOR_length(info) * 4, 64);
187 dev->ss.clear_color_state_offset =
188 RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
189
190 dev->ss.clear_value_size =
191 isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
192 RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
193 RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
194 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
195
196 dev->ss.clear_value_offset =
197 RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
198
199 assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
200 dev->ss.addr_offset =
201 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
202
203 /* The "Auxiliary Surface Base Address" field starts a bit higher up
204 * because the bottom 12 bits are used for other things. Round down to
205 * the nearest dword before.
206 */
207 dev->ss.aux_addr_offset =
208 (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
209
210 dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
211 assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
212 dev->ds.depth_offset =
213 _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
214
215 if (dev->use_separate_stencil) {
216 dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
217 _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
218 _3DSTATE_CLEAR_PARAMS_length(info) * 4;
219
220 assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
221 dev->ds.stencil_offset =
222 _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
223 _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
224
225 assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
226 dev->ds.hiz_offset =
227 _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
228 _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
229 _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
230 } else {
231 dev->ds.stencil_offset = 0;
232 dev->ds.hiz_offset = 0;
233 }
234
235 if (ISL_DEV_GEN(dev) >= 12) {
236 dev->ds.size += GEN12_MI_LOAD_REGISTER_IMM_length * 4 * 2;
237 }
238
239 isl_device_setup_mocs(dev);
240 }
241
242 /**
243 * @brief Query the set of multisamples supported by the device.
244 *
245 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
246 * supported.
247 */
248 isl_sample_count_mask_t ATTRIBUTE_CONST
249 isl_device_get_sample_counts(struct isl_device *dev)
250 {
251 if (ISL_DEV_GEN(dev) >= 9) {
252 return ISL_SAMPLE_COUNT_1_BIT |
253 ISL_SAMPLE_COUNT_2_BIT |
254 ISL_SAMPLE_COUNT_4_BIT |
255 ISL_SAMPLE_COUNT_8_BIT |
256 ISL_SAMPLE_COUNT_16_BIT;
257 } else if (ISL_DEV_GEN(dev) >= 8) {
258 return ISL_SAMPLE_COUNT_1_BIT |
259 ISL_SAMPLE_COUNT_2_BIT |
260 ISL_SAMPLE_COUNT_4_BIT |
261 ISL_SAMPLE_COUNT_8_BIT;
262 } else if (ISL_DEV_GEN(dev) >= 7) {
263 return ISL_SAMPLE_COUNT_1_BIT |
264 ISL_SAMPLE_COUNT_4_BIT |
265 ISL_SAMPLE_COUNT_8_BIT;
266 } else if (ISL_DEV_GEN(dev) >= 6) {
267 return ISL_SAMPLE_COUNT_1_BIT |
268 ISL_SAMPLE_COUNT_4_BIT;
269 } else {
270 return ISL_SAMPLE_COUNT_1_BIT;
271 }
272 }
273
274 /**
275 * @param[out] info is written only on success
276 */
277 static void
278 isl_tiling_get_info(enum isl_tiling tiling,
279 uint32_t format_bpb,
280 struct isl_tile_info *tile_info)
281 {
282 const uint32_t bs = format_bpb / 8;
283 struct isl_extent2d logical_el, phys_B;
284
285 if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
286 /* It is possible to have non-power-of-two formats in a tiled buffer.
287 * The easiest way to handle this is to treat the tile as if it is three
288 * times as wide. This way no pixel will ever cross a tile boundary.
289 * This really only works on legacy X and Y tiling formats.
290 */
291 assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
292 assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
293 isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
294 return;
295 }
296
297 switch (tiling) {
298 case ISL_TILING_LINEAR:
299 assert(bs > 0);
300 logical_el = isl_extent2d(1, 1);
301 phys_B = isl_extent2d(bs, 1);
302 break;
303
304 case ISL_TILING_X:
305 assert(bs > 0);
306 logical_el = isl_extent2d(512 / bs, 8);
307 phys_B = isl_extent2d(512, 8);
308 break;
309
310 case ISL_TILING_Y0:
311 assert(bs > 0);
312 logical_el = isl_extent2d(128 / bs, 32);
313 phys_B = isl_extent2d(128, 32);
314 break;
315
316 case ISL_TILING_W:
317 assert(bs == 1);
318 logical_el = isl_extent2d(64, 64);
319 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
320 *
321 * "If the surface is a stencil buffer (and thus has Tile Mode set
322 * to TILEMODE_WMAJOR), the pitch must be set to 2x the value
323 * computed based on width, as the stencil buffer is stored with two
324 * rows interleaved."
325 *
326 * This, together with the fact that stencil buffers are referred to as
327 * being Y-tiled in the PRMs for older hardware implies that the
328 * physical size of a W-tile is actually the same as for a Y-tile.
329 */
330 phys_B = isl_extent2d(128, 32);
331 break;
332
333 case ISL_TILING_Yf:
334 case ISL_TILING_Ys: {
335 bool is_Ys = tiling == ISL_TILING_Ys;
336
337 assert(bs > 0);
338 unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
339 unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
340
341 logical_el = isl_extent2d(width / bs, height);
342 phys_B = isl_extent2d(width, height);
343 break;
344 }
345
346 case ISL_TILING_HIZ:
347 /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
348 * 128bpb format. The tiling has the same physical dimensions as
349 * Y-tiling but actually has two HiZ columns per Y-tiled column.
350 */
351 assert(bs == 16);
352 logical_el = isl_extent2d(16, 16);
353 phys_B = isl_extent2d(128, 32);
354 break;
355
356 case ISL_TILING_CCS:
357 /* CCS surfaces are required to have one of the GENX_CCS_* formats which
358 * have a block size of 1 or 2 bits per block and each CCS element
359 * corresponds to one cache-line pair in the main surface. From the Sky
360 * Lake PRM Vol. 12 in the section on planes:
361 *
362 * "The Color Control Surface (CCS) contains the compression status
363 * of the cache-line pairs. The compression state of the cache-line
364 * pair is specified by 2 bits in the CCS. Each CCS cache-line
365 * represents an area on the main surface of 16x16 sets of 128 byte
366 * Y-tiled cache-line-pairs. CCS is always Y tiled."
367 *
368 * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
369 * Since each cache line corresponds to a 16x16 set of cache-line pairs,
370 * that yields total tile area of 128x128 cache-line pairs or CCS
371 * elements. On older hardware, each CCS element is 1 bit and the tile
372 * is 128x256 elements.
373 */
374 assert(format_bpb == 1 || format_bpb == 2);
375 logical_el = isl_extent2d(128, 256 / format_bpb);
376 phys_B = isl_extent2d(128, 32);
377 break;
378
379 case ISL_TILING_GEN12_CCS:
380 /* From the Bspec, Gen Graphics > Gen12 > Memory Data Formats > Memory
381 * Compression > Memory Compression - Gen12:
382 *
383 * 4 bits of auxiliary plane data are required for 2 cachelines of
384 * main surface data. This results in a single cacheline of auxiliary
385 * plane data mapping to 4 4K pages of main surface data for the 4K
386 * pages (tile Y ) and 1 64K Tile Ys page.
387 *
388 * The Y-tiled pairing bit of 9 shown in the table below that Bspec
389 * section expresses that the 2 cachelines of main surface data are
390 * horizontally adjacent.
391 *
392 * TODO: Handle Ys, Yf and their pairing bits.
393 *
394 * Therefore, each CCS cacheline represents a 512Bx32 row area and each
395 * element represents a 32Bx4 row area.
396 */
397 assert(format_bpb == 4);
398 logical_el = isl_extent2d(16, 8);
399 phys_B = isl_extent2d(64, 1);
400 break;
401
402 default:
403 unreachable("not reached");
404 } /* end switch */
405
406 *tile_info = (struct isl_tile_info) {
407 .tiling = tiling,
408 .format_bpb = format_bpb,
409 .logical_extent_el = logical_el,
410 .phys_extent_B = phys_B,
411 };
412 }
413
414 bool
415 isl_color_value_is_zero(union isl_color_value value,
416 enum isl_format format)
417 {
418 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
419
420 #define RETURN_FALSE_IF_NOT_0(c, i) \
421 if (fmtl->channels.c.bits && value.u32[i] != 0) \
422 return false
423
424 RETURN_FALSE_IF_NOT_0(r, 0);
425 RETURN_FALSE_IF_NOT_0(g, 1);
426 RETURN_FALSE_IF_NOT_0(b, 2);
427 RETURN_FALSE_IF_NOT_0(a, 3);
428
429 #undef RETURN_FALSE_IF_NOT_0
430
431 return true;
432 }
433
434 bool
435 isl_color_value_is_zero_one(union isl_color_value value,
436 enum isl_format format)
437 {
438 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
439
440 #define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
441 if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
442 return false
443
444 if (isl_format_has_int_channel(format)) {
445 RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
446 RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
447 RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
448 RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
449 } else {
450 RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
451 RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
452 RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
453 RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
454 }
455
456 #undef RETURN_FALSE_IF_NOT_0_1
457
458 return true;
459 }
460
461 /**
462 * @param[out] tiling is set only on success
463 */
464 static bool
465 isl_surf_choose_tiling(const struct isl_device *dev,
466 const struct isl_surf_init_info *restrict info,
467 enum isl_tiling *tiling)
468 {
469 isl_tiling_flags_t tiling_flags = info->tiling_flags;
470
471 /* HiZ surfaces always use the HiZ tiling */
472 if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
473 assert(info->format == ISL_FORMAT_HIZ);
474 assert(tiling_flags == ISL_TILING_HIZ_BIT);
475 *tiling = isl_tiling_flag_to_enum(tiling_flags);
476 return true;
477 }
478
479 /* CCS surfaces always use the CCS tiling */
480 if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
481 assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
482 UNUSED bool ivb_ccs = ISL_DEV_GEN(dev) < 12 &&
483 tiling_flags == ISL_TILING_CCS_BIT;
484 UNUSED bool tgl_ccs = ISL_DEV_GEN(dev) >= 12 &&
485 tiling_flags == ISL_TILING_GEN12_CCS_BIT;
486 assert(ivb_ccs != tgl_ccs);
487 *tiling = isl_tiling_flag_to_enum(tiling_flags);
488 return true;
489 }
490
491 if (ISL_DEV_GEN(dev) >= 6) {
492 isl_gen6_filter_tiling(dev, info, &tiling_flags);
493 } else {
494 isl_gen4_filter_tiling(dev, info, &tiling_flags);
495 }
496
497 #define CHOOSE(__tiling) \
498 do { \
499 if (tiling_flags & (1u << (__tiling))) { \
500 *tiling = (__tiling); \
501 return true; \
502 } \
503 } while (0)
504
505 /* Of the tiling modes remaining, choose the one that offers the best
506 * performance.
507 */
508
509 if (info->dim == ISL_SURF_DIM_1D) {
510 /* Prefer linear for 1D surfaces because they do not benefit from
511 * tiling. To the contrary, tiling leads to wasted memory and poor
512 * memory locality due to the swizzling and alignment restrictions
513 * required in tiled surfaces.
514 */
515 CHOOSE(ISL_TILING_LINEAR);
516 }
517
518 CHOOSE(ISL_TILING_Ys);
519 CHOOSE(ISL_TILING_Yf);
520 CHOOSE(ISL_TILING_Y0);
521 CHOOSE(ISL_TILING_X);
522 CHOOSE(ISL_TILING_W);
523 CHOOSE(ISL_TILING_LINEAR);
524
525 #undef CHOOSE
526
527 /* No tiling mode accomodates the inputs. */
528 return false;
529 }
530
531 static bool
532 isl_choose_msaa_layout(const struct isl_device *dev,
533 const struct isl_surf_init_info *info,
534 enum isl_tiling tiling,
535 enum isl_msaa_layout *msaa_layout)
536 {
537 if (ISL_DEV_GEN(dev) >= 8) {
538 return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
539 } else if (ISL_DEV_GEN(dev) >= 7) {
540 return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
541 } else if (ISL_DEV_GEN(dev) >= 6) {
542 return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
543 } else {
544 return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
545 }
546 }
547
548 struct isl_extent2d
549 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
550 {
551 assert(isl_is_pow2(samples));
552
553 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
554 * Sizes (p133):
555 *
556 * If the surface is multisampled and it is a depth or stencil surface
557 * or Multisampled Surface StorageFormat in SURFACE_STATE is
558 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
559 * proceeding: [...]
560 */
561 return (struct isl_extent2d) {
562 .width = 1 << ((ffs(samples) - 0) / 2),
563 .height = 1 << ((ffs(samples) - 1) / 2),
564 };
565 }
566
567 static void
568 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
569 uint32_t *width, uint32_t *height)
570 {
571 const struct isl_extent2d px_size_sa =
572 isl_get_interleaved_msaa_px_size_sa(samples);
573
574 if (width)
575 *width = isl_align(*width, 2) * px_size_sa.width;
576 if (height)
577 *height = isl_align(*height, 2) * px_size_sa.height;
578 }
579
580 static enum isl_array_pitch_span
581 isl_choose_array_pitch_span(const struct isl_device *dev,
582 const struct isl_surf_init_info *restrict info,
583 enum isl_dim_layout dim_layout,
584 const struct isl_extent4d *phys_level0_sa)
585 {
586 switch (dim_layout) {
587 case ISL_DIM_LAYOUT_GEN9_1D:
588 case ISL_DIM_LAYOUT_GEN4_2D:
589 if (ISL_DEV_GEN(dev) >= 8) {
590 /* QPitch becomes programmable in Broadwell. So choose the
591 * most compact QPitch possible in order to conserve memory.
592 *
593 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
594 * >> RENDER_SURFACE_STATE Surface QPitch (p325):
595 *
596 * - Software must ensure that this field is set to a value
597 * sufficiently large such that the array slices in the surface
598 * do not overlap. Refer to the Memory Data Formats section for
599 * information on how surfaces are stored in memory.
600 *
601 * - This field specifies the distance in rows between array
602 * slices. It is used only in the following cases:
603 *
604 * - Surface Array is enabled OR
605 * - Number of Mulitsamples is not NUMSAMPLES_1 and
606 * Multisampled Surface Storage Format set to MSFMT_MSS OR
607 * - Surface Type is SURFTYPE_CUBE
608 */
609 return ISL_ARRAY_PITCH_SPAN_COMPACT;
610 } else if (ISL_DEV_GEN(dev) >= 7) {
611 /* Note that Ivybridge introduces
612 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
613 * driver more control over the QPitch.
614 */
615
616 if (phys_level0_sa->array_len == 1) {
617 /* The hardware will never use the QPitch. So choose the most
618 * compact QPitch possible in order to conserve memory.
619 */
620 return ISL_ARRAY_PITCH_SPAN_COMPACT;
621 }
622
623 if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
624 (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
625 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
626 * Section 6.18.4.7: Surface Arrays (p112):
627 *
628 * If Surface Array Spacing is set to ARYSPC_FULL (note that
629 * the depth buffer and stencil buffer have an implied value of
630 * ARYSPC_FULL):
631 */
632 return ISL_ARRAY_PITCH_SPAN_FULL;
633 }
634
635 if (info->levels == 1) {
636 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
637 * to ARYSPC_LOD0.
638 */
639 return ISL_ARRAY_PITCH_SPAN_COMPACT;
640 }
641
642 return ISL_ARRAY_PITCH_SPAN_FULL;
643 } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
644 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
645 isl_surf_usage_is_stencil(info->usage)) {
646 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
647 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
648 *
649 * The separate stencil buffer does not support mip mapping, thus
650 * the storage for LODs other than LOD 0 is not needed.
651 */
652 assert(info->levels == 1);
653 return ISL_ARRAY_PITCH_SPAN_COMPACT;
654 } else {
655 if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
656 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
657 isl_surf_usage_is_stencil(info->usage)) {
658 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
659 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
660 *
661 * The separate stencil buffer does not support mip mapping,
662 * thus the storage for LODs other than LOD 0 is not needed.
663 */
664 assert(info->levels == 1);
665 assert(phys_level0_sa->array_len == 1);
666 return ISL_ARRAY_PITCH_SPAN_COMPACT;
667 }
668
669 if (phys_level0_sa->array_len == 1) {
670 /* The hardware will never use the QPitch. So choose the most
671 * compact QPitch possible in order to conserve memory.
672 */
673 return ISL_ARRAY_PITCH_SPAN_COMPACT;
674 }
675
676 return ISL_ARRAY_PITCH_SPAN_FULL;
677 }
678
679 case ISL_DIM_LAYOUT_GEN4_3D:
680 /* The hardware will never use the QPitch. So choose the most
681 * compact QPitch possible in order to conserve memory.
682 */
683 return ISL_ARRAY_PITCH_SPAN_COMPACT;
684
685 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
686 /* Each array image in the gen6 stencil of HiZ surface is compact in the
687 * sense that every LOD is a compact array of the same size as LOD0.
688 */
689 return ISL_ARRAY_PITCH_SPAN_COMPACT;
690 }
691
692 unreachable("bad isl_dim_layout");
693 return ISL_ARRAY_PITCH_SPAN_FULL;
694 }
695
696 static void
697 isl_choose_image_alignment_el(const struct isl_device *dev,
698 const struct isl_surf_init_info *restrict info,
699 enum isl_tiling tiling,
700 enum isl_dim_layout dim_layout,
701 enum isl_msaa_layout msaa_layout,
702 struct isl_extent3d *image_align_el)
703 {
704 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
705 if (fmtl->txc == ISL_TXC_MCS) {
706 assert(tiling == ISL_TILING_Y0);
707
708 /*
709 * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
710 *
711 * Height, width, and layout of MCS buffer in this case must match with
712 * Render Target height, width, and layout. MCS buffer is tiledY.
713 *
714 * To avoid wasting memory, choose the smallest alignment possible:
715 * HALIGN_4 and VALIGN_4.
716 */
717 *image_align_el = isl_extent3d(4, 4, 1);
718 return;
719 } else if (info->format == ISL_FORMAT_HIZ) {
720 assert(ISL_DEV_GEN(dev) >= 6);
721 if (ISL_DEV_GEN(dev) == 6) {
722 /* HiZ surfaces on Sandy Bridge are packed tightly. */
723 *image_align_el = isl_extent3d(1, 1, 1);
724 } else if (ISL_DEV_GEN(dev) < 12) {
725 /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
726 * primary surface which works out to 2x2 HiZ elments.
727 */
728 *image_align_el = isl_extent3d(2, 2, 1);
729 } else {
730 /* On gen12+, HiZ surfaces are always aligned to 16x16 pixels in the
731 * primary surface which works out to 2x4 HiZ elments.
732 * TODO: Verify
733 */
734 *image_align_el = isl_extent3d(2, 4, 1);
735 }
736 return;
737 }
738
739 if (ISL_DEV_GEN(dev) >= 12) {
740 isl_gen12_choose_image_alignment_el(dev, info, tiling, dim_layout,
741 msaa_layout, image_align_el);
742 } else if (ISL_DEV_GEN(dev) >= 9) {
743 isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
744 msaa_layout, image_align_el);
745 } else if (ISL_DEV_GEN(dev) >= 8) {
746 isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
747 msaa_layout, image_align_el);
748 } else if (ISL_DEV_GEN(dev) >= 7) {
749 isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
750 msaa_layout, image_align_el);
751 } else if (ISL_DEV_GEN(dev) >= 6) {
752 isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
753 msaa_layout, image_align_el);
754 } else {
755 isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
756 msaa_layout, image_align_el);
757 }
758 }
759
760 static enum isl_dim_layout
761 isl_surf_choose_dim_layout(const struct isl_device *dev,
762 enum isl_surf_dim logical_dim,
763 enum isl_tiling tiling,
764 isl_surf_usage_flags_t usage)
765 {
766 /* Sandy bridge needs a special layout for HiZ and stencil. */
767 if (ISL_DEV_GEN(dev) == 6 &&
768 (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
769 return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
770
771 if (ISL_DEV_GEN(dev) >= 9) {
772 switch (logical_dim) {
773 case ISL_SURF_DIM_1D:
774 /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
775 *
776 * One-dimensional surfaces use a tiling mode of linear.
777 * Technically, they are not tiled resources, but the Tiled
778 * Resource Mode field in RENDER_SURFACE_STATE is still used to
779 * indicate the alignment requirements for this linear surface
780 * (See 1D Alignment requirements for how 4K and 64KB Tiled
781 * Resource Modes impact alignment). Alternatively, a 1D surface
782 * can be defined as a 2D tiled surface (e.g. TileY or TileX) with
783 * a height of 0.
784 *
785 * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
786 * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
787 */
788 if (tiling == ISL_TILING_LINEAR)
789 return ISL_DIM_LAYOUT_GEN9_1D;
790 else
791 return ISL_DIM_LAYOUT_GEN4_2D;
792 case ISL_SURF_DIM_2D:
793 case ISL_SURF_DIM_3D:
794 return ISL_DIM_LAYOUT_GEN4_2D;
795 }
796 } else {
797 switch (logical_dim) {
798 case ISL_SURF_DIM_1D:
799 case ISL_SURF_DIM_2D:
800 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
801 *
802 * The cube face textures are stored in the same way as 3D surfaces
803 * are stored (see section 6.17.5 for details). For cube surfaces,
804 * however, the depth is equal to the number of faces (always 6) and
805 * is not reduced for each MIP.
806 */
807 if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
808 return ISL_DIM_LAYOUT_GEN4_3D;
809
810 return ISL_DIM_LAYOUT_GEN4_2D;
811 case ISL_SURF_DIM_3D:
812 return ISL_DIM_LAYOUT_GEN4_3D;
813 }
814 }
815
816 unreachable("bad isl_surf_dim");
817 return ISL_DIM_LAYOUT_GEN4_2D;
818 }
819
820 /**
821 * Calculate the physical extent of the surface's first level, in units of
822 * surface samples.
823 */
824 static void
825 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
826 const struct isl_surf_init_info *restrict info,
827 enum isl_dim_layout dim_layout,
828 enum isl_tiling tiling,
829 enum isl_msaa_layout msaa_layout,
830 struct isl_extent4d *phys_level0_sa)
831 {
832 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
833
834 if (isl_format_is_planar(info->format))
835 unreachable("Planar formats unsupported");
836
837 switch (info->dim) {
838 case ISL_SURF_DIM_1D:
839 assert(info->height == 1);
840 assert(info->depth == 1);
841 assert(info->samples == 1);
842
843 switch (dim_layout) {
844 case ISL_DIM_LAYOUT_GEN4_3D:
845 unreachable("bad isl_dim_layout");
846
847 case ISL_DIM_LAYOUT_GEN9_1D:
848 case ISL_DIM_LAYOUT_GEN4_2D:
849 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
850 *phys_level0_sa = (struct isl_extent4d) {
851 .w = info->width,
852 .h = 1,
853 .d = 1,
854 .a = info->array_len,
855 };
856 break;
857 }
858 break;
859
860 case ISL_SURF_DIM_2D:
861 if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
862 assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D);
863 else
864 assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
865 dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
866
867 if (tiling == ISL_TILING_Ys && info->samples > 1)
868 isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
869
870 switch (msaa_layout) {
871 case ISL_MSAA_LAYOUT_NONE:
872 assert(info->depth == 1);
873 assert(info->samples == 1);
874
875 *phys_level0_sa = (struct isl_extent4d) {
876 .w = info->width,
877 .h = info->height,
878 .d = 1,
879 .a = info->array_len,
880 };
881 break;
882
883 case ISL_MSAA_LAYOUT_ARRAY:
884 assert(info->depth == 1);
885 assert(info->levels == 1);
886 assert(isl_format_supports_multisampling(dev->info, info->format));
887 assert(fmtl->bw == 1 && fmtl->bh == 1);
888
889 *phys_level0_sa = (struct isl_extent4d) {
890 .w = info->width,
891 .h = info->height,
892 .d = 1,
893 .a = info->array_len * info->samples,
894 };
895 break;
896
897 case ISL_MSAA_LAYOUT_INTERLEAVED:
898 assert(info->depth == 1);
899 assert(info->levels == 1);
900 assert(isl_format_supports_multisampling(dev->info, info->format));
901
902 *phys_level0_sa = (struct isl_extent4d) {
903 .w = info->width,
904 .h = info->height,
905 .d = 1,
906 .a = info->array_len,
907 };
908
909 isl_msaa_interleaved_scale_px_to_sa(info->samples,
910 &phys_level0_sa->w,
911 &phys_level0_sa->h);
912 break;
913 }
914 break;
915
916 case ISL_SURF_DIM_3D:
917 assert(info->array_len == 1);
918 assert(info->samples == 1);
919
920 if (fmtl->bd > 1) {
921 isl_finishme("%s:%s: compression block with depth > 1",
922 __FILE__, __func__);
923 }
924
925 switch (dim_layout) {
926 case ISL_DIM_LAYOUT_GEN9_1D:
927 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
928 unreachable("bad isl_dim_layout");
929
930 case ISL_DIM_LAYOUT_GEN4_2D:
931 assert(ISL_DEV_GEN(dev) >= 9);
932
933 *phys_level0_sa = (struct isl_extent4d) {
934 .w = info->width,
935 .h = info->height,
936 .d = 1,
937 .a = info->depth,
938 };
939 break;
940
941 case ISL_DIM_LAYOUT_GEN4_3D:
942 assert(ISL_DEV_GEN(dev) < 9);
943 *phys_level0_sa = (struct isl_extent4d) {
944 .w = info->width,
945 .h = info->height,
946 .d = info->depth,
947 .a = 1,
948 };
949 break;
950 }
951 break;
952 }
953 }
954
955 /**
956 * Calculate the pitch between physical array slices, in units of rows of
957 * surface elements.
958 */
959 static uint32_t
960 isl_calc_array_pitch_el_rows_gen4_2d(
961 const struct isl_device *dev,
962 const struct isl_surf_init_info *restrict info,
963 const struct isl_tile_info *tile_info,
964 const struct isl_extent3d *image_align_sa,
965 const struct isl_extent4d *phys_level0_sa,
966 enum isl_array_pitch_span array_pitch_span,
967 const struct isl_extent2d *phys_slice0_sa)
968 {
969 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
970 uint32_t pitch_sa_rows = 0;
971
972 switch (array_pitch_span) {
973 case ISL_ARRAY_PITCH_SPAN_COMPACT:
974 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
975 break;
976 case ISL_ARRAY_PITCH_SPAN_FULL: {
977 /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
978 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
979 * Surfaces >> Surface Arrays.
980 */
981 uint32_t H0_sa = phys_level0_sa->h;
982 uint32_t H1_sa = isl_minify(H0_sa, 1);
983
984 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
985 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
986
987 uint32_t m;
988 if (ISL_DEV_GEN(dev) >= 7) {
989 /* The QPitch equation changed slightly in Ivybridge. */
990 m = 12;
991 } else {
992 m = 11;
993 }
994
995 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
996
997 if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
998 (info->height % 4 == 1)) {
999 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1000 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1001 *
1002 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
1003 * the value calculated in the equation above , for every
1004 * other odd Surface Height starting from 1 i.e. 1,5,9,13.
1005 *
1006 * XXX(chadv): Is the errata natural corollary of the physical
1007 * layout of interleaved samples?
1008 */
1009 pitch_sa_rows += 4;
1010 }
1011
1012 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
1013 } /* end case */
1014 break;
1015 }
1016
1017 assert(pitch_sa_rows % fmtl->bh == 0);
1018 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
1019
1020 if (ISL_DEV_GEN(dev) >= 9 && ISL_DEV_GEN(dev) <= 11 &&
1021 fmtl->txc == ISL_TXC_CCS) {
1022 /*
1023 * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
1024 *
1025 * "Mip-mapped and arrayed surfaces are supported with MCS buffer
1026 * layout with these alignments in the RT space: Horizontal
1027 * Alignment = 128 and Vertical Alignment = 64."
1028 *
1029 * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
1030 *
1031 * "For non-multisampled render target's CCS auxiliary surface,
1032 * QPitch must be computed with Horizontal Alignment = 128 and
1033 * Surface Vertical Alignment = 256. These alignments are only for
1034 * CCS buffer and not for associated render target."
1035 *
1036 * The first restriction is already handled by isl_choose_image_alignment_el
1037 * but the second restriction, which is an extension of the first, only
1038 * applies to qpitch and must be applied here.
1039 *
1040 * The second restriction disappears on Gen12.
1041 */
1042 assert(fmtl->bh == 4);
1043 pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
1044 }
1045
1046 if (ISL_DEV_GEN(dev) >= 9 &&
1047 info->dim == ISL_SURF_DIM_3D &&
1048 tile_info->tiling != ISL_TILING_LINEAR) {
1049 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
1050 *
1051 * Tile Mode != Linear: This field must be set to an integer multiple
1052 * of the tile height
1053 */
1054 pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
1055 }
1056
1057 return pitch_el_rows;
1058 }
1059
1060 /**
1061 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1062 * ISL_DIM_LAYOUT_GEN4_2D.
1063 */
1064 static void
1065 isl_calc_phys_slice0_extent_sa_gen4_2d(
1066 const struct isl_device *dev,
1067 const struct isl_surf_init_info *restrict info,
1068 enum isl_msaa_layout msaa_layout,
1069 const struct isl_extent3d *image_align_sa,
1070 const struct isl_extent4d *phys_level0_sa,
1071 struct isl_extent2d *phys_slice0_sa)
1072 {
1073 assert(phys_level0_sa->depth == 1);
1074
1075 if (info->levels == 1) {
1076 /* Do not pad the surface to the image alignment.
1077 *
1078 * For tiled surfaces, using a reduced alignment here avoids wasting CPU
1079 * cycles on the below mipmap layout caluclations. Reducing the
1080 * alignment here is safe because we later align the row pitch and array
1081 * pitch to the tile boundary. It is safe even for
1082 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
1083 * to accomodate the interleaved samples.
1084 *
1085 * For linear surfaces, reducing the alignment here permits us to later
1086 * choose an arbitrary, non-aligned row pitch. If the surface backs
1087 * a VkBuffer, then an arbitrary pitch may be needed to accomodate
1088 * VkBufferImageCopy::bufferRowLength.
1089 */
1090 *phys_slice0_sa = (struct isl_extent2d) {
1091 .w = phys_level0_sa->w,
1092 .h = phys_level0_sa->h,
1093 };
1094 return;
1095 }
1096
1097 uint32_t slice_top_w = 0;
1098 uint32_t slice_bottom_w = 0;
1099 uint32_t slice_left_h = 0;
1100 uint32_t slice_right_h = 0;
1101
1102 uint32_t W0 = phys_level0_sa->w;
1103 uint32_t H0 = phys_level0_sa->h;
1104
1105 for (uint32_t l = 0; l < info->levels; ++l) {
1106 uint32_t W = isl_minify(W0, l);
1107 uint32_t H = isl_minify(H0, l);
1108
1109 uint32_t w = isl_align_npot(W, image_align_sa->w);
1110 uint32_t h = isl_align_npot(H, image_align_sa->h);
1111
1112 if (l == 0) {
1113 slice_top_w = w;
1114 slice_left_h = h;
1115 slice_right_h = h;
1116 } else if (l == 1) {
1117 slice_bottom_w = w;
1118 slice_left_h += h;
1119 } else if (l == 2) {
1120 slice_bottom_w += w;
1121 slice_right_h += h;
1122 } else {
1123 slice_right_h += h;
1124 }
1125 }
1126
1127 *phys_slice0_sa = (struct isl_extent2d) {
1128 .w = MAX(slice_top_w, slice_bottom_w),
1129 .h = MAX(slice_left_h, slice_right_h),
1130 };
1131 }
1132
1133 static void
1134 isl_calc_phys_total_extent_el_gen4_2d(
1135 const struct isl_device *dev,
1136 const struct isl_surf_init_info *restrict info,
1137 const struct isl_tile_info *tile_info,
1138 enum isl_msaa_layout msaa_layout,
1139 const struct isl_extent3d *image_align_sa,
1140 const struct isl_extent4d *phys_level0_sa,
1141 enum isl_array_pitch_span array_pitch_span,
1142 uint32_t *array_pitch_el_rows,
1143 struct isl_extent2d *total_extent_el)
1144 {
1145 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1146
1147 struct isl_extent2d phys_slice0_sa;
1148 isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
1149 image_align_sa, phys_level0_sa,
1150 &phys_slice0_sa);
1151 *array_pitch_el_rows =
1152 isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
1153 image_align_sa, phys_level0_sa,
1154 array_pitch_span,
1155 &phys_slice0_sa);
1156 *total_extent_el = (struct isl_extent2d) {
1157 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1158 .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1159 isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1160 };
1161 }
1162
1163 /**
1164 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1165 * ISL_DIM_LAYOUT_GEN4_3D.
1166 */
1167 static void
1168 isl_calc_phys_total_extent_el_gen4_3d(
1169 const struct isl_device *dev,
1170 const struct isl_surf_init_info *restrict info,
1171 const struct isl_extent3d *image_align_sa,
1172 const struct isl_extent4d *phys_level0_sa,
1173 uint32_t *array_pitch_el_rows,
1174 struct isl_extent2d *phys_total_el)
1175 {
1176 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1177
1178 assert(info->samples == 1);
1179
1180 if (info->dim != ISL_SURF_DIM_3D) {
1181 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1182 *
1183 * The cube face textures are stored in the same way as 3D surfaces
1184 * are stored (see section 6.17.5 for details). For cube surfaces,
1185 * however, the depth is equal to the number of faces (always 6) and
1186 * is not reduced for each MIP.
1187 */
1188 assert(ISL_DEV_GEN(dev) == 4);
1189 assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1190 assert(phys_level0_sa->array_len == 6);
1191 } else {
1192 assert(phys_level0_sa->array_len == 1);
1193 }
1194
1195 uint32_t total_w = 0;
1196 uint32_t total_h = 0;
1197
1198 uint32_t W0 = phys_level0_sa->w;
1199 uint32_t H0 = phys_level0_sa->h;
1200 uint32_t D0 = phys_level0_sa->d;
1201 uint32_t A0 = phys_level0_sa->a;
1202
1203 for (uint32_t l = 0; l < info->levels; ++l) {
1204 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1205 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1206 uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1207
1208 uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1209 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1210
1211 total_w = MAX(total_w, level_w * max_layers_horiz);
1212 total_h += level_h * max_layers_vert;
1213 }
1214
1215 /* GEN4_3D layouts don't really have an array pitch since each LOD has a
1216 * different number of horizontal and vertical layers. We have to set it
1217 * to something, so at least make it true for LOD0.
1218 */
1219 *array_pitch_el_rows =
1220 isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1221 *phys_total_el = (struct isl_extent2d) {
1222 .w = isl_assert_div(total_w, fmtl->bw),
1223 .h = isl_assert_div(total_h, fmtl->bh),
1224 };
1225 }
1226
1227 /**
1228 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1229 * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
1230 */
1231 static void
1232 isl_calc_phys_total_extent_el_gen6_stencil_hiz(
1233 const struct isl_device *dev,
1234 const struct isl_surf_init_info *restrict info,
1235 const struct isl_tile_info *tile_info,
1236 const struct isl_extent3d *image_align_sa,
1237 const struct isl_extent4d *phys_level0_sa,
1238 uint32_t *array_pitch_el_rows,
1239 struct isl_extent2d *phys_total_el)
1240 {
1241 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1242
1243 const struct isl_extent2d tile_extent_sa = {
1244 .w = tile_info->logical_extent_el.w * fmtl->bw,
1245 .h = tile_info->logical_extent_el.h * fmtl->bh,
1246 };
1247 /* Tile size is a multiple of image alignment */
1248 assert(tile_extent_sa.w % image_align_sa->w == 0);
1249 assert(tile_extent_sa.h % image_align_sa->h == 0);
1250
1251 const uint32_t W0 = phys_level0_sa->w;
1252 const uint32_t H0 = phys_level0_sa->h;
1253
1254 /* Each image has the same height as LOD0 because the hardware thinks
1255 * everything is LOD0
1256 */
1257 const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1258
1259 uint32_t total_top_w = 0;
1260 uint32_t total_bottom_w = 0;
1261 uint32_t total_h = 0;
1262
1263 for (uint32_t l = 0; l < info->levels; ++l) {
1264 const uint32_t W = isl_minify(W0, l);
1265
1266 const uint32_t w = isl_align(W, tile_extent_sa.w);
1267 const uint32_t h = isl_align(H, tile_extent_sa.h);
1268
1269 if (l == 0) {
1270 total_top_w = w;
1271 total_h = h;
1272 } else if (l == 1) {
1273 total_bottom_w = w;
1274 total_h += h;
1275 } else {
1276 total_bottom_w += w;
1277 }
1278 }
1279
1280 *array_pitch_el_rows =
1281 isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1282 *phys_total_el = (struct isl_extent2d) {
1283 .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1284 .h = isl_assert_div(total_h, fmtl->bh),
1285 };
1286 }
1287
1288 /**
1289 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1290 * ISL_DIM_LAYOUT_GEN9_1D.
1291 */
1292 static void
1293 isl_calc_phys_total_extent_el_gen9_1d(
1294 const struct isl_device *dev,
1295 const struct isl_surf_init_info *restrict info,
1296 const struct isl_extent3d *image_align_sa,
1297 const struct isl_extent4d *phys_level0_sa,
1298 uint32_t *array_pitch_el_rows,
1299 struct isl_extent2d *phys_total_el)
1300 {
1301 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1302
1303 assert(phys_level0_sa->height == 1);
1304 assert(phys_level0_sa->depth == 1);
1305 assert(info->samples == 1);
1306 assert(image_align_sa->w >= fmtl->bw);
1307
1308 uint32_t slice_w = 0;
1309 const uint32_t W0 = phys_level0_sa->w;
1310
1311 for (uint32_t l = 0; l < info->levels; ++l) {
1312 uint32_t W = isl_minify(W0, l);
1313 uint32_t w = isl_align_npot(W, image_align_sa->w);
1314
1315 slice_w += w;
1316 }
1317
1318 *array_pitch_el_rows = 1;
1319 *phys_total_el = (struct isl_extent2d) {
1320 .w = isl_assert_div(slice_w, fmtl->bw),
1321 .h = phys_level0_sa->array_len,
1322 };
1323 }
1324
1325 /**
1326 * Calculate the two-dimensional total physical extent of the surface, in
1327 * units of surface elements.
1328 */
1329 static void
1330 isl_calc_phys_total_extent_el(const struct isl_device *dev,
1331 const struct isl_surf_init_info *restrict info,
1332 const struct isl_tile_info *tile_info,
1333 enum isl_dim_layout dim_layout,
1334 enum isl_msaa_layout msaa_layout,
1335 const struct isl_extent3d *image_align_sa,
1336 const struct isl_extent4d *phys_level0_sa,
1337 enum isl_array_pitch_span array_pitch_span,
1338 uint32_t *array_pitch_el_rows,
1339 struct isl_extent2d *total_extent_el)
1340 {
1341 switch (dim_layout) {
1342 case ISL_DIM_LAYOUT_GEN9_1D:
1343 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1344 isl_calc_phys_total_extent_el_gen9_1d(dev, info,
1345 image_align_sa, phys_level0_sa,
1346 array_pitch_el_rows,
1347 total_extent_el);
1348 return;
1349 case ISL_DIM_LAYOUT_GEN4_2D:
1350 isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
1351 image_align_sa, phys_level0_sa,
1352 array_pitch_span,
1353 array_pitch_el_rows,
1354 total_extent_el);
1355 return;
1356 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
1357 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1358 isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
1359 image_align_sa,
1360 phys_level0_sa,
1361 array_pitch_el_rows,
1362 total_extent_el);
1363 return;
1364 case ISL_DIM_LAYOUT_GEN4_3D:
1365 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1366 isl_calc_phys_total_extent_el_gen4_3d(dev, info,
1367 image_align_sa, phys_level0_sa,
1368 array_pitch_el_rows,
1369 total_extent_el);
1370 return;
1371 }
1372
1373 unreachable("invalid value for dim_layout");
1374 }
1375
1376 static uint32_t
1377 isl_calc_row_pitch_alignment(const struct isl_device *dev,
1378 const struct isl_surf_init_info *surf_info,
1379 const struct isl_tile_info *tile_info)
1380 {
1381 if (tile_info->tiling != ISL_TILING_LINEAR) {
1382 /* According to BSpec: 44930, Gen12's CCS-compressed surface pitches must
1383 * be 512B-aligned. CCS is only support on Y tilings.
1384 *
1385 * Only consider 512B alignment when :
1386 * - AUX is not explicitly disabled
1387 * - the caller has specified no pitch
1388 *
1389 * isl_surf_get_ccs_surf() will check that the main surface alignment
1390 * matches CCS expectations.
1391 */
1392 if (ISL_DEV_GEN(dev) >= 12 &&
1393 isl_format_supports_ccs_e(dev->info, surf_info->format) &&
1394 tile_info->tiling != ISL_TILING_X &&
1395 !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) &&
1396 surf_info->row_pitch_B == 0) {
1397 return isl_align(tile_info->phys_extent_B.width, 512);
1398 }
1399
1400 return tile_info->phys_extent_B.width;
1401 }
1402
1403 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1404 * RENDER_SURFACE_STATE Surface Pitch (p349):
1405 *
1406 * - For linear render target surfaces and surfaces accessed with the
1407 * typed data port messages, the pitch must be a multiple of the
1408 * element size for non-YUV surface formats. Pitch must be
1409 * a multiple of 2 * element size for YUV surface formats.
1410 *
1411 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1412 * ignore because isl doesn't do buffers.]
1413 *
1414 * - For other linear surfaces, the pitch can be any multiple of
1415 * bytes.
1416 */
1417 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1418 const uint32_t bs = fmtl->bpb / 8;
1419 uint32_t alignment;
1420
1421 if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1422 if (isl_format_is_yuv(surf_info->format)) {
1423 alignment = 2 * bs;
1424 } else {
1425 alignment = bs;
1426 }
1427 } else {
1428 alignment = 1;
1429 }
1430
1431 /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >>
1432 * PRI_STRIDE Stride (p1254):
1433 *
1434 * "When using linear memory, this must be at least 64 byte aligned."
1435 */
1436 if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT)
1437 alignment = isl_align(alignment, 64);
1438
1439 return alignment;
1440 }
1441
1442 static uint32_t
1443 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1444 const struct isl_surf_init_info *info,
1445 const struct isl_extent2d *phys_total_el,
1446 uint32_t alignment_B)
1447 {
1448 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1449 const uint32_t bs = fmtl->bpb / 8;
1450
1451 return isl_align_npot(bs * phys_total_el->w, alignment_B);
1452 }
1453
1454 static uint32_t
1455 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1456 const struct isl_surf_init_info *surf_info,
1457 const struct isl_tile_info *tile_info,
1458 const struct isl_extent2d *phys_total_el,
1459 uint32_t alignment_B)
1460 {
1461 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1462
1463 assert(fmtl->bpb % tile_info->format_bpb == 0);
1464
1465 const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1466 const uint32_t total_w_tl =
1467 isl_align_div(phys_total_el->w * tile_el_scale,
1468 tile_info->logical_extent_el.width);
1469
1470 /* In some cases the alignment of the pitch might be > to the tile size
1471 * (for example Gen12 CCS requires 512B alignment while the tile's width
1472 * can be 128B), so align the row pitch to the alignment.
1473 */
1474 assert(alignment_B >= tile_info->phys_extent_B.width);
1475 return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
1476 }
1477
1478 static uint32_t
1479 isl_calc_min_row_pitch(const struct isl_device *dev,
1480 const struct isl_surf_init_info *surf_info,
1481 const struct isl_tile_info *tile_info,
1482 const struct isl_extent2d *phys_total_el,
1483 uint32_t alignment_B)
1484 {
1485 if (tile_info->tiling == ISL_TILING_LINEAR) {
1486 return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1487 alignment_B);
1488 } else {
1489 return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1490 phys_total_el, alignment_B);
1491 }
1492 }
1493
1494 /**
1495 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1496 * size is `bits` bits?
1497 *
1498 * Hardware pitch fields are offset by 1. For example, if the size of
1499 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1500 * pitches is [1, 2^b] inclusive. If the surface pitch is N, then
1501 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1502 */
1503 static bool
1504 pitch_in_range(uint32_t n, uint32_t bits)
1505 {
1506 assert(n != 0);
1507 return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1508 }
1509
1510 static bool
1511 isl_calc_row_pitch(const struct isl_device *dev,
1512 const struct isl_surf_init_info *surf_info,
1513 const struct isl_tile_info *tile_info,
1514 enum isl_dim_layout dim_layout,
1515 const struct isl_extent2d *phys_total_el,
1516 uint32_t *out_row_pitch_B)
1517 {
1518 uint32_t alignment_B =
1519 isl_calc_row_pitch_alignment(dev, surf_info, tile_info);
1520
1521 const uint32_t min_row_pitch_B =
1522 isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1523 alignment_B);
1524
1525 if (surf_info->row_pitch_B != 0) {
1526 if (surf_info->row_pitch_B < min_row_pitch_B)
1527 return false;
1528
1529 if (surf_info->row_pitch_B % alignment_B != 0)
1530 return false;
1531 }
1532
1533 const uint32_t row_pitch_B =
1534 surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B;
1535
1536 const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1537
1538 if (row_pitch_B == 0)
1539 return false;
1540
1541 if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
1542 /* SurfacePitch is ignored for this layout. */
1543 goto done;
1544 }
1545
1546 if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1547 ISL_SURF_USAGE_TEXTURE_BIT |
1548 ISL_SURF_USAGE_STORAGE_BIT)) &&
1549 !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1550 return false;
1551
1552 if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1553 ISL_SURF_USAGE_MCS_BIT)) &&
1554 !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1555 return false;
1556
1557 if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1558 !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1559 return false;
1560
1561 if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1562 !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1563 return false;
1564
1565 const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1566 _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1567 _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1568
1569 if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1570 !pitch_in_range(row_pitch_B, stencil_pitch_bits))
1571 return false;
1572
1573 done:
1574 *out_row_pitch_B = row_pitch_B;
1575 return true;
1576 }
1577
1578 bool
1579 isl_surf_init_s(const struct isl_device *dev,
1580 struct isl_surf *surf,
1581 const struct isl_surf_init_info *restrict info)
1582 {
1583 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1584
1585 const struct isl_extent4d logical_level0_px = {
1586 .w = info->width,
1587 .h = info->height,
1588 .d = info->depth,
1589 .a = info->array_len,
1590 };
1591
1592 enum isl_tiling tiling;
1593 if (!isl_surf_choose_tiling(dev, info, &tiling))
1594 return false;
1595
1596 struct isl_tile_info tile_info;
1597 isl_tiling_get_info(tiling, fmtl->bpb, &tile_info);
1598
1599 const enum isl_dim_layout dim_layout =
1600 isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1601
1602 enum isl_msaa_layout msaa_layout;
1603 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1604 return false;
1605
1606 struct isl_extent3d image_align_el;
1607 isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1608 &image_align_el);
1609
1610 struct isl_extent3d image_align_sa =
1611 isl_extent3d_el_to_sa(info->format, image_align_el);
1612
1613 struct isl_extent4d phys_level0_sa;
1614 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1615 &phys_level0_sa);
1616
1617 enum isl_array_pitch_span array_pitch_span =
1618 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1619
1620 uint32_t array_pitch_el_rows;
1621 struct isl_extent2d phys_total_el;
1622 isl_calc_phys_total_extent_el(dev, info, &tile_info,
1623 dim_layout, msaa_layout,
1624 &image_align_sa, &phys_level0_sa,
1625 array_pitch_span, &array_pitch_el_rows,
1626 &phys_total_el);
1627
1628 uint32_t row_pitch_B;
1629 if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1630 &phys_total_el, &row_pitch_B))
1631 return false;
1632
1633 uint32_t base_alignment_B;
1634 uint64_t size_B;
1635 if (tiling == ISL_TILING_LINEAR) {
1636 size_B = (uint64_t) row_pitch_B * phys_total_el.h;
1637
1638 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1639 *
1640 * "The Base Address for linear render target surfaces and surfaces
1641 * accessed with the typed surface read/write data port messages must
1642 * be element-size aligned, for non-YUV surface formats, or a
1643 * multiple of 2 element-sizes for YUV surface formats. Other linear
1644 * surfaces have no alignment requirements (byte alignment is
1645 * sufficient.)"
1646 */
1647 base_alignment_B = MAX(1, info->min_alignment_B);
1648 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1649 if (isl_format_is_yuv(info->format)) {
1650 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4);
1651 } else {
1652 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8);
1653 }
1654 }
1655 base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
1656
1657 /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
1658 *
1659 * "For Linear memory, this field specifies the stride in chunks of
1660 * 64 bytes (1 cache line)."
1661 */
1662 if (isl_surf_usage_is_display(info->usage))
1663 base_alignment_B = MAX(base_alignment_B, 64);
1664 } else {
1665 const uint32_t total_h_tl =
1666 isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1667
1668 size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B;
1669
1670 const uint32_t tile_size_B = tile_info.phys_extent_B.width *
1671 tile_info.phys_extent_B.height;
1672 assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
1673 base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
1674
1675 /* The diagram in the Bspec section Memory Compression - Gen12, shows
1676 * that the CCS is indexed in 256B chunks. However, the
1677 * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
1678 * pages. We currently don't assign the usage field like we do for main
1679 * surfaces, so just use 4K for now.
1680 */
1681 if (tiling == ISL_TILING_GEN12_CCS)
1682 base_alignment_B = MAX(base_alignment_B, 4096);
1683
1684 /* Gen12+ requires that images be 64K-aligned if they're going to used
1685 * with CCS. This is because the Aux translation table maps main
1686 * surface addresses to aux addresses at a 64K (in the main surface)
1687 * granularity. Because we don't know for sure in ISL if a surface will
1688 * use CCS, we have to guess based on the DISABLE_AUX usage bit. The
1689 * one thing we do know is that we haven't enable CCS on linear images
1690 * yet so we can avoid the extra alignment there.
1691 */
1692 if (ISL_DEV_GEN(dev) >= 12 &&
1693 !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
1694 base_alignment_B = MAX(base_alignment_B, 64 * 1024);
1695 }
1696 }
1697
1698 if (ISL_DEV_GEN(dev) < 9) {
1699 /* From the Broadwell PRM Vol 5, Surface Layout:
1700 *
1701 * "In addition to restrictions on maximum height, width, and depth,
1702 * surfaces are also restricted to a maximum size in bytes. This
1703 * maximum is 2 GB for all products and all surface types."
1704 *
1705 * This comment is applicable to all Pre-gen9 platforms.
1706 */
1707 if (size_B > (uint64_t) 1 << 31)
1708 return false;
1709 } else if (ISL_DEV_GEN(dev) < 11) {
1710 /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1711 * "In addition to restrictions on maximum height, width, and depth,
1712 * surfaces are also restricted to a maximum size of 2^38 bytes.
1713 * All pixels within the surface must be contained within 2^38 bytes
1714 * of the base address."
1715 */
1716 if (size_B > (uint64_t) 1 << 38)
1717 return false;
1718 } else {
1719 /* gen11+ platforms raised this limit to 2^44 bytes. */
1720 if (size_B > (uint64_t) 1 << 44)
1721 return false;
1722 }
1723
1724 *surf = (struct isl_surf) {
1725 .dim = info->dim,
1726 .dim_layout = dim_layout,
1727 .msaa_layout = msaa_layout,
1728 .tiling = tiling,
1729 .format = info->format,
1730
1731 .levels = info->levels,
1732 .samples = info->samples,
1733
1734 .image_alignment_el = image_align_el,
1735 .logical_level0_px = logical_level0_px,
1736 .phys_level0_sa = phys_level0_sa,
1737
1738 .size_B = size_B,
1739 .alignment_B = base_alignment_B,
1740 .row_pitch_B = row_pitch_B,
1741 .array_pitch_el_rows = array_pitch_el_rows,
1742 .array_pitch_span = array_pitch_span,
1743
1744 .usage = info->usage,
1745 };
1746
1747 return true;
1748 }
1749
1750 void
1751 isl_surf_get_tile_info(const struct isl_surf *surf,
1752 struct isl_tile_info *tile_info)
1753 {
1754 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1755 isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info);
1756 }
1757
1758 bool
1759 isl_surf_get_hiz_surf(const struct isl_device *dev,
1760 const struct isl_surf *surf,
1761 struct isl_surf *hiz_surf)
1762 {
1763 assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1764
1765 if (!isl_surf_usage_is_depth(surf->usage))
1766 return false;
1767
1768 /* HiZ only works with Y-tiled depth buffers */
1769 if (!isl_tiling_is_any_y(surf->tiling))
1770 return false;
1771
1772 /* On SNB+, compressed depth buffers cannot be interleaved with stencil. */
1773 switch (surf->format) {
1774 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
1775 if (isl_surf_usage_is_depth_and_stencil(surf->usage)) {
1776 assert(ISL_DEV_GEN(dev) == 5);
1777 unreachable("This should work, but is untested");
1778 }
1779 /* Fall through */
1780 case ISL_FORMAT_R16_UNORM:
1781 case ISL_FORMAT_R32_FLOAT:
1782 break;
1783 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
1784 if (ISL_DEV_GEN(dev) == 5) {
1785 assert(isl_surf_usage_is_depth_and_stencil(surf->usage));
1786 unreachable("This should work, but is untested");
1787 }
1788 /* Fall through */
1789 default:
1790 return false;
1791 }
1792
1793 /* Multisampled depth is always interleaved */
1794 assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1795 surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1796
1797 /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1798 *
1799 * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1800 * Target View Extent, and Depth Coordinate Offset X/Y of the
1801 * hierarchical depth buffer are inherited from the depth buffer. The
1802 * height and width of the hierarchical depth buffer that must be
1803 * allocated are computed by the following formulas, where HZ is the
1804 * hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1805 * Z_Width, and Z_Depth values given in these formulas are those present
1806 * in 3DSTATE_DEPTH_BUFFER incremented by one.
1807 *
1808 * "The value of Z_Height and Z_Width must each be multiplied by 2 before
1809 * being applied to the table below if Number of Multisamples is set to
1810 * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1811 * Z_Width must be multiplied by 4 before being applied to the table
1812 * below if Number of Multisamples is set to NUMSAMPLES_8."
1813 *
1814 * In the Sky Lake PRM, the second paragraph is replaced with this:
1815 *
1816 * "The Z_Height and Z_Width values must equal those present in
1817 * 3DSTATE_DEPTH_BUFFER incremented by one."
1818 *
1819 * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
1820 * block corresponds to a region of 8x4 samples in the primary depth
1821 * surface. On Sky Lake, on the other hand, each HiZ block corresponds to
1822 * a region of 8x4 pixels in the primary depth surface regardless of the
1823 * number of samples. The dimensions of a HiZ block in both pixels and
1824 * samples are given in the table below:
1825 *
1826 * | SNB - BDW | SKL+
1827 * ------+-----------+-------------
1828 * 1x | 8 x 4 sa | 8 x 4 sa
1829 * MSAA | 8 x 4 px | 8 x 4 px
1830 * ------+-----------+-------------
1831 * 2x | 8 x 4 sa | 16 x 4 sa
1832 * MSAA | 4 x 4 px | 8 x 4 px
1833 * ------+-----------+-------------
1834 * 4x | 8 x 4 sa | 16 x 8 sa
1835 * MSAA | 4 x 2 px | 8 x 4 px
1836 * ------+-----------+-------------
1837 * 8x | 8 x 4 sa | 32 x 8 sa
1838 * MSAA | 2 x 2 px | 8 x 4 px
1839 * ------+-----------+-------------
1840 * 16x | N/A | 32 x 16 sa
1841 * MSAA | N/A | 8 x 4 px
1842 * ------+-----------+-------------
1843 *
1844 * There are a number of different ways that this discrepency could be
1845 * handled. The way we have chosen is to simply make MSAA HiZ have the
1846 * same number of samples as the parent surface pre-Sky Lake and always be
1847 * single-sampled on Sky Lake and above. Since the block sizes of
1848 * compressed formats are given in samples, this neatly handles everything
1849 * without the need for additional HiZ formats with different block sizes
1850 * on SKL+.
1851 */
1852 const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
1853
1854 return isl_surf_init(dev, hiz_surf,
1855 .dim = surf->dim,
1856 .format = ISL_FORMAT_HIZ,
1857 .width = surf->logical_level0_px.width,
1858 .height = surf->logical_level0_px.height,
1859 .depth = surf->logical_level0_px.depth,
1860 .levels = surf->levels,
1861 .array_len = surf->logical_level0_px.array_len,
1862 .samples = samples,
1863 .usage = ISL_SURF_USAGE_HIZ_BIT,
1864 .tiling_flags = ISL_TILING_HIZ_BIT);
1865 }
1866
1867 bool
1868 isl_surf_get_mcs_surf(const struct isl_device *dev,
1869 const struct isl_surf *surf,
1870 struct isl_surf *mcs_surf)
1871 {
1872 /* It must be multisampled with an array layout */
1873 if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY)
1874 return false;
1875
1876 if (mcs_surf->size_B > 0)
1877 return false;
1878
1879 /* The following are true of all multisampled surfaces */
1880 assert(surf->samples > 1);
1881 assert(surf->dim == ISL_SURF_DIM_2D);
1882 assert(surf->levels == 1);
1883 assert(surf->logical_level0_px.depth == 1);
1884
1885 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
1886 *
1887 * This field must be set to 0 for all SINT MSRTs when all RT channels
1888 * are not written
1889 *
1890 * In practice this means that we have to disable MCS for all signed
1891 * integer MSAA buffers. The alternative, to disable MCS only when one
1892 * of the render target channels is disabled, is impractical because it
1893 * would require converting between CMS and UMS MSAA layouts on the fly,
1894 * which is expensive.
1895 */
1896 if (ISL_DEV_GEN(dev) == 7 && isl_format_has_sint_channel(surf->format))
1897 return false;
1898
1899 /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
1900 * bits which means the maximum pitch of a compression surface is 512
1901 * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is
1902 * 64bpp, this gives us a maximum width of 8192 pixels. We can create
1903 * larger multisampled surfaces, we just can't compress them. For 2x, 4x,
1904 * and 8x, we have enough room for the full 16k supported by the hardware.
1905 */
1906 if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
1907 return false;
1908
1909 enum isl_format mcs_format;
1910 switch (surf->samples) {
1911 case 2: mcs_format = ISL_FORMAT_MCS_2X; break;
1912 case 4: mcs_format = ISL_FORMAT_MCS_4X; break;
1913 case 8: mcs_format = ISL_FORMAT_MCS_8X; break;
1914 case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
1915 default:
1916 unreachable("Invalid sample count");
1917 }
1918
1919 return isl_surf_init(dev, mcs_surf,
1920 .dim = ISL_SURF_DIM_2D,
1921 .format = mcs_format,
1922 .width = surf->logical_level0_px.width,
1923 .height = surf->logical_level0_px.height,
1924 .depth = 1,
1925 .levels = 1,
1926 .array_len = surf->logical_level0_px.array_len,
1927 .samples = 1, /* MCS surfaces are really single-sampled */
1928 .usage = ISL_SURF_USAGE_MCS_BIT,
1929 .tiling_flags = ISL_TILING_Y0_BIT);
1930 }
1931
1932 bool
1933 isl_surf_supports_ccs(const struct isl_device *dev,
1934 const struct isl_surf *surf)
1935 {
1936 /* CCS support does not exist prior to Gen7 */
1937 if (ISL_DEV_GEN(dev) <= 6)
1938 return false;
1939
1940 if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
1941 return false;
1942
1943 if (isl_format_is_compressed(surf->format))
1944 return false;
1945
1946 if (!isl_is_pow2(isl_format_get_layout(surf->format)->bpb))
1947 return false;
1948
1949 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
1950 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
1951 *
1952 * - Support is limited to tiled render targets.
1953 *
1954 * From the Skylake documentation, it is made clear that X-tiling is no
1955 * longer supported:
1956 *
1957 * - MCS and Lossless compression is supported for
1958 * TiledY/TileYs/TileYf non-MSRTs only.
1959 *
1960 * From the BSpec (44930) for Gen12:
1961 *
1962 * Linear CCS is only allowed for Untyped Buffers but only via HDC
1963 * Data-Port messages.
1964 *
1965 * We never use untyped messages on surfaces created by ISL on Gen9+ so
1966 * this means linear is out on Gen12+ as well.
1967 */
1968 if (surf->tiling == ISL_TILING_LINEAR)
1969 return false;
1970
1971 if (ISL_DEV_GEN(dev) >= 12) {
1972 if (isl_surf_usage_is_stencil(surf->usage) && surf->samples > 1)
1973 return false;
1974
1975 /* On Gen12, 8BPP surfaces cannot be compressed if any level is not
1976 * 32Bx4row-aligned. For now, just reject the cases where alignment
1977 * matters.
1978 */
1979 if (isl_format_get_layout(surf->format)->bpb == 8 && surf->levels >= 3) {
1980 isl_finishme("%s:%s: CCS for 8BPP textures with 3+ miplevels is "
1981 "disabled, but support for more levels is possible.",
1982 __FILE__, __func__);
1983 return false;
1984 }
1985
1986 /* On Gen12, all CCS-compressed surface pitches must be multiples of
1987 * 512B.
1988 */
1989 if (surf->row_pitch_B % 512 != 0)
1990 return false;
1991
1992 /* According to GEN:BUG:1406738321, 3D textures need a blit to a new
1993 * surface in order to perform a resolve. For now, just disable CCS.
1994 */
1995 if (surf->dim == ISL_SURF_DIM_3D) {
1996 isl_finishme("%s:%s: CCS for 3D textures is disabled, but a workaround"
1997 " is available.", __FILE__, __func__);
1998 return false;
1999 }
2000
2001 /* GEN:BUG:1207137018
2002 *
2003 * TODO: implement following workaround currently covered by the
2004 * restriction above. If following conditions are met:
2005 *
2006 * - RENDER_SURFACE_STATE.Surface Type == 3D
2007 * - RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE
2008 * - RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS
2009 *
2010 * Set the value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip
2011 * that larger than those present in the surface (i.e. 15)
2012 */
2013
2014 /* TODO: Handle the other tiling formats */
2015 if (surf->tiling != ISL_TILING_Y0)
2016 return false;
2017 } else {
2018 /* ISL_DEV_GEN(dev) < 12 */
2019 if (surf->samples > 1)
2020 return false;
2021
2022 /* CCS is only for color images on Gen7-11 */
2023 if (isl_surf_usage_is_depth_or_stencil(surf->usage))
2024 return false;
2025
2026 /* The PRM doesn't say this explicitly, but fast-clears don't appear to
2027 * work for 3D textures until gen9 where the layout of 3D textures
2028 * changes to match 2D array textures.
2029 */
2030 if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
2031 return false;
2032
2033 /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
2034 * Non-MultiSampler Render Target Restrictions):
2035 *
2036 * "Support is for non-mip-mapped and non-array surface types only."
2037 *
2038 * This restriction is lifted on gen8+. Technically, it may be possible
2039 * to create a CCS for an arrayed or mipmapped image and only enable
2040 * CCS_D when rendering to the base slice. However, there is no
2041 * documentation tell us what the hardware would do in that case or what
2042 * it does if you walk off the bases slice. (Does it ignore CCS or does
2043 * it start scribbling over random memory?) We play it safe and just
2044 * follow the docs and don't allow CCS_D for arrayed or mip-mapped
2045 * surfaces.
2046 */
2047 if (ISL_DEV_GEN(dev) <= 7 &&
2048 (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
2049 return false;
2050
2051 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2052 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2053 *
2054 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
2055 * 64bpp, and 128bpp.
2056 */
2057 if (isl_format_get_layout(surf->format)->bpb < 32)
2058 return false;
2059
2060 /* From the Skylake documentation, it is made clear that X-tiling is no
2061 * longer supported:
2062 *
2063 * - MCS and Lossless compression is supported for
2064 * TiledY/TileYs/TileYf non-MSRTs only.
2065 */
2066 if (ISL_DEV_GEN(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling))
2067 return false;
2068 }
2069
2070 return true;
2071 }
2072
2073 bool
2074 isl_surf_get_ccs_surf(const struct isl_device *dev,
2075 const struct isl_surf *surf,
2076 struct isl_surf *aux_surf,
2077 struct isl_surf *extra_aux_surf,
2078 uint32_t row_pitch_B)
2079 {
2080 assert(aux_surf);
2081
2082 /* An uninitialized surface is needed to get a CCS surface. */
2083 if (aux_surf->size_B > 0 &&
2084 (extra_aux_surf == NULL || extra_aux_surf->size_B > 0)) {
2085 return false;
2086 }
2087
2088 /* A surface can't have two CCS surfaces. */
2089 if (aux_surf->usage & ISL_SURF_USAGE_CCS_BIT)
2090 return false;
2091
2092 if (!isl_surf_supports_ccs(dev, surf))
2093 return false;
2094
2095 if (ISL_DEV_GEN(dev) >= 12) {
2096 enum isl_format ccs_format;
2097 switch (isl_format_get_layout(surf->format)->bpb) {
2098 case 8: ccs_format = ISL_FORMAT_GEN12_CCS_8BPP_Y0; break;
2099 case 16: ccs_format = ISL_FORMAT_GEN12_CCS_16BPP_Y0; break;
2100 case 32: ccs_format = ISL_FORMAT_GEN12_CCS_32BPP_Y0; break;
2101 case 64: ccs_format = ISL_FORMAT_GEN12_CCS_64BPP_Y0; break;
2102 case 128: ccs_format = ISL_FORMAT_GEN12_CCS_128BPP_Y0; break;
2103 default:
2104 return false;
2105 }
2106
2107 /* On Gen12, the CCS is a scaled-down version of the main surface. We
2108 * model this as the CCS compressing a 2D-view of the entire surface.
2109 */
2110 struct isl_surf *ccs_surf =
2111 aux_surf->size_B > 0 ? extra_aux_surf : aux_surf;
2112 const bool ok =
2113 isl_surf_init(dev, ccs_surf,
2114 .dim = ISL_SURF_DIM_2D,
2115 .format = ccs_format,
2116 .width = isl_surf_get_row_pitch_el(surf),
2117 .height = surf->size_B / surf->row_pitch_B,
2118 .depth = 1,
2119 .levels = 1,
2120 .array_len = 1,
2121 .samples = 1,
2122 .row_pitch_B = row_pitch_B,
2123 .usage = ISL_SURF_USAGE_CCS_BIT,
2124 .tiling_flags = ISL_TILING_GEN12_CCS_BIT);
2125 assert(!ok || ccs_surf->size_B == surf->size_B / 256);
2126 return ok;
2127 } else {
2128 enum isl_format ccs_format;
2129 if (ISL_DEV_GEN(dev) >= 9) {
2130 switch (isl_format_get_layout(surf->format)->bpb) {
2131 case 32: ccs_format = ISL_FORMAT_GEN9_CCS_32BPP; break;
2132 case 64: ccs_format = ISL_FORMAT_GEN9_CCS_64BPP; break;
2133 case 128: ccs_format = ISL_FORMAT_GEN9_CCS_128BPP; break;
2134 default: unreachable("Unsupported CCS format");
2135 return false;
2136 }
2137 } else if (surf->tiling == ISL_TILING_Y0) {
2138 switch (isl_format_get_layout(surf->format)->bpb) {
2139 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y; break;
2140 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y; break;
2141 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y; break;
2142 default: unreachable("Unsupported CCS format");
2143 }
2144 } else if (surf->tiling == ISL_TILING_X) {
2145 switch (isl_format_get_layout(surf->format)->bpb) {
2146 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X; break;
2147 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X; break;
2148 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X; break;
2149 default: unreachable("Unsupported CCS format");
2150 }
2151 } else {
2152 unreachable("Invalid tiling format");
2153 }
2154
2155 return isl_surf_init(dev, aux_surf,
2156 .dim = surf->dim,
2157 .format = ccs_format,
2158 .width = surf->logical_level0_px.width,
2159 .height = surf->logical_level0_px.height,
2160 .depth = surf->logical_level0_px.depth,
2161 .levels = surf->levels,
2162 .array_len = surf->logical_level0_px.array_len,
2163 .samples = 1,
2164 .row_pitch_B = row_pitch_B,
2165 .usage = ISL_SURF_USAGE_CCS_BIT,
2166 .tiling_flags = ISL_TILING_CCS_BIT);
2167 }
2168 }
2169
2170 #define isl_genX_call(dev, func, ...) \
2171 switch (ISL_DEV_GEN(dev)) { \
2172 case 4: \
2173 /* G45 surface state is the same as gen5 */ \
2174 if (ISL_DEV_IS_G4X(dev)) { \
2175 isl_gen5_##func(__VA_ARGS__); \
2176 } else { \
2177 isl_gen4_##func(__VA_ARGS__); \
2178 } \
2179 break; \
2180 case 5: \
2181 isl_gen5_##func(__VA_ARGS__); \
2182 break; \
2183 case 6: \
2184 isl_gen6_##func(__VA_ARGS__); \
2185 break; \
2186 case 7: \
2187 if (ISL_DEV_IS_HASWELL(dev)) { \
2188 isl_gen75_##func(__VA_ARGS__); \
2189 } else { \
2190 isl_gen7_##func(__VA_ARGS__); \
2191 } \
2192 break; \
2193 case 8: \
2194 isl_gen8_##func(__VA_ARGS__); \
2195 break; \
2196 case 9: \
2197 isl_gen9_##func(__VA_ARGS__); \
2198 break; \
2199 case 10: \
2200 isl_gen10_##func(__VA_ARGS__); \
2201 break; \
2202 case 11: \
2203 isl_gen11_##func(__VA_ARGS__); \
2204 break; \
2205 case 12: \
2206 isl_gen12_##func(__VA_ARGS__); \
2207 break; \
2208 default: \
2209 assert(!"Unknown hardware generation"); \
2210 }
2211
2212 void
2213 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
2214 const struct isl_surf_fill_state_info *restrict info)
2215 {
2216 #ifndef NDEBUG
2217 isl_surf_usage_flags_t _base_usage =
2218 info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
2219 ISL_SURF_USAGE_TEXTURE_BIT |
2220 ISL_SURF_USAGE_STORAGE_BIT);
2221 /* They may only specify one of the above bits at a time */
2222 assert(__builtin_popcount(_base_usage) == 1);
2223 /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
2224 assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
2225 #endif
2226
2227 if (info->surf->dim == ISL_SURF_DIM_3D) {
2228 assert(info->view->base_array_layer + info->view->array_len <=
2229 info->surf->logical_level0_px.depth);
2230 } else {
2231 assert(info->view->base_array_layer + info->view->array_len <=
2232 info->surf->logical_level0_px.array_len);
2233 }
2234
2235 isl_genX_call(dev, surf_fill_state_s, dev, state, info);
2236 }
2237
2238 void
2239 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
2240 const struct isl_buffer_fill_state_info *restrict info)
2241 {
2242 isl_genX_call(dev, buffer_fill_state_s, dev, state, info);
2243 }
2244
2245 void
2246 isl_null_fill_state(const struct isl_device *dev, void *state,
2247 struct isl_extent3d size)
2248 {
2249 isl_genX_call(dev, null_fill_state, state, size);
2250 }
2251
2252 void
2253 isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
2254 const struct isl_depth_stencil_hiz_emit_info *restrict info)
2255 {
2256 if (info->depth_surf && info->stencil_surf) {
2257 if (!dev->info->has_hiz_and_separate_stencil) {
2258 assert(info->depth_surf == info->stencil_surf);
2259 assert(info->depth_address == info->stencil_address);
2260 }
2261 assert(info->depth_surf->dim == info->stencil_surf->dim);
2262 }
2263
2264 if (info->depth_surf) {
2265 assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
2266 if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
2267 assert(info->view->base_array_layer + info->view->array_len <=
2268 info->depth_surf->logical_level0_px.depth);
2269 } else {
2270 assert(info->view->base_array_layer + info->view->array_len <=
2271 info->depth_surf->logical_level0_px.array_len);
2272 }
2273 }
2274
2275 if (info->stencil_surf) {
2276 assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
2277 if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
2278 assert(info->view->base_array_layer + info->view->array_len <=
2279 info->stencil_surf->logical_level0_px.depth);
2280 } else {
2281 assert(info->view->base_array_layer + info->view->array_len <=
2282 info->stencil_surf->logical_level0_px.array_len);
2283 }
2284 }
2285
2286 isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
2287 }
2288
2289 /**
2290 * A variant of isl_surf_get_image_offset_sa() specific to
2291 * ISL_DIM_LAYOUT_GEN4_2D.
2292 */
2293 static void
2294 get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
2295 uint32_t level, uint32_t logical_array_layer,
2296 uint32_t *x_offset_sa,
2297 uint32_t *y_offset_sa)
2298 {
2299 assert(level < surf->levels);
2300 if (surf->dim == ISL_SURF_DIM_3D)
2301 assert(logical_array_layer < surf->logical_level0_px.depth);
2302 else
2303 assert(logical_array_layer < surf->logical_level0_px.array_len);
2304
2305 const struct isl_extent3d image_align_sa =
2306 isl_surf_get_image_alignment_sa(surf);
2307
2308 const uint32_t W0 = surf->phys_level0_sa.width;
2309 const uint32_t H0 = surf->phys_level0_sa.height;
2310
2311 const uint32_t phys_layer = logical_array_layer *
2312 (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
2313
2314 uint32_t x = 0;
2315 uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
2316
2317 for (uint32_t l = 0; l < level; ++l) {
2318 if (l == 1) {
2319 uint32_t W = isl_minify(W0, l);
2320 x += isl_align_npot(W, image_align_sa.w);
2321 } else {
2322 uint32_t H = isl_minify(H0, l);
2323 y += isl_align_npot(H, image_align_sa.h);
2324 }
2325 }
2326
2327 *x_offset_sa = x;
2328 *y_offset_sa = y;
2329 }
2330
2331 /**
2332 * A variant of isl_surf_get_image_offset_sa() specific to
2333 * ISL_DIM_LAYOUT_GEN4_3D.
2334 */
2335 static void
2336 get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
2337 uint32_t level, uint32_t logical_z_offset_px,
2338 uint32_t *x_offset_sa,
2339 uint32_t *y_offset_sa)
2340 {
2341 assert(level < surf->levels);
2342 if (surf->dim == ISL_SURF_DIM_3D) {
2343 assert(surf->phys_level0_sa.array_len == 1);
2344 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
2345 } else {
2346 assert(surf->dim == ISL_SURF_DIM_2D);
2347 assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
2348 assert(surf->phys_level0_sa.array_len == 6);
2349 assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
2350 }
2351
2352 const struct isl_extent3d image_align_sa =
2353 isl_surf_get_image_alignment_sa(surf);
2354
2355 const uint32_t W0 = surf->phys_level0_sa.width;
2356 const uint32_t H0 = surf->phys_level0_sa.height;
2357 const uint32_t D0 = surf->phys_level0_sa.depth;
2358 const uint32_t AL = surf->phys_level0_sa.array_len;
2359
2360 uint32_t x = 0;
2361 uint32_t y = 0;
2362
2363 for (uint32_t l = 0; l < level; ++l) {
2364 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
2365 const uint32_t level_d =
2366 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
2367 image_align_sa.d);
2368 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2369
2370 y += level_h * max_layers_vert;
2371 }
2372
2373 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
2374 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
2375 const uint32_t level_d =
2376 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
2377 image_align_sa.d);
2378
2379 const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
2380
2381 x += level_w * (logical_z_offset_px % max_layers_horiz);
2382 y += level_h * (logical_z_offset_px / max_layers_horiz);
2383
2384 *x_offset_sa = x;
2385 *y_offset_sa = y;
2386 }
2387
2388 static void
2389 get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
2390 uint32_t level,
2391 uint32_t logical_array_layer,
2392 uint32_t *x_offset_sa,
2393 uint32_t *y_offset_sa)
2394 {
2395 assert(level < surf->levels);
2396 assert(surf->logical_level0_px.depth == 1);
2397 assert(logical_array_layer < surf->logical_level0_px.array_len);
2398
2399 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2400
2401 const struct isl_extent3d image_align_sa =
2402 isl_surf_get_image_alignment_sa(surf);
2403
2404 struct isl_tile_info tile_info;
2405 isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info);
2406 const struct isl_extent2d tile_extent_sa = {
2407 .w = tile_info.logical_extent_el.w * fmtl->bw,
2408 .h = tile_info.logical_extent_el.h * fmtl->bh,
2409 };
2410 /* Tile size is a multiple of image alignment */
2411 assert(tile_extent_sa.w % image_align_sa.w == 0);
2412 assert(tile_extent_sa.h % image_align_sa.h == 0);
2413
2414 const uint32_t W0 = surf->phys_level0_sa.w;
2415 const uint32_t H0 = surf->phys_level0_sa.h;
2416
2417 /* Each image has the same height as LOD0 because the hardware thinks
2418 * everything is LOD0
2419 */
2420 const uint32_t H = isl_align(H0, image_align_sa.h);
2421
2422 /* Quick sanity check for consistency */
2423 if (surf->phys_level0_sa.array_len > 1)
2424 assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2425
2426 uint32_t x = 0, y = 0;
2427 for (uint32_t l = 0; l < level; ++l) {
2428 const uint32_t W = isl_minify(W0, l);
2429
2430 const uint32_t w = isl_align(W, tile_extent_sa.w);
2431 const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2432 tile_extent_sa.h);
2433
2434 if (l == 0) {
2435 y += h;
2436 } else {
2437 x += w;
2438 }
2439 }
2440
2441 y += H * logical_array_layer;
2442
2443 *x_offset_sa = x;
2444 *y_offset_sa = y;
2445 }
2446
2447 /**
2448 * A variant of isl_surf_get_image_offset_sa() specific to
2449 * ISL_DIM_LAYOUT_GEN9_1D.
2450 */
2451 static void
2452 get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
2453 uint32_t level, uint32_t layer,
2454 uint32_t *x_offset_sa,
2455 uint32_t *y_offset_sa)
2456 {
2457 assert(level < surf->levels);
2458 assert(layer < surf->phys_level0_sa.array_len);
2459 assert(surf->phys_level0_sa.height == 1);
2460 assert(surf->phys_level0_sa.depth == 1);
2461 assert(surf->samples == 1);
2462
2463 const uint32_t W0 = surf->phys_level0_sa.width;
2464 const struct isl_extent3d image_align_sa =
2465 isl_surf_get_image_alignment_sa(surf);
2466
2467 uint32_t x = 0;
2468
2469 for (uint32_t l = 0; l < level; ++l) {
2470 uint32_t W = isl_minify(W0, l);
2471 uint32_t w = isl_align_npot(W, image_align_sa.w);
2472
2473 x += w;
2474 }
2475
2476 *x_offset_sa = x;
2477 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2478 }
2479
2480 /**
2481 * Calculate the offset, in units of surface samples, to a subimage in the
2482 * surface.
2483 *
2484 * @invariant level < surface levels
2485 * @invariant logical_array_layer < logical array length of surface
2486 * @invariant logical_z_offset_px < logical depth of surface at level
2487 */
2488 void
2489 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2490 uint32_t level,
2491 uint32_t logical_array_layer,
2492 uint32_t logical_z_offset_px,
2493 uint32_t *x_offset_sa,
2494 uint32_t *y_offset_sa)
2495 {
2496 assert(level < surf->levels);
2497 assert(logical_array_layer < surf->logical_level0_px.array_len);
2498 assert(logical_z_offset_px
2499 < isl_minify(surf->logical_level0_px.depth, level));
2500
2501 switch (surf->dim_layout) {
2502 case ISL_DIM_LAYOUT_GEN9_1D:
2503 get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
2504 x_offset_sa, y_offset_sa);
2505 break;
2506 case ISL_DIM_LAYOUT_GEN4_2D:
2507 get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
2508 + logical_z_offset_px,
2509 x_offset_sa, y_offset_sa);
2510 break;
2511 case ISL_DIM_LAYOUT_GEN4_3D:
2512 get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
2513 logical_z_offset_px,
2514 x_offset_sa, y_offset_sa);
2515 break;
2516 case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
2517 get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
2518 logical_z_offset_px,
2519 x_offset_sa, y_offset_sa);
2520 break;
2521
2522 default:
2523 unreachable("not reached");
2524 }
2525 }
2526
2527 void
2528 isl_surf_get_image_offset_el(const struct isl_surf *surf,
2529 uint32_t level,
2530 uint32_t logical_array_layer,
2531 uint32_t logical_z_offset_px,
2532 uint32_t *x_offset_el,
2533 uint32_t *y_offset_el)
2534 {
2535 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2536
2537 assert(level < surf->levels);
2538 assert(logical_array_layer < surf->logical_level0_px.array_len);
2539 assert(logical_z_offset_px
2540 < isl_minify(surf->logical_level0_px.depth, level));
2541
2542 uint32_t x_offset_sa, y_offset_sa;
2543 isl_surf_get_image_offset_sa(surf, level,
2544 logical_array_layer,
2545 logical_z_offset_px,
2546 &x_offset_sa,
2547 &y_offset_sa);
2548
2549 *x_offset_el = x_offset_sa / fmtl->bw;
2550 *y_offset_el = y_offset_sa / fmtl->bh;
2551 }
2552
2553 void
2554 isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2555 uint32_t level,
2556 uint32_t logical_array_layer,
2557 uint32_t logical_z_offset_px,
2558 uint32_t *offset_B,
2559 uint32_t *x_offset_sa,
2560 uint32_t *y_offset_sa)
2561 {
2562 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2563
2564 uint32_t total_x_offset_el, total_y_offset_el;
2565 isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2566 logical_z_offset_px,
2567 &total_x_offset_el,
2568 &total_y_offset_el);
2569
2570 uint32_t x_offset_el, y_offset_el;
2571 isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2572 surf->row_pitch_B,
2573 total_x_offset_el,
2574 total_y_offset_el,
2575 offset_B,
2576 &x_offset_el,
2577 &y_offset_el);
2578
2579 if (x_offset_sa) {
2580 *x_offset_sa = x_offset_el * fmtl->bw;
2581 } else {
2582 assert(x_offset_el == 0);
2583 }
2584
2585 if (y_offset_sa) {
2586 *y_offset_sa = y_offset_el * fmtl->bh;
2587 } else {
2588 assert(y_offset_el == 0);
2589 }
2590 }
2591
2592 void
2593 isl_surf_get_image_range_B_tile(const struct isl_surf *surf,
2594 uint32_t level,
2595 uint32_t logical_array_layer,
2596 uint32_t logical_z_offset_px,
2597 uint32_t *start_tile_B,
2598 uint32_t *end_tile_B)
2599 {
2600 uint32_t start_x_offset_el, start_y_offset_el;
2601 isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2602 logical_z_offset_px,
2603 &start_x_offset_el,
2604 &start_y_offset_el);
2605
2606 /* Compute the size of the subimage in surface elements */
2607 const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level);
2608 const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level);
2609 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2610 const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw);
2611 const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh);
2612
2613 /* Find the last pixel */
2614 uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1;
2615 uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1;
2616
2617 UNUSED uint32_t x_offset_el, y_offset_el;
2618 isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2619 surf->row_pitch_B,
2620 start_x_offset_el,
2621 start_y_offset_el,
2622 start_tile_B,
2623 &x_offset_el,
2624 &y_offset_el);
2625
2626 isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2627 surf->row_pitch_B,
2628 end_x_offset_el,
2629 end_y_offset_el,
2630 end_tile_B,
2631 &x_offset_el,
2632 &y_offset_el);
2633
2634 /* We want the range we return to be exclusive but the tile containing the
2635 * last pixel (what we just calculated) is inclusive. Add one.
2636 */
2637 (*end_tile_B)++;
2638
2639 assert(*end_tile_B <= surf->size_B);
2640 }
2641
2642 void
2643 isl_surf_get_image_surf(const struct isl_device *dev,
2644 const struct isl_surf *surf,
2645 uint32_t level,
2646 uint32_t logical_array_layer,
2647 uint32_t logical_z_offset_px,
2648 struct isl_surf *image_surf,
2649 uint32_t *offset_B,
2650 uint32_t *x_offset_sa,
2651 uint32_t *y_offset_sa)
2652 {
2653 isl_surf_get_image_offset_B_tile_sa(surf,
2654 level,
2655 logical_array_layer,
2656 logical_z_offset_px,
2657 offset_B,
2658 x_offset_sa,
2659 y_offset_sa);
2660
2661 /* Even for cube maps there will be only single face, therefore drop the
2662 * corresponding flag if present.
2663 */
2664 const isl_surf_usage_flags_t usage =
2665 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2666
2667 bool ok UNUSED;
2668 ok = isl_surf_init(dev, image_surf,
2669 .dim = ISL_SURF_DIM_2D,
2670 .format = surf->format,
2671 .width = isl_minify(surf->logical_level0_px.w, level),
2672 .height = isl_minify(surf->logical_level0_px.h, level),
2673 .depth = 1,
2674 .levels = 1,
2675 .array_len = 1,
2676 .samples = surf->samples,
2677 .row_pitch_B = surf->row_pitch_B,
2678 .usage = usage,
2679 .tiling_flags = (1 << surf->tiling));
2680 assert(ok);
2681 }
2682
2683 void
2684 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
2685 uint32_t bpb,
2686 uint32_t row_pitch_B,
2687 uint32_t total_x_offset_el,
2688 uint32_t total_y_offset_el,
2689 uint32_t *base_address_offset,
2690 uint32_t *x_offset_el,
2691 uint32_t *y_offset_el)
2692 {
2693 if (tiling == ISL_TILING_LINEAR) {
2694 assert(bpb % 8 == 0);
2695 *base_address_offset = total_y_offset_el * row_pitch_B +
2696 total_x_offset_el * (bpb / 8);
2697 *x_offset_el = 0;
2698 *y_offset_el = 0;
2699 return;
2700 }
2701
2702 struct isl_tile_info tile_info;
2703 isl_tiling_get_info(tiling, bpb, &tile_info);
2704
2705 assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
2706
2707 /* For non-power-of-two formats, we need the address to be both tile and
2708 * element-aligned. The easiest way to achieve this is to work with a tile
2709 * that is three times as wide as the regular tile.
2710 *
2711 * The tile info returned by get_tile_info has a logical size that is an
2712 * integer number of tile_info.format_bpb size elements. To scale the
2713 * tile, we scale up the physical width and then treat the logical tile
2714 * size as if it has bpb size elements.
2715 */
2716 const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
2717 tile_info.phys_extent_B.width *= tile_el_scale;
2718
2719 /* Compute the offset into the tile */
2720 *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
2721 *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
2722
2723 /* Compute the offset of the tile in units of whole tiles */
2724 uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
2725 uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
2726
2727 *base_address_offset =
2728 y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
2729 x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
2730 }
2731
2732 uint32_t
2733 isl_surf_get_depth_format(const struct isl_device *dev,
2734 const struct isl_surf *surf)
2735 {
2736 /* Support for separate stencil buffers began in gen5. Support for
2737 * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
2738 * those that supported separate and interleaved stencil, were gen5 and
2739 * gen6.
2740 *
2741 * For a list of all available formats, see the Sandybridge PRM >> Volume
2742 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
2743 * Format (p321).
2744 */
2745
2746 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
2747
2748 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
2749
2750 if (has_stencil)
2751 assert(ISL_DEV_GEN(dev) < 7);
2752
2753 switch (surf->format) {
2754 default:
2755 unreachable("bad isl depth format");
2756 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
2757 assert(ISL_DEV_GEN(dev) < 7);
2758 return 0; /* D32_FLOAT_S8X24_UINT */
2759 case ISL_FORMAT_R32_FLOAT:
2760 assert(!has_stencil);
2761 return 1; /* D32_FLOAT */
2762 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
2763 if (has_stencil) {
2764 assert(ISL_DEV_GEN(dev) < 7);
2765 return 2; /* D24_UNORM_S8_UINT */
2766 } else {
2767 assert(ISL_DEV_GEN(dev) >= 5);
2768 return 3; /* D24_UNORM_X8_UINT */
2769 }
2770 case ISL_FORMAT_R16_UNORM:
2771 assert(!has_stencil);
2772 return 5; /* D16_UNORM */
2773 }
2774 }
2775
2776 bool
2777 isl_swizzle_supports_rendering(const struct gen_device_info *devinfo,
2778 struct isl_swizzle swizzle)
2779 {
2780 if (devinfo->is_haswell) {
2781 /* From the Haswell PRM,
2782 * RENDER_SURFACE_STATE::Shader Channel Select Red
2783 *
2784 * "The Shader channel selects also define which shader channels are
2785 * written to which surface channel. If the Shader channel select is
2786 * SCS_ZERO or SCS_ONE then it is not written to the surface. If the
2787 * shader channel select is SCS_RED it is written to the surface red
2788 * channel and so on. If more than one shader channel select is set
2789 * to the same surface channel only the first shader channel in RGBA
2790 * order will be written."
2791 */
2792 return true;
2793 } else if (devinfo->gen <= 7) {
2794 /* Ivy Bridge and early doesn't have any swizzling */
2795 return isl_swizzle_is_identity(swizzle);
2796 } else {
2797 /* From the Sky Lake PRM Vol. 2d,
2798 * RENDER_SURFACE_STATE::Shader Channel Select Red
2799 *
2800 * "For Render Target, Red, Green and Blue Shader Channel Selects
2801 * MUST be such that only valid components can be swapped i.e. only
2802 * change the order of components in the pixel. Any other values for
2803 * these Shader Channel Select fields are not valid for Render
2804 * Targets. This also means that there MUST not be multiple shader
2805 * channels mapped to the same RT channel."
2806 *
2807 * From the Sky Lake PRM Vol. 2d,
2808 * RENDER_SURFACE_STATE::Shader Channel Select Alpha
2809 *
2810 * "For Render Target, this field MUST be programmed to
2811 * value = SCS_ALPHA."
2812 */
2813 return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
2814 swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
2815 swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
2816 (swizzle.g == ISL_CHANNEL_SELECT_RED ||
2817 swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
2818 swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
2819 (swizzle.b == ISL_CHANNEL_SELECT_RED ||
2820 swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
2821 swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
2822 swizzle.r != swizzle.g &&
2823 swizzle.r != swizzle.b &&
2824 swizzle.g != swizzle.b &&
2825 swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
2826 }
2827 }
2828
2829 static enum isl_channel_select
2830 swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
2831 {
2832 switch (chan) {
2833 case ISL_CHANNEL_SELECT_ZERO:
2834 case ISL_CHANNEL_SELECT_ONE:
2835 return chan;
2836 case ISL_CHANNEL_SELECT_RED:
2837 return swizzle.r;
2838 case ISL_CHANNEL_SELECT_GREEN:
2839 return swizzle.g;
2840 case ISL_CHANNEL_SELECT_BLUE:
2841 return swizzle.b;
2842 case ISL_CHANNEL_SELECT_ALPHA:
2843 return swizzle.a;
2844 default:
2845 unreachable("Invalid swizzle component");
2846 }
2847 }
2848
2849 /**
2850 * Returns the single swizzle that is equivalent to applying the two given
2851 * swizzles in sequence.
2852 */
2853 struct isl_swizzle
2854 isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
2855 {
2856 return (struct isl_swizzle) {
2857 .r = swizzle_select(first.r, second),
2858 .g = swizzle_select(first.g, second),
2859 .b = swizzle_select(first.b, second),
2860 .a = swizzle_select(first.a, second),
2861 };
2862 }
2863
2864 /**
2865 * Returns a swizzle that is the pseudo-inverse of this swizzle.
2866 */
2867 struct isl_swizzle
2868 isl_swizzle_invert(struct isl_swizzle swizzle)
2869 {
2870 /* Default to zero for channels which do not show up in the swizzle */
2871 enum isl_channel_select chans[4] = {
2872 ISL_CHANNEL_SELECT_ZERO,
2873 ISL_CHANNEL_SELECT_ZERO,
2874 ISL_CHANNEL_SELECT_ZERO,
2875 ISL_CHANNEL_SELECT_ZERO,
2876 };
2877
2878 /* We go in ABGR order so that, if there are any duplicates, the first one
2879 * is taken if you look at it in RGBA order. This is what Haswell hardware
2880 * does for render target swizzles.
2881 */
2882 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
2883 chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
2884 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
2885 chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
2886 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
2887 chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
2888 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
2889 chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
2890
2891 return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
2892 }
2893
2894 /** Applies an inverse swizzle to a color value */
2895 union isl_color_value
2896 isl_color_value_swizzle_inv(union isl_color_value src,
2897 struct isl_swizzle swizzle)
2898 {
2899 union isl_color_value dst = { .u32 = { 0, } };
2900
2901 /* We assign colors in ABGR order so that the first one will be taken in
2902 * RGBA precedence order. According to the PRM docs for shader channel
2903 * select, this matches Haswell hardware behavior.
2904 */
2905 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
2906 dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3];
2907 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
2908 dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2];
2909 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
2910 dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1];
2911 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
2912 dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0];
2913
2914 return dst;
2915 }
2916
2917 uint8_t
2918 isl_format_get_aux_map_encoding(enum isl_format format)
2919 {
2920 switch(format) {
2921 case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11;
2922 case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11;
2923 case ISL_FORMAT_R32G32B32A32_SINT: return 0x12;
2924 case ISL_FORMAT_R32G32B32A32_UINT: return 0x13;
2925 case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14;
2926 case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15;
2927 case ISL_FORMAT_R16G16B16A16_SINT: return 0x16;
2928 case ISL_FORMAT_R16G16B16A16_UINT: return 0x17;
2929 case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10;
2930 case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10;
2931 case ISL_FORMAT_R32G32_FLOAT: return 0x11;
2932 case ISL_FORMAT_R32G32_SINT: return 0x12;
2933 case ISL_FORMAT_R32G32_UINT: return 0x13;
2934 case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA;
2935 case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA;
2936 case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA;
2937 case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA;
2938 case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18;
2939 case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18;
2940 case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19;
2941 case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A;
2942 case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA;
2943 case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA;
2944 case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B;
2945 case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C;
2946 case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D;
2947 case ISL_FORMAT_R16G16_UNORM: return 0x14;
2948 case ISL_FORMAT_R16G16_SNORM: return 0x15;
2949 case ISL_FORMAT_R16G16_SINT: return 0x16;
2950 case ISL_FORMAT_R16G16_UINT: return 0x17;
2951 case ISL_FORMAT_R16G16_FLOAT: return 0x10;
2952 case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18;
2953 case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18;
2954 case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E;
2955 case ISL_FORMAT_R32_SINT: return 0x12;
2956 case ISL_FORMAT_R32_UINT: return 0x13;
2957 case ISL_FORMAT_R32_FLOAT: return 0x11;
2958 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x11;
2959 case ISL_FORMAT_B5G6R5_UNORM: return 0xA;
2960 case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA;
2961 case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA;
2962 case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA;
2963 case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA;
2964 case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA;
2965 case ISL_FORMAT_R8G8_UNORM: return 0xA;
2966 case ISL_FORMAT_R8G8_SNORM: return 0x1B;
2967 case ISL_FORMAT_R8G8_SINT: return 0x1C;
2968 case ISL_FORMAT_R8G8_UINT: return 0x1D;
2969 case ISL_FORMAT_R16_UNORM: return 0x14;
2970 case ISL_FORMAT_R16_SNORM: return 0x15;
2971 case ISL_FORMAT_R16_SINT: return 0x16;
2972 case ISL_FORMAT_R16_UINT: return 0x17;
2973 case ISL_FORMAT_R16_FLOAT: return 0x10;
2974 case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA;
2975 case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA;
2976 case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA;
2977 case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA;
2978 case ISL_FORMAT_R8_UNORM: return 0xA;
2979 case ISL_FORMAT_R8_SNORM: return 0x1B;
2980 case ISL_FORMAT_R8_SINT: return 0x1C;
2981 case ISL_FORMAT_R8_UINT: return 0x1D;
2982 case ISL_FORMAT_A8_UNORM: return 0xA;
2983 default:
2984 unreachable("Unsupported aux-map format!");
2985 return 0;
2986 }
2987 }