973aeb1152d170792b6410f4be52f793626f2621
[mesa.git] / src / gallium / drivers / iris / iris_resolve.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_resolve.c
25 *
26 * This file handles resolve tracking for main and auxiliary surfaces.
27 *
28 * It also handles our cache tracking. We have sets for the render cache,
29 * depth cache, and so on. If a BO is in a cache's set, then it may have
30 * data in that cache. The helpers take care of emitting flushes for
31 * render-to-texture, format reinterpretation issues, and other situations.
32 */
33
34 #include "util/hash_table.h"
35 #include "util/set.h"
36 #include "iris_context.h"
37 #include "compiler/nir/nir.h"
38
39 /**
40 * Disable auxiliary buffers if a renderbuffer is also bound as a texture
41 * or shader image. This causes a self-dependency, where both rendering
42 * and sampling may concurrently read or write the CCS buffer, causing
43 * incorrect pixels.
44 */
45 static bool
46 disable_rb_aux_buffer(struct iris_context *ice,
47 bool *draw_aux_buffer_disabled,
48 struct iris_resource *tex_res,
49 unsigned min_level, unsigned num_levels,
50 const char *usage)
51 {
52 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
53 bool found = false;
54
55 /* We only need to worry about color compression and fast clears. */
56 if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D &&
57 tex_res->aux.usage != ISL_AUX_USAGE_CCS_E)
58 return false;
59
60 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
61 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
62 if (!surf)
63 continue;
64
65 struct iris_resource *rb_res = (void *) surf->base.texture;
66
67 if (rb_res->bo == tex_res->bo &&
68 surf->base.u.tex.level >= min_level &&
69 surf->base.u.tex.level < min_level + num_levels) {
70 found = draw_aux_buffer_disabled[i] = true;
71 }
72 }
73
74 if (found) {
75 perf_debug(&ice->dbg,
76 "Disabling CCS because a renderbuffer is also bound %s.\n",
77 usage);
78 }
79
80 return found;
81 }
82
83 static void
84 resolve_sampler_views(struct iris_context *ice,
85 struct iris_batch *batch,
86 struct iris_shader_state *shs,
87 const struct shader_info *info,
88 bool *draw_aux_buffer_disabled,
89 bool consider_framebuffer)
90 {
91 uint32_t views = info ? (shs->bound_sampler_views & info->textures_used) : 0;
92
93 unsigned astc5x5_wa_bits = 0; // XXX: actual tracking
94
95 while (views) {
96 const int i = u_bit_scan(&views);
97 struct iris_sampler_view *isv = shs->textures[i];
98 struct iris_resource *res = (void *) isv->base.texture;
99
100 if (res->base.target != PIPE_BUFFER) {
101 if (consider_framebuffer) {
102 disable_rb_aux_buffer(ice, draw_aux_buffer_disabled,
103 res, isv->view.base_level, isv->view.levels,
104 "for sampling");
105 }
106
107 iris_resource_prepare_texture(ice, batch, res, isv->view.format,
108 isv->view.base_level, isv->view.levels,
109 isv->view.base_array_layer,
110 isv->view.array_len,
111 astc5x5_wa_bits);
112 }
113
114 iris_cache_flush_for_read(batch, res->bo);
115 }
116 }
117
118 static void
119 resolve_image_views(struct iris_context *ice,
120 struct iris_batch *batch,
121 struct iris_shader_state *shs,
122 bool *draw_aux_buffer_disabled,
123 bool consider_framebuffer)
124 {
125 /* TODO: Consider images used by program */
126 uint32_t views = shs->bound_image_views;
127
128 while (views) {
129 const int i = u_bit_scan(&views);
130 struct pipe_image_view *pview = &shs->image[i].base;
131 struct iris_resource *res = (void *) pview->resource;
132
133 if (res->base.target != PIPE_BUFFER) {
134 if (consider_framebuffer) {
135 disable_rb_aux_buffer(ice, draw_aux_buffer_disabled,
136 res, pview->u.tex.level, 1,
137 "as a shader image");
138 }
139
140 unsigned num_layers =
141 pview->u.tex.last_layer - pview->u.tex.first_layer + 1;
142
143 /* The data port doesn't understand any compression */
144 iris_resource_prepare_access(ice, batch, res,
145 pview->u.tex.level, 1,
146 pview->u.tex.first_layer, num_layers,
147 ISL_AUX_USAGE_NONE, false);
148 }
149
150 iris_cache_flush_for_read(batch, res->bo);
151 }
152 }
153
154
155 /**
156 * \brief Resolve buffers before drawing.
157 *
158 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
159 * enabled depth texture, and flush the render cache for any dirty textures.
160 */
161 void
162 iris_predraw_resolve_inputs(struct iris_context *ice,
163 struct iris_batch *batch,
164 bool *draw_aux_buffer_disabled,
165 gl_shader_stage stage,
166 bool consider_framebuffer)
167 {
168 struct iris_shader_state *shs = &ice->state.shaders[stage];
169 const struct shader_info *info = iris_get_shader_info(ice, stage);
170
171 uint64_t dirty = (IRIS_DIRTY_BINDINGS_VS << stage) |
172 (consider_framebuffer ? IRIS_DIRTY_BINDINGS_FS : 0);
173
174 if (ice->state.dirty & dirty) {
175 resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled,
176 consider_framebuffer);
177 resolve_image_views(ice, batch, shs, draw_aux_buffer_disabled,
178 consider_framebuffer);
179 }
180
181 // XXX: ASTC hacks
182 }
183
184 void
185 iris_predraw_resolve_framebuffer(struct iris_context *ice,
186 struct iris_batch *batch,
187 bool *draw_aux_buffer_disabled)
188 {
189 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
190 struct iris_screen *screen = (void *) ice->ctx.screen;
191 struct gen_device_info *devinfo = &screen->devinfo;
192 struct iris_uncompiled_shader *ish =
193 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
194 const nir_shader *nir = ish->nir;
195
196 if (ice->state.dirty & IRIS_DIRTY_DEPTH_BUFFER) {
197 struct pipe_surface *zs_surf = cso_fb->zsbuf;
198
199 if (zs_surf) {
200 struct iris_resource *z_res, *s_res;
201 iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res);
202 unsigned num_layers =
203 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
204
205 if (z_res) {
206 iris_resource_prepare_depth(ice, batch, z_res,
207 zs_surf->u.tex.level,
208 zs_surf->u.tex.first_layer,
209 num_layers);
210 iris_cache_flush_for_depth(batch, z_res->bo);
211 }
212
213 if (s_res) {
214 iris_cache_flush_for_depth(batch, s_res->bo);
215 }
216 }
217 }
218
219 if (devinfo->gen == 8 && nir->info.outputs_read != 0) {
220 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
221 if (cso_fb->cbufs[i]) {
222 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
223 struct iris_resource *res = (void *) cso_fb->cbufs[i]->texture;
224
225 iris_resource_prepare_texture(ice, batch, res, surf->view.format,
226 surf->view.base_level, 1,
227 surf->view.base_array_layer,
228 surf->view.array_len,
229 0);
230 }
231 }
232 }
233
234 if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE)) {
235 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
236 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
237 if (!surf)
238 continue;
239
240 struct iris_resource *res = (void *) surf->base.texture;
241
242 enum isl_aux_usage aux_usage =
243 iris_resource_render_aux_usage(ice, res, surf->view.format,
244 ice->state.blend_enables & (1u << i),
245 draw_aux_buffer_disabled[i]);
246
247 if (ice->state.draw_aux_usage[i] != aux_usage) {
248 ice->state.draw_aux_usage[i] = aux_usage;
249 /* XXX: Need to track which bindings to make dirty */
250 ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS;
251 }
252
253 iris_resource_prepare_render(ice, batch, res, surf->view.base_level,
254 surf->view.base_array_layer,
255 surf->view.array_len,
256 aux_usage);
257
258 iris_cache_flush_for_render(batch, res->bo, surf->view.format,
259 aux_usage);
260 }
261 }
262 }
263
264 /**
265 * \brief Call this after drawing to mark which buffers need resolving
266 *
267 * If the depth buffer was written to and if it has an accompanying HiZ
268 * buffer, then mark that it needs a depth resolve.
269 *
270 * If the color buffer is a multisample window system buffer, then
271 * mark that it needs a downsample.
272 *
273 * Also mark any render targets which will be textured as needing a render
274 * cache flush.
275 */
276 void
277 iris_postdraw_update_resolve_tracking(struct iris_context *ice,
278 struct iris_batch *batch)
279 {
280 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
281
282 // XXX: front buffer drawing?
283
284 bool may_have_resolved_depth =
285 ice->state.dirty & (IRIS_DIRTY_DEPTH_BUFFER |
286 IRIS_DIRTY_WM_DEPTH_STENCIL);
287
288 struct pipe_surface *zs_surf = cso_fb->zsbuf;
289 if (zs_surf) {
290 struct iris_resource *z_res, *s_res;
291 iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res);
292 unsigned num_layers =
293 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
294
295 if (z_res) {
296 if (may_have_resolved_depth) {
297 iris_resource_finish_depth(ice, z_res, zs_surf->u.tex.level,
298 zs_surf->u.tex.first_layer, num_layers,
299 ice->state.depth_writes_enabled);
300 }
301
302 if (ice->state.depth_writes_enabled)
303 iris_depth_cache_add_bo(batch, z_res->bo);
304 }
305
306 if (s_res) {
307 if (may_have_resolved_depth) {
308 iris_resource_finish_write(ice, s_res, zs_surf->u.tex.level,
309 zs_surf->u.tex.first_layer, num_layers,
310 ISL_AUX_USAGE_NONE);
311 }
312
313 if (ice->state.stencil_writes_enabled)
314 iris_depth_cache_add_bo(batch, s_res->bo);
315 }
316 }
317
318 bool may_have_resolved_color =
319 ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE);
320
321 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
322 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
323 if (!surf)
324 continue;
325
326 struct iris_resource *res = (void *) surf->base.texture;
327 enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i];
328
329 iris_render_cache_add_bo(batch, res->bo, surf->view.format,
330 aux_usage);
331
332 if (may_have_resolved_color) {
333 union pipe_surface_desc *desc = &surf->base.u;
334 unsigned num_layers =
335 desc->tex.last_layer - desc->tex.first_layer + 1;
336 iris_resource_finish_render(ice, res, desc->tex.level,
337 desc->tex.first_layer, num_layers,
338 aux_usage);
339 }
340 }
341 }
342
343 /**
344 * Clear the cache-tracking sets.
345 */
346 void
347 iris_cache_sets_clear(struct iris_batch *batch)
348 {
349 hash_table_foreach(batch->cache.render, render_entry)
350 _mesa_hash_table_remove(batch->cache.render, render_entry);
351
352 set_foreach(batch->cache.depth, depth_entry)
353 _mesa_set_remove(batch->cache.depth, depth_entry);
354 }
355
356 /**
357 * Emits an appropriate flush for a BO if it has been rendered to within the
358 * same batchbuffer as a read that's about to be emitted.
359 *
360 * The GPU has separate, incoherent caches for the render cache and the
361 * sampler cache, along with other caches. Usually data in the different
362 * caches don't interact (e.g. we don't render to our driver-generated
363 * immediate constant data), but for render-to-texture in FBOs we definitely
364 * do. When a batchbuffer is flushed, the kernel will ensure that everything
365 * necessary is flushed before another use of that BO, but for reuse from
366 * different caches within a batchbuffer, it's all our responsibility.
367 */
368 void
369 iris_flush_depth_and_render_caches(struct iris_batch *batch)
370 {
371 iris_emit_pipe_control_flush(batch,
372 "cache tracker: render-to-texture",
373 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
374 PIPE_CONTROL_RENDER_TARGET_FLUSH |
375 PIPE_CONTROL_CS_STALL);
376
377 iris_emit_pipe_control_flush(batch,
378 "cache tracker: render-to-texture",
379 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
380 PIPE_CONTROL_CONST_CACHE_INVALIDATE);
381
382 iris_cache_sets_clear(batch);
383 }
384
385 void
386 iris_cache_flush_for_read(struct iris_batch *batch,
387 struct iris_bo *bo)
388 {
389 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) ||
390 _mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
391 iris_flush_depth_and_render_caches(batch);
392 }
393
394 static void *
395 format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
396 {
397 return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
398 }
399
400 void
401 iris_cache_flush_for_render(struct iris_batch *batch,
402 struct iris_bo *bo,
403 enum isl_format format,
404 enum isl_aux_usage aux_usage)
405 {
406 if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
407 iris_flush_depth_and_render_caches(batch);
408
409 /* Check to see if this bo has been used by a previous rendering operation
410 * but with a different format or aux usage. If it has, flush the render
411 * cache so we ensure that it's only in there with one format or aux usage
412 * at a time.
413 *
414 * Even though it's not obvious, this can easily happen in practice.
415 * Suppose a client is blending on a surface with sRGB encode enabled on
416 * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client
417 * then disables sRGB decode and continues blending we will flip on
418 * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
419 * perfectly valid since CCS_E is a subset of CCS_D). However, this means
420 * that we have fragments in-flight which are rendering with UNORM+CCS_E
421 * and other fragments in-flight with SRGB+CCS_D on the same surface at the
422 * same time and the pixel scoreboard and color blender are trying to sort
423 * it all out. This ends badly (i.e. GPU hangs).
424 *
425 * To date, we have never observed GPU hangs or even corruption to be
426 * associated with switching the format, only the aux usage. However,
427 * there are comments in various docs which indicate that the render cache
428 * isn't 100% resilient to format changes. We may as well be conservative
429 * and flush on format changes too. We can always relax this later if we
430 * find it to be a performance problem.
431 */
432 struct hash_entry *entry =
433 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
434 if (entry && entry->data != format_aux_tuple(format, aux_usage))
435 iris_flush_depth_and_render_caches(batch);
436 }
437
438 void
439 iris_render_cache_add_bo(struct iris_batch *batch,
440 struct iris_bo *bo,
441 enum isl_format format,
442 enum isl_aux_usage aux_usage)
443 {
444 #ifndef NDEBUG
445 struct hash_entry *entry =
446 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
447 if (entry) {
448 /* Otherwise, someone didn't do a flush_for_render and that would be
449 * very bad indeed.
450 */
451 assert(entry->data == format_aux_tuple(format, aux_usage));
452 }
453 #endif
454
455 _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo,
456 format_aux_tuple(format, aux_usage));
457 }
458
459 void
460 iris_cache_flush_for_depth(struct iris_batch *batch,
461 struct iris_bo *bo)
462 {
463 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo))
464 iris_flush_depth_and_render_caches(batch);
465 }
466
467 void
468 iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo)
469 {
470 _mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo);
471 }
472
473 static void
474 iris_resolve_color(struct iris_context *ice,
475 struct iris_batch *batch,
476 struct iris_resource *res,
477 unsigned level, unsigned layer,
478 enum isl_aux_op resolve_op)
479 {
480 //DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer);
481
482 struct blorp_surf surf;
483 iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, res->aux.usage,
484 level, true);
485
486 iris_batch_maybe_flush(batch, 1500);
487
488 /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
489 *
490 * "Any transition from any value in {Clear, Render, Resolve} to a
491 * different value in {Clear, Render, Resolve} requires end of pipe
492 * synchronization."
493 *
494 * In other words, fast clear ops are not properly synchronized with
495 * other drawing. We need to use a PIPE_CONTROL to ensure that the
496 * contents of the previous draw hit the render target before we resolve
497 * and again afterwards to ensure that the resolve is complete before we
498 * do any more regular drawing.
499 */
500 iris_emit_end_of_pipe_sync(batch, "color resolve: pre-flush",
501 PIPE_CONTROL_RENDER_TARGET_FLUSH);
502
503 struct blorp_batch blorp_batch;
504 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
505 blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1,
506 isl_format_srgb_to_linear(res->surf.format),
507 resolve_op);
508 blorp_batch_finish(&blorp_batch);
509
510 /* See comment above */
511 iris_emit_end_of_pipe_sync(batch, "color resolve: post-flush",
512 PIPE_CONTROL_RENDER_TARGET_FLUSH);
513 }
514
515 static void
516 iris_mcs_partial_resolve(struct iris_context *ice,
517 struct iris_batch *batch,
518 struct iris_resource *res,
519 uint32_t start_layer,
520 uint32_t num_layers)
521 {
522 //DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
523 //start_layer, start_layer + num_layers - 1);
524
525 assert(res->aux.usage == ISL_AUX_USAGE_MCS);
526
527 struct blorp_surf surf;
528 iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base, res->aux.usage,
529 0, true);
530
531 struct blorp_batch blorp_batch;
532 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
533 blorp_mcs_partial_resolve(&blorp_batch, &surf,
534 isl_format_srgb_to_linear(res->surf.format),
535 start_layer, num_layers);
536 blorp_batch_finish(&blorp_batch);
537 }
538
539
540 /**
541 * Return true if the format that will be used to access the resource is
542 * CCS_E-compatible with the resource's linear/non-sRGB format.
543 *
544 * Why use the linear format? Well, although the resourcemay be specified
545 * with an sRGB format, the usage of that color space/format can be toggled.
546 * Since our HW tends to support more linear formats than sRGB ones, we use
547 * this format variant for check for CCS_E compatibility.
548 */
549 static bool
550 format_ccs_e_compat_with_resource(const struct gen_device_info *devinfo,
551 const struct iris_resource *res,
552 enum isl_format access_format)
553 {
554 assert(res->aux.usage == ISL_AUX_USAGE_CCS_E);
555
556 enum isl_format isl_format = isl_format_srgb_to_linear(res->surf.format);
557 return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format);
558 }
559
560 static bool
561 sample_with_hiz(const struct gen_device_info *devinfo,
562 const struct iris_resource *res)
563 {
564 if (!devinfo->has_sample_with_hiz)
565 return false;
566
567 if (res->aux.usage != ISL_AUX_USAGE_HIZ)
568 return false;
569
570 /* It seems the hardware won't fallback to the depth buffer if some of the
571 * mipmap levels aren't available in the HiZ buffer. So we need all levels
572 * of the texture to be HiZ enabled.
573 */
574 for (unsigned level = 0; level < res->surf.levels; ++level) {
575 if (!iris_resource_level_has_hiz(res, level))
576 return false;
577 }
578
579 /* If compressed multisampling is enabled, then we use it for the auxiliary
580 * buffer instead.
581 *
582 * From the BDW PRM (Volume 2d: Command Reference: Structures
583 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
584 *
585 * "If this field is set to AUX_HIZ, Number of Multisamples must be
586 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
587 *
588 * There is no such blurb for 1D textures, but there is sufficient evidence
589 * that this is broken on SKL+.
590 */
591 // XXX: i965 disables this for arrays too, is that reasonable?
592 return res->surf.samples == 1 && res->surf.dim == ISL_SURF_DIM_2D;
593 }
594
595 /**
596 * Perform a HiZ or depth resolve operation.
597 *
598 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
599 * PRM, Volume 1, Part 2:
600 * - 7.5.3.1 Depth Buffer Clear
601 * - 7.5.3.2 Depth Buffer Resolve
602 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
603 */
604 void
605 iris_hiz_exec(struct iris_context *ice,
606 struct iris_batch *batch,
607 struct iris_resource *res,
608 unsigned int level, unsigned int start_layer,
609 unsigned int num_layers, enum isl_aux_op op,
610 bool update_clear_depth)
611 {
612 assert(iris_resource_level_has_hiz(res, level));
613 assert(op != ISL_AUX_OP_NONE);
614 UNUSED const char *name = NULL;
615
616 switch (op) {
617 case ISL_AUX_OP_FULL_RESOLVE:
618 name = "depth resolve";
619 break;
620 case ISL_AUX_OP_AMBIGUATE:
621 name = "hiz ambiguate";
622 break;
623 case ISL_AUX_OP_FAST_CLEAR:
624 name = "depth clear";
625 break;
626 case ISL_AUX_OP_PARTIAL_RESOLVE:
627 case ISL_AUX_OP_NONE:
628 unreachable("Invalid HiZ op");
629 }
630
631 //DBG("%s %s to mt %p level %d layers %d-%d\n",
632 //__func__, name, mt, level, start_layer, start_layer + num_layers - 1);
633
634 /* The following stalls and flushes are only documented to be required
635 * for HiZ clear operations. However, they also seem to be required for
636 * resolve operations.
637 *
638 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
639 *
640 * "If other rendering operations have preceded this clear, a
641 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
642 * enabled must be issued before the rectangle primitive used for
643 * the depth buffer clear operation."
644 *
645 * Same applies for Gen8 and Gen9.
646 *
647 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
648 * PIPE_CONTROL, Depth Cache Flush Enable:
649 *
650 * "This bit must not be set when Depth Stall Enable bit is set in
651 * this packet."
652 *
653 * This is confirmed to hold for real, Haswell gets immediate gpu hangs.
654 *
655 * Therefore issue two pipe control flushes, one for cache flush and
656 * another for depth stall.
657 */
658 iris_emit_pipe_control_flush(batch,
659 "hiz op: pre-flushes (1/2)",
660 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
661 PIPE_CONTROL_CS_STALL);
662
663 iris_emit_pipe_control_flush(batch, "hiz op: pre-flushes (2/2)",
664 PIPE_CONTROL_DEPTH_STALL);
665
666 assert(isl_aux_usage_has_hiz(res->aux.usage) && res->aux.bo);
667
668 iris_batch_maybe_flush(batch, 1500);
669
670 struct blorp_surf surf;
671 iris_blorp_surf_for_resource(&ice->vtbl, &surf, &res->base,
672 res->aux.usage, level, true);
673
674 struct blorp_batch blorp_batch;
675 enum blorp_batch_flags flags = 0;
676 flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR;
677 blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags);
678 blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op);
679 blorp_batch_finish(&blorp_batch);
680
681 /* The following stalls and flushes are only documented to be required
682 * for HiZ clear operations. However, they also seem to be required for
683 * resolve operations.
684 *
685 * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
686 *
687 * "Depth buffer clear pass using any of the methods (WM_STATE,
688 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
689 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
690 * "set" before starting to render. DepthStall and DepthFlush are
691 * not needed between consecutive depth clear passes nor is it
692 * required if the depth clear pass was done with
693 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
694 *
695 * TODO: Such as the spec says, this could be conditional.
696 */
697 iris_emit_pipe_control_flush(batch,
698 "hiz op: post flush",
699 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
700 PIPE_CONTROL_DEPTH_STALL);
701 }
702
703 /**
704 * Does the resource's slice have hiz enabled?
705 */
706 bool
707 iris_resource_level_has_hiz(const struct iris_resource *res, uint32_t level)
708 {
709 iris_resource_check_level_layer(res, level, 0);
710 return res->aux.has_hiz & 1 << level;
711 }
712
713 /** \brief Assert that the level and layer are valid for the resource. */
714 void
715 iris_resource_check_level_layer(UNUSED const struct iris_resource *res,
716 UNUSED uint32_t level, UNUSED uint32_t layer)
717 {
718 assert(level < res->surf.levels);
719 assert(layer < util_num_layers(&res->base, level));
720 }
721
722 static inline uint32_t
723 miptree_level_range_length(const struct iris_resource *res,
724 uint32_t start_level, uint32_t num_levels)
725 {
726 assert(start_level < res->surf.levels);
727
728 if (num_levels == INTEL_REMAINING_LAYERS)
729 num_levels = res->surf.levels;
730
731 /* Check for overflow */
732 assert(start_level + num_levels >= start_level);
733 assert(start_level + num_levels <= res->surf.levels);
734
735 return num_levels;
736 }
737
738 static inline uint32_t
739 miptree_layer_range_length(const struct iris_resource *res, uint32_t level,
740 uint32_t start_layer, uint32_t num_layers)
741 {
742 assert(level <= res->base.last_level);
743
744 const uint32_t total_num_layers = iris_get_num_logical_layers(res, level);
745 assert(start_layer < total_num_layers);
746 if (num_layers == INTEL_REMAINING_LAYERS)
747 num_layers = total_num_layers - start_layer;
748 /* Check for overflow */
749 assert(start_layer + num_layers >= start_layer);
750 assert(start_layer + num_layers <= total_num_layers);
751
752 return num_layers;
753 }
754
755 bool
756 iris_has_color_unresolved(const struct iris_resource *res,
757 unsigned start_level, unsigned num_levels,
758 unsigned start_layer, unsigned num_layers)
759 {
760 if (!res->aux.bo)
761 return false;
762
763 /* Clamp the level range to fit the resource */
764 num_levels = miptree_level_range_length(res, start_level, num_levels);
765
766 for (uint32_t l = 0; l < num_levels; l++) {
767 const uint32_t level = start_level + l;
768 const uint32_t level_layers =
769 miptree_layer_range_length(res, level, start_layer, num_layers);
770 for (unsigned a = 0; a < level_layers; a++) {
771 enum isl_aux_state aux_state =
772 iris_resource_get_aux_state(res, level, start_layer + a);
773 assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
774 if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
775 return true;
776 }
777 }
778
779 return false;
780 }
781
782 static enum isl_aux_op
783 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
784 enum isl_aux_usage aux_usage,
785 bool fast_clear_supported)
786 {
787 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D);
788
789 const bool ccs_supported =
790 (aux_usage == ISL_AUX_USAGE_CCS_D) && fast_clear_supported;
791
792 switch (aux_state) {
793 case ISL_AUX_STATE_CLEAR:
794 case ISL_AUX_STATE_PARTIAL_CLEAR:
795 if (!ccs_supported)
796 return ISL_AUX_OP_FULL_RESOLVE;
797 else
798 return ISL_AUX_OP_NONE;
799
800 case ISL_AUX_STATE_PASS_THROUGH:
801 return ISL_AUX_OP_NONE;
802
803 case ISL_AUX_STATE_RESOLVED:
804 case ISL_AUX_STATE_AUX_INVALID:
805 case ISL_AUX_STATE_COMPRESSED_CLEAR:
806 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
807 break;
808 }
809
810 unreachable("Invalid aux state for CCS_D");
811 }
812
813 static enum isl_aux_op
814 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
815 enum isl_aux_usage aux_usage,
816 bool fast_clear_supported)
817 {
818 /* CCS_E surfaces can be accessed as CCS_D if we're careful. */
819 assert(aux_usage == ISL_AUX_USAGE_NONE ||
820 aux_usage == ISL_AUX_USAGE_CCS_D ||
821 aux_usage == ISL_AUX_USAGE_CCS_E);
822
823 switch (aux_state) {
824 case ISL_AUX_STATE_CLEAR:
825 case ISL_AUX_STATE_PARTIAL_CLEAR:
826 if (fast_clear_supported)
827 return ISL_AUX_OP_NONE;
828 else if (aux_usage == ISL_AUX_USAGE_CCS_E)
829 return ISL_AUX_OP_PARTIAL_RESOLVE;
830 else
831 return ISL_AUX_OP_FULL_RESOLVE;
832
833 case ISL_AUX_STATE_COMPRESSED_CLEAR:
834 if (aux_usage != ISL_AUX_USAGE_CCS_E)
835 return ISL_AUX_OP_FULL_RESOLVE;
836 else if (!fast_clear_supported)
837 return ISL_AUX_OP_PARTIAL_RESOLVE;
838 else
839 return ISL_AUX_OP_NONE;
840
841 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
842 if (aux_usage != ISL_AUX_USAGE_CCS_E)
843 return ISL_AUX_OP_FULL_RESOLVE;
844 else
845 return ISL_AUX_OP_NONE;
846
847 case ISL_AUX_STATE_PASS_THROUGH:
848 return ISL_AUX_OP_NONE;
849
850 case ISL_AUX_STATE_RESOLVED:
851 case ISL_AUX_STATE_AUX_INVALID:
852 break;
853 }
854
855 unreachable("Invalid aux state for CCS_E");
856 }
857
858 static void
859 iris_resource_prepare_ccs_access(struct iris_context *ice,
860 struct iris_batch *batch,
861 struct iris_resource *res,
862 uint32_t level, uint32_t layer,
863 enum isl_aux_usage aux_usage,
864 bool fast_clear_supported)
865 {
866 enum isl_aux_state aux_state = iris_resource_get_aux_state(res, level, layer);
867
868 enum isl_aux_op resolve_op;
869 if (res->aux.usage == ISL_AUX_USAGE_CCS_E) {
870 resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage,
871 fast_clear_supported);
872 } else {
873 assert(res->aux.usage == ISL_AUX_USAGE_CCS_D);
874 resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage,
875 fast_clear_supported);
876 }
877
878 if (resolve_op != ISL_AUX_OP_NONE) {
879 iris_resolve_color(ice, batch, res, level, layer, resolve_op);
880
881 switch (resolve_op) {
882 case ISL_AUX_OP_FULL_RESOLVE:
883 /* The CCS full resolve operation destroys the CCS and sets it to the
884 * pass-through state. (You can also think of this as being both a
885 * resolve and an ambiguate in one operation.)
886 */
887 iris_resource_set_aux_state(ice, res, level, layer, 1,
888 ISL_AUX_STATE_PASS_THROUGH);
889 break;
890
891 case ISL_AUX_OP_PARTIAL_RESOLVE:
892 iris_resource_set_aux_state(ice, res, level, layer, 1,
893 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
894 break;
895
896 default:
897 unreachable("Invalid resolve op");
898 }
899 }
900 }
901
902 static void
903 iris_resource_finish_ccs_write(struct iris_context *ice,
904 struct iris_resource *res,
905 uint32_t level, uint32_t layer,
906 enum isl_aux_usage aux_usage)
907 {
908 assert(aux_usage == ISL_AUX_USAGE_NONE ||
909 aux_usage == ISL_AUX_USAGE_CCS_D ||
910 aux_usage == ISL_AUX_USAGE_CCS_E);
911
912 enum isl_aux_state aux_state =
913 iris_resource_get_aux_state(res, level, layer);
914
915 if (res->aux.usage == ISL_AUX_USAGE_CCS_E) {
916 switch (aux_state) {
917 case ISL_AUX_STATE_CLEAR:
918 case ISL_AUX_STATE_PARTIAL_CLEAR:
919 assert(aux_usage == ISL_AUX_USAGE_CCS_E ||
920 aux_usage == ISL_AUX_USAGE_CCS_D);
921
922 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
923 iris_resource_set_aux_state(ice, res, level, layer, 1,
924 ISL_AUX_STATE_COMPRESSED_CLEAR);
925 } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) {
926 iris_resource_set_aux_state(ice, res, level, layer, 1,
927 ISL_AUX_STATE_PARTIAL_CLEAR);
928 }
929 break;
930
931 case ISL_AUX_STATE_COMPRESSED_CLEAR:
932 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
933 assert(aux_usage == ISL_AUX_USAGE_CCS_E);
934 break; /* Nothing to do */
935
936 case ISL_AUX_STATE_PASS_THROUGH:
937 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
938 iris_resource_set_aux_state(ice, res, level, layer, 1,
939 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
940 } else {
941 /* Nothing to do */
942 }
943 break;
944
945 case ISL_AUX_STATE_RESOLVED:
946 case ISL_AUX_STATE_AUX_INVALID:
947 unreachable("Invalid aux state for CCS_E");
948 }
949 } else {
950 assert(res->aux.usage == ISL_AUX_USAGE_CCS_D);
951 /* CCS_D is a bit simpler */
952 switch (aux_state) {
953 case ISL_AUX_STATE_CLEAR:
954 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
955 iris_resource_set_aux_state(ice, res, level, layer, 1,
956 ISL_AUX_STATE_PARTIAL_CLEAR);
957 break;
958
959 case ISL_AUX_STATE_PARTIAL_CLEAR:
960 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
961 break; /* Nothing to do */
962
963 case ISL_AUX_STATE_PASS_THROUGH:
964 /* Nothing to do */
965 break;
966
967 case ISL_AUX_STATE_COMPRESSED_CLEAR:
968 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
969 case ISL_AUX_STATE_RESOLVED:
970 case ISL_AUX_STATE_AUX_INVALID:
971 unreachable("Invalid aux state for CCS_D");
972 }
973 }
974 }
975
976 static void
977 iris_resource_prepare_mcs_access(struct iris_context *ice,
978 struct iris_batch *batch,
979 struct iris_resource *res,
980 uint32_t layer,
981 enum isl_aux_usage aux_usage,
982 bool fast_clear_supported)
983 {
984 assert(aux_usage == ISL_AUX_USAGE_MCS);
985
986 switch (iris_resource_get_aux_state(res, 0, layer)) {
987 case ISL_AUX_STATE_CLEAR:
988 case ISL_AUX_STATE_COMPRESSED_CLEAR:
989 if (!fast_clear_supported) {
990 iris_mcs_partial_resolve(ice, batch, res, layer, 1);
991 iris_resource_set_aux_state(ice, res, 0, layer, 1,
992 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
993 }
994 break;
995
996 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
997 break; /* Nothing to do */
998
999 case ISL_AUX_STATE_RESOLVED:
1000 case ISL_AUX_STATE_PASS_THROUGH:
1001 case ISL_AUX_STATE_AUX_INVALID:
1002 case ISL_AUX_STATE_PARTIAL_CLEAR:
1003 unreachable("Invalid aux state for MCS");
1004 }
1005 }
1006
1007 static void
1008 iris_resource_finish_mcs_write(struct iris_context *ice,
1009 struct iris_resource *res,
1010 uint32_t layer,
1011 enum isl_aux_usage aux_usage)
1012 {
1013 assert(aux_usage == ISL_AUX_USAGE_MCS);
1014
1015 switch (iris_resource_get_aux_state(res, 0, layer)) {
1016 case ISL_AUX_STATE_CLEAR:
1017 iris_resource_set_aux_state(ice, res, 0, layer, 1,
1018 ISL_AUX_STATE_COMPRESSED_CLEAR);
1019 break;
1020
1021 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1022 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1023 break; /* Nothing to do */
1024
1025 case ISL_AUX_STATE_RESOLVED:
1026 case ISL_AUX_STATE_PASS_THROUGH:
1027 case ISL_AUX_STATE_AUX_INVALID:
1028 case ISL_AUX_STATE_PARTIAL_CLEAR:
1029 unreachable("Invalid aux state for MCS");
1030 }
1031 }
1032
1033 static void
1034 iris_resource_prepare_hiz_access(struct iris_context *ice,
1035 struct iris_batch *batch,
1036 struct iris_resource *res,
1037 uint32_t level, uint32_t layer,
1038 enum isl_aux_usage aux_usage,
1039 bool fast_clear_supported)
1040 {
1041 assert(aux_usage == ISL_AUX_USAGE_NONE ||
1042 aux_usage == ISL_AUX_USAGE_HIZ ||
1043 aux_usage == ISL_AUX_USAGE_HIZ_CCS ||
1044 aux_usage == ISL_AUX_USAGE_CCS_E);
1045
1046 enum isl_aux_op hiz_op = ISL_AUX_OP_NONE;
1047 switch (iris_resource_get_aux_state(res, level, layer)) {
1048 case ISL_AUX_STATE_CLEAR:
1049 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1050 if (aux_usage == ISL_AUX_USAGE_NONE || !fast_clear_supported)
1051 hiz_op = ISL_AUX_OP_FULL_RESOLVE;
1052 break;
1053
1054 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1055 if (aux_usage == ISL_AUX_USAGE_NONE)
1056 hiz_op = ISL_AUX_OP_FULL_RESOLVE;
1057 break;
1058
1059 case ISL_AUX_STATE_PASS_THROUGH:
1060 case ISL_AUX_STATE_RESOLVED:
1061 break;
1062
1063 case ISL_AUX_STATE_AUX_INVALID:
1064 if (aux_usage != ISL_AUX_USAGE_NONE)
1065 hiz_op = ISL_AUX_OP_AMBIGUATE;
1066 break;
1067
1068 case ISL_AUX_STATE_PARTIAL_CLEAR:
1069 unreachable("Invalid HiZ state");
1070 }
1071
1072 if (hiz_op != ISL_AUX_OP_NONE) {
1073 iris_hiz_exec(ice, batch, res, level, layer, 1, hiz_op, false);
1074
1075 switch (hiz_op) {
1076 case ISL_AUX_OP_FULL_RESOLVE:
1077 iris_resource_set_aux_state(ice, res, level, layer, 1,
1078 ISL_AUX_STATE_RESOLVED);
1079 break;
1080
1081 case ISL_AUX_OP_AMBIGUATE:
1082 /* The HiZ resolve operation is actually an ambiguate */
1083 iris_resource_set_aux_state(ice, res, level, layer, 1,
1084 ISL_AUX_STATE_PASS_THROUGH);
1085 break;
1086
1087 default:
1088 unreachable("Invalid HiZ op");
1089 }
1090 }
1091 }
1092
1093 static void
1094 iris_resource_finish_hiz_write(struct iris_context *ice,
1095 struct iris_resource *res,
1096 uint32_t level, uint32_t layer,
1097 enum isl_aux_usage aux_usage)
1098 {
1099 assert(aux_usage == ISL_AUX_USAGE_NONE ||
1100 isl_aux_usage_has_hiz(aux_usage));
1101
1102 switch (iris_resource_get_aux_state(res, level, layer)) {
1103 case ISL_AUX_STATE_CLEAR:
1104 assert(isl_aux_usage_has_hiz(aux_usage));
1105 iris_resource_set_aux_state(ice, res, level, layer, 1,
1106 ISL_AUX_STATE_COMPRESSED_CLEAR);
1107 break;
1108
1109 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1110 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1111 assert(isl_aux_usage_has_hiz(aux_usage));
1112 break; /* Nothing to do */
1113
1114 case ISL_AUX_STATE_RESOLVED:
1115 if (isl_aux_usage_has_hiz(aux_usage)) {
1116 iris_resource_set_aux_state(ice, res, level, layer, 1,
1117 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
1118 } else {
1119 iris_resource_set_aux_state(ice, res, level, layer, 1,
1120 ISL_AUX_STATE_AUX_INVALID);
1121 }
1122 break;
1123
1124 case ISL_AUX_STATE_PASS_THROUGH:
1125 if (isl_aux_usage_has_hiz(aux_usage)) {
1126 iris_resource_set_aux_state(ice, res, level, layer, 1,
1127 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
1128 }
1129 break;
1130
1131 case ISL_AUX_STATE_AUX_INVALID:
1132 assert(!isl_aux_usage_has_hiz(aux_usage));
1133 break;
1134
1135 case ISL_AUX_STATE_PARTIAL_CLEAR:
1136 unreachable("Invalid HiZ state");
1137 }
1138 }
1139
1140 void
1141 iris_resource_prepare_access(struct iris_context *ice,
1142 struct iris_batch *batch,
1143 struct iris_resource *res,
1144 uint32_t start_level, uint32_t num_levels,
1145 uint32_t start_layer, uint32_t num_layers,
1146 enum isl_aux_usage aux_usage,
1147 bool fast_clear_supported)
1148 {
1149 num_levels = miptree_level_range_length(res, start_level, num_levels);
1150
1151 switch (res->aux.usage) {
1152 case ISL_AUX_USAGE_NONE:
1153 /* Nothing to do */
1154 break;
1155
1156 case ISL_AUX_USAGE_MCS:
1157 assert(start_level == 0 && num_levels == 1);
1158 const uint32_t level_layers =
1159 miptree_layer_range_length(res, 0, start_layer, num_layers);
1160 for (uint32_t a = 0; a < level_layers; a++) {
1161 iris_resource_prepare_mcs_access(ice, batch, res, start_layer + a,
1162 aux_usage, fast_clear_supported);
1163 }
1164 break;
1165
1166 case ISL_AUX_USAGE_CCS_D:
1167 case ISL_AUX_USAGE_CCS_E:
1168 for (uint32_t l = 0; l < num_levels; l++) {
1169 const uint32_t level = start_level + l;
1170 const uint32_t level_layers =
1171 miptree_layer_range_length(res, level, start_layer, num_layers);
1172 for (uint32_t a = 0; a < level_layers; a++) {
1173 iris_resource_prepare_ccs_access(ice, batch, res, level,
1174 start_layer + a,
1175 aux_usage, fast_clear_supported);
1176 }
1177 }
1178 break;
1179
1180 case ISL_AUX_USAGE_HIZ:
1181 case ISL_AUX_USAGE_HIZ_CCS:
1182 for (uint32_t l = 0; l < num_levels; l++) {
1183 const uint32_t level = start_level + l;
1184 if (!iris_resource_level_has_hiz(res, level))
1185 continue;
1186
1187 const uint32_t level_layers =
1188 miptree_layer_range_length(res, level, start_layer, num_layers);
1189 for (uint32_t a = 0; a < level_layers; a++) {
1190 iris_resource_prepare_hiz_access(ice, batch, res, level,
1191 start_layer + a, aux_usage,
1192 fast_clear_supported);
1193 }
1194 }
1195 break;
1196
1197 default:
1198 unreachable("Invalid aux usage");
1199 }
1200 }
1201
1202 void
1203 iris_resource_finish_write(struct iris_context *ice,
1204 struct iris_resource *res, uint32_t level,
1205 uint32_t start_layer, uint32_t num_layers,
1206 enum isl_aux_usage aux_usage)
1207 {
1208 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
1209
1210 switch (res->aux.usage) {
1211 case ISL_AUX_USAGE_NONE:
1212 break;
1213
1214 case ISL_AUX_USAGE_MCS:
1215 for (uint32_t a = 0; a < num_layers; a++) {
1216 iris_resource_finish_mcs_write(ice, res, start_layer + a,
1217 aux_usage);
1218 }
1219 break;
1220
1221 case ISL_AUX_USAGE_CCS_D:
1222 case ISL_AUX_USAGE_CCS_E:
1223 for (uint32_t a = 0; a < num_layers; a++) {
1224 iris_resource_finish_ccs_write(ice, res, level, start_layer + a,
1225 aux_usage);
1226 }
1227 break;
1228
1229 case ISL_AUX_USAGE_HIZ:
1230 case ISL_AUX_USAGE_HIZ_CCS:
1231 if (!iris_resource_level_has_hiz(res, level))
1232 return;
1233
1234 for (uint32_t a = 0; a < num_layers; a++) {
1235 iris_resource_finish_hiz_write(ice, res, level, start_layer + a,
1236 aux_usage);
1237 }
1238 break;
1239
1240 default:
1241 unreachable("Invavlid aux usage");
1242 }
1243 }
1244
1245 enum isl_aux_state
1246 iris_resource_get_aux_state(const struct iris_resource *res,
1247 uint32_t level, uint32_t layer)
1248 {
1249 iris_resource_check_level_layer(res, level, layer);
1250
1251 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
1252 assert(iris_resource_level_has_hiz(res, level));
1253 } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) {
1254 unreachable("Cannot get aux state for stencil");
1255 } else {
1256 assert(res->surf.samples == 1 ||
1257 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
1258 }
1259
1260 return res->aux.state[level][layer];
1261 }
1262
1263 void
1264 iris_resource_set_aux_state(struct iris_context *ice,
1265 struct iris_resource *res, uint32_t level,
1266 uint32_t start_layer, uint32_t num_layers,
1267 enum isl_aux_state aux_state)
1268 {
1269 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
1270
1271 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
1272 assert(iris_resource_level_has_hiz(res, level));
1273 } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) {
1274 unreachable("Cannot set aux state for stencil");
1275 } else {
1276 assert(res->surf.samples == 1 ||
1277 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
1278 }
1279
1280 for (unsigned a = 0; a < num_layers; a++) {
1281 if (res->aux.state[level][start_layer + a] != aux_state) {
1282 res->aux.state[level][start_layer + a] = aux_state;
1283 /* XXX: Need to track which bindings to make dirty */
1284 ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS;
1285 }
1286 }
1287 }
1288
1289 /* On Gen9 color buffers may be compressed by the hardware (lossless
1290 * compression). There are, however, format restrictions and care needs to be
1291 * taken that the sampler engine is capable for re-interpreting a buffer with
1292 * format different the buffer was originally written with.
1293 *
1294 * For example, SRGB formats are not compressible and the sampler engine isn't
1295 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
1296 * color buffer needs to be resolved so that the sampling surface can be
1297 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
1298 * set).
1299 */
1300 static bool
1301 can_texture_with_ccs(const struct gen_device_info *devinfo,
1302 struct pipe_debug_callback *dbg,
1303 const struct iris_resource *res,
1304 enum isl_format view_format)
1305 {
1306 if (res->aux.usage != ISL_AUX_USAGE_CCS_E)
1307 return false;
1308
1309 if (!format_ccs_e_compat_with_resource(devinfo, res, view_format)) {
1310 const struct isl_format_layout *res_fmtl =
1311 isl_format_get_layout(res->surf.format);
1312 const struct isl_format_layout *view_fmtl =
1313 isl_format_get_layout(view_format);
1314
1315 perf_debug(dbg, "Incompatible sampling format (%s) for CCS (%s)\n",
1316 view_fmtl->name, res_fmtl->name);
1317
1318 return false;
1319 }
1320
1321 return true;
1322 }
1323
1324 enum isl_aux_usage
1325 iris_resource_texture_aux_usage(struct iris_context *ice,
1326 const struct iris_resource *res,
1327 enum isl_format view_format,
1328 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
1329 {
1330 struct iris_screen *screen = (void *) ice->ctx.screen;
1331 struct gen_device_info *devinfo = &screen->devinfo;
1332
1333 assert(devinfo->gen == 9 || astc5x5_wa_bits == 0);
1334
1335 /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side
1336 * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for
1337 * details.
1338 */
1339 if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
1340 res->aux.usage != ISL_AUX_USAGE_MCS)
1341 return ISL_AUX_USAGE_NONE;
1342
1343 switch (res->aux.usage) {
1344 case ISL_AUX_USAGE_HIZ:
1345 if (sample_with_hiz(devinfo, res))
1346 return ISL_AUX_USAGE_HIZ;
1347 break;
1348
1349 case ISL_AUX_USAGE_MCS:
1350 return ISL_AUX_USAGE_MCS;
1351
1352 case ISL_AUX_USAGE_CCS_D:
1353 case ISL_AUX_USAGE_CCS_E:
1354 /* If we don't have any unresolved color, report an aux usage of
1355 * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the
1356 * aux surface and we can save some bandwidth.
1357 */
1358 if (!iris_has_color_unresolved(res, 0, INTEL_REMAINING_LEVELS,
1359 0, INTEL_REMAINING_LAYERS))
1360 return ISL_AUX_USAGE_NONE;
1361
1362 if (can_texture_with_ccs(devinfo, &ice->dbg, res, view_format))
1363 return ISL_AUX_USAGE_CCS_E;
1364 break;
1365
1366 default:
1367 break;
1368 }
1369
1370 return ISL_AUX_USAGE_NONE;
1371 }
1372
1373 static bool
1374 isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
1375 {
1376 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
1377 * values so sRGB curve application was a no-op for all fast-clearable
1378 * formats.
1379 *
1380 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear
1381 * values, the hardware interprets the floats, not as what would be
1382 * returned from the sampler (or written by the shader), but as being
1383 * between format conversion and sRGB curve application. This means that
1384 * we can switch between sRGB and UNORM without having to whack the clear
1385 * color.
1386 */
1387 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
1388 }
1389
1390 void
1391 iris_resource_prepare_texture(struct iris_context *ice,
1392 struct iris_batch *batch,
1393 struct iris_resource *res,
1394 enum isl_format view_format,
1395 uint32_t start_level, uint32_t num_levels,
1396 uint32_t start_layer, uint32_t num_layers,
1397 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
1398 {
1399 enum isl_aux_usage aux_usage =
1400 iris_resource_texture_aux_usage(ice, res, view_format, astc5x5_wa_bits);
1401
1402 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
1403
1404 /* Clear color is specified as ints or floats and the conversion is done by
1405 * the sampler. If we have a texture view, we would have to perform the
1406 * clear color conversion manually. Just disable clear color.
1407 */
1408 if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format))
1409 clear_supported = false;
1410
1411 iris_resource_prepare_access(ice, batch, res, start_level, num_levels,
1412 start_layer, num_layers,
1413 aux_usage, clear_supported);
1414 }
1415
1416 enum isl_aux_usage
1417 iris_resource_render_aux_usage(struct iris_context *ice,
1418 struct iris_resource *res,
1419 enum isl_format render_format,
1420 bool blend_enabled,
1421 bool draw_aux_disabled)
1422 {
1423 struct iris_screen *screen = (void *) ice->ctx.screen;
1424 struct gen_device_info *devinfo = &screen->devinfo;
1425
1426 if (draw_aux_disabled)
1427 return ISL_AUX_USAGE_NONE;
1428
1429 switch (res->aux.usage) {
1430 case ISL_AUX_USAGE_MCS:
1431 return ISL_AUX_USAGE_MCS;
1432
1433 case ISL_AUX_USAGE_CCS_D:
1434 case ISL_AUX_USAGE_CCS_E:
1435 /* Gen9+ hardware technically supports non-0/1 clear colors with sRGB
1436 * formats. However, there are issues with blending where it doesn't
1437 * properly apply the sRGB curve to the clear color when blending.
1438 */
1439 if (devinfo->gen >= 9 && blend_enabled &&
1440 isl_format_is_srgb(render_format) &&
1441 !isl_color_value_is_zero_one(res->aux.clear_color, render_format))
1442 return ISL_AUX_USAGE_NONE;
1443
1444 if (res->aux.usage == ISL_AUX_USAGE_CCS_E &&
1445 format_ccs_e_compat_with_resource(devinfo, res, render_format))
1446 return ISL_AUX_USAGE_CCS_E;
1447
1448 /* Otherwise, we try to fall back to CCS_D */
1449 if (isl_format_supports_ccs_d(devinfo, render_format))
1450 return ISL_AUX_USAGE_CCS_D;
1451
1452 default:
1453 return ISL_AUX_USAGE_NONE;
1454 }
1455 }
1456
1457 void
1458 iris_resource_prepare_render(struct iris_context *ice,
1459 struct iris_batch *batch,
1460 struct iris_resource *res, uint32_t level,
1461 uint32_t start_layer, uint32_t layer_count,
1462 enum isl_aux_usage aux_usage)
1463 {
1464 iris_resource_prepare_access(ice, batch, res, level, 1, start_layer,
1465 layer_count, aux_usage,
1466 aux_usage != ISL_AUX_USAGE_NONE);
1467 }
1468
1469 void
1470 iris_resource_finish_render(struct iris_context *ice,
1471 struct iris_resource *res, uint32_t level,
1472 uint32_t start_layer, uint32_t layer_count,
1473 enum isl_aux_usage aux_usage)
1474 {
1475 iris_resource_finish_write(ice, res, level, start_layer, layer_count,
1476 aux_usage);
1477 }
1478
1479 void
1480 iris_resource_prepare_depth(struct iris_context *ice,
1481 struct iris_batch *batch,
1482 struct iris_resource *res, uint32_t level,
1483 uint32_t start_layer, uint32_t layer_count)
1484 {
1485 iris_resource_prepare_access(ice, batch, res, level, 1, start_layer,
1486 layer_count, res->aux.usage, !!res->aux.bo);
1487 }
1488
1489 void
1490 iris_resource_finish_depth(struct iris_context *ice,
1491 struct iris_resource *res, uint32_t level,
1492 uint32_t start_layer, uint32_t layer_count,
1493 bool depth_written)
1494 {
1495 if (depth_written) {
1496 iris_resource_finish_write(ice, res, level, start_layer, layer_count,
1497 res->aux.usage);
1498 }
1499 }