2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
24 * @file iris_resolve.c
26 * This file handles resolve tracking for main and auxiliary surfaces.
28 * It also handles our cache tracking. We have sets for the render cache,
29 * depth cache, and so on. If a BO is in a cache's set, then it may have
30 * data in that cache. The helpers take care of emitting flushes for
31 * render-to-texture, format reinterpretation issues, and other situations.
34 #include "util/hash_table.h"
36 #include "iris_context.h"
37 #include "compiler/nir/nir.h"
40 * Disable auxiliary buffers if a renderbuffer is also bound as a texture
41 * or shader image. This causes a self-dependency, where both rendering
42 * and sampling may concurrently read or write the CCS buffer, causing
46 disable_rb_aux_buffer(struct iris_context
*ice
,
47 bool *draw_aux_buffer_disabled
,
48 struct iris_resource
*tex_res
,
49 unsigned min_level
, unsigned num_levels
,
52 struct pipe_framebuffer_state
*cso_fb
= &ice
->state
.framebuffer
;
55 /* We only need to worry about color compression and fast clears. */
56 if (tex_res
->aux
.usage
!= ISL_AUX_USAGE_CCS_D
&&
57 tex_res
->aux
.usage
!= ISL_AUX_USAGE_CCS_E
&&
58 tex_res
->aux
.usage
!= ISL_AUX_USAGE_GEN12_CCS_E
)
61 for (unsigned i
= 0; i
< cso_fb
->nr_cbufs
; i
++) {
62 struct iris_surface
*surf
= (void *) cso_fb
->cbufs
[i
];
66 struct iris_resource
*rb_res
= (void *) surf
->base
.texture
;
68 if (rb_res
->bo
== tex_res
->bo
&&
69 surf
->base
.u
.tex
.level
>= min_level
&&
70 surf
->base
.u
.tex
.level
< min_level
+ num_levels
) {
71 found
= draw_aux_buffer_disabled
[i
] = true;
77 "Disabling CCS because a renderbuffer is also bound %s.\n",
85 resolve_sampler_views(struct iris_context
*ice
,
86 struct iris_batch
*batch
,
87 struct iris_shader_state
*shs
,
88 const struct shader_info
*info
,
89 bool *draw_aux_buffer_disabled
,
90 bool consider_framebuffer
)
92 uint32_t views
= info
? (shs
->bound_sampler_views
& info
->textures_used
) : 0;
95 const int i
= u_bit_scan(&views
);
96 struct iris_sampler_view
*isv
= shs
->textures
[i
];
97 struct iris_resource
*res
= (void *) isv
->base
.texture
;
99 if (res
->base
.target
!= PIPE_BUFFER
) {
100 if (consider_framebuffer
) {
101 disable_rb_aux_buffer(ice
, draw_aux_buffer_disabled
,
102 res
, isv
->view
.base_level
, isv
->view
.levels
,
106 iris_resource_prepare_texture(ice
, res
, isv
->view
.format
,
107 isv
->view
.base_level
, isv
->view
.levels
,
108 isv
->view
.base_array_layer
,
109 isv
->view
.array_len
);
112 iris_emit_buffer_barrier_for(batch
, res
->bo
, IRIS_DOMAIN_OTHER_READ
);
117 resolve_image_views(struct iris_context
*ice
,
118 struct iris_batch
*batch
,
119 struct iris_shader_state
*shs
,
120 const struct shader_info
*info
,
121 bool *draw_aux_buffer_disabled
,
122 bool consider_framebuffer
)
124 uint32_t views
= info
? (shs
->bound_image_views
& info
->images_used
) : 0;
127 const int i
= u_bit_scan(&views
);
128 struct pipe_image_view
*pview
= &shs
->image
[i
].base
;
129 struct iris_resource
*res
= (void *) pview
->resource
;
131 if (res
->base
.target
!= PIPE_BUFFER
) {
132 if (consider_framebuffer
) {
133 disable_rb_aux_buffer(ice
, draw_aux_buffer_disabled
,
134 res
, pview
->u
.tex
.level
, 1,
135 "as a shader image");
138 unsigned num_layers
=
139 pview
->u
.tex
.last_layer
- pview
->u
.tex
.first_layer
+ 1;
141 enum isl_aux_usage aux_usage
=
142 iris_image_view_aux_usage(ice
, pview
, info
);
144 iris_resource_prepare_access(ice
, res
,
145 pview
->u
.tex
.level
, 1,
146 pview
->u
.tex
.first_layer
, num_layers
,
150 iris_emit_buffer_barrier_for(batch
, res
->bo
, IRIS_DOMAIN_OTHER_READ
);
156 * \brief Resolve buffers before drawing.
158 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
159 * enabled depth texture, and flush the render cache for any dirty textures.
162 iris_predraw_resolve_inputs(struct iris_context
*ice
,
163 struct iris_batch
*batch
,
164 bool *draw_aux_buffer_disabled
,
165 gl_shader_stage stage
,
166 bool consider_framebuffer
)
168 struct iris_shader_state
*shs
= &ice
->state
.shaders
[stage
];
169 const struct shader_info
*info
= iris_get_shader_info(ice
, stage
);
171 uint64_t stage_dirty
= (IRIS_STAGE_DIRTY_BINDINGS_VS
<< stage
) |
172 (consider_framebuffer
? IRIS_STAGE_DIRTY_BINDINGS_FS
: 0);
174 if (ice
->state
.stage_dirty
& stage_dirty
) {
175 resolve_sampler_views(ice
, batch
, shs
, info
, draw_aux_buffer_disabled
,
176 consider_framebuffer
);
177 resolve_image_views(ice
, batch
, shs
, info
, draw_aux_buffer_disabled
,
178 consider_framebuffer
);
183 iris_predraw_resolve_framebuffer(struct iris_context
*ice
,
184 struct iris_batch
*batch
,
185 bool *draw_aux_buffer_disabled
)
187 struct pipe_framebuffer_state
*cso_fb
= &ice
->state
.framebuffer
;
188 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
189 struct gen_device_info
*devinfo
= &screen
->devinfo
;
190 struct iris_uncompiled_shader
*ish
=
191 ice
->shaders
.uncompiled
[MESA_SHADER_FRAGMENT
];
192 const nir_shader
*nir
= ish
->nir
;
194 if (ice
->state
.dirty
& IRIS_DIRTY_DEPTH_BUFFER
) {
195 struct pipe_surface
*zs_surf
= cso_fb
->zsbuf
;
198 struct iris_resource
*z_res
, *s_res
;
199 iris_get_depth_stencil_resources(zs_surf
->texture
, &z_res
, &s_res
);
200 unsigned num_layers
=
201 zs_surf
->u
.tex
.last_layer
- zs_surf
->u
.tex
.first_layer
+ 1;
204 iris_resource_prepare_depth(ice
, batch
, z_res
,
205 zs_surf
->u
.tex
.level
,
206 zs_surf
->u
.tex
.first_layer
,
208 iris_emit_buffer_barrier_for(batch
, z_res
->bo
,
209 IRIS_DOMAIN_DEPTH_WRITE
);
213 iris_emit_buffer_barrier_for(batch
, s_res
->bo
,
214 IRIS_DOMAIN_DEPTH_WRITE
);
219 if (devinfo
->gen
== 8 && nir
->info
.outputs_read
!= 0) {
220 for (unsigned i
= 0; i
< cso_fb
->nr_cbufs
; i
++) {
221 if (cso_fb
->cbufs
[i
]) {
222 struct iris_surface
*surf
= (void *) cso_fb
->cbufs
[i
];
223 struct iris_resource
*res
= (void *) cso_fb
->cbufs
[i
]->texture
;
225 iris_resource_prepare_texture(ice
, res
, surf
->view
.format
,
226 surf
->view
.base_level
, 1,
227 surf
->view
.base_array_layer
,
228 surf
->view
.array_len
);
233 if (ice
->state
.stage_dirty
& IRIS_STAGE_DIRTY_BINDINGS_FS
) {
234 for (unsigned i
= 0; i
< cso_fb
->nr_cbufs
; i
++) {
235 struct iris_surface
*surf
= (void *) cso_fb
->cbufs
[i
];
239 struct iris_resource
*res
= (void *) surf
->base
.texture
;
241 enum isl_aux_usage aux_usage
=
242 iris_resource_render_aux_usage(ice
, res
, surf
->view
.format
,
243 draw_aux_buffer_disabled
[i
]);
245 if (ice
->state
.draw_aux_usage
[i
] != aux_usage
) {
246 ice
->state
.draw_aux_usage
[i
] = aux_usage
;
247 /* XXX: Need to track which bindings to make dirty */
248 ice
->state
.dirty
|= IRIS_DIRTY_RENDER_BUFFER
;
249 ice
->state
.stage_dirty
|= IRIS_ALL_STAGE_DIRTY_BINDINGS
;
252 iris_resource_prepare_render(ice
, batch
, res
, surf
->view
.base_level
,
253 surf
->view
.base_array_layer
,
254 surf
->view
.array_len
,
257 iris_cache_flush_for_render(batch
, res
->bo
, surf
->view
.format
,
264 * \brief Call this after drawing to mark which buffers need resolving
266 * If the depth buffer was written to and if it has an accompanying HiZ
267 * buffer, then mark that it needs a depth resolve.
269 * If the color buffer is a multisample window system buffer, then
270 * mark that it needs a downsample.
272 * Also mark any render targets which will be textured as needing a render
276 iris_postdraw_update_resolve_tracking(struct iris_context
*ice
,
277 struct iris_batch
*batch
)
279 struct pipe_framebuffer_state
*cso_fb
= &ice
->state
.framebuffer
;
281 // XXX: front buffer drawing?
283 bool may_have_resolved_depth
=
284 ice
->state
.dirty
& (IRIS_DIRTY_DEPTH_BUFFER
|
285 IRIS_DIRTY_WM_DEPTH_STENCIL
);
287 struct pipe_surface
*zs_surf
= cso_fb
->zsbuf
;
289 struct iris_resource
*z_res
, *s_res
;
290 iris_get_depth_stencil_resources(zs_surf
->texture
, &z_res
, &s_res
);
291 unsigned num_layers
=
292 zs_surf
->u
.tex
.last_layer
- zs_surf
->u
.tex
.first_layer
+ 1;
295 if (may_have_resolved_depth
) {
296 iris_resource_finish_depth(ice
, z_res
, zs_surf
->u
.tex
.level
,
297 zs_surf
->u
.tex
.first_layer
, num_layers
,
298 ice
->state
.depth_writes_enabled
);
303 if (may_have_resolved_depth
&& ice
->state
.stencil_writes_enabled
) {
304 iris_resource_finish_write(ice
, s_res
, zs_surf
->u
.tex
.level
,
305 zs_surf
->u
.tex
.first_layer
, num_layers
,
311 bool may_have_resolved_color
=
312 ice
->state
.stage_dirty
& IRIS_STAGE_DIRTY_BINDINGS_FS
;
314 for (unsigned i
= 0; i
< cso_fb
->nr_cbufs
; i
++) {
315 struct iris_surface
*surf
= (void *) cso_fb
->cbufs
[i
];
319 struct iris_resource
*res
= (void *) surf
->base
.texture
;
320 enum isl_aux_usage aux_usage
= ice
->state
.draw_aux_usage
[i
];
322 if (may_have_resolved_color
) {
323 union pipe_surface_desc
*desc
= &surf
->base
.u
;
324 unsigned num_layers
=
325 desc
->tex
.last_layer
- desc
->tex
.first_layer
+ 1;
326 iris_resource_finish_render(ice
, res
, desc
->tex
.level
,
327 desc
->tex
.first_layer
, num_layers
,
334 format_aux_tuple(enum isl_format format
, enum isl_aux_usage aux_usage
)
336 return (void *)(uintptr_t)((uint32_t)format
<< 8 | aux_usage
);
340 iris_cache_flush_for_render(struct iris_batch
*batch
,
342 enum isl_format format
,
343 enum isl_aux_usage aux_usage
)
345 iris_emit_buffer_barrier_for(batch
, bo
, IRIS_DOMAIN_RENDER_WRITE
);
347 /* Check to see if this bo has been used by a previous rendering operation
348 * but with a different format or aux usage. If it has, flush the render
349 * cache so we ensure that it's only in there with one format or aux usage
352 * Even though it's not obvious, this can easily happen in practice.
353 * Suppose a client is blending on a surface with sRGB encode enabled on
354 * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client
355 * then disables sRGB decode and continues blending we will flip on
356 * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
357 * perfectly valid since CCS_E is a subset of CCS_D). However, this means
358 * that we have fragments in-flight which are rendering with UNORM+CCS_E
359 * and other fragments in-flight with SRGB+CCS_D on the same surface at the
360 * same time and the pixel scoreboard and color blender are trying to sort
361 * it all out. This ends badly (i.e. GPU hangs).
363 * To date, we have never observed GPU hangs or even corruption to be
364 * associated with switching the format, only the aux usage. However,
365 * there are comments in various docs which indicate that the render cache
366 * isn't 100% resilient to format changes. We may as well be conservative
367 * and flush on format changes too. We can always relax this later if we
368 * find it to be a performance problem.
370 struct hash_entry
*entry
=
371 _mesa_hash_table_search_pre_hashed(batch
->cache
.render
, bo
->hash
, bo
);
373 _mesa_hash_table_insert_pre_hashed(batch
->cache
.render
, bo
->hash
, bo
,
374 format_aux_tuple(format
, aux_usage
));
375 } else if (entry
->data
!= format_aux_tuple(format
, aux_usage
)) {
376 iris_emit_pipe_control_flush(batch
,
377 "cache tracker: render format mismatch",
378 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
379 PIPE_CONTROL_CS_STALL
);
380 entry
->data
= format_aux_tuple(format
, aux_usage
);
385 iris_resolve_color(struct iris_context
*ice
,
386 struct iris_batch
*batch
,
387 struct iris_resource
*res
,
388 unsigned level
, unsigned layer
,
389 enum isl_aux_op resolve_op
)
391 //DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer);
393 struct blorp_surf surf
;
394 iris_blorp_surf_for_resource(&batch
->screen
->isl_dev
, &surf
,
395 &res
->base
, res
->aux
.usage
, level
, true);
397 iris_batch_maybe_flush(batch
, 1500);
399 /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
401 * "Any transition from any value in {Clear, Render, Resolve} to a
402 * different value in {Clear, Render, Resolve} requires end of pipe
405 * In other words, fast clear ops are not properly synchronized with
406 * other drawing. We need to use a PIPE_CONTROL to ensure that the
407 * contents of the previous draw hit the render target before we resolve
408 * and again afterwards to ensure that the resolve is complete before we
409 * do any more regular drawing.
411 iris_emit_end_of_pipe_sync(batch
, "color resolve: pre-flush",
412 PIPE_CONTROL_RENDER_TARGET_FLUSH
);
414 iris_batch_sync_region_start(batch
);
415 struct blorp_batch blorp_batch
;
416 blorp_batch_init(&ice
->blorp
, &blorp_batch
, batch
, 0);
417 /* On Gen >= 12, Stencil buffer with lossless compression needs to be
418 * resolve with WM_HZ_OP packet.
420 if (res
->aux
.usage
== ISL_AUX_USAGE_STC_CCS
) {
421 blorp_hiz_stencil_op(&blorp_batch
, &surf
, level
, layer
,
424 blorp_ccs_resolve(&blorp_batch
, &surf
, level
, layer
, 1,
425 res
->surf
.format
, resolve_op
);
427 blorp_batch_finish(&blorp_batch
);
429 /* See comment above */
430 iris_emit_end_of_pipe_sync(batch
, "color resolve: post-flush",
431 PIPE_CONTROL_RENDER_TARGET_FLUSH
);
432 iris_batch_sync_region_end(batch
);
436 iris_mcs_partial_resolve(struct iris_context
*ice
,
437 struct iris_batch
*batch
,
438 struct iris_resource
*res
,
439 uint32_t start_layer
,
442 //DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
443 //start_layer, start_layer + num_layers - 1);
445 assert(isl_aux_usage_has_mcs(res
->aux
.usage
));
447 struct blorp_surf surf
;
448 iris_blorp_surf_for_resource(&batch
->screen
->isl_dev
, &surf
,
449 &res
->base
, res
->aux
.usage
, 0, true);
450 iris_emit_buffer_barrier_for(batch
, res
->bo
, IRIS_DOMAIN_RENDER_WRITE
);
452 struct blorp_batch blorp_batch
;
453 iris_batch_sync_region_start(batch
);
454 blorp_batch_init(&ice
->blorp
, &blorp_batch
, batch
, 0);
455 blorp_mcs_partial_resolve(&blorp_batch
, &surf
, res
->surf
.format
,
456 start_layer
, num_layers
);
457 blorp_batch_finish(&blorp_batch
);
458 iris_batch_sync_region_end(batch
);
462 iris_sample_with_depth_aux(const struct gen_device_info
*devinfo
,
463 const struct iris_resource
*res
)
465 switch (res
->aux
.usage
) {
466 case ISL_AUX_USAGE_HIZ
:
467 if (devinfo
->has_sample_with_hiz
)
470 case ISL_AUX_USAGE_HIZ_CCS
:
472 case ISL_AUX_USAGE_HIZ_CCS_WT
:
478 /* It seems the hardware won't fallback to the depth buffer if some of the
479 * mipmap levels aren't available in the HiZ buffer. So we need all levels
480 * of the texture to be HiZ enabled.
482 for (unsigned level
= 0; level
< res
->surf
.levels
; ++level
) {
483 if (!iris_resource_level_has_hiz(res
, level
))
487 /* If compressed multisampling is enabled, then we use it for the auxiliary
490 * From the BDW PRM (Volume 2d: Command Reference: Structures
491 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
493 * "If this field is set to AUX_HIZ, Number of Multisamples must be
494 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
496 * There is no such blurb for 1D textures, but there is sufficient evidence
497 * that this is broken on SKL+.
499 // XXX: i965 disables this for arrays too, is that reasonable?
500 return res
->surf
.samples
== 1 && res
->surf
.dim
== ISL_SURF_DIM_2D
;
504 * Perform a HiZ or depth resolve operation.
506 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
507 * PRM, Volume 1, Part 2:
508 * - 7.5.3.1 Depth Buffer Clear
509 * - 7.5.3.2 Depth Buffer Resolve
510 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
513 iris_hiz_exec(struct iris_context
*ice
,
514 struct iris_batch
*batch
,
515 struct iris_resource
*res
,
516 unsigned int level
, unsigned int start_layer
,
517 unsigned int num_layers
, enum isl_aux_op op
,
518 bool update_clear_depth
)
520 assert(iris_resource_level_has_hiz(res
, level
));
521 assert(op
!= ISL_AUX_OP_NONE
);
522 UNUSED
const char *name
= NULL
;
525 case ISL_AUX_OP_FULL_RESOLVE
:
526 name
= "depth resolve";
528 case ISL_AUX_OP_AMBIGUATE
:
529 name
= "hiz ambiguate";
531 case ISL_AUX_OP_FAST_CLEAR
:
532 name
= "depth clear";
534 case ISL_AUX_OP_PARTIAL_RESOLVE
:
535 case ISL_AUX_OP_NONE
:
536 unreachable("Invalid HiZ op");
539 //DBG("%s %s to mt %p level %d layers %d-%d\n",
540 //__func__, name, mt, level, start_layer, start_layer + num_layers - 1);
542 /* The following stalls and flushes are only documented to be required
543 * for HiZ clear operations. However, they also seem to be required for
544 * resolve operations.
546 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
548 * "If other rendering operations have preceded this clear, a
549 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
550 * enabled must be issued before the rectangle primitive used for
551 * the depth buffer clear operation."
553 * Same applies for Gen8 and Gen9.
555 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
556 * PIPE_CONTROL, Depth Cache Flush Enable:
558 * "This bit must not be set when Depth Stall Enable bit is set in
561 * This is confirmed to hold for real, Haswell gets immediate gpu hangs.
563 * Therefore issue two pipe control flushes, one for cache flush and
564 * another for depth stall.
566 iris_emit_pipe_control_flush(batch
,
567 "hiz op: pre-flushes (1/2)",
568 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
569 PIPE_CONTROL_CS_STALL
);
571 iris_emit_pipe_control_flush(batch
, "hiz op: pre-flushes (2/2)",
572 PIPE_CONTROL_DEPTH_STALL
);
574 assert(isl_aux_usage_has_hiz(res
->aux
.usage
) && res
->aux
.bo
);
576 iris_batch_maybe_flush(batch
, 1500);
578 iris_batch_sync_region_start(batch
);
580 struct blorp_surf surf
;
581 iris_blorp_surf_for_resource(&batch
->screen
->isl_dev
, &surf
,
582 &res
->base
, res
->aux
.usage
, level
, true);
584 struct blorp_batch blorp_batch
;
585 enum blorp_batch_flags flags
= 0;
586 flags
|= update_clear_depth
? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR
;
587 blorp_batch_init(&ice
->blorp
, &blorp_batch
, batch
, flags
);
588 blorp_hiz_op(&blorp_batch
, &surf
, level
, start_layer
, num_layers
, op
);
589 blorp_batch_finish(&blorp_batch
);
591 /* The following stalls and flushes are only documented to be required
592 * for HiZ clear operations. However, they also seem to be required for
593 * resolve operations.
595 * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
597 * "Depth buffer clear pass using any of the methods (WM_STATE,
598 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
599 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
600 * "set" before starting to render. DepthStall and DepthFlush are
601 * not needed between consecutive depth clear passes nor is it
602 * required if the depth clear pass was done with
603 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
605 * TODO: Such as the spec says, this could be conditional.
607 iris_emit_pipe_control_flush(batch
,
608 "hiz op: post flush",
609 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
610 PIPE_CONTROL_DEPTH_STALL
);
612 iris_batch_sync_region_end(batch
);
616 level_has_aux(const struct iris_resource
*res
, uint32_t level
)
618 return isl_aux_usage_has_hiz(res
->aux
.usage
) ?
619 iris_resource_level_has_hiz(res
, level
) :
620 res
->aux
.usage
!= ISL_AUX_USAGE_NONE
;
624 * Does the resource's slice have hiz enabled?
627 iris_resource_level_has_hiz(const struct iris_resource
*res
, uint32_t level
)
629 iris_resource_check_level_layer(res
, level
, 0);
630 return res
->aux
.has_hiz
& 1 << level
;
633 /** \brief Assert that the level and layer are valid for the resource. */
635 iris_resource_check_level_layer(UNUSED
const struct iris_resource
*res
,
636 UNUSED
uint32_t level
, UNUSED
uint32_t layer
)
638 assert(level
< res
->surf
.levels
);
639 assert(layer
< util_num_layers(&res
->base
, level
));
642 static inline uint32_t
643 miptree_level_range_length(const struct iris_resource
*res
,
644 uint32_t start_level
, uint32_t num_levels
)
646 assert(start_level
< res
->surf
.levels
);
648 if (num_levels
== INTEL_REMAINING_LAYERS
)
649 num_levels
= res
->surf
.levels
;
651 /* Check for overflow */
652 assert(start_level
+ num_levels
>= start_level
);
653 assert(start_level
+ num_levels
<= res
->surf
.levels
);
658 static inline uint32_t
659 miptree_layer_range_length(const struct iris_resource
*res
, uint32_t level
,
660 uint32_t start_layer
, uint32_t num_layers
)
662 assert(level
<= res
->base
.last_level
);
664 const uint32_t total_num_layers
= iris_get_num_logical_layers(res
, level
);
665 assert(start_layer
< total_num_layers
);
666 if (num_layers
== INTEL_REMAINING_LAYERS
)
667 num_layers
= total_num_layers
- start_layer
;
668 /* Check for overflow */
669 assert(start_layer
+ num_layers
>= start_layer
);
670 assert(start_layer
+ num_layers
<= total_num_layers
);
676 iris_has_color_unresolved(const struct iris_resource
*res
,
677 unsigned start_level
, unsigned num_levels
,
678 unsigned start_layer
, unsigned num_layers
)
683 /* Clamp the level range to fit the resource */
684 num_levels
= miptree_level_range_length(res
, start_level
, num_levels
);
686 for (uint32_t l
= 0; l
< num_levels
; l
++) {
687 const uint32_t level
= start_level
+ l
;
688 const uint32_t level_layers
=
689 miptree_layer_range_length(res
, level
, start_layer
, num_layers
);
690 for (unsigned a
= 0; a
< level_layers
; a
++) {
691 enum isl_aux_state aux_state
=
692 iris_resource_get_aux_state(res
, level
, start_layer
+ a
);
693 assert(aux_state
!= ISL_AUX_STATE_AUX_INVALID
);
694 if (aux_state
!= ISL_AUX_STATE_PASS_THROUGH
)
703 iris_resource_prepare_access(struct iris_context
*ice
,
704 struct iris_resource
*res
,
705 uint32_t start_level
, uint32_t num_levels
,
706 uint32_t start_layer
, uint32_t num_layers
,
707 enum isl_aux_usage aux_usage
,
708 bool fast_clear_supported
)
710 /* We can't do resolves on the compute engine, so awkwardly, we have to
711 * do them on the render batch...
713 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
715 const uint32_t clamped_levels
=
716 miptree_level_range_length(res
, start_level
, num_levels
);
717 for (uint32_t l
= 0; l
< clamped_levels
; l
++) {
718 const uint32_t level
= start_level
+ l
;
719 if (!level_has_aux(res
, level
))
722 const uint32_t level_layers
=
723 miptree_layer_range_length(res
, level
, start_layer
, num_layers
);
724 for (uint32_t a
= 0; a
< level_layers
; a
++) {
725 const uint32_t layer
= start_layer
+ a
;
726 const enum isl_aux_state aux_state
=
727 iris_resource_get_aux_state(res
, level
, layer
);
728 const enum isl_aux_op aux_op
=
729 isl_aux_prepare_access(aux_state
, aux_usage
, fast_clear_supported
);
731 if (aux_op
== ISL_AUX_OP_NONE
) {
732 /* Nothing to do here. */
733 } else if (isl_aux_usage_has_mcs(res
->aux
.usage
)) {
734 assert(aux_op
== ISL_AUX_OP_PARTIAL_RESOLVE
);
735 iris_mcs_partial_resolve(ice
, batch
, res
, layer
, 1);
736 } else if (isl_aux_usage_has_hiz(res
->aux
.usage
)) {
737 iris_hiz_exec(ice
, batch
, res
, level
, layer
, 1, aux_op
, false);
739 assert(isl_aux_usage_has_ccs(res
->aux
.usage
));
740 iris_resolve_color(ice
, batch
, res
, level
, layer
, aux_op
);
743 const enum isl_aux_state new_state
=
744 isl_aux_state_transition_aux_op(aux_state
, res
->aux
.usage
, aux_op
);
745 iris_resource_set_aux_state(ice
, res
, level
, layer
, 1, new_state
);
751 iris_resource_finish_write(struct iris_context
*ice
,
752 struct iris_resource
*res
, uint32_t level
,
753 uint32_t start_layer
, uint32_t num_layers
,
754 enum isl_aux_usage aux_usage
)
756 if (!level_has_aux(res
, level
))
759 const uint32_t level_layers
=
760 miptree_layer_range_length(res
, level
, start_layer
, num_layers
);
762 for (uint32_t a
= 0; a
< level_layers
; a
++) {
763 const uint32_t layer
= start_layer
+ a
;
764 const enum isl_aux_state aux_state
=
765 iris_resource_get_aux_state(res
, level
, layer
);
766 const enum isl_aux_state new_aux_state
=
767 isl_aux_state_transition_write(aux_state
, aux_usage
, false);
768 iris_resource_set_aux_state(ice
, res
, level
, layer
, 1, new_aux_state
);
773 iris_resource_get_aux_state(const struct iris_resource
*res
,
774 uint32_t level
, uint32_t layer
)
776 iris_resource_check_level_layer(res
, level
, layer
);
778 if (res
->surf
.usage
& ISL_SURF_USAGE_DEPTH_BIT
) {
779 assert(iris_resource_level_has_hiz(res
, level
));
781 assert(res
->surf
.samples
== 1 ||
782 res
->surf
.msaa_layout
== ISL_MSAA_LAYOUT_ARRAY
);
785 return res
->aux
.state
[level
][layer
];
789 iris_resource_set_aux_state(struct iris_context
*ice
,
790 struct iris_resource
*res
, uint32_t level
,
791 uint32_t start_layer
, uint32_t num_layers
,
792 enum isl_aux_state aux_state
)
794 num_layers
= miptree_layer_range_length(res
, level
, start_layer
, num_layers
);
796 if (res
->surf
.usage
& ISL_SURF_USAGE_DEPTH_BIT
) {
797 assert(iris_resource_level_has_hiz(res
, level
));
799 assert(res
->surf
.samples
== 1 ||
800 res
->surf
.msaa_layout
== ISL_MSAA_LAYOUT_ARRAY
);
803 for (unsigned a
= 0; a
< num_layers
; a
++) {
804 if (res
->aux
.state
[level
][start_layer
+ a
] != aux_state
) {
805 res
->aux
.state
[level
][start_layer
+ a
] = aux_state
;
806 /* XXX: Need to track which bindings to make dirty */
807 ice
->state
.dirty
|= IRIS_DIRTY_RENDER_BUFFER
;
808 ice
->state
.stage_dirty
|= IRIS_ALL_STAGE_DIRTY_BINDINGS
;
814 iris_resource_texture_aux_usage(struct iris_context
*ice
,
815 const struct iris_resource
*res
,
816 enum isl_format view_format
)
818 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
819 struct gen_device_info
*devinfo
= &screen
->devinfo
;
821 switch (res
->aux
.usage
) {
822 case ISL_AUX_USAGE_HIZ
:
823 if (iris_sample_with_depth_aux(devinfo
, res
))
824 return ISL_AUX_USAGE_HIZ
;
827 case ISL_AUX_USAGE_HIZ_CCS
:
828 assert(!iris_sample_with_depth_aux(devinfo
, res
));
829 return ISL_AUX_USAGE_NONE
;
831 case ISL_AUX_USAGE_HIZ_CCS_WT
:
832 if (iris_sample_with_depth_aux(devinfo
, res
))
833 return ISL_AUX_USAGE_HIZ_CCS_WT
;
836 case ISL_AUX_USAGE_MCS
:
837 case ISL_AUX_USAGE_MCS_CCS
:
838 case ISL_AUX_USAGE_STC_CCS
:
839 return res
->aux
.usage
;
841 case ISL_AUX_USAGE_CCS_E
:
842 case ISL_AUX_USAGE_GEN12_CCS_E
:
843 /* If we don't have any unresolved color, report an aux usage of
844 * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the
845 * aux surface and we can save some bandwidth.
847 if (!iris_has_color_unresolved(res
, 0, INTEL_REMAINING_LEVELS
,
848 0, INTEL_REMAINING_LAYERS
))
849 return ISL_AUX_USAGE_NONE
;
851 /* On Gen9 color buffers may be compressed by the hardware (lossless
852 * compression). There are, however, format restrictions and care needs
853 * to be taken that the sampler engine is capable for re-interpreting a
854 * buffer with format different the buffer was originally written with.
856 * For example, SRGB formats are not compressible and the sampler engine
857 * isn't capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case
858 * the underlying color buffer needs to be resolved so that the sampling
859 * surface can be sampled as non-compressed (i.e., without the auxiliary
860 * MCS buffer being set).
862 if (isl_formats_are_ccs_e_compatible(devinfo
, res
->surf
.format
,
864 return res
->aux
.usage
;
871 return ISL_AUX_USAGE_NONE
;
875 iris_image_view_aux_usage(struct iris_context
*ice
,
876 const struct pipe_image_view
*pview
,
877 const struct shader_info
*info
)
880 return ISL_AUX_USAGE_NONE
;
882 struct iris_resource
*res
= (void *) pview
->resource
;
884 enum isl_format view_format
= iris_image_view_get_format(ice
, pview
);
885 enum isl_aux_usage aux_usage
=
886 iris_resource_texture_aux_usage(ice
, res
, view_format
);
888 bool uses_atomic_load_store
=
889 ice
->shaders
.uncompiled
[info
->stage
]->uses_atomic_load_store
;
891 if (aux_usage
== ISL_AUX_USAGE_GEN12_CCS_E
&& !uses_atomic_load_store
)
892 return ISL_AUX_USAGE_GEN12_CCS_E
;
894 return ISL_AUX_USAGE_NONE
;
898 isl_formats_are_fast_clear_compatible(enum isl_format a
, enum isl_format b
)
900 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
901 * values so sRGB curve application was a no-op for all fast-clearable
904 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear
905 * values, the hardware interprets the floats, not as what would be
906 * returned from the sampler (or written by the shader), but as being
907 * between format conversion and sRGB curve application. This means that
908 * we can switch between sRGB and UNORM without having to whack the clear
911 return isl_format_srgb_to_linear(a
) == isl_format_srgb_to_linear(b
);
915 iris_resource_prepare_texture(struct iris_context
*ice
,
916 struct iris_resource
*res
,
917 enum isl_format view_format
,
918 uint32_t start_level
, uint32_t num_levels
,
919 uint32_t start_layer
, uint32_t num_layers
)
921 enum isl_aux_usage aux_usage
=
922 iris_resource_texture_aux_usage(ice
, res
, view_format
);
924 bool clear_supported
= isl_aux_usage_has_fast_clears(aux_usage
);
926 /* Clear color is specified as ints or floats and the conversion is done by
927 * the sampler. If we have a texture view, we would have to perform the
928 * clear color conversion manually. Just disable clear color.
930 if (!isl_formats_are_fast_clear_compatible(res
->surf
.format
, view_format
))
931 clear_supported
= false;
933 iris_resource_prepare_access(ice
, res
, start_level
, num_levels
,
934 start_layer
, num_layers
,
935 aux_usage
, clear_supported
);
938 /* Whether or not rendering a color value with either format results in the
939 * same pixel. This can return false negatives.
942 iris_render_formats_color_compatible(enum isl_format a
, enum isl_format b
,
943 union isl_color_value color
)
948 /* A difference in color space doesn't matter for 0/1 values. */
949 if (isl_format_srgb_to_linear(a
) == isl_format_srgb_to_linear(b
) &&
950 isl_color_value_is_zero_one(color
, a
)) {
958 iris_resource_render_aux_usage(struct iris_context
*ice
,
959 struct iris_resource
*res
,
960 enum isl_format render_format
,
961 bool draw_aux_disabled
)
963 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
964 struct gen_device_info
*devinfo
= &screen
->devinfo
;
966 if (draw_aux_disabled
)
967 return ISL_AUX_USAGE_NONE
;
969 switch (res
->aux
.usage
) {
970 case ISL_AUX_USAGE_MCS
:
971 case ISL_AUX_USAGE_MCS_CCS
:
972 return res
->aux
.usage
;
974 case ISL_AUX_USAGE_CCS_D
:
975 case ISL_AUX_USAGE_CCS_E
:
976 case ISL_AUX_USAGE_GEN12_CCS_E
:
977 /* Disable CCS for some cases of texture-view rendering. On gen12, HW
978 * may convert some subregions of shader output to fast-cleared blocks
979 * if CCS is enabled and the shader output matches the clear color.
980 * Existing fast-cleared blocks are correctly interpreted by the clear
981 * color and the resource format (see can_fast_clear_color). To avoid
982 * gaining new fast-cleared blocks that can't be interpreted by the
983 * resource format (and to avoid misinterpreting existing ones), shut
984 * off CCS when the interpretation of the clear color differs between
985 * the render_format and the resource format.
987 if (!iris_render_formats_color_compatible(render_format
,
989 res
->aux
.clear_color
)) {
990 return ISL_AUX_USAGE_NONE
;
993 if (res
->aux
.usage
== ISL_AUX_USAGE_CCS_D
)
994 return ISL_AUX_USAGE_CCS_D
;
996 if (isl_formats_are_ccs_e_compatible(devinfo
, res
->surf
.format
,
998 return res
->aux
.usage
;
1003 return ISL_AUX_USAGE_NONE
;
1008 iris_resource_prepare_render(struct iris_context
*ice
,
1009 struct iris_batch
*batch
,
1010 struct iris_resource
*res
, uint32_t level
,
1011 uint32_t start_layer
, uint32_t layer_count
,
1012 enum isl_aux_usage aux_usage
)
1014 iris_resource_prepare_access(ice
, res
, level
, 1, start_layer
,
1015 layer_count
, aux_usage
,
1016 isl_aux_usage_has_fast_clears(aux_usage
));
1020 iris_resource_finish_render(struct iris_context
*ice
,
1021 struct iris_resource
*res
, uint32_t level
,
1022 uint32_t start_layer
, uint32_t layer_count
,
1023 enum isl_aux_usage aux_usage
)
1025 iris_resource_finish_write(ice
, res
, level
, start_layer
, layer_count
,
1030 iris_resource_prepare_depth(struct iris_context
*ice
,
1031 struct iris_batch
*batch
,
1032 struct iris_resource
*res
, uint32_t level
,
1033 uint32_t start_layer
, uint32_t layer_count
)
1035 iris_resource_prepare_access(ice
, res
, level
, 1, start_layer
,
1036 layer_count
, res
->aux
.usage
, !!res
->aux
.bo
);
1040 iris_resource_finish_depth(struct iris_context
*ice
,
1041 struct iris_resource
*res
, uint32_t level
,
1042 uint32_t start_layer
, uint32_t layer_count
,
1045 if (depth_written
) {
1046 iris_resource_finish_write(ice
, res
, level
, start_layer
, layer_count
,