iris: Initial import of resolve code
[mesa.git] / src / gallium / drivers / iris / iris_resolve.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_resolve.c
25 *
26 * This file handles resolve tracking for main and auxiliary surfaces.
27 *
28 * It also handles our cache tracking. We have sets for the render cache,
29 * depth cache, and so on. If a BO is in a cache's set, then it may have
30 * data in that cache. The helpers take care of emitting flushes for
31 * render-to-texture, format reinterpretation issues, and other situations.
32 */
33
34 #include "util/hash_table.h"
35 #include "util/set.h"
36 #include "iris_context.h"
37
38 static void
39 resolve_sampler_views(struct iris_batch *batch,
40 struct iris_shader_state *shs)
41 {
42 uint32_t views = shs->bound_sampler_views;
43
44 while (views) {
45 const int i = u_bit_scan(&views);
46 struct iris_sampler_view *isv = shs->textures[i];
47 struct iris_resource *res = (void *) isv->base.texture;
48
49 // XXX: aux tracking
50 iris_cache_flush_for_read(batch, res->bo);
51 }
52 }
53
54 static void
55 resolve_image_views(struct iris_batch *batch,
56 struct iris_shader_state *shs)
57 {
58 uint32_t views = shs->bound_image_views;
59
60 while (views) {
61 const int i = u_bit_scan(&views);
62 struct pipe_resource *res = shs->image[i].res;
63
64 // XXX: aux tracking
65 iris_cache_flush_for_read(batch, iris_resource_bo(res));
66 }
67 }
68
69
70 /**
71 * \brief Resolve buffers before drawing.
72 *
73 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
74 * enabled depth texture, and flush the render cache for any dirty textures.
75 */
76 void
77 iris_predraw_resolve_inputs(struct iris_batch *batch,
78 struct iris_shader_state *shs)
79 {
80 resolve_sampler_views(batch, shs);
81 resolve_image_views(batch, shs);
82 }
83
84 void
85 iris_predraw_resolve_framebuffer(struct iris_context *ice,
86 struct iris_batch *batch)
87 {
88 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
89 struct pipe_surface *zs_surf = cso_fb->zsbuf;
90
91 if (zs_surf) {
92 // XXX: HiZ resolves
93 }
94
95 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
96 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
97 if (!surf)
98 continue;
99
100 struct iris_resource *res = (void *) surf->base.texture;
101
102 // XXX: aux tracking
103
104 iris_cache_flush_for_render(batch, res->bo, surf->view.format,
105 ISL_AUX_USAGE_NONE);
106 }
107 }
108
109 /**
110 * \brief Call this after drawing to mark which buffers need resolving
111 *
112 * If the depth buffer was written to and if it has an accompanying HiZ
113 * buffer, then mark that it needs a depth resolve.
114 *
115 * If the color buffer is a multisample window system buffer, then
116 * mark that it needs a downsample.
117 *
118 * Also mark any render targets which will be textured as needing a render
119 * cache flush.
120 */
121 void
122 iris_postdraw_update_resolve_tracking(struct iris_context *ice,
123 struct iris_batch *batch)
124 {
125 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
126 struct pipe_surface *zs_surf = cso_fb->zsbuf;
127
128 // XXX: front buffer drawing?
129
130 if (zs_surf) {
131 struct iris_resource *z_res, *s_res;
132 iris_get_depth_stencil_resources(zs_surf->texture, &z_res, &s_res);
133
134 if (z_res) {
135 // XXX: aux tracking
136
137 if (ice->state.depth_writes_enabled)
138 iris_depth_cache_add_bo(batch, z_res->bo);
139 }
140
141 if (s_res) {
142 // XXX: aux tracking
143
144 if (ice->state.stencil_writes_enabled)
145 iris_depth_cache_add_bo(batch, s_res->bo);
146 }
147 }
148
149 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
150 struct iris_surface *surf = (void *) cso_fb->cbufs[i];
151 if (!surf)
152 continue;
153
154 struct iris_resource *res = (void *) surf->base.texture;
155
156 // XXX: aux tracking
157 iris_render_cache_add_bo(batch, res->bo, surf->view.format,
158 ISL_AUX_USAGE_NONE);
159 }
160 }
161
162 /**
163 * Clear the cache-tracking sets.
164 */
165 void
166 iris_cache_sets_clear(struct iris_batch *batch)
167 {
168 hash_table_foreach(batch->cache.render, render_entry)
169 _mesa_hash_table_remove(batch->cache.render, render_entry);
170
171 set_foreach(batch->cache.depth, depth_entry)
172 _mesa_set_remove(batch->cache.depth, depth_entry);
173 }
174
175 /**
176 * Emits an appropriate flush for a BO if it has been rendered to within the
177 * same batchbuffer as a read that's about to be emitted.
178 *
179 * The GPU has separate, incoherent caches for the render cache and the
180 * sampler cache, along with other caches. Usually data in the different
181 * caches don't interact (e.g. we don't render to our driver-generated
182 * immediate constant data), but for render-to-texture in FBOs we definitely
183 * do. When a batchbuffer is flushed, the kernel will ensure that everything
184 * necessary is flushed before another use of that BO, but for reuse from
185 * different caches within a batchbuffer, it's all our responsibility.
186 */
187 void
188 iris_flush_depth_and_render_caches(struct iris_batch *batch)
189 {
190 iris_emit_pipe_control_flush(batch,
191 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
192 PIPE_CONTROL_RENDER_TARGET_FLUSH |
193 PIPE_CONTROL_CS_STALL);
194
195 iris_emit_pipe_control_flush(batch,
196 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
197 PIPE_CONTROL_CONST_CACHE_INVALIDATE);
198
199 iris_cache_sets_clear(batch);
200 }
201
202 void
203 iris_cache_flush_for_read(struct iris_batch *batch,
204 struct iris_bo *bo)
205 {
206 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) ||
207 _mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
208 iris_flush_depth_and_render_caches(batch);
209 }
210
211 static void *
212 format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
213 {
214 return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
215 }
216
217 void
218 iris_cache_flush_for_render(struct iris_batch *batch,
219 struct iris_bo *bo,
220 enum isl_format format,
221 enum isl_aux_usage aux_usage)
222 {
223 if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
224 iris_flush_depth_and_render_caches(batch);
225
226 /* Check to see if this bo has been used by a previous rendering operation
227 * but with a different format or aux usage. If it has, flush the render
228 * cache so we ensure that it's only in there with one format or aux usage
229 * at a time.
230 *
231 * Even though it's not obvious, this can easily happen in practice.
232 * Suppose a client is blending on a surface with sRGB encode enabled on
233 * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client
234 * then disables sRGB decode and continues blending we will flip on
235 * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
236 * perfectly valid since CCS_E is a subset of CCS_D). However, this means
237 * that we have fragments in-flight which are rendering with UNORM+CCS_E
238 * and other fragments in-flight with SRGB+CCS_D on the same surface at the
239 * same time and the pixel scoreboard and color blender are trying to sort
240 * it all out. This ends badly (i.e. GPU hangs).
241 *
242 * To date, we have never observed GPU hangs or even corruption to be
243 * associated with switching the format, only the aux usage. However,
244 * there are comments in various docs which indicate that the render cache
245 * isn't 100% resilient to format changes. We may as well be conservative
246 * and flush on format changes too. We can always relax this later if we
247 * find it to be a performance problem.
248 */
249 struct hash_entry *entry =
250 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
251 if (entry && entry->data != format_aux_tuple(format, aux_usage))
252 iris_flush_depth_and_render_caches(batch);
253 }
254
255 void
256 iris_render_cache_add_bo(struct iris_batch *batch,
257 struct iris_bo *bo,
258 enum isl_format format,
259 enum isl_aux_usage aux_usage)
260 {
261 #ifndef NDEBUG
262 struct hash_entry *entry =
263 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
264 if (entry) {
265 /* Otherwise, someone didn't do a flush_for_render and that would be
266 * very bad indeed.
267 */
268 assert(entry->data == format_aux_tuple(format, aux_usage));
269 }
270 #endif
271
272 _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo,
273 format_aux_tuple(format, aux_usage));
274 }
275
276 void
277 iris_cache_flush_for_depth(struct iris_batch *batch,
278 struct iris_bo *bo)
279 {
280 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo))
281 iris_flush_depth_and_render_caches(batch);
282 }
283
284 void
285 iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo)
286 {
287 _mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo);
288 }
289
290 /**
291 * Return true if the format that will be used to access the resource is
292 * CCS_E-compatible with the resource's linear/non-sRGB format.
293 *
294 * Why use the linear format? Well, although the resourcemay be specified
295 * with an sRGB format, the usage of that color space/format can be toggled.
296 * Since our HW tends to support more linear formats than sRGB ones, we use
297 * this format variant for check for CCS_E compatibility.
298 */
299 static bool
300 format_ccs_e_compat_with_resource(const struct gen_device_info *devinfo,
301 const struct iris_resource *res,
302 enum isl_format access_format)
303 {
304 assert(res->aux.usage == ISL_AUX_USAGE_CCS_E);
305
306 enum isl_format isl_format = isl_format_srgb_to_linear(res->surf.format);
307 return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format);
308 }
309
310 static bool
311 sample_with_hiz(const struct gen_device_info *devinfo,
312 const struct iris_resource *res)
313 {
314 if (!devinfo->has_sample_with_hiz)
315 return false;
316
317 if (res->aux.usage != ISL_AUX_USAGE_HIZ)
318 return false;
319
320 /* It seems the hardware won't fallback to the depth buffer if some of the
321 * mipmap levels aren't available in the HiZ buffer. So we need all levels
322 * of the texture to be HiZ enabled.
323 */
324 for (unsigned level = 0; level < res->surf.levels; ++level) {
325 if (!iris_resource_level_has_hiz(res, level))
326 return false;
327 }
328
329 /* If compressed multisampling is enabled, then we use it for the auxiliary
330 * buffer instead.
331 *
332 * From the BDW PRM (Volume 2d: Command Reference: Structures
333 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
334 *
335 * "If this field is set to AUX_HIZ, Number of Multisamples must be
336 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
337 *
338 * There is no such blurb for 1D textures, but there is sufficient evidence
339 * that this is broken on SKL+.
340 */
341 // XXX: i965 disables this for arrays too, is that reasonable?
342 return res->surf.samples == 1 && res->surf.dim == ISL_SURF_DIM_2D;
343 }
344
345 /**
346 * Does the resource's slice have hiz enabled?
347 */
348 bool
349 iris_resource_level_has_hiz(const struct iris_resource *res, uint32_t level)
350 {
351 iris_resource_check_level_layer(res, level, 0);
352 // return res->level[level].has_hiz;
353 return false;
354 }
355
356 /** \brief Assert that the level and layer are valid for the resource. */
357 void
358 iris_resource_check_level_layer(UNUSED const struct iris_resource *res,
359 UNUSED uint32_t level, UNUSED uint32_t layer)
360 {
361 assert(level < res->surf.levels);
362 assert(layer < util_num_layers(&res->base, level));
363 }
364
365 static inline uint32_t
366 miptree_level_range_length(const struct iris_resource *res,
367 uint32_t start_level, uint32_t num_levels)
368 {
369 assert(start_level < res->surf.levels);
370
371 if (num_levels == INTEL_REMAINING_LAYERS)
372 num_levels = res->surf.levels;
373
374 /* Check for overflow */
375 assert(start_level + num_levels >= start_level);
376 assert(start_level + num_levels <= res->surf.levels);
377
378 return num_levels;
379 }
380
381 static inline uint32_t
382 miptree_layer_range_length(const struct iris_resource *res, uint32_t level,
383 uint32_t start_layer, uint32_t num_layers)
384 {
385 assert(level <= res->base.last_level);
386
387 const uint32_t total_num_layers = iris_get_num_logical_layers(res, level);
388 assert(start_layer < total_num_layers);
389 if (num_layers == INTEL_REMAINING_LAYERS)
390 num_layers = total_num_layers - start_layer;
391 /* Check for overflow */
392 assert(start_layer + num_layers >= start_layer);
393 assert(start_layer + num_layers <= total_num_layers);
394
395 return num_layers;
396 }
397
398 static bool
399 has_color_unresolved(const struct iris_resource *res,
400 unsigned start_level, unsigned num_levels,
401 unsigned start_layer, unsigned num_layers)
402 {
403 if (!res->aux.bo)
404 return false;
405
406 /* Clamp the level range to fit the resource */
407 num_levels = miptree_level_range_length(res, start_level, num_levels);
408
409 for (uint32_t l = 0; l < num_levels; l++) {
410 const uint32_t level = start_level + l;
411 const uint32_t level_layers =
412 miptree_layer_range_length(res, level, start_layer, num_layers);
413 for (unsigned a = 0; a < level_layers; a++) {
414 enum isl_aux_state aux_state =
415 iris_resource_get_aux_state(res, level, start_layer + a);
416 assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
417 if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
418 return true;
419 }
420 }
421
422 return false;
423 }
424
425 static enum isl_aux_op
426 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
427 enum isl_aux_usage aux_usage,
428 bool fast_clear_supported)
429 {
430 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D);
431
432 const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_D;
433
434 assert(ccs_supported == fast_clear_supported);
435
436 switch (aux_state) {
437 case ISL_AUX_STATE_CLEAR:
438 case ISL_AUX_STATE_PARTIAL_CLEAR:
439 if (!ccs_supported)
440 return ISL_AUX_OP_FULL_RESOLVE;
441 else
442 return ISL_AUX_OP_NONE;
443
444 case ISL_AUX_STATE_PASS_THROUGH:
445 return ISL_AUX_OP_NONE;
446
447 case ISL_AUX_STATE_RESOLVED:
448 case ISL_AUX_STATE_AUX_INVALID:
449 case ISL_AUX_STATE_COMPRESSED_CLEAR:
450 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
451 break;
452 }
453
454 unreachable("Invalid aux state for CCS_D");
455 }
456
457 static enum isl_aux_op
458 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
459 enum isl_aux_usage aux_usage,
460 bool fast_clear_supported)
461 {
462 /* CCS_E surfaces can be accessed as CCS_D if we're careful. */
463 assert(aux_usage == ISL_AUX_USAGE_NONE ||
464 aux_usage == ISL_AUX_USAGE_CCS_D ||
465 aux_usage == ISL_AUX_USAGE_CCS_E);
466
467 if (aux_usage == ISL_AUX_USAGE_CCS_D)
468 assert(fast_clear_supported);
469
470 switch (aux_state) {
471 case ISL_AUX_STATE_CLEAR:
472 case ISL_AUX_STATE_PARTIAL_CLEAR:
473 if (fast_clear_supported)
474 return ISL_AUX_OP_NONE;
475 else if (aux_usage == ISL_AUX_USAGE_CCS_E)
476 return ISL_AUX_OP_PARTIAL_RESOLVE;
477 else
478 return ISL_AUX_OP_FULL_RESOLVE;
479
480 case ISL_AUX_STATE_COMPRESSED_CLEAR:
481 if (aux_usage != ISL_AUX_USAGE_CCS_E)
482 return ISL_AUX_OP_FULL_RESOLVE;
483 else if (!fast_clear_supported)
484 return ISL_AUX_OP_PARTIAL_RESOLVE;
485 else
486 return ISL_AUX_OP_NONE;
487
488 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
489 if (aux_usage != ISL_AUX_USAGE_CCS_E)
490 return ISL_AUX_OP_FULL_RESOLVE;
491 else
492 return ISL_AUX_OP_NONE;
493
494 case ISL_AUX_STATE_PASS_THROUGH:
495 return ISL_AUX_OP_NONE;
496
497 case ISL_AUX_STATE_RESOLVED:
498 case ISL_AUX_STATE_AUX_INVALID:
499 break;
500 }
501
502 unreachable("Invalid aux state for CCS_E");
503 }
504
505 static void
506 iris_resource_prepare_ccs_access(struct iris_context *ice,
507 struct iris_resource *res,
508 uint32_t level, uint32_t layer,
509 enum isl_aux_usage aux_usage,
510 bool fast_clear_supported)
511 {
512 enum isl_aux_state aux_state = iris_resource_get_aux_state(res, level, layer);
513
514 enum isl_aux_op resolve_op;
515 if (res->aux.usage == ISL_AUX_USAGE_CCS_E) {
516 resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage,
517 fast_clear_supported);
518 } else {
519 assert(res->aux.usage == ISL_AUX_USAGE_CCS_D);
520 resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage,
521 fast_clear_supported);
522 }
523
524 if (resolve_op != ISL_AUX_OP_NONE) {
525 // XXX: iris_blorp_resolve_color(ice, res, level, layer, resolve_op);
526
527 switch (resolve_op) {
528 case ISL_AUX_OP_FULL_RESOLVE:
529 /* The CCS full resolve operation destroys the CCS and sets it to the
530 * pass-through state. (You can also think of this as being both a
531 * resolve and an ambiguate in one operation.)
532 */
533 iris_resource_set_aux_state(res, level, layer, 1,
534 ISL_AUX_STATE_PASS_THROUGH);
535 break;
536
537 case ISL_AUX_OP_PARTIAL_RESOLVE:
538 iris_resource_set_aux_state(res, level, layer, 1,
539 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
540 break;
541
542 default:
543 unreachable("Invalid resolve op");
544 }
545 }
546 }
547
548 static void
549 iris_resource_finish_ccs_write(struct iris_context *ice,
550 struct iris_resource *res,
551 uint32_t level, uint32_t layer,
552 enum isl_aux_usage aux_usage)
553 {
554 assert(aux_usage == ISL_AUX_USAGE_NONE ||
555 aux_usage == ISL_AUX_USAGE_CCS_D ||
556 aux_usage == ISL_AUX_USAGE_CCS_E);
557
558 enum isl_aux_state aux_state =
559 iris_resource_get_aux_state(res, level, layer);
560
561 if (res->aux.usage == ISL_AUX_USAGE_CCS_E) {
562 switch (aux_state) {
563 case ISL_AUX_STATE_CLEAR:
564 case ISL_AUX_STATE_PARTIAL_CLEAR:
565 assert(aux_usage == ISL_AUX_USAGE_CCS_E ||
566 aux_usage == ISL_AUX_USAGE_CCS_D);
567
568 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
569 iris_resource_set_aux_state(res, level, layer, 1,
570 ISL_AUX_STATE_COMPRESSED_CLEAR);
571 } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) {
572 iris_resource_set_aux_state(res, level, layer, 1,
573 ISL_AUX_STATE_PARTIAL_CLEAR);
574 }
575 break;
576
577 case ISL_AUX_STATE_COMPRESSED_CLEAR:
578 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
579 assert(aux_usage == ISL_AUX_USAGE_CCS_E);
580 break; /* Nothing to do */
581
582 case ISL_AUX_STATE_PASS_THROUGH:
583 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
584 iris_resource_set_aux_state(res, level, layer, 1,
585 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
586 } else {
587 /* Nothing to do */
588 }
589 break;
590
591 case ISL_AUX_STATE_RESOLVED:
592 case ISL_AUX_STATE_AUX_INVALID:
593 unreachable("Invalid aux state for CCS_E");
594 }
595 } else {
596 assert(res->aux.usage == ISL_AUX_USAGE_CCS_D);
597 /* CCS_D is a bit simpler */
598 switch (aux_state) {
599 case ISL_AUX_STATE_CLEAR:
600 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
601 iris_resource_set_aux_state(res, level, layer, 1,
602 ISL_AUX_STATE_PARTIAL_CLEAR);
603 break;
604
605 case ISL_AUX_STATE_PARTIAL_CLEAR:
606 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
607 break; /* Nothing to do */
608
609 case ISL_AUX_STATE_PASS_THROUGH:
610 /* Nothing to do */
611 break;
612
613 case ISL_AUX_STATE_COMPRESSED_CLEAR:
614 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
615 case ISL_AUX_STATE_RESOLVED:
616 case ISL_AUX_STATE_AUX_INVALID:
617 unreachable("Invalid aux state for CCS_D");
618 }
619 }
620 }
621
622 static void
623 iris_resource_prepare_mcs_access(struct iris_context *ice,
624 struct iris_resource *res,
625 uint32_t layer,
626 enum isl_aux_usage aux_usage,
627 bool fast_clear_supported)
628 {
629 assert(aux_usage == ISL_AUX_USAGE_MCS);
630
631 switch (iris_resource_get_aux_state(res, 0, layer)) {
632 case ISL_AUX_STATE_CLEAR:
633 case ISL_AUX_STATE_COMPRESSED_CLEAR:
634 if (!fast_clear_supported) {
635 // XXX: iris_blorp_mcs_partial_resolve(ice, res, layer, 1);
636 iris_resource_set_aux_state(res, 0, layer, 1,
637 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
638 }
639 break;
640
641 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
642 break; /* Nothing to do */
643
644 case ISL_AUX_STATE_RESOLVED:
645 case ISL_AUX_STATE_PASS_THROUGH:
646 case ISL_AUX_STATE_AUX_INVALID:
647 case ISL_AUX_STATE_PARTIAL_CLEAR:
648 unreachable("Invalid aux state for MCS");
649 }
650 }
651
652 static void
653 iris_resource_finish_mcs_write(struct iris_context *ice,
654 struct iris_resource *res,
655 uint32_t layer,
656 enum isl_aux_usage aux_usage)
657 {
658 assert(aux_usage == ISL_AUX_USAGE_MCS);
659
660 switch (iris_resource_get_aux_state(res, 0, layer)) {
661 case ISL_AUX_STATE_CLEAR:
662 iris_resource_set_aux_state(res, 0, layer, 1,
663 ISL_AUX_STATE_COMPRESSED_CLEAR);
664 break;
665
666 case ISL_AUX_STATE_COMPRESSED_CLEAR:
667 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
668 break; /* Nothing to do */
669
670 case ISL_AUX_STATE_RESOLVED:
671 case ISL_AUX_STATE_PASS_THROUGH:
672 case ISL_AUX_STATE_AUX_INVALID:
673 case ISL_AUX_STATE_PARTIAL_CLEAR:
674 unreachable("Invalid aux state for MCS");
675 }
676 }
677
678 static void
679 iris_resource_prepare_hiz_access(struct iris_context *ice,
680 struct iris_resource *res,
681 uint32_t level, uint32_t layer,
682 enum isl_aux_usage aux_usage,
683 bool fast_clear_supported)
684 {
685 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
686
687 enum isl_aux_op hiz_op = ISL_AUX_OP_NONE;
688 switch (iris_resource_get_aux_state(res, level, layer)) {
689 case ISL_AUX_STATE_CLEAR:
690 case ISL_AUX_STATE_COMPRESSED_CLEAR:
691 if (aux_usage != ISL_AUX_USAGE_HIZ || !fast_clear_supported)
692 hiz_op = ISL_AUX_OP_FULL_RESOLVE;
693 break;
694
695 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
696 if (aux_usage != ISL_AUX_USAGE_HIZ)
697 hiz_op = ISL_AUX_OP_FULL_RESOLVE;
698 break;
699
700 case ISL_AUX_STATE_PASS_THROUGH:
701 case ISL_AUX_STATE_RESOLVED:
702 break;
703
704 case ISL_AUX_STATE_AUX_INVALID:
705 if (aux_usage == ISL_AUX_USAGE_HIZ)
706 hiz_op = ISL_AUX_OP_AMBIGUATE;
707 break;
708
709 case ISL_AUX_STATE_PARTIAL_CLEAR:
710 unreachable("Invalid HiZ state");
711 }
712
713 if (hiz_op != ISL_AUX_OP_NONE) {
714 // XXX: HiZ
715 //intel_hiz_exec(ice, res, level, layer, 1, hiz_op);
716
717 switch (hiz_op) {
718 case ISL_AUX_OP_FULL_RESOLVE:
719 iris_resource_set_aux_state(res, level, layer, 1,
720 ISL_AUX_STATE_RESOLVED);
721 break;
722
723 case ISL_AUX_OP_AMBIGUATE:
724 /* The HiZ resolve operation is actually an ambiguate */
725 iris_resource_set_aux_state(res, level, layer, 1,
726 ISL_AUX_STATE_PASS_THROUGH);
727 break;
728
729 default:
730 unreachable("Invalid HiZ op");
731 }
732 }
733 }
734
735 static void
736 iris_resource_finish_hiz_write(struct iris_context *ice,
737 struct iris_resource *res,
738 uint32_t level, uint32_t layer,
739 enum isl_aux_usage aux_usage)
740 {
741 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
742
743 switch (iris_resource_get_aux_state(res, level, layer)) {
744 case ISL_AUX_STATE_CLEAR:
745 assert(aux_usage == ISL_AUX_USAGE_HIZ);
746 iris_resource_set_aux_state(res, level, layer, 1,
747 ISL_AUX_STATE_COMPRESSED_CLEAR);
748 break;
749
750 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
751 case ISL_AUX_STATE_COMPRESSED_CLEAR:
752 assert(aux_usage == ISL_AUX_USAGE_HIZ);
753 break; /* Nothing to do */
754
755 case ISL_AUX_STATE_RESOLVED:
756 if (aux_usage == ISL_AUX_USAGE_HIZ) {
757 iris_resource_set_aux_state(res, level, layer, 1,
758 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
759 } else {
760 iris_resource_set_aux_state(res, level, layer, 1,
761 ISL_AUX_STATE_AUX_INVALID);
762 }
763 break;
764
765 case ISL_AUX_STATE_PASS_THROUGH:
766 if (aux_usage == ISL_AUX_USAGE_HIZ) {
767 iris_resource_set_aux_state(res, level, layer, 1,
768 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
769 }
770 break;
771
772 case ISL_AUX_STATE_AUX_INVALID:
773 assert(aux_usage != ISL_AUX_USAGE_HIZ);
774 break;
775
776 case ISL_AUX_STATE_PARTIAL_CLEAR:
777 unreachable("Invalid HiZ state");
778 }
779 }
780
781 void
782 iris_resource_prepare_access(struct iris_context *ice,
783 struct iris_resource *res,
784 uint32_t start_level, uint32_t num_levels,
785 uint32_t start_layer, uint32_t num_layers,
786 enum isl_aux_usage aux_usage,
787 bool fast_clear_supported)
788 {
789 num_levels = miptree_level_range_length(res, start_level, num_levels);
790
791 switch (res->aux.usage) {
792 case ISL_AUX_USAGE_NONE:
793 /* Nothing to do */
794 break;
795
796 case ISL_AUX_USAGE_MCS:
797 assert(start_level == 0 && num_levels == 1);
798 const uint32_t level_layers =
799 miptree_layer_range_length(res, 0, start_layer, num_layers);
800 for (uint32_t a = 0; a < level_layers; a++) {
801 iris_resource_prepare_mcs_access(ice, res, start_layer + a,
802 aux_usage, fast_clear_supported);
803 }
804 break;
805
806 case ISL_AUX_USAGE_CCS_D:
807 case ISL_AUX_USAGE_CCS_E:
808 for (uint32_t l = 0; l < num_levels; l++) {
809 const uint32_t level = start_level + l;
810 const uint32_t level_layers =
811 miptree_layer_range_length(res, level, start_layer, num_layers);
812 for (uint32_t a = 0; a < level_layers; a++) {
813 iris_resource_prepare_ccs_access(ice, res, level,
814 start_layer + a,
815 aux_usage, fast_clear_supported);
816 }
817 }
818 break;
819
820 case ISL_AUX_USAGE_HIZ:
821 for (uint32_t l = 0; l < num_levels; l++) {
822 const uint32_t level = start_level + l;
823 if (!iris_resource_level_has_hiz(res, level))
824 continue;
825
826 const uint32_t level_layers =
827 miptree_layer_range_length(res, level, start_layer, num_layers);
828 for (uint32_t a = 0; a < level_layers; a++) {
829 iris_resource_prepare_hiz_access(ice, res, level, start_layer + a,
830 aux_usage, fast_clear_supported);
831 }
832 }
833 break;
834
835 default:
836 unreachable("Invalid aux usage");
837 }
838 }
839
840 void
841 iris_resource_finish_write(struct iris_context *ice,
842 struct iris_resource *res, uint32_t level,
843 uint32_t start_layer, uint32_t num_layers,
844 enum isl_aux_usage aux_usage)
845 {
846 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
847
848 switch (res->aux.usage) {
849 case ISL_AUX_USAGE_NONE:
850 break;
851
852 case ISL_AUX_USAGE_MCS:
853 for (uint32_t a = 0; a < num_layers; a++) {
854 iris_resource_finish_mcs_write(ice, res, start_layer + a,
855 aux_usage);
856 }
857 break;
858
859 case ISL_AUX_USAGE_CCS_D:
860 case ISL_AUX_USAGE_CCS_E:
861 for (uint32_t a = 0; a < num_layers; a++) {
862 iris_resource_finish_ccs_write(ice, res, level, start_layer + a,
863 aux_usage);
864 }
865 break;
866
867 case ISL_AUX_USAGE_HIZ:
868 if (!iris_resource_level_has_hiz(res, level))
869 return;
870
871 for (uint32_t a = 0; a < num_layers; a++) {
872 iris_resource_finish_hiz_write(ice, res, level, start_layer + a,
873 aux_usage);
874 }
875 break;
876
877 default:
878 unreachable("Invavlid aux usage");
879 }
880 }
881
882 enum isl_aux_state
883 iris_resource_get_aux_state(const struct iris_resource *res,
884 uint32_t level, uint32_t layer)
885 {
886 iris_resource_check_level_layer(res, level, layer);
887
888 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
889 assert(iris_resource_level_has_hiz(res, level));
890 } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) {
891 unreachable("Cannot get aux state for stencil");
892 } else {
893 assert(res->surf.samples == 1 ||
894 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
895 }
896
897 return res->aux.state[level][layer];
898 }
899
900 void
901 iris_resource_set_aux_state(struct iris_resource *res, uint32_t level,
902 uint32_t start_layer, uint32_t num_layers,
903 enum isl_aux_state aux_state)
904 {
905 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
906
907 if (res->surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
908 assert(iris_resource_level_has_hiz(res, level));
909 } else if (res->surf.usage & ISL_SURF_USAGE_STENCIL_BIT) {
910 unreachable("Cannot set aux state for stencil");
911 } else {
912 assert(res->surf.samples == 1 ||
913 res->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
914 }
915
916 for (unsigned a = 0; a < num_layers; a++) {
917 if (res->aux.state[level][start_layer + a] != aux_state) {
918 res->aux.state[level][start_layer + a] = aux_state;
919 // XXX: dirty works differently
920 // brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
921 }
922 }
923 }
924
925 /* On Gen9 color buffers may be compressed by the hardware (lossless
926 * compression). There are, however, format restrictions and care needs to be
927 * taken that the sampler engine is capable for re-interpreting a buffer with
928 * format different the buffer was originally written with.
929 *
930 * For example, SRGB formats are not compressible and the sampler engine isn't
931 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
932 * color buffer needs to be resolved so that the sampling surface can be
933 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
934 * set).
935 */
936 static bool
937 can_texture_with_ccs(const struct gen_device_info *devinfo,
938 struct pipe_debug_callback *dbg,
939 const struct iris_resource *res,
940 enum isl_format view_format)
941 {
942 if (res->aux.usage != ISL_AUX_USAGE_CCS_E)
943 return false;
944
945 if (!format_ccs_e_compat_with_resource(devinfo, res, view_format)) {
946 const struct isl_format_layout *res_fmtl =
947 isl_format_get_layout(res->surf.format);
948 const struct isl_format_layout *view_fmtl =
949 isl_format_get_layout(view_format);
950
951 perf_debug(dbg, "Incompatible sampling format (%s) for CCS (%s)\n",
952 view_fmtl->name, res_fmtl->name);
953
954 return false;
955 }
956
957 return true;
958 }
959
960 enum isl_aux_usage
961 iris_resource_texture_aux_usage(struct iris_context *ice,
962 const struct iris_resource *res,
963 enum isl_format view_format,
964 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
965 {
966 struct iris_screen *screen = (void *) ice->ctx.screen;
967 struct gen_device_info *devinfo = &screen->devinfo;
968
969 assert(devinfo->gen == 9 || astc5x5_wa_bits == 0);
970
971 /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side
972 * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for
973 * details.
974 */
975 if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
976 res->aux.usage != ISL_AUX_USAGE_MCS)
977 return ISL_AUX_USAGE_NONE;
978
979 switch (res->aux.usage) {
980 case ISL_AUX_USAGE_HIZ:
981 if (sample_with_hiz(devinfo, res))
982 return ISL_AUX_USAGE_HIZ;
983 break;
984
985 case ISL_AUX_USAGE_MCS:
986 return ISL_AUX_USAGE_MCS;
987
988 case ISL_AUX_USAGE_CCS_D:
989 case ISL_AUX_USAGE_CCS_E:
990 /* If we don't have any unresolved color, report an aux usage of
991 * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the
992 * aux surface and we can save some bandwidth.
993 */
994 if (!has_color_unresolved(res, 0, INTEL_REMAINING_LEVELS,
995 0, INTEL_REMAINING_LAYERS))
996 return ISL_AUX_USAGE_NONE;
997
998 if (can_texture_with_ccs(devinfo, &ice->dbg, res, view_format))
999 return ISL_AUX_USAGE_CCS_E;
1000 break;
1001
1002 default:
1003 break;
1004 }
1005
1006 return ISL_AUX_USAGE_NONE;
1007 }
1008
1009 static bool
1010 isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
1011 {
1012 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
1013 * values so sRGB curve application was a no-op for all fast-clearable
1014 * formats.
1015 *
1016 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear
1017 * values, the hardware interprets the floats, not as what would be
1018 * returned from the sampler (or written by the shader), but as being
1019 * between format conversion and sRGB curve application. This means that
1020 * we can switch between sRGB and UNORM without having to whack the clear
1021 * color.
1022 */
1023 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
1024 }
1025
1026 void
1027 iris_resource_prepare_texture(struct iris_context *ice,
1028 struct iris_resource *res,
1029 enum isl_format view_format,
1030 uint32_t start_level, uint32_t num_levels,
1031 uint32_t start_layer, uint32_t num_layers,
1032 enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
1033 {
1034 enum isl_aux_usage aux_usage =
1035 iris_resource_texture_aux_usage(ice, res, view_format, astc5x5_wa_bits);
1036
1037 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
1038
1039 /* Clear color is specified as ints or floats and the conversion is done by
1040 * the sampler. If we have a texture view, we would have to perform the
1041 * clear color conversion manually. Just disable clear color.
1042 */
1043 if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format))
1044 clear_supported = false;
1045
1046 iris_resource_prepare_access(ice, res, start_level, num_levels,
1047 start_layer, num_layers,
1048 aux_usage, clear_supported);
1049 }
1050
1051 void
1052 iris_resource_prepare_image(struct iris_context *ice,
1053 struct iris_resource *res)
1054 {
1055 /* The data port doesn't understand any compression */
1056 iris_resource_prepare_access(ice, res, 0, INTEL_REMAINING_LEVELS,
1057 0, INTEL_REMAINING_LAYERS,
1058 ISL_AUX_USAGE_NONE, false);
1059 }
1060
1061 enum isl_aux_usage
1062 iris_resource_render_aux_usage(struct iris_context *ice,
1063 struct iris_resource *res,
1064 enum isl_format render_format,
1065 bool blend_enabled,
1066 bool draw_aux_disabled)
1067 {
1068 struct iris_screen *screen = (void *) ice->ctx.screen;
1069 struct gen_device_info *devinfo = &screen->devinfo;
1070
1071 if (draw_aux_disabled)
1072 return ISL_AUX_USAGE_NONE;
1073
1074 switch (res->aux.usage) {
1075 case ISL_AUX_USAGE_MCS:
1076 return ISL_AUX_USAGE_MCS;
1077
1078 case ISL_AUX_USAGE_CCS_D:
1079 case ISL_AUX_USAGE_CCS_E:
1080 /* Gen9+ hardware technically supports non-0/1 clear colors with sRGB
1081 * formats. However, there are issues with blending where it doesn't
1082 * properly apply the sRGB curve to the clear color when blending.
1083 */
1084 /* XXX:
1085 if (devinfo->gen >= 9 && blend_enabled &&
1086 isl_format_is_srgb(render_format) &&
1087 !isl_color_value_is_zero_one(res->fast_clear_color, render_format))
1088 return ISL_AUX_USAGE_NONE;
1089 */
1090
1091 if (res->aux.usage == ISL_AUX_USAGE_CCS_E &&
1092 format_ccs_e_compat_with_resource(devinfo, res, render_format))
1093 return ISL_AUX_USAGE_CCS_E;
1094
1095 /* Otherwise, we have to fall back to CCS_D */
1096 return ISL_AUX_USAGE_CCS_D;
1097
1098 default:
1099 return ISL_AUX_USAGE_NONE;
1100 }
1101 }
1102
1103 void
1104 iris_resource_prepare_render(struct iris_context *ice,
1105 struct iris_resource *res, uint32_t level,
1106 uint32_t start_layer, uint32_t layer_count,
1107 enum isl_aux_usage aux_usage)
1108 {
1109 iris_resource_prepare_access(ice, res, level, 1, start_layer, layer_count,
1110 aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
1111 }
1112
1113 void
1114 iris_resource_finish_render(struct iris_context *ice,
1115 struct iris_resource *res, uint32_t level,
1116 uint32_t start_layer, uint32_t layer_count,
1117 enum isl_aux_usage aux_usage)
1118 {
1119 iris_resource_finish_write(ice, res, level, start_layer, layer_count,
1120 aux_usage);
1121 }
1122
1123 void
1124 iris_resource_prepare_depth(struct iris_context *ice,
1125 struct iris_resource *res, uint32_t level,
1126 uint32_t start_layer, uint32_t layer_count)
1127 {
1128 iris_resource_prepare_access(ice, res, level, 1, start_layer, layer_count,
1129 res->aux.usage, res->aux.bo != NULL);
1130 }
1131
1132 void
1133 iris_resource_finish_depth(struct iris_context *ice,
1134 struct iris_resource *res, uint32_t level,
1135 uint32_t start_layer, uint32_t layer_count,
1136 bool depth_written)
1137 {
1138 if (depth_written) {
1139 iris_resource_finish_write(ice, res, level, start_layer, layer_count,
1140 res->aux.usage);
1141 }
1142 }