548a081fe226c8744de096d71c8914a32c1af473
[mesa.git] / src / mesa / drivers / dri / i965 / intel_mipmap_tree.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #include <GL/gl.h>
27 #include <GL/internal/dri_interface.h>
28
29 #include "intel_batchbuffer.h"
30 #include "intel_image.h"
31 #include "intel_mipmap_tree.h"
32 #include "intel_tex.h"
33 #include "intel_blit.h"
34 #include "intel_fbo.h"
35
36 #include "brw_blorp.h"
37 #include "brw_context.h"
38 #include "brw_state.h"
39
40 #include "main/enums.h"
41 #include "main/fbobject.h"
42 #include "main/formats.h"
43 #include "main/glformats.h"
44 #include "main/texcompress_etc.h"
45 #include "main/teximage.h"
46 #include "main/streaming-load-memcpy.h"
47 #include "x86/common_x86_asm.h"
48
49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
50
51 static void *intel_miptree_map_raw(struct brw_context *brw,
52 struct intel_mipmap_tree *mt,
53 GLbitfield mode);
54
55 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
56
57 static bool
58 intel_miptree_alloc_aux(struct brw_context *brw,
59 struct intel_mipmap_tree *mt);
60
61 static bool
62 is_mcs_supported(const struct brw_context *brw, mesa_format format,
63 uint32_t layout_flags)
64 {
65 /* Prior to Gen7, all MSAA surfaces used IMS layout. */
66 if (brw->gen < 7)
67 return false;
68
69 /* In Gen7, IMS layout is only used for depth and stencil buffers. */
70 switch (_mesa_get_format_base_format(format)) {
71 case GL_DEPTH_COMPONENT:
72 case GL_STENCIL_INDEX:
73 case GL_DEPTH_STENCIL:
74 return false;
75 default:
76 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
77 *
78 * This field must be set to 0 for all SINT MSRTs when all RT channels
79 * are not written
80 *
81 * In practice this means that we have to disable MCS for all signed
82 * integer MSAA buffers. The alternative, to disable MCS only when one
83 * of the render target channels is disabled, is impractical because it
84 * would require converting between CMS and UMS MSAA layouts on the fly,
85 * which is expensive.
86 */
87 if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
88 return false;
89 } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
90 /* We can't use the CMS layout because it uses an aux buffer, the MCS
91 * buffer. So fallback to UMS, which is identical to CMS without the
92 * MCS. */
93 return false;
94 } else {
95 return true;
96 }
97 }
98 }
99
100 static bool
101 intel_tiling_supports_ccs(const struct brw_context *brw,
102 enum isl_tiling tiling)
103 {
104 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
105 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
106 *
107 * - Support is limited to tiled render targets.
108 *
109 * Gen9 changes the restriction to Y-tile only.
110 */
111 if (brw->gen >= 9)
112 return tiling == ISL_TILING_Y0;
113 else if (brw->gen >= 7)
114 return tiling != ISL_TILING_LINEAR;
115 else
116 return false;
117 }
118
119 /**
120 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
121 * can be used. This doesn't (and should not) inspect any of the properties of
122 * the miptree's BO.
123 *
124 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
125 * beneath the "Fast Color Clear" bullet (p326):
126 *
127 * - Support is for non-mip-mapped and non-array surface types only.
128 *
129 * And then later, on p327:
130 *
131 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
132 * 64bpp, and 128bpp.
133 *
134 * From the Skylake documentation, it is made clear that X-tiling is no longer
135 * supported:
136 *
137 * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
138 * non-MSRTs only.
139 */
140 static bool
141 intel_miptree_supports_ccs(struct brw_context *brw,
142 const struct intel_mipmap_tree *mt)
143 {
144 /* MCS support does not exist prior to Gen7 */
145 if (brw->gen < 7)
146 return false;
147
148 /* This function applies only to non-multisampled render targets. */
149 if (mt->surf.samples > 1)
150 return false;
151
152 /* MCS is only supported for color buffers */
153 switch (_mesa_get_format_base_format(mt->format)) {
154 case GL_DEPTH_COMPONENT:
155 case GL_DEPTH_STENCIL:
156 case GL_STENCIL_INDEX:
157 return false;
158 }
159
160 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
161 return false;
162
163 const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0;
164 const bool arrayed = mt->surf.logical_level0_px.array_len > 1 ||
165 mt->surf.logical_level0_px.depth > 1;
166
167 if (arrayed) {
168 /* Multisample surfaces with the CMS layout are not layered surfaces,
169 * yet still have physical_depth0 > 1. Assert that we don't
170 * accidentally reject a multisampled surface here. We should have
171 * rejected it earlier by explicitly checking the sample count.
172 */
173 assert(mt->surf.samples == 1);
174 }
175
176 /* Handle the hardware restrictions...
177 *
178 * All GENs have the following restriction: "MCS buffer for non-MSRT is
179 * supported only for RT formats 32bpp, 64bpp, and 128bpp."
180 *
181 * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of
182 * Non-MultiSampler Render Target Restrictions) Support is for
183 * non-mip-mapped and non-array surface types only.
184 *
185 * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of
186 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
187 * surfaces are supported with MCS buffer layout with these alignments in
188 * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128.
189 *
190 * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of
191 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
192 * surfaces are supported with MCS buffer layout with these alignments in
193 * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64.
194 */
195 if (brw->gen < 8 && (mip_mapped || arrayed))
196 return false;
197
198 /* There's no point in using an MCS buffer if the surface isn't in a
199 * renderable format.
200 */
201 if (!brw->mesa_format_supports_render[mt->format])
202 return false;
203
204 if (brw->gen >= 9) {
205 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
206 const enum isl_format isl_format =
207 brw_isl_format_for_mesa_format(linear_format);
208 return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
209 } else
210 return true;
211 }
212
213 static bool
214 intel_tiling_supports_hiz(const struct brw_context *brw,
215 enum isl_tiling tiling)
216 {
217 if (brw->gen < 6)
218 return false;
219
220 return tiling == ISL_TILING_Y0;
221 }
222
223 static bool
224 intel_miptree_supports_hiz(const struct brw_context *brw,
225 const struct intel_mipmap_tree *mt)
226 {
227 if (!brw->has_hiz)
228 return false;
229
230 switch (mt->format) {
231 case MESA_FORMAT_Z_FLOAT32:
232 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
233 case MESA_FORMAT_Z24_UNORM_X8_UINT:
234 case MESA_FORMAT_Z24_UNORM_S8_UINT:
235 case MESA_FORMAT_Z_UNORM16:
236 return true;
237 default:
238 return false;
239 }
240 }
241
242 static bool
243 intel_miptree_supports_ccs_e(struct brw_context *brw,
244 const struct intel_mipmap_tree *mt)
245 {
246 if (brw->gen < 9)
247 return false;
248
249 /* For now compression is only enabled for integer formats even though
250 * there exist supported floating point formats also. This is a heuristic
251 * decision based on current public benchmarks. In none of the cases these
252 * formats provided any improvement but a few cases were seen to regress.
253 * Hence these are left to to be enabled in the future when they are known
254 * to improve things.
255 */
256 if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
257 return false;
258
259 if (!intel_miptree_supports_ccs(brw, mt))
260 return false;
261
262 /* Fast clear can be also used to clear srgb surfaces by using equivalent
263 * linear format. This trick, however, can't be extended to be used with
264 * lossless compression and therefore a check is needed to see if the format
265 * really is linear.
266 */
267 return _mesa_get_srgb_format_linear(mt->format) == mt->format;
268 }
269
270 /**
271 * Determine depth format corresponding to a depth+stencil format,
272 * for separate stencil.
273 */
274 mesa_format
275 intel_depth_format_for_depthstencil_format(mesa_format format) {
276 switch (format) {
277 case MESA_FORMAT_Z24_UNORM_S8_UINT:
278 return MESA_FORMAT_Z24_UNORM_X8_UINT;
279 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
280 return MESA_FORMAT_Z_FLOAT32;
281 default:
282 return format;
283 }
284 }
285
286 static bool
287 create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
288 unsigned depth0, struct intel_mipmap_level *table)
289 {
290 for (unsigned level = first_level; level <= last_level; level++) {
291 const unsigned d =
292 target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
293
294 table[level].slice = calloc(d, sizeof(*table[0].slice));
295 if (!table[level].slice)
296 goto unwind;
297 }
298
299 return true;
300
301 unwind:
302 for (unsigned level = first_level; level <= last_level; level++)
303 free(table[level].slice);
304
305 return false;
306 }
307
308 static bool
309 needs_separate_stencil(const struct brw_context *brw,
310 struct intel_mipmap_tree *mt,
311 mesa_format format, uint32_t layout_flags)
312 {
313
314 if (layout_flags & MIPTREE_LAYOUT_FOR_BO)
315 return false;
316
317 if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL)
318 return false;
319
320 if (brw->must_use_separate_stencil)
321 return true;
322
323 return brw->has_separate_stencil &&
324 intel_miptree_supports_hiz(brw, mt);
325 }
326
327 /**
328 * Choose the aux usage for this miptree. This function must be called fairly
329 * late in the miptree create process after we have a tiling.
330 */
331 static void
332 intel_miptree_choose_aux_usage(struct brw_context *brw,
333 struct intel_mipmap_tree *mt)
334 {
335 assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
336
337 const unsigned no_flags = 0;
338 if (mt->surf.samples > 1 && is_mcs_supported(brw, mt->format, no_flags)) {
339 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
340 mt->aux_usage = ISL_AUX_USAGE_MCS;
341 } else if (intel_tiling_supports_ccs(brw, mt->surf.tiling) &&
342 intel_miptree_supports_ccs(brw, mt)) {
343 if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
344 intel_miptree_supports_ccs_e(brw, mt)) {
345 mt->aux_usage = ISL_AUX_USAGE_CCS_E;
346 } else {
347 mt->aux_usage = ISL_AUX_USAGE_CCS_D;
348 }
349 } else if (intel_tiling_supports_hiz(brw, mt->surf.tiling) &&
350 intel_miptree_supports_hiz(brw, mt)) {
351 mt->aux_usage = ISL_AUX_USAGE_HIZ;
352 }
353
354 /* We can do fast-clear on all auxiliary surface types that are
355 * allocated through the normal texture creation paths.
356 */
357 if (mt->aux_usage != ISL_AUX_USAGE_NONE)
358 mt->supports_fast_clear = true;
359 }
360
361
362 /**
363 * Choose an appropriate uncompressed format for a requested
364 * compressed format, if unsupported.
365 */
366 mesa_format
367 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
368 {
369 /* No need to lower ETC formats on these platforms,
370 * they are supported natively.
371 */
372 if (brw->gen >= 8 || brw->is_baytrail)
373 return format;
374
375 switch (format) {
376 case MESA_FORMAT_ETC1_RGB8:
377 return MESA_FORMAT_R8G8B8X8_UNORM;
378 case MESA_FORMAT_ETC2_RGB8:
379 return MESA_FORMAT_R8G8B8X8_UNORM;
380 case MESA_FORMAT_ETC2_SRGB8:
381 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
382 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
383 return MESA_FORMAT_B8G8R8A8_SRGB;
384 case MESA_FORMAT_ETC2_RGBA8_EAC:
385 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
386 return MESA_FORMAT_R8G8B8A8_UNORM;
387 case MESA_FORMAT_ETC2_R11_EAC:
388 return MESA_FORMAT_R_UNORM16;
389 case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
390 return MESA_FORMAT_R_SNORM16;
391 case MESA_FORMAT_ETC2_RG11_EAC:
392 return MESA_FORMAT_R16G16_UNORM;
393 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
394 return MESA_FORMAT_R16G16_SNORM;
395 default:
396 /* Non ETC1 / ETC2 format */
397 return format;
398 }
399 }
400
401 static unsigned
402 get_num_logical_layers(const struct intel_mipmap_tree *mt, unsigned level)
403 {
404 if (mt->surf.dim == ISL_SURF_DIM_3D)
405 return minify(mt->surf.logical_level0_px.depth, level);
406 else
407 return mt->surf.logical_level0_px.array_len;
408 }
409
410 static unsigned
411 get_num_phys_layers(const struct isl_surf *surf, unsigned level)
412 {
413 /* In case of physical dimensions one needs to consider also the layout.
414 * See isl_calc_phys_level0_extent_sa().
415 */
416 if (surf->dim != ISL_SURF_DIM_3D)
417 return surf->phys_level0_sa.array_len;
418
419 if (surf->dim_layout == ISL_DIM_LAYOUT_GEN4_2D)
420 return minify(surf->phys_level0_sa.array_len, level);
421
422 return minify(surf->phys_level0_sa.depth, level);
423 }
424
425 /** \brief Assert that the level and layer are valid for the miptree. */
426 void
427 intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt,
428 uint32_t level,
429 uint32_t layer)
430 {
431 (void) mt;
432 (void) level;
433 (void) layer;
434
435 assert(level >= mt->first_level);
436 assert(level <= mt->last_level);
437 assert(layer < get_num_phys_layers(&mt->surf, level));
438 }
439
440 static enum isl_aux_state **
441 create_aux_state_map(struct intel_mipmap_tree *mt,
442 enum isl_aux_state initial)
443 {
444 const uint32_t levels = mt->last_level + 1;
445
446 uint32_t total_slices = 0;
447 for (uint32_t level = 0; level < levels; level++)
448 total_slices += get_num_logical_layers(mt, level);
449
450 const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
451
452 /* We're going to allocate a single chunk of data for both the per-level
453 * reference array and the arrays of aux_state. This makes cleanup
454 * significantly easier.
455 */
456 const size_t total_size = per_level_array_size +
457 total_slices * sizeof(enum isl_aux_state);
458 void *data = malloc(total_size);
459 if (data == NULL)
460 return NULL;
461
462 enum isl_aux_state **per_level_arr = data;
463 enum isl_aux_state *s = data + per_level_array_size;
464 for (uint32_t level = 0; level < levels; level++) {
465 per_level_arr[level] = s;
466 const unsigned level_layers = get_num_logical_layers(mt, level);
467 for (uint32_t a = 0; a < level_layers; a++)
468 *(s++) = initial;
469 }
470 assert((void *)s == data + total_size);
471
472 return per_level_arr;
473 }
474
475 static void
476 free_aux_state_map(enum isl_aux_state **state)
477 {
478 free(state);
479 }
480
481 static bool
482 need_to_retile_as_linear(struct brw_context *brw, unsigned row_pitch,
483 enum isl_tiling tiling, unsigned samples)
484 {
485 if (samples > 1)
486 return false;
487
488 if (tiling == ISL_TILING_LINEAR)
489 return false;
490
491 /* If the width is much smaller than a tile, don't bother tiling. */
492 if (row_pitch < 64)
493 return true;
494
495 if (ALIGN(row_pitch, 512) >= 32768) {
496 perf_debug("row pitch %u too large to blit, falling back to untiled",
497 row_pitch);
498 return true;
499 }
500
501 return false;
502 }
503
504 static bool
505 need_to_retile_as_x(const struct brw_context *brw, uint64_t size,
506 enum isl_tiling tiling)
507 {
508 /* If the BO is too large to fit in the aperture, we need to use the
509 * BLT engine to support it. Prior to Sandybridge, the BLT paths can't
510 * handle Y-tiling, so we need to fall back to X.
511 */
512 if (brw->gen < 6 && size >= brw->max_gtt_map_object_size &&
513 tiling == ISL_TILING_Y0)
514 return true;
515
516 return false;
517 }
518
519 static struct intel_mipmap_tree *
520 make_surface(struct brw_context *brw, GLenum target, mesa_format format,
521 unsigned first_level, unsigned last_level,
522 unsigned width0, unsigned height0, unsigned depth0,
523 unsigned num_samples, isl_tiling_flags_t tiling_flags,
524 isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
525 unsigned row_pitch, struct brw_bo *bo)
526 {
527 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
528 if (!mt)
529 return NULL;
530
531 if (!create_mapping_table(target, first_level, last_level, depth0,
532 mt->level)) {
533 free(mt);
534 return NULL;
535 }
536
537 mt->refcount = 1;
538
539 if (target == GL_TEXTURE_CUBE_MAP ||
540 target == GL_TEXTURE_CUBE_MAP_ARRAY)
541 isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
542
543 DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
544 __func__,
545 _mesa_enum_to_string(target),
546 _mesa_get_format_name(format),
547 num_samples, width0, height0, depth0,
548 first_level, last_level, mt);
549
550 struct isl_surf_init_info init_info = {
551 .dim = get_isl_surf_dim(target),
552 .format = translate_tex_format(brw, format, false),
553 .width = width0,
554 .height = height0,
555 .depth = target == GL_TEXTURE_3D ? depth0 : 1,
556 .levels = last_level - first_level + 1,
557 .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
558 .samples = num_samples,
559 .row_pitch = row_pitch,
560 .usage = isl_usage_flags,
561 .tiling_flags = tiling_flags,
562 };
563
564 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
565 goto fail;
566
567 /* In case caller doesn't specifically request Y-tiling (needed
568 * unconditionally for depth), check for corner cases needing special
569 * treatment.
570 */
571 if (tiling_flags & ~ISL_TILING_Y0_BIT) {
572 if (need_to_retile_as_linear(brw, mt->surf.row_pitch,
573 mt->surf.tiling, mt->surf.samples)) {
574 init_info.tiling_flags = 1u << ISL_TILING_LINEAR;
575 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
576 goto fail;
577 } else if (need_to_retile_as_x(brw, mt->surf.size, mt->surf.tiling)) {
578 init_info.tiling_flags = 1u << ISL_TILING_X;
579 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
580 goto fail;
581 }
582 }
583
584 /* In case of linear the buffer gets padded by fixed 64 bytes and therefore
585 * the size may not be multiple of row_pitch.
586 * See isl_apply_surface_padding().
587 */
588 if (mt->surf.tiling != ISL_TILING_LINEAR)
589 assert(mt->surf.size % mt->surf.row_pitch == 0);
590
591 if (!bo) {
592 mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
593 mt->surf.size,
594 isl_tiling_to_i915_tiling(
595 mt->surf.tiling),
596 mt->surf.row_pitch, alloc_flags);
597 if (!mt->bo)
598 goto fail;
599 } else {
600 mt->bo = bo;
601 }
602
603 mt->first_level = first_level;
604 mt->last_level = last_level;
605 mt->target = target;
606 mt->format = format;
607 mt->aux_state = NULL;
608 mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8;
609 mt->compressed = _mesa_is_format_compressed(format);
610
611 return mt;
612
613 fail:
614 intel_miptree_release(&mt);
615 return NULL;
616 }
617
618 static bool
619 make_separate_stencil_surface(struct brw_context *brw,
620 struct intel_mipmap_tree *mt)
621 {
622 mt->stencil_mt = make_surface(brw, mt->target, MESA_FORMAT_S_UINT8,
623 0, mt->surf.levels - 1,
624 mt->surf.logical_level0_px.width,
625 mt->surf.logical_level0_px.height,
626 mt->surf.dim == ISL_SURF_DIM_3D ?
627 mt->surf.logical_level0_px.depth :
628 mt->surf.logical_level0_px.array_len,
629 mt->surf.samples, ISL_TILING_W_BIT,
630 ISL_SURF_USAGE_STENCIL_BIT |
631 ISL_SURF_USAGE_TEXTURE_BIT,
632 BO_ALLOC_FOR_RENDER, 0, NULL);
633
634 if (!mt->stencil_mt)
635 return false;
636
637 mt->stencil_mt->r8stencil_needs_update = true;
638
639 return true;
640 }
641
642 static bool
643 force_linear_tiling(uint32_t layout_flags)
644 {
645 /* ANY includes NONE and Y bit. */
646 if (layout_flags & MIPTREE_LAYOUT_TILING_Y)
647 return false;
648
649 return layout_flags & MIPTREE_LAYOUT_TILING_NONE;
650 }
651
652 static struct intel_mipmap_tree *
653 miptree_create(struct brw_context *brw,
654 GLenum target,
655 mesa_format format,
656 GLuint first_level,
657 GLuint last_level,
658 GLuint width0,
659 GLuint height0,
660 GLuint depth0,
661 GLuint num_samples,
662 uint32_t layout_flags)
663 {
664 if (format == MESA_FORMAT_S_UINT8)
665 return make_surface(brw, target, format, first_level, last_level,
666 width0, height0, depth0, num_samples,
667 ISL_TILING_W_BIT,
668 ISL_SURF_USAGE_STENCIL_BIT |
669 ISL_SURF_USAGE_TEXTURE_BIT,
670 BO_ALLOC_FOR_RENDER,
671 0,
672 NULL);
673
674 const GLenum base_format = _mesa_get_format_base_format(format);
675 if ((base_format == GL_DEPTH_COMPONENT ||
676 base_format == GL_DEPTH_STENCIL) &&
677 !force_linear_tiling(layout_flags)) {
678 /* Fix up the Z miptree format for how we're splitting out separate
679 * stencil. Gen7 expects there to be no stencil bits in its depth buffer.
680 */
681 const mesa_format depth_only_format =
682 intel_depth_format_for_depthstencil_format(format);
683 struct intel_mipmap_tree *mt = make_surface(
684 brw, target, brw->gen >= 6 ? depth_only_format : format,
685 first_level, last_level,
686 width0, height0, depth0, num_samples, ISL_TILING_Y0_BIT,
687 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
688 BO_ALLOC_FOR_RENDER, 0, NULL);
689
690 if (needs_separate_stencil(brw, mt, format, layout_flags) &&
691 !make_separate_stencil_surface(brw, mt)) {
692 intel_miptree_release(&mt);
693 return NULL;
694 }
695
696 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
697 intel_miptree_choose_aux_usage(brw, mt);
698
699 return mt;
700 }
701
702 mesa_format tex_format = format;
703 mesa_format etc_format = MESA_FORMAT_NONE;
704 uint32_t alloc_flags = 0;
705
706 format = intel_lower_compressed_format(brw, format);
707
708 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
709
710 assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
711 if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
712 alloc_flags |= BO_ALLOC_FOR_RENDER;
713
714 isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ?
715 ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK;
716
717 /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
718 if (brw->gen < 6)
719 tiling_flags &= ~ISL_TILING_Y0_BIT;
720
721 struct intel_mipmap_tree *mt = make_surface(
722 brw, target, format,
723 first_level, last_level,
724 width0, height0, depth0,
725 num_samples, tiling_flags,
726 ISL_SURF_USAGE_RENDER_TARGET_BIT |
727 ISL_SURF_USAGE_TEXTURE_BIT,
728 alloc_flags, 0, NULL);
729 if (!mt)
730 return NULL;
731
732 mt->etc_format = etc_format;
733
734 if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
735 mt->bo->cache_coherent = false;
736
737 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
738 intel_miptree_choose_aux_usage(brw, mt);
739
740 return mt;
741 }
742
743 struct intel_mipmap_tree *
744 intel_miptree_create(struct brw_context *brw,
745 GLenum target,
746 mesa_format format,
747 GLuint first_level,
748 GLuint last_level,
749 GLuint width0,
750 GLuint height0,
751 GLuint depth0,
752 GLuint num_samples,
753 uint32_t layout_flags)
754 {
755 assert(num_samples > 0);
756
757 struct intel_mipmap_tree *mt = miptree_create(
758 brw, target, format,
759 first_level, last_level,
760 width0, height0, depth0, num_samples,
761 layout_flags);
762 if (!mt)
763 return NULL;
764
765 mt->offset = 0;
766
767 if (!intel_miptree_alloc_aux(brw, mt)) {
768 intel_miptree_release(&mt);
769 return NULL;
770 }
771
772 return mt;
773 }
774
775 struct intel_mipmap_tree *
776 intel_miptree_create_for_bo(struct brw_context *brw,
777 struct brw_bo *bo,
778 mesa_format format,
779 uint32_t offset,
780 uint32_t width,
781 uint32_t height,
782 uint32_t depth,
783 int pitch,
784 uint32_t layout_flags)
785 {
786 struct intel_mipmap_tree *mt;
787 uint32_t tiling, swizzle;
788 const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
789 const GLenum base_format = _mesa_get_format_base_format(format);
790
791 if ((base_format == GL_DEPTH_COMPONENT ||
792 base_format == GL_DEPTH_STENCIL)) {
793 const mesa_format depth_only_format =
794 intel_depth_format_for_depthstencil_format(format);
795 mt = make_surface(brw, target,
796 brw->gen >= 6 ? depth_only_format : format,
797 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT,
798 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
799 BO_ALLOC_FOR_RENDER, pitch, bo);
800
801 brw_bo_reference(bo);
802
803 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
804 intel_miptree_choose_aux_usage(brw, mt);
805
806 return mt;
807 } else if (format == MESA_FORMAT_S_UINT8) {
808 mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
809 0, 0, width, height, depth, 1,
810 ISL_TILING_W_BIT,
811 ISL_SURF_USAGE_STENCIL_BIT |
812 ISL_SURF_USAGE_TEXTURE_BIT,
813 BO_ALLOC_FOR_RENDER, pitch, bo);
814 if (!mt)
815 return NULL;
816
817 assert(bo->size >= mt->surf.size);
818
819 brw_bo_reference(bo);
820 return mt;
821 }
822
823 brw_bo_get_tiling(bo, &tiling, &swizzle);
824
825 /* Nothing will be able to use this miptree with the BO if the offset isn't
826 * aligned.
827 */
828 if (tiling != I915_TILING_NONE)
829 assert(offset % 4096 == 0);
830
831 /* miptrees can't handle negative pitch. If you need flipping of images,
832 * that's outside of the scope of the mt.
833 */
834 assert(pitch >= 0);
835
836 /* The BO already has a tiling format and we shouldn't confuse the lower
837 * layers by making it try to find a tiling format again.
838 */
839 assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
840 assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
841
842 mt = make_surface(brw, target, format,
843 0, 0, width, height, depth, 1,
844 1lu << isl_tiling_from_i915_tiling(tiling),
845 ISL_SURF_USAGE_RENDER_TARGET_BIT |
846 ISL_SURF_USAGE_TEXTURE_BIT,
847 0, pitch, bo);
848 if (!mt)
849 return NULL;
850
851 brw_bo_reference(bo);
852 mt->bo = bo;
853 mt->offset = offset;
854
855 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
856 intel_miptree_choose_aux_usage(brw, mt);
857
858 return mt;
859 }
860
861 static struct intel_mipmap_tree *
862 miptree_create_for_planar_image(struct brw_context *brw,
863 __DRIimage *image, GLenum target)
864 {
865 struct intel_image_format *f = image->planar_format;
866 struct intel_mipmap_tree *planar_mt = NULL;
867
868 for (int i = 0; i < f->nplanes; i++) {
869 const int index = f->planes[i].buffer_index;
870 const uint32_t dri_format = f->planes[i].dri_format;
871 const mesa_format format = driImageFormatToGLFormat(dri_format);
872 const uint32_t width = image->width >> f->planes[i].width_shift;
873 const uint32_t height = image->height >> f->planes[i].height_shift;
874
875 /* Disable creation of the texture's aux buffers because the driver
876 * exposes no EGL API to manage them. That is, there is no API for
877 * resolving the aux buffer's content to the main buffer nor for
878 * invalidating the aux buffer's content.
879 */
880 struct intel_mipmap_tree *mt =
881 intel_miptree_create_for_bo(brw, image->bo, format,
882 image->offsets[index],
883 width, height, 1,
884 image->strides[index],
885 MIPTREE_LAYOUT_DISABLE_AUX);
886 if (mt == NULL)
887 return NULL;
888
889 mt->target = target;
890
891 if (i == 0)
892 planar_mt = mt;
893 else
894 planar_mt->plane[i - 1] = mt;
895 }
896
897 return planar_mt;
898 }
899
900 struct intel_mipmap_tree *
901 intel_miptree_create_for_dri_image(struct brw_context *brw,
902 __DRIimage *image, GLenum target,
903 enum isl_colorspace colorspace,
904 bool is_winsys_image)
905 {
906 if (image->planar_format && image->planar_format->nplanes > 0) {
907 assert(colorspace == ISL_COLORSPACE_NONE ||
908 colorspace == ISL_COLORSPACE_YUV);
909 return miptree_create_for_planar_image(brw, image, target);
910 }
911
912 mesa_format format = image->format;
913 switch (colorspace) {
914 case ISL_COLORSPACE_NONE:
915 /* Keep the image format unmodified */
916 break;
917
918 case ISL_COLORSPACE_LINEAR:
919 format =_mesa_get_srgb_format_linear(format);
920 break;
921
922 case ISL_COLORSPACE_SRGB:
923 format =_mesa_get_linear_format_srgb(format);
924 break;
925
926 default:
927 unreachable("Inalid colorspace for non-planar image");
928 }
929
930 if (!brw->ctx.TextureFormatSupported[format]) {
931 /* The texture storage paths in core Mesa detect if the driver does not
932 * support the user-requested format, and then searches for a
933 * fallback format. The DRIimage code bypasses core Mesa, though. So we
934 * do the fallbacks here for important formats.
935 *
936 * We must support DRM_FOURCC_XBGR8888 textures because the Android
937 * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
938 * the Chrome OS compositor consumes as dma_buf EGLImages.
939 */
940 format = _mesa_format_fallback_rgbx_to_rgba(format);
941 }
942
943 if (!brw->ctx.TextureFormatSupported[format])
944 return NULL;
945
946 /* If this image comes in from a window system, we have different
947 * requirements than if it comes in via an EGL import operation. Window
948 * system images can use any form of auxiliary compression we wish because
949 * they get "flushed" before being handed off to the window system and we
950 * have the opportunity to do resolves. Window system buffers also may be
951 * used for scanout so we need to flag that appropriately.
952 */
953 const uint32_t mt_layout_flags =
954 is_winsys_image ? MIPTREE_LAYOUT_FOR_SCANOUT : MIPTREE_LAYOUT_DISABLE_AUX;
955
956 /* Disable creation of the texture's aux buffers because the driver exposes
957 * no EGL API to manage them. That is, there is no API for resolving the aux
958 * buffer's content to the main buffer nor for invalidating the aux buffer's
959 * content.
960 */
961 struct intel_mipmap_tree *mt =
962 intel_miptree_create_for_bo(brw, image->bo, format,
963 image->offset, image->width, image->height, 1,
964 image->pitch, mt_layout_flags);
965 if (mt == NULL)
966 return NULL;
967
968 mt->target = target;
969 mt->level[0].level_x = image->tile_x;
970 mt->level[0].level_y = image->tile_y;
971
972 /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
973 * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
974 * trouble resolving back to destination image due to alignment issues.
975 */
976 if (!brw->has_surface_tile_offset) {
977 uint32_t draw_x, draw_y;
978 intel_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
979
980 if (draw_x != 0 || draw_y != 0) {
981 _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
982 intel_miptree_release(&mt);
983 return NULL;
984 }
985 }
986
987 if (!intel_miptree_alloc_aux(brw, mt)) {
988 intel_miptree_release(&mt);
989 return NULL;
990 }
991
992 return mt;
993 }
994
995 /**
996 * For a singlesample renderbuffer, this simply wraps the given BO with a
997 * miptree.
998 *
999 * For a multisample renderbuffer, this wraps the window system's
1000 * (singlesample) BO with a singlesample miptree attached to the
1001 * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
1002 * that will contain the actual rendering (which is lazily resolved to
1003 * irb->singlesample_mt).
1004 */
1005 bool
1006 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
1007 struct intel_renderbuffer *irb,
1008 struct intel_mipmap_tree *singlesample_mt,
1009 uint32_t width, uint32_t height,
1010 uint32_t pitch)
1011 {
1012 struct intel_mipmap_tree *multisample_mt = NULL;
1013 struct gl_renderbuffer *rb = &irb->Base.Base;
1014 mesa_format format = rb->Format;
1015 const unsigned num_samples = MAX2(rb->NumSamples, 1);
1016
1017 /* Only the front and back buffers, which are color buffers, are allocated
1018 * through the image loader.
1019 */
1020 assert(_mesa_get_format_base_format(format) == GL_RGB ||
1021 _mesa_get_format_base_format(format) == GL_RGBA);
1022
1023 assert(singlesample_mt);
1024
1025 if (num_samples == 1) {
1026 intel_miptree_release(&irb->mt);
1027 irb->mt = singlesample_mt;
1028
1029 assert(!irb->singlesample_mt);
1030 } else {
1031 intel_miptree_release(&irb->singlesample_mt);
1032 irb->singlesample_mt = singlesample_mt;
1033
1034 if (!irb->mt ||
1035 irb->mt->surf.logical_level0_px.width != width ||
1036 irb->mt->surf.logical_level0_px.height != height) {
1037 multisample_mt = intel_miptree_create_for_renderbuffer(intel,
1038 format,
1039 width,
1040 height,
1041 num_samples);
1042 if (!multisample_mt)
1043 goto fail;
1044
1045 irb->need_downsample = false;
1046 intel_miptree_release(&irb->mt);
1047 irb->mt = multisample_mt;
1048 }
1049 }
1050 return true;
1051
1052 fail:
1053 intel_miptree_release(&irb->mt);
1054 return false;
1055 }
1056
1057 struct intel_mipmap_tree*
1058 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
1059 mesa_format format,
1060 uint32_t width,
1061 uint32_t height,
1062 uint32_t num_samples)
1063 {
1064 struct intel_mipmap_tree *mt;
1065 uint32_t depth = 1;
1066 GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
1067 const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1068 MIPTREE_LAYOUT_TILING_ANY;
1069
1070 mt = intel_miptree_create(brw, target, format, 0, 0,
1071 width, height, depth, num_samples,
1072 layout_flags);
1073 if (!mt)
1074 goto fail;
1075
1076 return mt;
1077
1078 fail:
1079 intel_miptree_release(&mt);
1080 return NULL;
1081 }
1082
1083 void
1084 intel_miptree_reference(struct intel_mipmap_tree **dst,
1085 struct intel_mipmap_tree *src)
1086 {
1087 if (*dst == src)
1088 return;
1089
1090 intel_miptree_release(dst);
1091
1092 if (src) {
1093 src->refcount++;
1094 DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
1095 }
1096
1097 *dst = src;
1098 }
1099
1100 static void
1101 intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf)
1102 {
1103 if (aux_buf == NULL)
1104 return;
1105
1106 brw_bo_unreference(aux_buf->bo);
1107
1108 free(aux_buf);
1109 }
1110
1111 void
1112 intel_miptree_release(struct intel_mipmap_tree **mt)
1113 {
1114 if (!*mt)
1115 return;
1116
1117 DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
1118 if (--(*mt)->refcount <= 0) {
1119 GLuint i;
1120
1121 DBG("%s deleting %p\n", __func__, *mt);
1122
1123 brw_bo_unreference((*mt)->bo);
1124 intel_miptree_release(&(*mt)->stencil_mt);
1125 intel_miptree_release(&(*mt)->r8stencil_mt);
1126 intel_miptree_aux_buffer_free((*mt)->hiz_buf);
1127 intel_miptree_aux_buffer_free((*mt)->mcs_buf);
1128 free_aux_state_map((*mt)->aux_state);
1129
1130 intel_miptree_release(&(*mt)->plane[0]);
1131 intel_miptree_release(&(*mt)->plane[1]);
1132
1133 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
1134 free((*mt)->level[i].slice);
1135 }
1136
1137 free(*mt);
1138 }
1139 *mt = NULL;
1140 }
1141
1142
1143 void
1144 intel_get_image_dims(struct gl_texture_image *image,
1145 int *width, int *height, int *depth)
1146 {
1147 switch (image->TexObject->Target) {
1148 case GL_TEXTURE_1D_ARRAY:
1149 /* For a 1D Array texture the OpenGL API will treat the image height as
1150 * the number of array slices. For Intel hardware, we treat the 1D array
1151 * as a 2D Array with a height of 1. So, here we want to swap image
1152 * height and depth.
1153 */
1154 assert(image->Depth == 1);
1155 *width = image->Width;
1156 *height = 1;
1157 *depth = image->Height;
1158 break;
1159 case GL_TEXTURE_CUBE_MAP:
1160 /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
1161 * though we really have 6 slices.
1162 */
1163 assert(image->Depth == 1);
1164 *width = image->Width;
1165 *height = image->Height;
1166 *depth = 6;
1167 break;
1168 default:
1169 *width = image->Width;
1170 *height = image->Height;
1171 *depth = image->Depth;
1172 break;
1173 }
1174 }
1175
1176 /**
1177 * Can the image be pulled into a unified mipmap tree? This mirrors
1178 * the completeness test in a lot of ways.
1179 *
1180 * Not sure whether I want to pass gl_texture_image here.
1181 */
1182 bool
1183 intel_miptree_match_image(struct intel_mipmap_tree *mt,
1184 struct gl_texture_image *image)
1185 {
1186 struct intel_texture_image *intelImage = intel_texture_image(image);
1187 GLuint level = intelImage->base.Base.Level;
1188 int width, height, depth;
1189
1190 /* glTexImage* choose the texture object based on the target passed in, and
1191 * objects can't change targets over their lifetimes, so this should be
1192 * true.
1193 */
1194 assert(image->TexObject->Target == mt->target);
1195
1196 mesa_format mt_format = mt->format;
1197 if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
1198 mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
1199 if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
1200 mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
1201 if (mt->etc_format != MESA_FORMAT_NONE)
1202 mt_format = mt->etc_format;
1203
1204 if (image->TexFormat != mt_format)
1205 return false;
1206
1207 intel_get_image_dims(image, &width, &height, &depth);
1208
1209 if (mt->target == GL_TEXTURE_CUBE_MAP)
1210 depth = 6;
1211
1212 if (level >= mt->surf.levels)
1213 return false;
1214
1215 const unsigned level_depth =
1216 mt->surf.dim == ISL_SURF_DIM_3D ?
1217 minify(mt->surf.logical_level0_px.depth, level) :
1218 mt->surf.logical_level0_px.array_len;
1219
1220 return width == minify(mt->surf.logical_level0_px.width, level) &&
1221 height == minify(mt->surf.logical_level0_px.height, level) &&
1222 depth == level_depth &&
1223 MAX2(image->NumSamples, 1) == mt->surf.samples;
1224 }
1225
1226 void
1227 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1228 GLuint level, GLuint slice,
1229 GLuint *x, GLuint *y)
1230 {
1231 if (level == 0 && slice == 0) {
1232 *x = mt->level[0].level_x;
1233 *y = mt->level[0].level_y;
1234 return;
1235 }
1236
1237 uint32_t x_offset_sa, y_offset_sa;
1238
1239 /* Miptree itself can have an offset only if it represents a single
1240 * slice in an imported buffer object.
1241 * See intel_miptree_create_for_dri_image().
1242 */
1243 assert(mt->level[0].level_x == 0);
1244 assert(mt->level[0].level_y == 0);
1245
1246 /* Given level is relative to level zero while the miptree may be
1247 * represent just a subset of all levels starting from 'first_level'.
1248 */
1249 assert(level >= mt->first_level);
1250 level -= mt->first_level;
1251
1252 const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
1253 slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
1254 isl_surf_get_image_offset_el(&mt->surf, level, slice, z,
1255 &x_offset_sa, &y_offset_sa);
1256
1257 *x = x_offset_sa;
1258 *y = y_offset_sa;
1259 }
1260
1261
1262 /**
1263 * This function computes the tile_w (in bytes) and tile_h (in rows) of
1264 * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1265 * and tile_h is set to 1.
1266 */
1267 void
1268 intel_get_tile_dims(enum isl_tiling tiling, uint32_t cpp,
1269 uint32_t *tile_w, uint32_t *tile_h)
1270 {
1271 switch (tiling) {
1272 case ISL_TILING_X:
1273 *tile_w = 512;
1274 *tile_h = 8;
1275 break;
1276 case ISL_TILING_Y0:
1277 *tile_w = 128;
1278 *tile_h = 32;
1279 break;
1280 case ISL_TILING_LINEAR:
1281 *tile_w = cpp;
1282 *tile_h = 1;
1283 break;
1284 default:
1285 unreachable("not reached");
1286 }
1287 }
1288
1289
1290 /**
1291 * This function computes masks that may be used to select the bits of the X
1292 * and Y coordinates that indicate the offset within a tile. If the BO is
1293 * untiled, the masks are set to 0.
1294 */
1295 void
1296 intel_get_tile_masks(enum isl_tiling tiling, uint32_t cpp,
1297 uint32_t *mask_x, uint32_t *mask_y)
1298 {
1299 uint32_t tile_w_bytes, tile_h;
1300
1301 intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h);
1302
1303 *mask_x = tile_w_bytes / cpp - 1;
1304 *mask_y = tile_h - 1;
1305 }
1306
1307 /**
1308 * Compute the offset (in bytes) from the start of the BO to the given x
1309 * and y coordinate. For tiled BOs, caller must ensure that x and y are
1310 * multiples of the tile size.
1311 */
1312 uint32_t
1313 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1314 uint32_t x, uint32_t y)
1315 {
1316 int cpp = mt->cpp;
1317 uint32_t pitch = mt->surf.row_pitch;
1318
1319 switch (mt->surf.tiling) {
1320 default:
1321 unreachable("not reached");
1322 case ISL_TILING_LINEAR:
1323 return y * pitch + x * cpp;
1324 case ISL_TILING_X:
1325 assert((x % (512 / cpp)) == 0);
1326 assert((y % 8) == 0);
1327 return y * pitch + x / (512 / cpp) * 4096;
1328 case ISL_TILING_Y0:
1329 assert((x % (128 / cpp)) == 0);
1330 assert((y % 32) == 0);
1331 return y * pitch + x / (128 / cpp) * 4096;
1332 }
1333 }
1334
1335 /**
1336 * Rendering with tiled buffers requires that the base address of the buffer
1337 * be aligned to a page boundary. For renderbuffers, and sometimes with
1338 * textures, we may want the surface to point at a texture image level that
1339 * isn't at a page boundary.
1340 *
1341 * This function returns an appropriately-aligned base offset
1342 * according to the tiling restrictions, plus any required x/y offset
1343 * from there.
1344 */
1345 uint32_t
1346 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1347 GLuint level, GLuint slice,
1348 uint32_t *tile_x,
1349 uint32_t *tile_y)
1350 {
1351 uint32_t x, y;
1352 uint32_t mask_x, mask_y;
1353
1354 intel_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y);
1355 intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1356
1357 *tile_x = x & mask_x;
1358 *tile_y = y & mask_y;
1359
1360 return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
1361 }
1362
1363 static void
1364 intel_miptree_copy_slice_sw(struct brw_context *brw,
1365 struct intel_mipmap_tree *src_mt,
1366 unsigned src_level, unsigned src_layer,
1367 struct intel_mipmap_tree *dst_mt,
1368 unsigned dst_level, unsigned dst_layer,
1369 unsigned width, unsigned height)
1370 {
1371 void *src, *dst;
1372 ptrdiff_t src_stride, dst_stride;
1373 const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8);
1374
1375 intel_miptree_map(brw, src_mt,
1376 src_level, src_layer,
1377 0, 0,
1378 width, height,
1379 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1380 &src, &src_stride);
1381
1382 intel_miptree_map(brw, dst_mt,
1383 dst_level, dst_layer,
1384 0, 0,
1385 width, height,
1386 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1387 BRW_MAP_DIRECT_BIT,
1388 &dst, &dst_stride);
1389
1390 DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1391 _mesa_get_format_name(src_mt->format),
1392 src_mt, src, src_stride,
1393 _mesa_get_format_name(dst_mt->format),
1394 dst_mt, dst, dst_stride,
1395 width, height);
1396
1397 int row_size = cpp * width;
1398 if (src_stride == row_size &&
1399 dst_stride == row_size) {
1400 memcpy(dst, src, row_size * height);
1401 } else {
1402 for (int i = 0; i < height; i++) {
1403 memcpy(dst, src, row_size);
1404 dst += dst_stride;
1405 src += src_stride;
1406 }
1407 }
1408
1409 intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
1410 intel_miptree_unmap(brw, src_mt, src_level, src_layer);
1411
1412 /* Don't forget to copy the stencil data over, too. We could have skipped
1413 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1414 * shuffling the two data sources in/out of temporary storage instead of
1415 * the direct mapping we get this way.
1416 */
1417 if (dst_mt->stencil_mt) {
1418 assert(src_mt->stencil_mt);
1419 intel_miptree_copy_slice_sw(brw,
1420 src_mt->stencil_mt, src_level, src_layer,
1421 dst_mt->stencil_mt, dst_level, dst_layer,
1422 width, height);
1423 }
1424 }
1425
1426 void
1427 intel_miptree_copy_slice(struct brw_context *brw,
1428 struct intel_mipmap_tree *src_mt,
1429 unsigned src_level, unsigned src_layer,
1430 struct intel_mipmap_tree *dst_mt,
1431 unsigned dst_level, unsigned dst_layer)
1432
1433 {
1434 mesa_format format = src_mt->format;
1435 unsigned width = minify(src_mt->surf.phys_level0_sa.width,
1436 src_level - src_mt->first_level);
1437 unsigned height = minify(src_mt->surf.phys_level0_sa.height,
1438 src_level - src_mt->first_level);
1439
1440 assert(src_layer < get_num_phys_layers(&src_mt->surf,
1441 src_level - src_mt->first_level));
1442
1443 assert(src_mt->format == dst_mt->format);
1444
1445 if (dst_mt->compressed) {
1446 unsigned int i, j;
1447 _mesa_get_format_block_size(dst_mt->format, &i, &j);
1448 height = ALIGN_NPOT(height, j) / j;
1449 width = ALIGN_NPOT(width, i) / i;
1450 }
1451
1452 /* If it's a packed depth/stencil buffer with separate stencil, the blit
1453 * below won't apply since we can't do the depth's Y tiling or the
1454 * stencil's W tiling in the blitter.
1455 */
1456 if (src_mt->stencil_mt) {
1457 intel_miptree_copy_slice_sw(brw,
1458 src_mt, src_level, src_layer,
1459 dst_mt, dst_level, dst_layer,
1460 width, height);
1461 return;
1462 }
1463
1464 uint32_t dst_x, dst_y, src_x, src_y;
1465 intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
1466 &dst_x, &dst_y);
1467 intel_miptree_get_image_offset(src_mt, src_level, src_layer,
1468 &src_x, &src_y);
1469
1470 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1471 _mesa_get_format_name(src_mt->format),
1472 src_mt, src_x, src_y, src_mt->surf.row_pitch,
1473 _mesa_get_format_name(dst_mt->format),
1474 dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch,
1475 width, height);
1476
1477 if (!intel_miptree_blit(brw,
1478 src_mt, src_level, src_layer, 0, 0, false,
1479 dst_mt, dst_level, dst_layer, 0, 0, false,
1480 width, height, GL_COPY)) {
1481 perf_debug("miptree validate blit for %s failed\n",
1482 _mesa_get_format_name(format));
1483
1484 intel_miptree_copy_slice_sw(brw,
1485 src_mt, src_level, src_layer,
1486 dst_mt, dst_level, dst_layer,
1487 width, height);
1488 }
1489 }
1490
1491 /**
1492 * Copies the image's current data to the given miptree, and associates that
1493 * miptree with the image.
1494 *
1495 * If \c invalidate is true, then the actual image data does not need to be
1496 * copied, but the image still needs to be associated to the new miptree (this
1497 * is set to true if we're about to clear the image).
1498 */
1499 void
1500 intel_miptree_copy_teximage(struct brw_context *brw,
1501 struct intel_texture_image *intelImage,
1502 struct intel_mipmap_tree *dst_mt,
1503 bool invalidate)
1504 {
1505 struct intel_mipmap_tree *src_mt = intelImage->mt;
1506 struct intel_texture_object *intel_obj =
1507 intel_texture_object(intelImage->base.Base.TexObject);
1508 int level = intelImage->base.Base.Level;
1509 const unsigned face = intelImage->base.Base.Face;
1510 unsigned start_layer, end_layer;
1511
1512 if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
1513 assert(face == 0);
1514 assert(intelImage->base.Base.Height);
1515 start_layer = 0;
1516 end_layer = intelImage->base.Base.Height - 1;
1517 } else if (face > 0) {
1518 start_layer = face;
1519 end_layer = face;
1520 } else {
1521 assert(intelImage->base.Base.Depth);
1522 start_layer = 0;
1523 end_layer = intelImage->base.Base.Depth - 1;
1524 }
1525
1526 if (!invalidate) {
1527 for (unsigned i = start_layer; i <= end_layer; i++) {
1528 intel_miptree_copy_slice(brw,
1529 src_mt, level, i,
1530 dst_mt, level, i);
1531 }
1532 }
1533
1534 intel_miptree_reference(&intelImage->mt, dst_mt);
1535 intel_obj->needs_validate = true;
1536 }
1537
1538 static void
1539 intel_miptree_init_mcs(struct brw_context *brw,
1540 struct intel_mipmap_tree *mt,
1541 int init_value)
1542 {
1543 assert(mt->mcs_buf != NULL);
1544
1545 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1546 *
1547 * When MCS buffer is enabled and bound to MSRT, it is required that it
1548 * is cleared prior to any rendering.
1549 *
1550 * Since we don't use the MCS buffer for any purpose other than rendering,
1551 * it makes sense to just clear it immediately upon allocation.
1552 *
1553 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1554 */
1555 void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE);
1556 if (unlikely(map == NULL)) {
1557 fprintf(stderr, "Failed to map mcs buffer into GTT\n");
1558 brw_bo_unreference(mt->mcs_buf->bo);
1559 free(mt->mcs_buf);
1560 return;
1561 }
1562 void *data = map;
1563 memset(data, init_value, mt->mcs_buf->size);
1564 brw_bo_unmap(mt->mcs_buf->bo);
1565 }
1566
1567 static struct intel_miptree_aux_buffer *
1568 intel_alloc_aux_buffer(struct brw_context *brw,
1569 const char *name,
1570 const struct isl_surf *aux_surf,
1571 uint32_t alloc_flags,
1572 struct intel_mipmap_tree *mt)
1573 {
1574 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1575 if (!buf)
1576 return false;
1577
1578 buf->size = aux_surf->size;
1579 buf->pitch = aux_surf->row_pitch;
1580 buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
1581
1582 /* ISL has stricter set of alignment rules then the drm allocator.
1583 * Therefore one can pass the ISL dimensions in terms of bytes instead of
1584 * trying to recalculate based on different format block sizes.
1585 */
1586 buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
1587 I915_TILING_Y, buf->pitch, alloc_flags);
1588 if (!buf->bo) {
1589 free(buf);
1590 return NULL;
1591 }
1592
1593 buf->surf = *aux_surf;
1594
1595 return buf;
1596 }
1597
1598 static bool
1599 intel_miptree_alloc_mcs(struct brw_context *brw,
1600 struct intel_mipmap_tree *mt,
1601 GLuint num_samples)
1602 {
1603 assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1604 assert(mt->mcs_buf == NULL);
1605 assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
1606
1607 /* Multisampled miptrees are only supported for single level. */
1608 assert(mt->first_level == 0);
1609 enum isl_aux_state **aux_state =
1610 create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
1611 if (!aux_state)
1612 return false;
1613
1614 struct isl_surf temp_mcs_surf;
1615
1616 MAYBE_UNUSED bool ok =
1617 isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &temp_mcs_surf);
1618 assert(ok);
1619
1620 /* Buffer needs to be initialised requiring the buffer to be immediately
1621 * mapped to cpu space for writing. Therefore do not use the gpu access
1622 * flag which can cause an unnecessary delay if the backing pages happened
1623 * to be just used by the GPU.
1624 */
1625 const uint32_t alloc_flags = 0;
1626 mt->mcs_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
1627 &temp_mcs_surf, alloc_flags, mt);
1628 if (!mt->mcs_buf) {
1629 free(aux_state);
1630 return false;
1631 }
1632
1633 mt->aux_state = aux_state;
1634
1635 intel_miptree_init_mcs(brw, mt, 0xFF);
1636
1637 return true;
1638 }
1639
1640 bool
1641 intel_miptree_alloc_ccs(struct brw_context *brw,
1642 struct intel_mipmap_tree *mt)
1643 {
1644 assert(mt->mcs_buf == NULL);
1645 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
1646 mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1647
1648 struct isl_surf temp_ccs_surf;
1649
1650 if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, &temp_ccs_surf, 0))
1651 return false;
1652
1653 assert(temp_ccs_surf.size &&
1654 (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
1655
1656 enum isl_aux_state **aux_state =
1657 create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
1658 if (!aux_state)
1659 return false;
1660
1661 /* When CCS_E is used, we need to ensure that the CCS starts off in a valid
1662 * state. From the Sky Lake PRM, "MCS Buffer for Render Target(s)":
1663 *
1664 * "If Software wants to enable Color Compression without Fast clear,
1665 * Software needs to initialize MCS with zeros."
1666 *
1667 * A CCS value of 0 indicates that the corresponding block is in the
1668 * pass-through state which is what we want.
1669 *
1670 * For CCS_D, on the other hand, we don't care as we're about to perform a
1671 * fast-clear operation. In that case, being hot in caches more useful.
1672 */
1673 const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
1674 BO_ALLOC_ZEROED : BO_ALLOC_FOR_RENDER;
1675 mt->mcs_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
1676 &temp_ccs_surf, alloc_flags, mt);
1677 if (!mt->mcs_buf) {
1678 free(aux_state);
1679 return false;
1680 }
1681
1682 mt->aux_state = aux_state;
1683
1684 return true;
1685 }
1686
1687 /**
1688 * Helper for intel_miptree_alloc_hiz() that sets
1689 * \c mt->level[level].has_hiz. Return true if and only if
1690 * \c has_hiz was set.
1691 */
1692 static bool
1693 intel_miptree_level_enable_hiz(struct brw_context *brw,
1694 struct intel_mipmap_tree *mt,
1695 uint32_t level)
1696 {
1697 assert(mt->hiz_buf);
1698 assert(mt->surf.size > 0);
1699
1700 if (brw->gen >= 8 || brw->is_haswell) {
1701 uint32_t width = minify(mt->surf.phys_level0_sa.width, level);
1702 uint32_t height = minify(mt->surf.phys_level0_sa.height, level);
1703
1704 /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1705 * and the height is 4 aligned. This allows our HiZ support
1706 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1707 * we can grow the width & height to allow the HiZ op to
1708 * force the proper size alignments.
1709 */
1710 if (level > 0 && ((width & 7) || (height & 3))) {
1711 DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1712 return false;
1713 }
1714 }
1715
1716 DBG("mt %p level %d: HiZ enabled\n", mt, level);
1717 mt->level[level].has_hiz = true;
1718 return true;
1719 }
1720
1721 bool
1722 intel_miptree_alloc_hiz(struct brw_context *brw,
1723 struct intel_mipmap_tree *mt)
1724 {
1725 assert(mt->hiz_buf == NULL);
1726 assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
1727
1728 enum isl_aux_state **aux_state =
1729 create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
1730 if (!aux_state)
1731 return false;
1732
1733 struct isl_surf temp_hiz_surf;
1734
1735 MAYBE_UNUSED bool ok =
1736 isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
1737 assert(ok);
1738
1739 const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
1740 mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
1741 &temp_hiz_surf, alloc_flags, mt);
1742
1743 if (!mt->hiz_buf) {
1744 free(aux_state);
1745 return false;
1746 }
1747
1748 for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
1749 intel_miptree_level_enable_hiz(brw, mt, level);
1750
1751 mt->aux_state = aux_state;
1752
1753 return true;
1754 }
1755
1756
1757 /**
1758 * Allocate the initial aux surface for a miptree based on mt->aux_usage
1759 *
1760 * Since MCS, HiZ, and CCS_E can compress more than just clear color, we
1761 * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only
1762 * compress clear color so we wait until an actual fast-clear to allocate it.
1763 */
1764 static bool
1765 intel_miptree_alloc_aux(struct brw_context *brw,
1766 struct intel_mipmap_tree *mt)
1767 {
1768 switch (mt->aux_usage) {
1769 case ISL_AUX_USAGE_NONE:
1770 return true;
1771
1772 case ISL_AUX_USAGE_HIZ:
1773 assert(!_mesa_is_format_color_format(mt->format));
1774 if (!intel_miptree_alloc_hiz(brw, mt))
1775 return false;
1776 return true;
1777
1778 case ISL_AUX_USAGE_MCS:
1779 assert(_mesa_is_format_color_format(mt->format));
1780 assert(mt->surf.samples > 1);
1781 if (!intel_miptree_alloc_mcs(brw, mt, mt->surf.samples))
1782 return false;
1783 return true;
1784
1785 case ISL_AUX_USAGE_CCS_D:
1786 /* Since CCS_D can only compress clear color so we wait until an actual
1787 * fast-clear to allocate it.
1788 */
1789 return true;
1790
1791 case ISL_AUX_USAGE_CCS_E:
1792 assert(_mesa_is_format_color_format(mt->format));
1793 assert(mt->surf.samples == 1);
1794 if (!intel_miptree_alloc_ccs(brw, mt))
1795 return false;
1796 return true;
1797 }
1798
1799 unreachable("Invalid aux usage");
1800 }
1801
1802
1803 /**
1804 * Can the miptree sample using the hiz buffer?
1805 */
1806 bool
1807 intel_miptree_sample_with_hiz(struct brw_context *brw,
1808 struct intel_mipmap_tree *mt)
1809 {
1810 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
1811 * so keep things conservative for now and never enable it unless we're SKL+.
1812 */
1813 if (brw->gen < 9) {
1814 return false;
1815 }
1816
1817 if (!mt->hiz_buf) {
1818 return false;
1819 }
1820
1821 /* It seems the hardware won't fallback to the depth buffer if some of the
1822 * mipmap levels aren't available in the HiZ buffer. So we need all levels
1823 * of the texture to be HiZ enabled.
1824 */
1825 for (unsigned level = 0; level < mt->surf.levels; ++level) {
1826 if (!intel_miptree_level_has_hiz(mt, level))
1827 return false;
1828 }
1829
1830 /* If compressed multisampling is enabled, then we use it for the auxiliary
1831 * buffer instead.
1832 *
1833 * From the BDW PRM (Volume 2d: Command Reference: Structures
1834 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
1835 *
1836 * "If this field is set to AUX_HIZ, Number of Multisamples must be
1837 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
1838 *
1839 * There is no such blurb for 1D textures, but there is sufficient evidence
1840 * that this is broken on SKL+.
1841 */
1842 return (mt->surf.samples == 1 &&
1843 mt->target != GL_TEXTURE_3D &&
1844 mt->target != GL_TEXTURE_1D /* gen9+ restriction */);
1845 }
1846
1847 /**
1848 * Does the miptree slice have hiz enabled?
1849 */
1850 bool
1851 intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level)
1852 {
1853 intel_miptree_check_level_layer(mt, level, 0);
1854 return mt->level[level].has_hiz;
1855 }
1856
1857 static inline uint32_t
1858 miptree_level_range_length(const struct intel_mipmap_tree *mt,
1859 uint32_t start_level, uint32_t num_levels)
1860 {
1861 assert(start_level >= mt->first_level);
1862 assert(start_level <= mt->last_level);
1863
1864 if (num_levels == INTEL_REMAINING_LAYERS)
1865 num_levels = mt->last_level - start_level + 1;
1866 /* Check for overflow */
1867 assert(start_level + num_levels >= start_level);
1868 assert(start_level + num_levels <= mt->last_level + 1);
1869
1870 return num_levels;
1871 }
1872
1873 static inline uint32_t
1874 miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level,
1875 uint32_t start_layer, uint32_t num_layers)
1876 {
1877 assert(level <= mt->last_level);
1878
1879 const uint32_t total_num_layers = get_num_logical_layers(mt, level);
1880 assert(start_layer < total_num_layers);
1881 if (num_layers == INTEL_REMAINING_LAYERS)
1882 num_layers = total_num_layers - start_layer;
1883 /* Check for overflow */
1884 assert(start_layer + num_layers >= start_layer);
1885 assert(start_layer + num_layers <= total_num_layers);
1886
1887 return num_layers;
1888 }
1889
1890 bool
1891 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt,
1892 unsigned start_level, unsigned num_levels,
1893 unsigned start_layer, unsigned num_layers)
1894 {
1895 assert(_mesa_is_format_color_format(mt->format));
1896
1897 if (!mt->mcs_buf)
1898 return false;
1899
1900 /* Clamp the level range to fit the miptree */
1901 num_levels = miptree_level_range_length(mt, start_level, num_levels);
1902
1903 for (uint32_t l = 0; l < num_levels; l++) {
1904 const uint32_t level = start_level + l;
1905 const uint32_t level_layers =
1906 miptree_layer_range_length(mt, level, start_layer, num_layers);
1907 for (unsigned a = 0; a < level_layers; a++) {
1908 enum isl_aux_state aux_state =
1909 intel_miptree_get_aux_state(mt, level, start_layer + a);
1910 assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
1911 if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
1912 return true;
1913 }
1914 }
1915
1916 return false;
1917 }
1918
1919 static void
1920 intel_miptree_check_color_resolve(const struct brw_context *brw,
1921 const struct intel_mipmap_tree *mt,
1922 unsigned level, unsigned layer)
1923 {
1924
1925 if (!mt->mcs_buf)
1926 return;
1927
1928 /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
1929 assert(brw->gen >= 8 ||
1930 (level == 0 && mt->first_level == 0 && mt->last_level == 0));
1931
1932 /* Compression of arrayed msaa surfaces is supported. */
1933 if (mt->surf.samples > 1)
1934 return;
1935
1936 /* Fast color clear is supported for non-msaa arrays only on Gen8+. */
1937 assert(brw->gen >= 8 ||
1938 (layer == 0 &&
1939 mt->surf.logical_level0_px.depth == 1 &&
1940 mt->surf.logical_level0_px.array_len == 1));
1941
1942 (void)level;
1943 (void)layer;
1944 }
1945
1946 static enum blorp_fast_clear_op
1947 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
1948 enum isl_aux_usage aux_usage,
1949 bool fast_clear_supported)
1950 {
1951 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D);
1952
1953 const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_D;
1954
1955 assert(ccs_supported == fast_clear_supported);
1956
1957 switch (aux_state) {
1958 case ISL_AUX_STATE_CLEAR:
1959 case ISL_AUX_STATE_PARTIAL_CLEAR:
1960 if (!ccs_supported)
1961 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1962 else
1963 return BLORP_FAST_CLEAR_OP_NONE;
1964
1965 case ISL_AUX_STATE_PASS_THROUGH:
1966 return BLORP_FAST_CLEAR_OP_NONE;
1967
1968 case ISL_AUX_STATE_RESOLVED:
1969 case ISL_AUX_STATE_AUX_INVALID:
1970 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1971 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1972 break;
1973 }
1974
1975 unreachable("Invalid aux state for CCS_D");
1976 }
1977
1978 static enum blorp_fast_clear_op
1979 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
1980 enum isl_aux_usage aux_usage,
1981 bool fast_clear_supported)
1982 {
1983 /* CCS_E surfaces can be accessed as CCS_D if we're careful. */
1984 assert(aux_usage == ISL_AUX_USAGE_NONE ||
1985 aux_usage == ISL_AUX_USAGE_CCS_D ||
1986 aux_usage == ISL_AUX_USAGE_CCS_E);
1987
1988 if (aux_usage == ISL_AUX_USAGE_CCS_D)
1989 assert(fast_clear_supported);
1990
1991 switch (aux_state) {
1992 case ISL_AUX_STATE_CLEAR:
1993 case ISL_AUX_STATE_PARTIAL_CLEAR:
1994 if (fast_clear_supported)
1995 return BLORP_FAST_CLEAR_OP_NONE;
1996 else if (aux_usage == ISL_AUX_USAGE_CCS_E)
1997 return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
1998 else
1999 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2000
2001 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2002 if (aux_usage != ISL_AUX_USAGE_CCS_E)
2003 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2004 else if (!fast_clear_supported)
2005 return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
2006 else
2007 return BLORP_FAST_CLEAR_OP_NONE;
2008
2009 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2010 if (aux_usage != ISL_AUX_USAGE_CCS_E)
2011 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2012 else
2013 return BLORP_FAST_CLEAR_OP_NONE;
2014
2015 case ISL_AUX_STATE_PASS_THROUGH:
2016 return BLORP_FAST_CLEAR_OP_NONE;
2017
2018 case ISL_AUX_STATE_RESOLVED:
2019 case ISL_AUX_STATE_AUX_INVALID:
2020 break;
2021 }
2022
2023 unreachable("Invalid aux state for CCS_E");
2024 }
2025
2026 static void
2027 intel_miptree_prepare_ccs_access(struct brw_context *brw,
2028 struct intel_mipmap_tree *mt,
2029 uint32_t level, uint32_t layer,
2030 enum isl_aux_usage aux_usage,
2031 bool fast_clear_supported)
2032 {
2033 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2034
2035 enum blorp_fast_clear_op resolve_op;
2036 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
2037 resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage,
2038 fast_clear_supported);
2039 } else {
2040 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2041 resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage,
2042 fast_clear_supported);
2043 }
2044
2045 if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {
2046 intel_miptree_check_color_resolve(brw, mt, level, layer);
2047 brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);
2048
2049 switch (resolve_op) {
2050 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
2051 /* The CCS full resolve operation destroys the CCS and sets it to the
2052 * pass-through state. (You can also think of this as being both a
2053 * resolve and an ambiguate in one operation.)
2054 */
2055 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2056 ISL_AUX_STATE_PASS_THROUGH);
2057 break;
2058
2059 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
2060 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2061 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2062 break;
2063
2064 default:
2065 unreachable("Invalid resolve op");
2066 }
2067 }
2068 }
2069
2070 static void
2071 intel_miptree_finish_ccs_write(struct brw_context *brw,
2072 struct intel_mipmap_tree *mt,
2073 uint32_t level, uint32_t layer,
2074 enum isl_aux_usage aux_usage)
2075 {
2076 assert(aux_usage == ISL_AUX_USAGE_NONE ||
2077 aux_usage == ISL_AUX_USAGE_CCS_D ||
2078 aux_usage == ISL_AUX_USAGE_CCS_E);
2079
2080 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2081
2082 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
2083 switch (aux_state) {
2084 case ISL_AUX_STATE_CLEAR:
2085 case ISL_AUX_STATE_PARTIAL_CLEAR:
2086 assert(aux_usage == ISL_AUX_USAGE_CCS_E ||
2087 aux_usage == ISL_AUX_USAGE_CCS_D);
2088
2089 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
2090 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2091 ISL_AUX_STATE_COMPRESSED_CLEAR);
2092 } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) {
2093 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2094 ISL_AUX_STATE_PARTIAL_CLEAR);
2095 }
2096 break;
2097
2098 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2099 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2100 assert(aux_usage == ISL_AUX_USAGE_CCS_E);
2101 break; /* Nothing to do */
2102
2103 case ISL_AUX_STATE_PASS_THROUGH:
2104 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
2105 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2106 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2107 } else {
2108 /* Nothing to do */
2109 }
2110 break;
2111
2112 case ISL_AUX_STATE_RESOLVED:
2113 case ISL_AUX_STATE_AUX_INVALID:
2114 unreachable("Invalid aux state for CCS_E");
2115 }
2116 } else {
2117 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2118 /* CCS_D is a bit simpler */
2119 switch (aux_state) {
2120 case ISL_AUX_STATE_CLEAR:
2121 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
2122 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2123 ISL_AUX_STATE_PARTIAL_CLEAR);
2124 break;
2125
2126 case ISL_AUX_STATE_PARTIAL_CLEAR:
2127 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
2128 break; /* Nothing to do */
2129
2130 case ISL_AUX_STATE_PASS_THROUGH:
2131 /* Nothing to do */
2132 break;
2133
2134 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2135 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2136 case ISL_AUX_STATE_RESOLVED:
2137 case ISL_AUX_STATE_AUX_INVALID:
2138 unreachable("Invalid aux state for CCS_D");
2139 }
2140 }
2141 }
2142
2143 static void
2144 intel_miptree_prepare_mcs_access(struct brw_context *brw,
2145 struct intel_mipmap_tree *mt,
2146 uint32_t layer,
2147 enum isl_aux_usage aux_usage,
2148 bool fast_clear_supported)
2149 {
2150 assert(aux_usage == ISL_AUX_USAGE_MCS);
2151
2152 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2153 case ISL_AUX_STATE_CLEAR:
2154 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2155 if (!fast_clear_supported) {
2156 brw_blorp_mcs_partial_resolve(brw, mt, layer, 1);
2157 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2158 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2159 }
2160 break;
2161
2162 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2163 break; /* Nothing to do */
2164
2165 case ISL_AUX_STATE_RESOLVED:
2166 case ISL_AUX_STATE_PASS_THROUGH:
2167 case ISL_AUX_STATE_AUX_INVALID:
2168 case ISL_AUX_STATE_PARTIAL_CLEAR:
2169 unreachable("Invalid aux state for MCS");
2170 }
2171 }
2172
2173 static void
2174 intel_miptree_finish_mcs_write(struct brw_context *brw,
2175 struct intel_mipmap_tree *mt,
2176 uint32_t layer,
2177 enum isl_aux_usage aux_usage)
2178 {
2179 assert(aux_usage == ISL_AUX_USAGE_MCS);
2180
2181 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2182 case ISL_AUX_STATE_CLEAR:
2183 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2184 ISL_AUX_STATE_COMPRESSED_CLEAR);
2185 break;
2186
2187 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2188 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2189 break; /* Nothing to do */
2190
2191 case ISL_AUX_STATE_RESOLVED:
2192 case ISL_AUX_STATE_PASS_THROUGH:
2193 case ISL_AUX_STATE_AUX_INVALID:
2194 case ISL_AUX_STATE_PARTIAL_CLEAR:
2195 unreachable("Invalid aux state for MCS");
2196 }
2197 }
2198
2199 static void
2200 intel_miptree_prepare_hiz_access(struct brw_context *brw,
2201 struct intel_mipmap_tree *mt,
2202 uint32_t level, uint32_t layer,
2203 enum isl_aux_usage aux_usage,
2204 bool fast_clear_supported)
2205 {
2206 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
2207
2208 enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
2209 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2210 case ISL_AUX_STATE_CLEAR:
2211 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2212 if (aux_usage != ISL_AUX_USAGE_HIZ || !fast_clear_supported)
2213 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2214 break;
2215
2216 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2217 if (aux_usage != ISL_AUX_USAGE_HIZ)
2218 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2219 break;
2220
2221 case ISL_AUX_STATE_PASS_THROUGH:
2222 case ISL_AUX_STATE_RESOLVED:
2223 break;
2224
2225 case ISL_AUX_STATE_AUX_INVALID:
2226 if (aux_usage == ISL_AUX_USAGE_HIZ)
2227 hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
2228 break;
2229
2230 case ISL_AUX_STATE_PARTIAL_CLEAR:
2231 unreachable("Invalid HiZ state");
2232 }
2233
2234 if (hiz_op != BLORP_HIZ_OP_NONE) {
2235 intel_hiz_exec(brw, mt, level, layer, 1, hiz_op);
2236
2237 switch (hiz_op) {
2238 case BLORP_HIZ_OP_DEPTH_RESOLVE:
2239 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2240 ISL_AUX_STATE_RESOLVED);
2241 break;
2242
2243 case BLORP_HIZ_OP_HIZ_RESOLVE:
2244 /* The HiZ resolve operation is actually an ambiguate */
2245 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2246 ISL_AUX_STATE_PASS_THROUGH);
2247 break;
2248
2249 default:
2250 unreachable("Invalid HiZ op");
2251 }
2252 }
2253 }
2254
2255 static void
2256 intel_miptree_finish_hiz_write(struct brw_context *brw,
2257 struct intel_mipmap_tree *mt,
2258 uint32_t level, uint32_t layer,
2259 enum isl_aux_usage aux_usage)
2260 {
2261 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
2262
2263 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2264 case ISL_AUX_STATE_CLEAR:
2265 assert(aux_usage == ISL_AUX_USAGE_HIZ);
2266 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2267 ISL_AUX_STATE_COMPRESSED_CLEAR);
2268 break;
2269
2270 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2271 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2272 assert(aux_usage == ISL_AUX_USAGE_HIZ);
2273 break; /* Nothing to do */
2274
2275 case ISL_AUX_STATE_RESOLVED:
2276 if (aux_usage == ISL_AUX_USAGE_HIZ) {
2277 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2278 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2279 } else {
2280 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2281 ISL_AUX_STATE_AUX_INVALID);
2282 }
2283 break;
2284
2285 case ISL_AUX_STATE_PASS_THROUGH:
2286 if (aux_usage == ISL_AUX_USAGE_HIZ) {
2287 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2288 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2289 }
2290 break;
2291
2292 case ISL_AUX_STATE_AUX_INVALID:
2293 assert(aux_usage != ISL_AUX_USAGE_HIZ);
2294 break;
2295
2296 case ISL_AUX_STATE_PARTIAL_CLEAR:
2297 unreachable("Invalid HiZ state");
2298 }
2299 }
2300
2301 void
2302 intel_miptree_prepare_access(struct brw_context *brw,
2303 struct intel_mipmap_tree *mt,
2304 uint32_t start_level, uint32_t num_levels,
2305 uint32_t start_layer, uint32_t num_layers,
2306 enum isl_aux_usage aux_usage,
2307 bool fast_clear_supported)
2308 {
2309 num_levels = miptree_level_range_length(mt, start_level, num_levels);
2310
2311 switch (mt->aux_usage) {
2312 case ISL_AUX_USAGE_NONE:
2313 /* Nothing to do */
2314 break;
2315
2316 case ISL_AUX_USAGE_MCS:
2317 assert(mt->mcs_buf);
2318 assert(start_level == 0 && num_levels == 1);
2319 const uint32_t level_layers =
2320 miptree_layer_range_length(mt, 0, start_layer, num_layers);
2321 for (uint32_t a = 0; a < level_layers; a++) {
2322 intel_miptree_prepare_mcs_access(brw, mt, start_layer + a,
2323 aux_usage, fast_clear_supported);
2324 }
2325 break;
2326
2327 case ISL_AUX_USAGE_CCS_D:
2328 case ISL_AUX_USAGE_CCS_E:
2329 if (!mt->mcs_buf)
2330 return;
2331
2332 for (uint32_t l = 0; l < num_levels; l++) {
2333 const uint32_t level = start_level + l;
2334 const uint32_t level_layers =
2335 miptree_layer_range_length(mt, level, start_layer, num_layers);
2336 for (uint32_t a = 0; a < level_layers; a++) {
2337 intel_miptree_prepare_ccs_access(brw, mt, level,
2338 start_layer + a,
2339 aux_usage, fast_clear_supported);
2340 }
2341 }
2342 break;
2343
2344 case ISL_AUX_USAGE_HIZ:
2345 assert(mt->hiz_buf);
2346 for (uint32_t l = 0; l < num_levels; l++) {
2347 const uint32_t level = start_level + l;
2348 if (!intel_miptree_level_has_hiz(mt, level))
2349 continue;
2350
2351 const uint32_t level_layers =
2352 miptree_layer_range_length(mt, level, start_layer, num_layers);
2353 for (uint32_t a = 0; a < level_layers; a++) {
2354 intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a,
2355 aux_usage, fast_clear_supported);
2356 }
2357 }
2358 break;
2359
2360 default:
2361 unreachable("Invalid aux usage");
2362 }
2363 }
2364
2365 void
2366 intel_miptree_finish_write(struct brw_context *brw,
2367 struct intel_mipmap_tree *mt, uint32_t level,
2368 uint32_t start_layer, uint32_t num_layers,
2369 enum isl_aux_usage aux_usage)
2370 {
2371 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2372
2373 switch (mt->aux_usage) {
2374 case ISL_AUX_USAGE_NONE:
2375 /* Nothing to do */
2376 break;
2377
2378 case ISL_AUX_USAGE_MCS:
2379 assert(mt->mcs_buf);
2380 for (uint32_t a = 0; a < num_layers; a++) {
2381 intel_miptree_finish_mcs_write(brw, mt, start_layer + a,
2382 aux_usage);
2383 }
2384 break;
2385
2386 case ISL_AUX_USAGE_CCS_D:
2387 case ISL_AUX_USAGE_CCS_E:
2388 if (!mt->mcs_buf)
2389 return;
2390
2391 for (uint32_t a = 0; a < num_layers; a++) {
2392 intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a,
2393 aux_usage);
2394 }
2395 break;
2396
2397 case ISL_AUX_USAGE_HIZ:
2398 if (!intel_miptree_level_has_hiz(mt, level))
2399 return;
2400
2401 for (uint32_t a = 0; a < num_layers; a++) {
2402 intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a,
2403 aux_usage);
2404 }
2405 break;
2406
2407 default:
2408 unreachable("Invavlid aux usage");
2409 }
2410 }
2411
2412 enum isl_aux_state
2413 intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt,
2414 uint32_t level, uint32_t layer)
2415 {
2416 intel_miptree_check_level_layer(mt, level, layer);
2417
2418 if (_mesa_is_format_color_format(mt->format)) {
2419 assert(mt->mcs_buf != NULL);
2420 assert(mt->surf.samples == 1 ||
2421 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2422 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2423 unreachable("Cannot get aux state for stencil");
2424 } else {
2425 assert(intel_miptree_level_has_hiz(mt, level));
2426 }
2427
2428 return mt->aux_state[level][layer];
2429 }
2430
2431 void
2432 intel_miptree_set_aux_state(struct brw_context *brw,
2433 struct intel_mipmap_tree *mt, uint32_t level,
2434 uint32_t start_layer, uint32_t num_layers,
2435 enum isl_aux_state aux_state)
2436 {
2437 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2438
2439 if (_mesa_is_format_color_format(mt->format)) {
2440 assert(mt->mcs_buf != NULL);
2441 assert(mt->surf.samples == 1 ||
2442 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2443 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2444 unreachable("Cannot get aux state for stencil");
2445 } else {
2446 assert(intel_miptree_level_has_hiz(mt, level));
2447 }
2448
2449 for (unsigned a = 0; a < num_layers; a++)
2450 mt->aux_state[level][start_layer + a] = aux_state;
2451 }
2452
2453 /* On Gen9 color buffers may be compressed by the hardware (lossless
2454 * compression). There are, however, format restrictions and care needs to be
2455 * taken that the sampler engine is capable for re-interpreting a buffer with
2456 * format different the buffer was originally written with.
2457 *
2458 * For example, SRGB formats are not compressible and the sampler engine isn't
2459 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
2460 * color buffer needs to be resolved so that the sampling surface can be
2461 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
2462 * set).
2463 */
2464 static bool
2465 can_texture_with_ccs(struct brw_context *brw,
2466 struct intel_mipmap_tree *mt,
2467 enum isl_format view_format)
2468 {
2469 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
2470 return false;
2471
2472 if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
2473 mt->surf.format, view_format)) {
2474 perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
2475 isl_format_get_layout(view_format)->name,
2476 _mesa_get_format_name(mt->format));
2477 return false;
2478 }
2479
2480 return true;
2481 }
2482
2483 enum isl_aux_usage
2484 intel_miptree_texture_aux_usage(struct brw_context *brw,
2485 struct intel_mipmap_tree *mt,
2486 enum isl_format view_format)
2487 {
2488 switch (mt->aux_usage) {
2489 case ISL_AUX_USAGE_HIZ:
2490 if (intel_miptree_sample_with_hiz(brw, mt))
2491 return ISL_AUX_USAGE_HIZ;
2492 break;
2493
2494 case ISL_AUX_USAGE_MCS:
2495 return ISL_AUX_USAGE_MCS;
2496
2497 case ISL_AUX_USAGE_CCS_D:
2498 case ISL_AUX_USAGE_CCS_E:
2499 if (mt->mcs_buf && can_texture_with_ccs(brw, mt, view_format))
2500 return ISL_AUX_USAGE_CCS_E;
2501 break;
2502
2503 default:
2504 break;
2505 }
2506
2507 return ISL_AUX_USAGE_NONE;
2508 }
2509
2510 static bool
2511 isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
2512 {
2513 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
2514 * values so sRGB curve application was a no-op for all fast-clearable
2515 * formats.
2516 *
2517 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear
2518 * values, the hardware interprets the floats, not as what would be
2519 * returned from the sampler (or written by the shader), but as being
2520 * between format conversion and sRGB curve application. This means that
2521 * we can switch between sRGB and UNORM without having to whack the clear
2522 * color.
2523 */
2524 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
2525 }
2526
2527 static void
2528 intel_miptree_prepare_texture_slices(struct brw_context *brw,
2529 struct intel_mipmap_tree *mt,
2530 enum isl_format view_format,
2531 uint32_t start_level, uint32_t num_levels,
2532 uint32_t start_layer, uint32_t num_layers,
2533 bool *aux_supported_out)
2534 {
2535 enum isl_aux_usage aux_usage =
2536 intel_miptree_texture_aux_usage(brw, mt, view_format);
2537 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
2538
2539 /* Clear color is specified as ints or floats and the conversion is done by
2540 * the sampler. If we have a texture view, we would have to perform the
2541 * clear color conversion manually. Just disable clear color.
2542 */
2543 if (!isl_formats_are_fast_clear_compatible(mt->surf.format, view_format))
2544 clear_supported = false;
2545
2546 intel_miptree_prepare_access(brw, mt, start_level, num_levels,
2547 start_layer, num_layers,
2548 aux_usage, clear_supported);
2549 if (aux_supported_out)
2550 *aux_supported_out = aux_usage != ISL_AUX_USAGE_NONE;
2551 }
2552
2553 void
2554 intel_miptree_prepare_texture(struct brw_context *brw,
2555 struct intel_mipmap_tree *mt,
2556 enum isl_format view_format,
2557 bool *aux_supported_out)
2558 {
2559 intel_miptree_prepare_texture_slices(brw, mt, view_format,
2560 0, INTEL_REMAINING_LEVELS,
2561 0, INTEL_REMAINING_LAYERS,
2562 aux_supported_out);
2563 }
2564
2565 void
2566 intel_miptree_prepare_image(struct brw_context *brw,
2567 struct intel_mipmap_tree *mt)
2568 {
2569 /* The data port doesn't understand any compression */
2570 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2571 0, INTEL_REMAINING_LAYERS,
2572 ISL_AUX_USAGE_NONE, false);
2573 }
2574
2575 void
2576 intel_miptree_prepare_fb_fetch(struct brw_context *brw,
2577 struct intel_mipmap_tree *mt, uint32_t level,
2578 uint32_t start_layer, uint32_t num_layers)
2579 {
2580 intel_miptree_prepare_texture_slices(brw, mt, mt->surf.format, level, 1,
2581 start_layer, num_layers, NULL);
2582 }
2583
2584 enum isl_aux_usage
2585 intel_miptree_render_aux_usage(struct brw_context *brw,
2586 struct intel_mipmap_tree *mt,
2587 bool srgb_enabled)
2588 {
2589 switch (mt->aux_usage) {
2590 case ISL_AUX_USAGE_MCS:
2591 assert(mt->mcs_buf);
2592 return ISL_AUX_USAGE_MCS;
2593
2594 case ISL_AUX_USAGE_CCS_D:
2595 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
2596 * the single-sampled color renderbuffers because the CCS buffer isn't
2597 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
2598 * enabled because otherwise the surface state will be programmed with
2599 * the linear equivalent format anyway.
2600 */
2601 if (srgb_enabled &&
2602 _mesa_get_srgb_format_linear(mt->format) != mt->format) {
2603 return ISL_AUX_USAGE_NONE;
2604 } else if (!mt->mcs_buf) {
2605 return ISL_AUX_USAGE_NONE;
2606 } else {
2607 return ISL_AUX_USAGE_CCS_D;
2608 }
2609
2610 case ISL_AUX_USAGE_CCS_E: {
2611 /* Lossless compression is not supported for SRGB formats, it
2612 * should be impossible to get here with such surfaces.
2613 */
2614 assert(!srgb_enabled ||
2615 _mesa_get_srgb_format_linear(mt->format) == mt->format);
2616
2617 return ISL_AUX_USAGE_CCS_E;
2618 }
2619
2620 default:
2621 return ISL_AUX_USAGE_NONE;
2622 }
2623 }
2624
2625 void
2626 intel_miptree_prepare_render(struct brw_context *brw,
2627 struct intel_mipmap_tree *mt, uint32_t level,
2628 uint32_t start_layer, uint32_t layer_count,
2629 bool srgb_enabled)
2630 {
2631 enum isl_aux_usage aux_usage =
2632 intel_miptree_render_aux_usage(brw, mt, srgb_enabled);
2633 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2634 aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
2635 }
2636
2637 void
2638 intel_miptree_finish_render(struct brw_context *brw,
2639 struct intel_mipmap_tree *mt, uint32_t level,
2640 uint32_t start_layer, uint32_t layer_count,
2641 bool srgb_enabled)
2642 {
2643 assert(_mesa_is_format_color_format(mt->format));
2644
2645 enum isl_aux_usage aux_usage =
2646 intel_miptree_render_aux_usage(brw, mt, srgb_enabled);
2647 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2648 aux_usage);
2649 }
2650
2651 void
2652 intel_miptree_prepare_depth(struct brw_context *brw,
2653 struct intel_mipmap_tree *mt, uint32_t level,
2654 uint32_t start_layer, uint32_t layer_count)
2655 {
2656 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2657 mt->aux_usage, mt->hiz_buf != NULL);
2658 }
2659
2660 void
2661 intel_miptree_finish_depth(struct brw_context *brw,
2662 struct intel_mipmap_tree *mt, uint32_t level,
2663 uint32_t start_layer, uint32_t layer_count,
2664 bool depth_written)
2665 {
2666 if (depth_written) {
2667 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2668 mt->hiz_buf != NULL);
2669 }
2670 }
2671
2672 /**
2673 * Make it possible to share the BO backing the given miptree with another
2674 * process or another miptree.
2675 *
2676 * Fast color clears are unsafe with shared buffers, so we need to resolve and
2677 * then discard the MCS buffer, if present. We also set the no_ccs flag to
2678 * ensure that no MCS buffer gets allocated in the future.
2679 *
2680 * HiZ is similarly unsafe with shared buffers.
2681 */
2682 void
2683 intel_miptree_make_shareable(struct brw_context *brw,
2684 struct intel_mipmap_tree *mt)
2685 {
2686 /* MCS buffers are also used for multisample buffers, but we can't resolve
2687 * away a multisample MCS buffer because it's an integral part of how the
2688 * pixel data is stored. Fortunately this code path should never be
2689 * reached for multisample buffers.
2690 */
2691 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE ||
2692 mt->surf.samples == 1);
2693
2694 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2695 0, INTEL_REMAINING_LAYERS,
2696 ISL_AUX_USAGE_NONE, false);
2697
2698 if (mt->mcs_buf) {
2699 brw_bo_unreference(mt->mcs_buf->bo);
2700 free(mt->mcs_buf);
2701 mt->mcs_buf = NULL;
2702
2703 /* Any pending MCS/CCS operations are no longer needed. Trying to
2704 * execute any will likely crash due to the missing aux buffer. So let's
2705 * delete all pending ops.
2706 */
2707 free(mt->aux_state);
2708 mt->aux_state = NULL;
2709 }
2710
2711 if (mt->hiz_buf) {
2712 intel_miptree_aux_buffer_free(mt->hiz_buf);
2713 mt->hiz_buf = NULL;
2714
2715 for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
2716 mt->level[l].has_hiz = false;
2717 }
2718
2719 /* Any pending HiZ operations are no longer needed. Trying to execute
2720 * any will likely crash due to the missing aux buffer. So let's delete
2721 * all pending ops.
2722 */
2723 free(mt->aux_state);
2724 mt->aux_state = NULL;
2725 }
2726
2727 mt->aux_usage = ISL_AUX_USAGE_NONE;
2728 }
2729
2730
2731 /**
2732 * \brief Get pointer offset into stencil buffer.
2733 *
2734 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
2735 * must decode the tile's layout in software.
2736 *
2737 * See
2738 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2739 * Format.
2740 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2741 *
2742 * Even though the returned offset is always positive, the return type is
2743 * signed due to
2744 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2745 * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
2746 */
2747 static intptr_t
2748 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2749 {
2750 uint32_t tile_size = 4096;
2751 uint32_t tile_width = 64;
2752 uint32_t tile_height = 64;
2753 uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
2754
2755 uint32_t tile_x = x / tile_width;
2756 uint32_t tile_y = y / tile_height;
2757
2758 /* The byte's address relative to the tile's base addres. */
2759 uint32_t byte_x = x % tile_width;
2760 uint32_t byte_y = y % tile_height;
2761
2762 uintptr_t u = tile_y * row_size
2763 + tile_x * tile_size
2764 + 512 * (byte_x / 8)
2765 + 64 * (byte_y / 8)
2766 + 32 * ((byte_y / 4) % 2)
2767 + 16 * ((byte_x / 4) % 2)
2768 + 8 * ((byte_y / 2) % 2)
2769 + 4 * ((byte_x / 2) % 2)
2770 + 2 * (byte_y % 2)
2771 + 1 * (byte_x % 2);
2772
2773 if (swizzled) {
2774 /* adjust for bit6 swizzling */
2775 if (((byte_x / 8) % 2) == 1) {
2776 if (((byte_y / 8) % 2) == 0) {
2777 u += 64;
2778 } else {
2779 u -= 64;
2780 }
2781 }
2782 }
2783
2784 return u;
2785 }
2786
2787 void
2788 intel_miptree_updownsample(struct brw_context *brw,
2789 struct intel_mipmap_tree *src,
2790 struct intel_mipmap_tree *dst)
2791 {
2792 unsigned src_w = src->surf.logical_level0_px.width;
2793 unsigned src_h = src->surf.logical_level0_px.height;
2794 unsigned dst_w = dst->surf.logical_level0_px.width;
2795 unsigned dst_h = dst->surf.logical_level0_px.height;
2796
2797 brw_blorp_blit_miptrees(brw,
2798 src, 0 /* level */, 0 /* layer */,
2799 src->format, SWIZZLE_XYZW,
2800 dst, 0 /* level */, 0 /* layer */, dst->format,
2801 0, 0, src_w, src_h,
2802 0, 0, dst_w, dst_h,
2803 GL_NEAREST, false, false /*mirror x, y*/,
2804 false, false);
2805
2806 if (src->stencil_mt) {
2807 src_w = src->stencil_mt->surf.logical_level0_px.width;
2808 src_h = src->stencil_mt->surf.logical_level0_px.height;
2809 dst_w = dst->stencil_mt->surf.logical_level0_px.width;
2810 dst_h = dst->stencil_mt->surf.logical_level0_px.height;
2811
2812 brw_blorp_blit_miptrees(brw,
2813 src->stencil_mt, 0 /* level */, 0 /* layer */,
2814 src->stencil_mt->format, SWIZZLE_XYZW,
2815 dst->stencil_mt, 0 /* level */, 0 /* layer */,
2816 dst->stencil_mt->format,
2817 0, 0, src_w, src_h,
2818 0, 0, dst_w, dst_h,
2819 GL_NEAREST, false, false /*mirror x, y*/,
2820 false, false /* decode/encode srgb */);
2821 }
2822 }
2823
2824 void
2825 intel_update_r8stencil(struct brw_context *brw,
2826 struct intel_mipmap_tree *mt)
2827 {
2828 assert(brw->gen >= 7);
2829 struct intel_mipmap_tree *src =
2830 mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
2831 if (!src || brw->gen >= 8 || !src->r8stencil_needs_update)
2832 return;
2833
2834 assert(src->surf.size > 0);
2835
2836 if (!mt->r8stencil_mt) {
2837 assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
2838 mt->r8stencil_mt = make_surface(
2839 brw,
2840 src->target,
2841 MESA_FORMAT_R_UINT8,
2842 src->first_level, src->last_level,
2843 src->surf.logical_level0_px.width,
2844 src->surf.logical_level0_px.height,
2845 src->surf.dim == ISL_SURF_DIM_3D ?
2846 src->surf.logical_level0_px.depth :
2847 src->surf.logical_level0_px.array_len,
2848 src->surf.samples,
2849 ISL_TILING_Y0_BIT,
2850 ISL_SURF_USAGE_TEXTURE_BIT,
2851 BO_ALLOC_FOR_RENDER, 0, NULL);
2852 assert(mt->r8stencil_mt);
2853 }
2854
2855 struct intel_mipmap_tree *dst = mt->r8stencil_mt;
2856
2857 for (int level = src->first_level; level <= src->last_level; level++) {
2858 const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ?
2859 minify(src->surf.phys_level0_sa.depth, level) :
2860 src->surf.phys_level0_sa.array_len;
2861
2862 for (unsigned layer = 0; layer < depth; layer++) {
2863 brw_blorp_copy_miptrees(brw,
2864 src, level, layer,
2865 dst, level, layer,
2866 0, 0, 0, 0,
2867 minify(src->surf.logical_level0_px.width,
2868 level),
2869 minify(src->surf.logical_level0_px.height,
2870 level));
2871 }
2872 }
2873
2874 brw_render_cache_set_check_flush(brw, dst->bo);
2875 src->r8stencil_needs_update = false;
2876 }
2877
2878 static void *
2879 intel_miptree_map_raw(struct brw_context *brw,
2880 struct intel_mipmap_tree *mt,
2881 GLbitfield mode)
2882 {
2883 struct brw_bo *bo = mt->bo;
2884
2885 if (brw_batch_references(&brw->batch, bo))
2886 intel_batchbuffer_flush(brw);
2887
2888 return brw_bo_map(brw, bo, mode);
2889 }
2890
2891 static void
2892 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2893 {
2894 brw_bo_unmap(mt->bo);
2895 }
2896
2897 static void
2898 intel_miptree_map_gtt(struct brw_context *brw,
2899 struct intel_mipmap_tree *mt,
2900 struct intel_miptree_map *map,
2901 unsigned int level, unsigned int slice)
2902 {
2903 unsigned int bw, bh;
2904 void *base;
2905 unsigned int image_x, image_y;
2906 intptr_t x = map->x;
2907 intptr_t y = map->y;
2908
2909 /* For compressed formats, the stride is the number of bytes per
2910 * row of blocks. intel_miptree_get_image_offset() already does
2911 * the divide.
2912 */
2913 _mesa_get_format_block_size(mt->format, &bw, &bh);
2914 assert(y % bh == 0);
2915 assert(x % bw == 0);
2916 y /= bh;
2917 x /= bw;
2918
2919 base = intel_miptree_map_raw(brw, mt, map->mode);
2920
2921 if (base == NULL)
2922 map->ptr = NULL;
2923 else {
2924 base += mt->offset;
2925
2926 /* Note that in the case of cube maps, the caller must have passed the
2927 * slice number referencing the face.
2928 */
2929 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2930 x += image_x;
2931 y += image_y;
2932
2933 map->stride = mt->surf.row_pitch;
2934 map->ptr = base + y * map->stride + x * mt->cpp;
2935 }
2936
2937 DBG("%s: %d,%d %dx%d from mt %p (%s) "
2938 "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
2939 map->x, map->y, map->w, map->h,
2940 mt, _mesa_get_format_name(mt->format),
2941 x, y, map->ptr, map->stride);
2942 }
2943
2944 static void
2945 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
2946 {
2947 intel_miptree_unmap_raw(mt);
2948 }
2949
2950 static void
2951 intel_miptree_map_blit(struct brw_context *brw,
2952 struct intel_mipmap_tree *mt,
2953 struct intel_miptree_map *map,
2954 unsigned int level, unsigned int slice)
2955 {
2956 map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
2957 /* first_level */ 0,
2958 /* last_level */ 0,
2959 map->w, map->h, 1,
2960 /* samples */ 1,
2961 MIPTREE_LAYOUT_TILING_NONE);
2962
2963 if (!map->linear_mt) {
2964 fprintf(stderr, "Failed to allocate blit temporary\n");
2965 goto fail;
2966 }
2967 map->stride = map->linear_mt->surf.row_pitch;
2968
2969 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
2970 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
2971 * invalidate is set, since we'll be writing the whole rectangle from our
2972 * temporary buffer back out.
2973 */
2974 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2975 if (!intel_miptree_copy(brw,
2976 mt, level, slice, map->x, map->y,
2977 map->linear_mt, 0, 0, 0, 0,
2978 map->w, map->h)) {
2979 fprintf(stderr, "Failed to blit\n");
2980 goto fail;
2981 }
2982 }
2983
2984 map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
2985
2986 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2987 map->x, map->y, map->w, map->h,
2988 mt, _mesa_get_format_name(mt->format),
2989 level, slice, map->ptr, map->stride);
2990
2991 return;
2992
2993 fail:
2994 intel_miptree_release(&map->linear_mt);
2995 map->ptr = NULL;
2996 map->stride = 0;
2997 }
2998
2999 static void
3000 intel_miptree_unmap_blit(struct brw_context *brw,
3001 struct intel_mipmap_tree *mt,
3002 struct intel_miptree_map *map,
3003 unsigned int level,
3004 unsigned int slice)
3005 {
3006 struct gl_context *ctx = &brw->ctx;
3007
3008 intel_miptree_unmap_raw(map->linear_mt);
3009
3010 if (map->mode & GL_MAP_WRITE_BIT) {
3011 bool ok = intel_miptree_copy(brw,
3012 map->linear_mt, 0, 0, 0, 0,
3013 mt, level, slice, map->x, map->y,
3014 map->w, map->h);
3015 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
3016 }
3017
3018 intel_miptree_release(&map->linear_mt);
3019 }
3020
3021 /**
3022 * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
3023 */
3024 #if defined(USE_SSE41)
3025 static void
3026 intel_miptree_map_movntdqa(struct brw_context *brw,
3027 struct intel_mipmap_tree *mt,
3028 struct intel_miptree_map *map,
3029 unsigned int level, unsigned int slice)
3030 {
3031 assert(map->mode & GL_MAP_READ_BIT);
3032 assert(!(map->mode & GL_MAP_WRITE_BIT));
3033
3034 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
3035 map->x, map->y, map->w, map->h,
3036 mt, _mesa_get_format_name(mt->format),
3037 level, slice, map->ptr, map->stride);
3038
3039 /* Map the original image */
3040 uint32_t image_x;
3041 uint32_t image_y;
3042 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3043 image_x += map->x;
3044 image_y += map->y;
3045
3046 void *src = intel_miptree_map_raw(brw, mt, map->mode);
3047 if (!src)
3048 return;
3049
3050 src += mt->offset;
3051
3052 src += image_y * mt->surf.row_pitch;
3053 src += image_x * mt->cpp;
3054
3055 /* Due to the pixel offsets for the particular image being mapped, our
3056 * src pointer may not be 16-byte aligned. However, if the pitch is
3057 * divisible by 16, then the amount by which it's misaligned will remain
3058 * consistent from row to row.
3059 */
3060 assert((mt->surf.row_pitch % 16) == 0);
3061 const int misalignment = ((uintptr_t) src) & 15;
3062
3063 /* Create an untiled temporary buffer for the mapping. */
3064 const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
3065
3066 map->stride = ALIGN(misalignment + width_bytes, 16);
3067
3068 map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
3069 /* Offset the destination so it has the same misalignment as src. */
3070 map->ptr = map->buffer + misalignment;
3071
3072 assert((((uintptr_t) map->ptr) & 15) == misalignment);
3073
3074 for (uint32_t y = 0; y < map->h; y++) {
3075 void *dst_ptr = map->ptr + y * map->stride;
3076 void *src_ptr = src + y * mt->surf.row_pitch;
3077
3078 _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
3079 }
3080
3081 intel_miptree_unmap_raw(mt);
3082 }
3083
3084 static void
3085 intel_miptree_unmap_movntdqa(struct brw_context *brw,
3086 struct intel_mipmap_tree *mt,
3087 struct intel_miptree_map *map,
3088 unsigned int level,
3089 unsigned int slice)
3090 {
3091 _mesa_align_free(map->buffer);
3092 map->buffer = NULL;
3093 map->ptr = NULL;
3094 }
3095 #endif
3096
3097 static void
3098 intel_miptree_map_s8(struct brw_context *brw,
3099 struct intel_mipmap_tree *mt,
3100 struct intel_miptree_map *map,
3101 unsigned int level, unsigned int slice)
3102 {
3103 map->stride = map->w;
3104 map->buffer = map->ptr = malloc(map->stride * map->h);
3105 if (!map->buffer)
3106 return;
3107
3108 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3109 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3110 * invalidate is set, since we'll be writing the whole rectangle from our
3111 * temporary buffer back out.
3112 */
3113 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3114 uint8_t *untiled_s8_map = map->ptr;
3115 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
3116 unsigned int image_x, image_y;
3117
3118 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3119
3120 for (uint32_t y = 0; y < map->h; y++) {
3121 for (uint32_t x = 0; x < map->w; x++) {
3122 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
3123 x + image_x + map->x,
3124 y + image_y + map->y,
3125 brw->has_swizzling);
3126 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
3127 }
3128 }
3129
3130 intel_miptree_unmap_raw(mt);
3131
3132 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
3133 map->x, map->y, map->w, map->h,
3134 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
3135 } else {
3136 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3137 map->x, map->y, map->w, map->h,
3138 mt, map->ptr, map->stride);
3139 }
3140 }
3141
3142 static void
3143 intel_miptree_unmap_s8(struct brw_context *brw,
3144 struct intel_mipmap_tree *mt,
3145 struct intel_miptree_map *map,
3146 unsigned int level,
3147 unsigned int slice)
3148 {
3149 if (map->mode & GL_MAP_WRITE_BIT) {
3150 unsigned int image_x, image_y;
3151 uint8_t *untiled_s8_map = map->ptr;
3152 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
3153
3154 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3155
3156 for (uint32_t y = 0; y < map->h; y++) {
3157 for (uint32_t x = 0; x < map->w; x++) {
3158 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
3159 image_x + x + map->x,
3160 image_y + y + map->y,
3161 brw->has_swizzling);
3162 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
3163 }
3164 }
3165
3166 intel_miptree_unmap_raw(mt);
3167 }
3168
3169 free(map->buffer);
3170 }
3171
3172 static void
3173 intel_miptree_map_etc(struct brw_context *brw,
3174 struct intel_mipmap_tree *mt,
3175 struct intel_miptree_map *map,
3176 unsigned int level,
3177 unsigned int slice)
3178 {
3179 assert(mt->etc_format != MESA_FORMAT_NONE);
3180 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
3181 assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
3182 }
3183
3184 assert(map->mode & GL_MAP_WRITE_BIT);
3185 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
3186
3187 map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
3188 map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
3189 map->w, map->h, 1));
3190 map->ptr = map->buffer;
3191 }
3192
3193 static void
3194 intel_miptree_unmap_etc(struct brw_context *brw,
3195 struct intel_mipmap_tree *mt,
3196 struct intel_miptree_map *map,
3197 unsigned int level,
3198 unsigned int slice)
3199 {
3200 uint32_t image_x;
3201 uint32_t image_y;
3202 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3203
3204 image_x += map->x;
3205 image_y += map->y;
3206
3207 uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
3208 + image_y * mt->surf.row_pitch
3209 + image_x * mt->cpp;
3210
3211 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
3212 _mesa_etc1_unpack_rgba8888(dst, mt->surf.row_pitch,
3213 map->ptr, map->stride,
3214 map->w, map->h);
3215 else
3216 _mesa_unpack_etc2_format(dst, mt->surf.row_pitch,
3217 map->ptr, map->stride,
3218 map->w, map->h, mt->etc_format);
3219
3220 intel_miptree_unmap_raw(mt);
3221 free(map->buffer);
3222 }
3223
3224 /**
3225 * Mapping function for packed depth/stencil miptrees backed by real separate
3226 * miptrees for depth and stencil.
3227 *
3228 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
3229 * separate from the depth buffer. Yet at the GL API level, we have to expose
3230 * packed depth/stencil textures and FBO attachments, and Mesa core expects to
3231 * be able to map that memory for texture storage and glReadPixels-type
3232 * operations. We give Mesa core that access by mallocing a temporary and
3233 * copying the data between the actual backing store and the temporary.
3234 */
3235 static void
3236 intel_miptree_map_depthstencil(struct brw_context *brw,
3237 struct intel_mipmap_tree *mt,
3238 struct intel_miptree_map *map,
3239 unsigned int level, unsigned int slice)
3240 {
3241 struct intel_mipmap_tree *z_mt = mt;
3242 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3243 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3244 int packed_bpp = map_z32f_x24s8 ? 8 : 4;
3245
3246 map->stride = map->w * packed_bpp;
3247 map->buffer = map->ptr = malloc(map->stride * map->h);
3248 if (!map->buffer)
3249 return;
3250
3251 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3252 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3253 * invalidate is set, since we'll be writing the whole rectangle from our
3254 * temporary buffer back out.
3255 */
3256 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3257 uint32_t *packed_map = map->ptr;
3258 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
3259 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
3260 unsigned int s_image_x, s_image_y;
3261 unsigned int z_image_x, z_image_y;
3262
3263 intel_miptree_get_image_offset(s_mt, level, slice,
3264 &s_image_x, &s_image_y);
3265 intel_miptree_get_image_offset(z_mt, level, slice,
3266 &z_image_x, &z_image_y);
3267
3268 for (uint32_t y = 0; y < map->h; y++) {
3269 for (uint32_t x = 0; x < map->w; x++) {
3270 int map_x = map->x + x, map_y = map->y + y;
3271 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3272 map_x + s_image_x,
3273 map_y + s_image_y,
3274 brw->has_swizzling);
3275 ptrdiff_t z_offset = ((map_y + z_image_y) *
3276 (z_mt->surf.row_pitch / 4) +
3277 (map_x + z_image_x));
3278 uint8_t s = s_map[s_offset];
3279 uint32_t z = z_map[z_offset];
3280
3281 if (map_z32f_x24s8) {
3282 packed_map[(y * map->w + x) * 2 + 0] = z;
3283 packed_map[(y * map->w + x) * 2 + 1] = s;
3284 } else {
3285 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
3286 }
3287 }
3288 }
3289
3290 intel_miptree_unmap_raw(s_mt);
3291 intel_miptree_unmap_raw(z_mt);
3292
3293 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
3294 __func__,
3295 map->x, map->y, map->w, map->h,
3296 z_mt, map->x + z_image_x, map->y + z_image_y,
3297 s_mt, map->x + s_image_x, map->y + s_image_y,
3298 map->ptr, map->stride);
3299 } else {
3300 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3301 map->x, map->y, map->w, map->h,
3302 mt, map->ptr, map->stride);
3303 }
3304 }
3305
3306 static void
3307 intel_miptree_unmap_depthstencil(struct brw_context *brw,
3308 struct intel_mipmap_tree *mt,
3309 struct intel_miptree_map *map,
3310 unsigned int level,
3311 unsigned int slice)
3312 {
3313 struct intel_mipmap_tree *z_mt = mt;
3314 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3315 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3316
3317 if (map->mode & GL_MAP_WRITE_BIT) {
3318 uint32_t *packed_map = map->ptr;
3319 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
3320 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
3321 unsigned int s_image_x, s_image_y;
3322 unsigned int z_image_x, z_image_y;
3323
3324 intel_miptree_get_image_offset(s_mt, level, slice,
3325 &s_image_x, &s_image_y);
3326 intel_miptree_get_image_offset(z_mt, level, slice,
3327 &z_image_x, &z_image_y);
3328
3329 for (uint32_t y = 0; y < map->h; y++) {
3330 for (uint32_t x = 0; x < map->w; x++) {
3331 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3332 x + s_image_x + map->x,
3333 y + s_image_y + map->y,
3334 brw->has_swizzling);
3335 ptrdiff_t z_offset = ((y + z_image_y + map->y) *
3336 (z_mt->surf.row_pitch / 4) +
3337 (x + z_image_x + map->x));
3338
3339 if (map_z32f_x24s8) {
3340 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
3341 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
3342 } else {
3343 uint32_t packed = packed_map[y * map->w + x];
3344 s_map[s_offset] = packed >> 24;
3345 z_map[z_offset] = packed;
3346 }
3347 }
3348 }
3349
3350 intel_miptree_unmap_raw(s_mt);
3351 intel_miptree_unmap_raw(z_mt);
3352
3353 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
3354 __func__,
3355 map->x, map->y, map->w, map->h,
3356 z_mt, _mesa_get_format_name(z_mt->format),
3357 map->x + z_image_x, map->y + z_image_y,
3358 s_mt, map->x + s_image_x, map->y + s_image_y,
3359 map->ptr, map->stride);
3360 }
3361
3362 free(map->buffer);
3363 }
3364
3365 /**
3366 * Create and attach a map to the miptree at (level, slice). Return the
3367 * attached map.
3368 */
3369 static struct intel_miptree_map*
3370 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
3371 unsigned int level,
3372 unsigned int slice,
3373 unsigned int x,
3374 unsigned int y,
3375 unsigned int w,
3376 unsigned int h,
3377 GLbitfield mode)
3378 {
3379 struct intel_miptree_map *map = calloc(1, sizeof(*map));
3380
3381 if (!map)
3382 return NULL;
3383
3384 assert(mt->level[level].slice[slice].map == NULL);
3385 mt->level[level].slice[slice].map = map;
3386
3387 map->mode = mode;
3388 map->x = x;
3389 map->y = y;
3390 map->w = w;
3391 map->h = h;
3392
3393 return map;
3394 }
3395
3396 /**
3397 * Release the map at (level, slice).
3398 */
3399 static void
3400 intel_miptree_release_map(struct intel_mipmap_tree *mt,
3401 unsigned int level,
3402 unsigned int slice)
3403 {
3404 struct intel_miptree_map **map;
3405
3406 map = &mt->level[level].slice[slice].map;
3407 free(*map);
3408 *map = NULL;
3409 }
3410
3411 static bool
3412 can_blit_slice(struct intel_mipmap_tree *mt,
3413 unsigned int level, unsigned int slice)
3414 {
3415 /* See intel_miptree_blit() for details on the 32k pitch limit. */
3416 if (mt->surf.row_pitch >= 32768)
3417 return false;
3418
3419 return true;
3420 }
3421
3422 static bool
3423 use_intel_mipree_map_blit(struct brw_context *brw,
3424 struct intel_mipmap_tree *mt,
3425 GLbitfield mode,
3426 unsigned int level,
3427 unsigned int slice)
3428 {
3429 if (brw->has_llc &&
3430 /* It's probably not worth swapping to the blit ring because of
3431 * all the overhead involved.
3432 */
3433 !(mode & GL_MAP_WRITE_BIT) &&
3434 !mt->compressed &&
3435 (mt->surf.tiling == ISL_TILING_X ||
3436 /* Prior to Sandybridge, the blitter can't handle Y tiling */
3437 (brw->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
3438 /* Fast copy blit on skl+ supports all tiling formats. */
3439 brw->gen >= 9) &&
3440 can_blit_slice(mt, level, slice))
3441 return true;
3442
3443 if (mt->surf.tiling != ISL_TILING_LINEAR &&
3444 mt->bo->size >= brw->max_gtt_map_object_size) {
3445 assert(can_blit_slice(mt, level, slice));
3446 return true;
3447 }
3448
3449 return false;
3450 }
3451
3452 /**
3453 * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
3454 * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
3455 * arithmetic overflow.
3456 *
3457 * If you call this function and use \a out_stride, then you're doing pointer
3458 * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
3459 * bugs. The caller must still take care to avoid 32-bit overflow errors in
3460 * all arithmetic expressions that contain buffer offsets and pixel sizes,
3461 * which usually have type uint32_t or GLuint.
3462 */
3463 void
3464 intel_miptree_map(struct brw_context *brw,
3465 struct intel_mipmap_tree *mt,
3466 unsigned int level,
3467 unsigned int slice,
3468 unsigned int x,
3469 unsigned int y,
3470 unsigned int w,
3471 unsigned int h,
3472 GLbitfield mode,
3473 void **out_ptr,
3474 ptrdiff_t *out_stride)
3475 {
3476 struct intel_miptree_map *map;
3477
3478 assert(mt->surf.samples == 1);
3479
3480 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
3481 if (!map){
3482 *out_ptr = NULL;
3483 *out_stride = 0;
3484 return;
3485 }
3486
3487 intel_miptree_access_raw(brw, mt, level, slice,
3488 map->mode & GL_MAP_WRITE_BIT);
3489
3490 if (mt->format == MESA_FORMAT_S_UINT8) {
3491 intel_miptree_map_s8(brw, mt, map, level, slice);
3492 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3493 !(mode & BRW_MAP_DIRECT_BIT)) {
3494 intel_miptree_map_etc(brw, mt, map, level, slice);
3495 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
3496 intel_miptree_map_depthstencil(brw, mt, map, level, slice);
3497 } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
3498 intel_miptree_map_blit(brw, mt, map, level, slice);
3499 #if defined(USE_SSE41)
3500 } else if (!(mode & GL_MAP_WRITE_BIT) &&
3501 !mt->compressed && cpu_has_sse4_1 &&
3502 (mt->surf.row_pitch % 16 == 0)) {
3503 intel_miptree_map_movntdqa(brw, mt, map, level, slice);
3504 #endif
3505 } else {
3506 intel_miptree_map_gtt(brw, mt, map, level, slice);
3507 }
3508
3509 *out_ptr = map->ptr;
3510 *out_stride = map->stride;
3511
3512 if (map->ptr == NULL)
3513 intel_miptree_release_map(mt, level, slice);
3514 }
3515
3516 void
3517 intel_miptree_unmap(struct brw_context *brw,
3518 struct intel_mipmap_tree *mt,
3519 unsigned int level,
3520 unsigned int slice)
3521 {
3522 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
3523
3524 assert(mt->surf.samples == 1);
3525
3526 if (!map)
3527 return;
3528
3529 DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
3530 mt, _mesa_get_format_name(mt->format), level, slice);
3531
3532 if (mt->format == MESA_FORMAT_S_UINT8) {
3533 intel_miptree_unmap_s8(brw, mt, map, level, slice);
3534 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3535 !(map->mode & BRW_MAP_DIRECT_BIT)) {
3536 intel_miptree_unmap_etc(brw, mt, map, level, slice);
3537 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
3538 intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
3539 } else if (map->linear_mt) {
3540 intel_miptree_unmap_blit(brw, mt, map, level, slice);
3541 #if defined(USE_SSE41)
3542 } else if (map->buffer && cpu_has_sse4_1) {
3543 intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
3544 #endif
3545 } else {
3546 intel_miptree_unmap_gtt(mt);
3547 }
3548
3549 intel_miptree_release_map(mt, level, slice);
3550 }
3551
3552 enum isl_surf_dim
3553 get_isl_surf_dim(GLenum target)
3554 {
3555 switch (target) {
3556 case GL_TEXTURE_1D:
3557 case GL_TEXTURE_1D_ARRAY:
3558 return ISL_SURF_DIM_1D;
3559
3560 case GL_TEXTURE_2D:
3561 case GL_TEXTURE_2D_ARRAY:
3562 case GL_TEXTURE_RECTANGLE:
3563 case GL_TEXTURE_CUBE_MAP:
3564 case GL_TEXTURE_CUBE_MAP_ARRAY:
3565 case GL_TEXTURE_2D_MULTISAMPLE:
3566 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3567 case GL_TEXTURE_EXTERNAL_OES:
3568 return ISL_SURF_DIM_2D;
3569
3570 case GL_TEXTURE_3D:
3571 return ISL_SURF_DIM_3D;
3572 }
3573
3574 unreachable("Invalid texture target");
3575 }
3576
3577 enum isl_dim_layout
3578 get_isl_dim_layout(const struct gen_device_info *devinfo,
3579 enum isl_tiling tiling, GLenum target)
3580 {
3581 switch (target) {
3582 case GL_TEXTURE_1D:
3583 case GL_TEXTURE_1D_ARRAY:
3584 return (devinfo->gen >= 9 && tiling == ISL_TILING_LINEAR ?
3585 ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
3586
3587 case GL_TEXTURE_2D:
3588 case GL_TEXTURE_2D_ARRAY:
3589 case GL_TEXTURE_RECTANGLE:
3590 case GL_TEXTURE_2D_MULTISAMPLE:
3591 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3592 case GL_TEXTURE_EXTERNAL_OES:
3593 return ISL_DIM_LAYOUT_GEN4_2D;
3594
3595 case GL_TEXTURE_CUBE_MAP:
3596 case GL_TEXTURE_CUBE_MAP_ARRAY:
3597 return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
3598 ISL_DIM_LAYOUT_GEN4_2D);
3599
3600 case GL_TEXTURE_3D:
3601 return (devinfo->gen >= 9 ?
3602 ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
3603 }
3604
3605 unreachable("Invalid texture target");
3606 }
3607
3608 enum isl_aux_usage
3609 intel_miptree_get_aux_isl_usage(const struct brw_context *brw,
3610 const struct intel_mipmap_tree *mt)
3611 {
3612 if (mt->hiz_buf)
3613 return ISL_AUX_USAGE_HIZ;
3614
3615 if (!mt->mcs_buf)
3616 return ISL_AUX_USAGE_NONE;
3617
3618 return mt->aux_usage;
3619 }