intel/isl: Add an aux state for "partial clear"
[mesa.git] / src / mesa / drivers / dri / i965 / intel_mipmap_tree.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #include <GL/gl.h>
27 #include <GL/internal/dri_interface.h>
28
29 #include "intel_batchbuffer.h"
30 #include "intel_image.h"
31 #include "intel_mipmap_tree.h"
32 #include "intel_tex.h"
33 #include "intel_blit.h"
34 #include "intel_fbo.h"
35
36 #include "brw_blorp.h"
37 #include "brw_context.h"
38 #include "brw_state.h"
39
40 #include "main/enums.h"
41 #include "main/fbobject.h"
42 #include "main/formats.h"
43 #include "main/glformats.h"
44 #include "main/texcompress_etc.h"
45 #include "main/teximage.h"
46 #include "main/streaming-load-memcpy.h"
47 #include "x86/common_x86_asm.h"
48
49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
50
51 static void *intel_miptree_map_raw(struct brw_context *brw,
52 struct intel_mipmap_tree *mt,
53 GLbitfield mode);
54
55 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
56
57 static bool
58 intel_miptree_alloc_aux(struct brw_context *brw,
59 struct intel_mipmap_tree *mt);
60
61 static bool
62 is_mcs_supported(const struct brw_context *brw, mesa_format format,
63 uint32_t layout_flags)
64 {
65 /* Prior to Gen7, all MSAA surfaces used IMS layout. */
66 if (brw->gen < 7)
67 return false;
68
69 /* In Gen7, IMS layout is only used for depth and stencil buffers. */
70 switch (_mesa_get_format_base_format(format)) {
71 case GL_DEPTH_COMPONENT:
72 case GL_STENCIL_INDEX:
73 case GL_DEPTH_STENCIL:
74 return false;
75 default:
76 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
77 *
78 * This field must be set to 0 for all SINT MSRTs when all RT channels
79 * are not written
80 *
81 * In practice this means that we have to disable MCS for all signed
82 * integer MSAA buffers. The alternative, to disable MCS only when one
83 * of the render target channels is disabled, is impractical because it
84 * would require converting between CMS and UMS MSAA layouts on the fly,
85 * which is expensive.
86 */
87 if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
88 return false;
89 } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
90 /* We can't use the CMS layout because it uses an aux buffer, the MCS
91 * buffer. So fallback to UMS, which is identical to CMS without the
92 * MCS. */
93 return false;
94 } else {
95 return true;
96 }
97 }
98 }
99
100 static bool
101 intel_tiling_supports_ccs(const struct brw_context *brw,
102 enum isl_tiling tiling)
103 {
104 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
105 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
106 *
107 * - Support is limited to tiled render targets.
108 *
109 * Gen9 changes the restriction to Y-tile only.
110 */
111 if (brw->gen >= 9)
112 return tiling == ISL_TILING_Y0;
113 else if (brw->gen >= 7)
114 return tiling != ISL_TILING_LINEAR;
115 else
116 return false;
117 }
118
119 /**
120 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
121 * can be used. This doesn't (and should not) inspect any of the properties of
122 * the miptree's BO.
123 *
124 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
125 * beneath the "Fast Color Clear" bullet (p326):
126 *
127 * - Support is for non-mip-mapped and non-array surface types only.
128 *
129 * And then later, on p327:
130 *
131 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
132 * 64bpp, and 128bpp.
133 *
134 * From the Skylake documentation, it is made clear that X-tiling is no longer
135 * supported:
136 *
137 * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
138 * non-MSRTs only.
139 */
140 static bool
141 intel_miptree_supports_ccs(struct brw_context *brw,
142 const struct intel_mipmap_tree *mt)
143 {
144 /* MCS support does not exist prior to Gen7 */
145 if (brw->gen < 7)
146 return false;
147
148 /* This function applies only to non-multisampled render targets. */
149 if (mt->surf.samples > 1)
150 return false;
151
152 /* MCS is only supported for color buffers */
153 switch (_mesa_get_format_base_format(mt->format)) {
154 case GL_DEPTH_COMPONENT:
155 case GL_DEPTH_STENCIL:
156 case GL_STENCIL_INDEX:
157 return false;
158 }
159
160 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
161 return false;
162
163 const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0;
164 const bool arrayed = mt->surf.logical_level0_px.array_len > 1 ||
165 mt->surf.logical_level0_px.depth > 1;
166
167 if (arrayed) {
168 /* Multisample surfaces with the CMS layout are not layered surfaces,
169 * yet still have physical_depth0 > 1. Assert that we don't
170 * accidentally reject a multisampled surface here. We should have
171 * rejected it earlier by explicitly checking the sample count.
172 */
173 assert(mt->surf.samples == 1);
174 }
175
176 /* Handle the hardware restrictions...
177 *
178 * All GENs have the following restriction: "MCS buffer for non-MSRT is
179 * supported only for RT formats 32bpp, 64bpp, and 128bpp."
180 *
181 * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of
182 * Non-MultiSampler Render Target Restrictions) Support is for
183 * non-mip-mapped and non-array surface types only.
184 *
185 * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of
186 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
187 * surfaces are supported with MCS buffer layout with these alignments in
188 * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128.
189 *
190 * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of
191 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
192 * surfaces are supported with MCS buffer layout with these alignments in
193 * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64.
194 */
195 if (brw->gen < 8 && (mip_mapped || arrayed))
196 return false;
197
198 /* There's no point in using an MCS buffer if the surface isn't in a
199 * renderable format.
200 */
201 if (!brw->mesa_format_supports_render[mt->format])
202 return false;
203
204 if (brw->gen >= 9) {
205 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
206 const enum isl_format isl_format =
207 brw_isl_format_for_mesa_format(linear_format);
208 return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
209 } else
210 return true;
211 }
212
213 static bool
214 intel_tiling_supports_hiz(const struct brw_context *brw,
215 enum isl_tiling tiling)
216 {
217 if (brw->gen < 6)
218 return false;
219
220 return tiling == ISL_TILING_Y0;
221 }
222
223 static bool
224 intel_miptree_supports_hiz(const struct brw_context *brw,
225 const struct intel_mipmap_tree *mt)
226 {
227 if (!brw->has_hiz)
228 return false;
229
230 switch (mt->format) {
231 case MESA_FORMAT_Z_FLOAT32:
232 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
233 case MESA_FORMAT_Z24_UNORM_X8_UINT:
234 case MESA_FORMAT_Z24_UNORM_S8_UINT:
235 case MESA_FORMAT_Z_UNORM16:
236 return true;
237 default:
238 return false;
239 }
240 }
241
242 static bool
243 intel_miptree_supports_ccs_e(struct brw_context *brw,
244 const struct intel_mipmap_tree *mt)
245 {
246 if (brw->gen < 9)
247 return false;
248
249 /* For now compression is only enabled for integer formats even though
250 * there exist supported floating point formats also. This is a heuristic
251 * decision based on current public benchmarks. In none of the cases these
252 * formats provided any improvement but a few cases were seen to regress.
253 * Hence these are left to to be enabled in the future when they are known
254 * to improve things.
255 */
256 if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
257 return false;
258
259 if (!intel_miptree_supports_ccs(brw, mt))
260 return false;
261
262 /* Fast clear can be also used to clear srgb surfaces by using equivalent
263 * linear format. This trick, however, can't be extended to be used with
264 * lossless compression and therefore a check is needed to see if the format
265 * really is linear.
266 */
267 return _mesa_get_srgb_format_linear(mt->format) == mt->format;
268 }
269
270 /**
271 * Determine depth format corresponding to a depth+stencil format,
272 * for separate stencil.
273 */
274 mesa_format
275 intel_depth_format_for_depthstencil_format(mesa_format format) {
276 switch (format) {
277 case MESA_FORMAT_Z24_UNORM_S8_UINT:
278 return MESA_FORMAT_Z24_UNORM_X8_UINT;
279 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
280 return MESA_FORMAT_Z_FLOAT32;
281 default:
282 return format;
283 }
284 }
285
286 static bool
287 create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
288 unsigned depth0, struct intel_mipmap_level *table)
289 {
290 for (unsigned level = first_level; level <= last_level; level++) {
291 const unsigned d =
292 target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
293
294 table[level].slice = calloc(d, sizeof(*table[0].slice));
295 if (!table[level].slice)
296 goto unwind;
297 }
298
299 return true;
300
301 unwind:
302 for (unsigned level = first_level; level <= last_level; level++)
303 free(table[level].slice);
304
305 return false;
306 }
307
308 static bool
309 needs_separate_stencil(const struct brw_context *brw,
310 struct intel_mipmap_tree *mt,
311 mesa_format format, uint32_t layout_flags)
312 {
313
314 if (layout_flags & MIPTREE_LAYOUT_FOR_BO)
315 return false;
316
317 if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL)
318 return false;
319
320 if (brw->must_use_separate_stencil)
321 return true;
322
323 return brw->has_separate_stencil &&
324 intel_miptree_supports_hiz(brw, mt);
325 }
326
327 /**
328 * Choose the aux usage for this miptree. This function must be called fairly
329 * late in the miptree create process after we have a tiling.
330 */
331 static void
332 intel_miptree_choose_aux_usage(struct brw_context *brw,
333 struct intel_mipmap_tree *mt)
334 {
335 assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
336
337 const unsigned no_flags = 0;
338 if (mt->surf.samples > 1 && is_mcs_supported(brw, mt->format, no_flags)) {
339 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
340 mt->aux_usage = ISL_AUX_USAGE_MCS;
341 } else if (intel_tiling_supports_ccs(brw, mt->surf.tiling) &&
342 intel_miptree_supports_ccs(brw, mt)) {
343 if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
344 intel_miptree_supports_ccs_e(brw, mt)) {
345 mt->aux_usage = ISL_AUX_USAGE_CCS_E;
346 } else {
347 mt->aux_usage = ISL_AUX_USAGE_CCS_D;
348 }
349 } else if (intel_tiling_supports_hiz(brw, mt->surf.tiling) &&
350 intel_miptree_supports_hiz(brw, mt)) {
351 mt->aux_usage = ISL_AUX_USAGE_HIZ;
352 }
353
354 /* We can do fast-clear on all auxiliary surface types that are
355 * allocated through the normal texture creation paths.
356 */
357 if (mt->aux_usage != ISL_AUX_USAGE_NONE)
358 mt->supports_fast_clear = true;
359 }
360
361
362 /**
363 * Choose an appropriate uncompressed format for a requested
364 * compressed format, if unsupported.
365 */
366 mesa_format
367 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
368 {
369 /* No need to lower ETC formats on these platforms,
370 * they are supported natively.
371 */
372 if (brw->gen >= 8 || brw->is_baytrail)
373 return format;
374
375 switch (format) {
376 case MESA_FORMAT_ETC1_RGB8:
377 return MESA_FORMAT_R8G8B8X8_UNORM;
378 case MESA_FORMAT_ETC2_RGB8:
379 return MESA_FORMAT_R8G8B8X8_UNORM;
380 case MESA_FORMAT_ETC2_SRGB8:
381 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
382 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
383 return MESA_FORMAT_B8G8R8A8_SRGB;
384 case MESA_FORMAT_ETC2_RGBA8_EAC:
385 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
386 return MESA_FORMAT_R8G8B8A8_UNORM;
387 case MESA_FORMAT_ETC2_R11_EAC:
388 return MESA_FORMAT_R_UNORM16;
389 case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
390 return MESA_FORMAT_R_SNORM16;
391 case MESA_FORMAT_ETC2_RG11_EAC:
392 return MESA_FORMAT_R16G16_UNORM;
393 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
394 return MESA_FORMAT_R16G16_SNORM;
395 default:
396 /* Non ETC1 / ETC2 format */
397 return format;
398 }
399 }
400
401 static unsigned
402 get_num_logical_layers(const struct intel_mipmap_tree *mt, unsigned level)
403 {
404 if (mt->surf.dim == ISL_SURF_DIM_3D)
405 return minify(mt->surf.logical_level0_px.depth, level);
406 else
407 return mt->surf.logical_level0_px.array_len;
408 }
409
410 static unsigned
411 get_num_phys_layers(const struct isl_surf *surf, unsigned level)
412 {
413 /* In case of physical dimensions one needs to consider also the layout.
414 * See isl_calc_phys_level0_extent_sa().
415 */
416 if (surf->dim != ISL_SURF_DIM_3D)
417 return surf->phys_level0_sa.array_len;
418
419 if (surf->dim_layout == ISL_DIM_LAYOUT_GEN4_2D)
420 return minify(surf->phys_level0_sa.array_len, level);
421
422 return minify(surf->phys_level0_sa.depth, level);
423 }
424
425 /** \brief Assert that the level and layer are valid for the miptree. */
426 void
427 intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt,
428 uint32_t level,
429 uint32_t layer)
430 {
431 (void) mt;
432 (void) level;
433 (void) layer;
434
435 assert(level >= mt->first_level);
436 assert(level <= mt->last_level);
437 assert(layer < get_num_phys_layers(&mt->surf, level));
438 }
439
440 static enum isl_aux_state **
441 create_aux_state_map(struct intel_mipmap_tree *mt,
442 enum isl_aux_state initial)
443 {
444 const uint32_t levels = mt->last_level + 1;
445
446 uint32_t total_slices = 0;
447 for (uint32_t level = 0; level < levels; level++)
448 total_slices += get_num_logical_layers(mt, level);
449
450 const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
451
452 /* We're going to allocate a single chunk of data for both the per-level
453 * reference array and the arrays of aux_state. This makes cleanup
454 * significantly easier.
455 */
456 const size_t total_size = per_level_array_size +
457 total_slices * sizeof(enum isl_aux_state);
458 void *data = malloc(total_size);
459 if (data == NULL)
460 return NULL;
461
462 enum isl_aux_state **per_level_arr = data;
463 enum isl_aux_state *s = data + per_level_array_size;
464 for (uint32_t level = 0; level < levels; level++) {
465 per_level_arr[level] = s;
466 const unsigned level_layers = get_num_logical_layers(mt, level);
467 for (uint32_t a = 0; a < level_layers; a++)
468 *(s++) = initial;
469 }
470 assert((void *)s == data + total_size);
471
472 return per_level_arr;
473 }
474
475 static void
476 free_aux_state_map(enum isl_aux_state **state)
477 {
478 free(state);
479 }
480
481 static bool
482 need_to_retile_as_linear(struct brw_context *brw, unsigned row_pitch,
483 enum isl_tiling tiling, unsigned samples)
484 {
485 if (samples > 1)
486 return false;
487
488 if (tiling == ISL_TILING_LINEAR)
489 return false;
490
491 /* If the width is much smaller than a tile, don't bother tiling. */
492 if (row_pitch < 64)
493 return true;
494
495 if (ALIGN(row_pitch, 512) >= 32768) {
496 perf_debug("row pitch %u too large to blit, falling back to untiled",
497 row_pitch);
498 return true;
499 }
500
501 return false;
502 }
503
504 static bool
505 need_to_retile_as_x(const struct brw_context *brw, uint64_t size,
506 enum isl_tiling tiling)
507 {
508 /* If the BO is too large to fit in the aperture, we need to use the
509 * BLT engine to support it. Prior to Sandybridge, the BLT paths can't
510 * handle Y-tiling, so we need to fall back to X.
511 */
512 if (brw->gen < 6 && size >= brw->max_gtt_map_object_size &&
513 tiling == ISL_TILING_Y0)
514 return true;
515
516 return false;
517 }
518
519 static struct intel_mipmap_tree *
520 make_surface(struct brw_context *brw, GLenum target, mesa_format format,
521 unsigned first_level, unsigned last_level,
522 unsigned width0, unsigned height0, unsigned depth0,
523 unsigned num_samples, isl_tiling_flags_t tiling_flags,
524 isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
525 unsigned row_pitch, struct brw_bo *bo)
526 {
527 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
528 if (!mt)
529 return NULL;
530
531 if (!create_mapping_table(target, first_level, last_level, depth0,
532 mt->level)) {
533 free(mt);
534 return NULL;
535 }
536
537 mt->refcount = 1;
538
539 if (target == GL_TEXTURE_CUBE_MAP ||
540 target == GL_TEXTURE_CUBE_MAP_ARRAY)
541 isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
542
543 DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
544 __func__,
545 _mesa_enum_to_string(target),
546 _mesa_get_format_name(format),
547 num_samples, width0, height0, depth0,
548 first_level, last_level, mt);
549
550 struct isl_surf_init_info init_info = {
551 .dim = get_isl_surf_dim(target),
552 .format = translate_tex_format(brw, format, false),
553 .width = width0,
554 .height = height0,
555 .depth = target == GL_TEXTURE_3D ? depth0 : 1,
556 .levels = last_level - first_level + 1,
557 .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
558 .samples = num_samples,
559 .row_pitch = row_pitch,
560 .usage = isl_usage_flags,
561 .tiling_flags = tiling_flags,
562 };
563
564 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
565 goto fail;
566
567 /* In case caller doesn't specifically request Y-tiling (needed
568 * unconditionally for depth), check for corner cases needing special
569 * treatment.
570 */
571 if (tiling_flags & ~ISL_TILING_Y0_BIT) {
572 if (need_to_retile_as_linear(brw, mt->surf.row_pitch,
573 mt->surf.tiling, mt->surf.samples)) {
574 init_info.tiling_flags = 1u << ISL_TILING_LINEAR;
575 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
576 goto fail;
577 } else if (need_to_retile_as_x(brw, mt->surf.size, mt->surf.tiling)) {
578 init_info.tiling_flags = 1u << ISL_TILING_X;
579 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
580 goto fail;
581 }
582 }
583
584 /* In case of linear the buffer gets padded by fixed 64 bytes and therefore
585 * the size may not be multiple of row_pitch.
586 * See isl_apply_surface_padding().
587 */
588 if (mt->surf.tiling != ISL_TILING_LINEAR)
589 assert(mt->surf.size % mt->surf.row_pitch == 0);
590
591 if (!bo) {
592 mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
593 mt->surf.size,
594 isl_tiling_to_i915_tiling(
595 mt->surf.tiling),
596 mt->surf.row_pitch, alloc_flags);
597 if (!mt->bo)
598 goto fail;
599 } else {
600 mt->bo = bo;
601 }
602
603 mt->first_level = first_level;
604 mt->last_level = last_level;
605 mt->target = target;
606 mt->format = format;
607 mt->aux_state = NULL;
608 mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8;
609 mt->compressed = _mesa_is_format_compressed(format);
610
611 return mt;
612
613 fail:
614 intel_miptree_release(&mt);
615 return NULL;
616 }
617
618 static bool
619 make_separate_stencil_surface(struct brw_context *brw,
620 struct intel_mipmap_tree *mt)
621 {
622 mt->stencil_mt = make_surface(brw, mt->target, MESA_FORMAT_S_UINT8,
623 0, mt->surf.levels - 1,
624 mt->surf.logical_level0_px.width,
625 mt->surf.logical_level0_px.height,
626 mt->surf.dim == ISL_SURF_DIM_3D ?
627 mt->surf.logical_level0_px.depth :
628 mt->surf.logical_level0_px.array_len,
629 mt->surf.samples, ISL_TILING_W_BIT,
630 ISL_SURF_USAGE_STENCIL_BIT |
631 ISL_SURF_USAGE_TEXTURE_BIT,
632 BO_ALLOC_FOR_RENDER, 0, NULL);
633
634 if (!mt->stencil_mt)
635 return false;
636
637 mt->stencil_mt->r8stencil_needs_update = true;
638
639 return true;
640 }
641
642 static bool
643 force_linear_tiling(uint32_t layout_flags)
644 {
645 /* ANY includes NONE and Y bit. */
646 if (layout_flags & MIPTREE_LAYOUT_TILING_Y)
647 return false;
648
649 return layout_flags & MIPTREE_LAYOUT_TILING_NONE;
650 }
651
652 static struct intel_mipmap_tree *
653 miptree_create(struct brw_context *brw,
654 GLenum target,
655 mesa_format format,
656 GLuint first_level,
657 GLuint last_level,
658 GLuint width0,
659 GLuint height0,
660 GLuint depth0,
661 GLuint num_samples,
662 uint32_t layout_flags)
663 {
664 if (format == MESA_FORMAT_S_UINT8)
665 return make_surface(brw, target, format, first_level, last_level,
666 width0, height0, depth0, num_samples,
667 ISL_TILING_W_BIT,
668 ISL_SURF_USAGE_STENCIL_BIT |
669 ISL_SURF_USAGE_TEXTURE_BIT,
670 BO_ALLOC_FOR_RENDER,
671 0,
672 NULL);
673
674 const GLenum base_format = _mesa_get_format_base_format(format);
675 if ((base_format == GL_DEPTH_COMPONENT ||
676 base_format == GL_DEPTH_STENCIL) &&
677 !force_linear_tiling(layout_flags)) {
678 /* Fix up the Z miptree format for how we're splitting out separate
679 * stencil. Gen7 expects there to be no stencil bits in its depth buffer.
680 */
681 const mesa_format depth_only_format =
682 intel_depth_format_for_depthstencil_format(format);
683 struct intel_mipmap_tree *mt = make_surface(
684 brw, target, brw->gen >= 6 ? depth_only_format : format,
685 first_level, last_level,
686 width0, height0, depth0, num_samples, ISL_TILING_Y0_BIT,
687 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
688 BO_ALLOC_FOR_RENDER, 0, NULL);
689
690 if (needs_separate_stencil(brw, mt, format, layout_flags) &&
691 !make_separate_stencil_surface(brw, mt)) {
692 intel_miptree_release(&mt);
693 return NULL;
694 }
695
696 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
697 intel_miptree_choose_aux_usage(brw, mt);
698
699 return mt;
700 }
701
702 mesa_format tex_format = format;
703 mesa_format etc_format = MESA_FORMAT_NONE;
704 uint32_t alloc_flags = 0;
705
706 format = intel_lower_compressed_format(brw, format);
707
708 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
709
710 assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
711 if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
712 alloc_flags |= BO_ALLOC_FOR_RENDER;
713
714 isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ?
715 ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK;
716
717 /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
718 if (brw->gen < 6)
719 tiling_flags &= ~ISL_TILING_Y0_BIT;
720
721 struct intel_mipmap_tree *mt = make_surface(
722 brw, target, format,
723 first_level, last_level,
724 width0, height0, depth0,
725 num_samples, tiling_flags,
726 ISL_SURF_USAGE_RENDER_TARGET_BIT |
727 ISL_SURF_USAGE_TEXTURE_BIT,
728 alloc_flags, 0, NULL);
729 if (!mt)
730 return NULL;
731
732 mt->etc_format = etc_format;
733
734 if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
735 mt->bo->cache_coherent = false;
736
737 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
738 intel_miptree_choose_aux_usage(brw, mt);
739
740 return mt;
741 }
742
743 struct intel_mipmap_tree *
744 intel_miptree_create(struct brw_context *brw,
745 GLenum target,
746 mesa_format format,
747 GLuint first_level,
748 GLuint last_level,
749 GLuint width0,
750 GLuint height0,
751 GLuint depth0,
752 GLuint num_samples,
753 uint32_t layout_flags)
754 {
755 assert(num_samples > 0);
756
757 struct intel_mipmap_tree *mt = miptree_create(
758 brw, target, format,
759 first_level, last_level,
760 width0, height0, depth0, num_samples,
761 layout_flags);
762 if (!mt)
763 return NULL;
764
765 mt->offset = 0;
766
767 if (!intel_miptree_alloc_aux(brw, mt)) {
768 intel_miptree_release(&mt);
769 return NULL;
770 }
771
772 return mt;
773 }
774
775 struct intel_mipmap_tree *
776 intel_miptree_create_for_bo(struct brw_context *brw,
777 struct brw_bo *bo,
778 mesa_format format,
779 uint32_t offset,
780 uint32_t width,
781 uint32_t height,
782 uint32_t depth,
783 int pitch,
784 uint32_t layout_flags)
785 {
786 struct intel_mipmap_tree *mt;
787 uint32_t tiling, swizzle;
788 const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
789 const GLenum base_format = _mesa_get_format_base_format(format);
790
791 if ((base_format == GL_DEPTH_COMPONENT ||
792 base_format == GL_DEPTH_STENCIL)) {
793 const mesa_format depth_only_format =
794 intel_depth_format_for_depthstencil_format(format);
795 mt = make_surface(brw, target,
796 brw->gen >= 6 ? depth_only_format : format,
797 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT,
798 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
799 BO_ALLOC_FOR_RENDER, pitch, bo);
800
801 brw_bo_reference(bo);
802
803 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
804 intel_miptree_choose_aux_usage(brw, mt);
805
806 return mt;
807 } else if (format == MESA_FORMAT_S_UINT8) {
808 mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
809 0, 0, width, height, depth, 1,
810 ISL_TILING_W_BIT,
811 ISL_SURF_USAGE_STENCIL_BIT |
812 ISL_SURF_USAGE_TEXTURE_BIT,
813 BO_ALLOC_FOR_RENDER, pitch, bo);
814 if (!mt)
815 return NULL;
816
817 assert(bo->size >= mt->surf.size);
818
819 brw_bo_reference(bo);
820 return mt;
821 }
822
823 brw_bo_get_tiling(bo, &tiling, &swizzle);
824
825 /* Nothing will be able to use this miptree with the BO if the offset isn't
826 * aligned.
827 */
828 if (tiling != I915_TILING_NONE)
829 assert(offset % 4096 == 0);
830
831 /* miptrees can't handle negative pitch. If you need flipping of images,
832 * that's outside of the scope of the mt.
833 */
834 assert(pitch >= 0);
835
836 /* The BO already has a tiling format and we shouldn't confuse the lower
837 * layers by making it try to find a tiling format again.
838 */
839 assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
840 assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
841
842 mt = make_surface(brw, target, format,
843 0, 0, width, height, depth, 1,
844 1lu << isl_tiling_from_i915_tiling(tiling),
845 ISL_SURF_USAGE_RENDER_TARGET_BIT |
846 ISL_SURF_USAGE_TEXTURE_BIT,
847 0, pitch, bo);
848 if (!mt)
849 return NULL;
850
851 brw_bo_reference(bo);
852 mt->bo = bo;
853 mt->offset = offset;
854
855 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
856 intel_miptree_choose_aux_usage(brw, mt);
857
858 return mt;
859 }
860
861 static struct intel_mipmap_tree *
862 miptree_create_for_planar_image(struct brw_context *brw,
863 __DRIimage *image, GLenum target)
864 {
865 struct intel_image_format *f = image->planar_format;
866 struct intel_mipmap_tree *planar_mt = NULL;
867
868 for (int i = 0; i < f->nplanes; i++) {
869 const int index = f->planes[i].buffer_index;
870 const uint32_t dri_format = f->planes[i].dri_format;
871 const mesa_format format = driImageFormatToGLFormat(dri_format);
872 const uint32_t width = image->width >> f->planes[i].width_shift;
873 const uint32_t height = image->height >> f->planes[i].height_shift;
874
875 /* Disable creation of the texture's aux buffers because the driver
876 * exposes no EGL API to manage them. That is, there is no API for
877 * resolving the aux buffer's content to the main buffer nor for
878 * invalidating the aux buffer's content.
879 */
880 struct intel_mipmap_tree *mt =
881 intel_miptree_create_for_bo(brw, image->bo, format,
882 image->offsets[index],
883 width, height, 1,
884 image->strides[index],
885 MIPTREE_LAYOUT_DISABLE_AUX);
886 if (mt == NULL)
887 return NULL;
888
889 mt->target = target;
890
891 if (i == 0)
892 planar_mt = mt;
893 else
894 planar_mt->plane[i - 1] = mt;
895 }
896
897 return planar_mt;
898 }
899
900 struct intel_mipmap_tree *
901 intel_miptree_create_for_dri_image(struct brw_context *brw,
902 __DRIimage *image, GLenum target,
903 enum isl_colorspace colorspace,
904 bool is_winsys_image)
905 {
906 if (image->planar_format && image->planar_format->nplanes > 0) {
907 assert(colorspace == ISL_COLORSPACE_NONE ||
908 colorspace == ISL_COLORSPACE_YUV);
909 return miptree_create_for_planar_image(brw, image, target);
910 }
911
912 mesa_format format = image->format;
913 switch (colorspace) {
914 case ISL_COLORSPACE_NONE:
915 /* Keep the image format unmodified */
916 break;
917
918 case ISL_COLORSPACE_LINEAR:
919 format =_mesa_get_srgb_format_linear(format);
920 break;
921
922 case ISL_COLORSPACE_SRGB:
923 format =_mesa_get_linear_format_srgb(format);
924 break;
925
926 default:
927 unreachable("Inalid colorspace for non-planar image");
928 }
929
930 if (!brw->ctx.TextureFormatSupported[format]) {
931 /* The texture storage paths in core Mesa detect if the driver does not
932 * support the user-requested format, and then searches for a
933 * fallback format. The DRIimage code bypasses core Mesa, though. So we
934 * do the fallbacks here for important formats.
935 *
936 * We must support DRM_FOURCC_XBGR8888 textures because the Android
937 * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
938 * the Chrome OS compositor consumes as dma_buf EGLImages.
939 */
940 format = _mesa_format_fallback_rgbx_to_rgba(format);
941 }
942
943 if (!brw->ctx.TextureFormatSupported[format])
944 return NULL;
945
946 /* If this image comes in from a window system, we have different
947 * requirements than if it comes in via an EGL import operation. Window
948 * system images can use any form of auxiliary compression we wish because
949 * they get "flushed" before being handed off to the window system and we
950 * have the opportunity to do resolves. Window system buffers also may be
951 * used for scanout so we need to flag that appropriately.
952 */
953 const uint32_t mt_layout_flags =
954 is_winsys_image ? MIPTREE_LAYOUT_FOR_SCANOUT : MIPTREE_LAYOUT_DISABLE_AUX;
955
956 /* Disable creation of the texture's aux buffers because the driver exposes
957 * no EGL API to manage them. That is, there is no API for resolving the aux
958 * buffer's content to the main buffer nor for invalidating the aux buffer's
959 * content.
960 */
961 struct intel_mipmap_tree *mt =
962 intel_miptree_create_for_bo(brw, image->bo, format,
963 image->offset, image->width, image->height, 1,
964 image->pitch, mt_layout_flags);
965 if (mt == NULL)
966 return NULL;
967
968 mt->target = target;
969 mt->level[0].level_x = image->tile_x;
970 mt->level[0].level_y = image->tile_y;
971
972 /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
973 * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
974 * trouble resolving back to destination image due to alignment issues.
975 */
976 if (!brw->has_surface_tile_offset) {
977 uint32_t draw_x, draw_y;
978 intel_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
979
980 if (draw_x != 0 || draw_y != 0) {
981 _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
982 intel_miptree_release(&mt);
983 return NULL;
984 }
985 }
986
987 if (!intel_miptree_alloc_aux(brw, mt)) {
988 intel_miptree_release(&mt);
989 return NULL;
990 }
991
992 return mt;
993 }
994
995 /**
996 * For a singlesample renderbuffer, this simply wraps the given BO with a
997 * miptree.
998 *
999 * For a multisample renderbuffer, this wraps the window system's
1000 * (singlesample) BO with a singlesample miptree attached to the
1001 * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
1002 * that will contain the actual rendering (which is lazily resolved to
1003 * irb->singlesample_mt).
1004 */
1005 bool
1006 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
1007 struct intel_renderbuffer *irb,
1008 struct intel_mipmap_tree *singlesample_mt,
1009 uint32_t width, uint32_t height,
1010 uint32_t pitch)
1011 {
1012 struct intel_mipmap_tree *multisample_mt = NULL;
1013 struct gl_renderbuffer *rb = &irb->Base.Base;
1014 mesa_format format = rb->Format;
1015 const unsigned num_samples = MAX2(rb->NumSamples, 1);
1016
1017 /* Only the front and back buffers, which are color buffers, are allocated
1018 * through the image loader.
1019 */
1020 assert(_mesa_get_format_base_format(format) == GL_RGB ||
1021 _mesa_get_format_base_format(format) == GL_RGBA);
1022
1023 assert(singlesample_mt);
1024
1025 if (num_samples == 1) {
1026 intel_miptree_release(&irb->mt);
1027 irb->mt = singlesample_mt;
1028
1029 assert(!irb->singlesample_mt);
1030 } else {
1031 intel_miptree_release(&irb->singlesample_mt);
1032 irb->singlesample_mt = singlesample_mt;
1033
1034 if (!irb->mt ||
1035 irb->mt->surf.logical_level0_px.width != width ||
1036 irb->mt->surf.logical_level0_px.height != height) {
1037 multisample_mt = intel_miptree_create_for_renderbuffer(intel,
1038 format,
1039 width,
1040 height,
1041 num_samples);
1042 if (!multisample_mt)
1043 goto fail;
1044
1045 irb->need_downsample = false;
1046 intel_miptree_release(&irb->mt);
1047 irb->mt = multisample_mt;
1048 }
1049 }
1050 return true;
1051
1052 fail:
1053 intel_miptree_release(&irb->mt);
1054 return false;
1055 }
1056
1057 struct intel_mipmap_tree*
1058 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
1059 mesa_format format,
1060 uint32_t width,
1061 uint32_t height,
1062 uint32_t num_samples)
1063 {
1064 struct intel_mipmap_tree *mt;
1065 uint32_t depth = 1;
1066 GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
1067 const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1068 MIPTREE_LAYOUT_TILING_ANY;
1069
1070 mt = intel_miptree_create(brw, target, format, 0, 0,
1071 width, height, depth, num_samples,
1072 layout_flags);
1073 if (!mt)
1074 goto fail;
1075
1076 return mt;
1077
1078 fail:
1079 intel_miptree_release(&mt);
1080 return NULL;
1081 }
1082
1083 void
1084 intel_miptree_reference(struct intel_mipmap_tree **dst,
1085 struct intel_mipmap_tree *src)
1086 {
1087 if (*dst == src)
1088 return;
1089
1090 intel_miptree_release(dst);
1091
1092 if (src) {
1093 src->refcount++;
1094 DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
1095 }
1096
1097 *dst = src;
1098 }
1099
1100 static void
1101 intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf)
1102 {
1103 if (aux_buf == NULL)
1104 return;
1105
1106 brw_bo_unreference(aux_buf->bo);
1107
1108 free(aux_buf);
1109 }
1110
1111 void
1112 intel_miptree_release(struct intel_mipmap_tree **mt)
1113 {
1114 if (!*mt)
1115 return;
1116
1117 DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
1118 if (--(*mt)->refcount <= 0) {
1119 GLuint i;
1120
1121 DBG("%s deleting %p\n", __func__, *mt);
1122
1123 brw_bo_unreference((*mt)->bo);
1124 intel_miptree_release(&(*mt)->stencil_mt);
1125 intel_miptree_release(&(*mt)->r8stencil_mt);
1126 intel_miptree_aux_buffer_free((*mt)->hiz_buf);
1127 intel_miptree_aux_buffer_free((*mt)->mcs_buf);
1128 free_aux_state_map((*mt)->aux_state);
1129
1130 intel_miptree_release(&(*mt)->plane[0]);
1131 intel_miptree_release(&(*mt)->plane[1]);
1132
1133 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
1134 free((*mt)->level[i].slice);
1135 }
1136
1137 free(*mt);
1138 }
1139 *mt = NULL;
1140 }
1141
1142
1143 void
1144 intel_get_image_dims(struct gl_texture_image *image,
1145 int *width, int *height, int *depth)
1146 {
1147 switch (image->TexObject->Target) {
1148 case GL_TEXTURE_1D_ARRAY:
1149 /* For a 1D Array texture the OpenGL API will treat the image height as
1150 * the number of array slices. For Intel hardware, we treat the 1D array
1151 * as a 2D Array with a height of 1. So, here we want to swap image
1152 * height and depth.
1153 */
1154 assert(image->Depth == 1);
1155 *width = image->Width;
1156 *height = 1;
1157 *depth = image->Height;
1158 break;
1159 case GL_TEXTURE_CUBE_MAP:
1160 /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
1161 * though we really have 6 slices.
1162 */
1163 assert(image->Depth == 1);
1164 *width = image->Width;
1165 *height = image->Height;
1166 *depth = 6;
1167 break;
1168 default:
1169 *width = image->Width;
1170 *height = image->Height;
1171 *depth = image->Depth;
1172 break;
1173 }
1174 }
1175
1176 /**
1177 * Can the image be pulled into a unified mipmap tree? This mirrors
1178 * the completeness test in a lot of ways.
1179 *
1180 * Not sure whether I want to pass gl_texture_image here.
1181 */
1182 bool
1183 intel_miptree_match_image(struct intel_mipmap_tree *mt,
1184 struct gl_texture_image *image)
1185 {
1186 struct intel_texture_image *intelImage = intel_texture_image(image);
1187 GLuint level = intelImage->base.Base.Level;
1188 int width, height, depth;
1189
1190 /* glTexImage* choose the texture object based on the target passed in, and
1191 * objects can't change targets over their lifetimes, so this should be
1192 * true.
1193 */
1194 assert(image->TexObject->Target == mt->target);
1195
1196 mesa_format mt_format = mt->format;
1197 if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
1198 mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
1199 if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
1200 mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
1201 if (mt->etc_format != MESA_FORMAT_NONE)
1202 mt_format = mt->etc_format;
1203
1204 if (image->TexFormat != mt_format)
1205 return false;
1206
1207 intel_get_image_dims(image, &width, &height, &depth);
1208
1209 if (mt->target == GL_TEXTURE_CUBE_MAP)
1210 depth = 6;
1211
1212 if (level >= mt->surf.levels)
1213 return false;
1214
1215 const unsigned level_depth =
1216 mt->surf.dim == ISL_SURF_DIM_3D ?
1217 minify(mt->surf.logical_level0_px.depth, level) :
1218 mt->surf.logical_level0_px.array_len;
1219
1220 return width == minify(mt->surf.logical_level0_px.width, level) &&
1221 height == minify(mt->surf.logical_level0_px.height, level) &&
1222 depth == level_depth &&
1223 MAX2(image->NumSamples, 1) == mt->surf.samples;
1224 }
1225
1226 void
1227 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1228 GLuint level, GLuint slice,
1229 GLuint *x, GLuint *y)
1230 {
1231 if (level == 0 && slice == 0) {
1232 *x = mt->level[0].level_x;
1233 *y = mt->level[0].level_y;
1234 return;
1235 }
1236
1237 uint32_t x_offset_sa, y_offset_sa;
1238
1239 /* Miptree itself can have an offset only if it represents a single
1240 * slice in an imported buffer object.
1241 * See intel_miptree_create_for_dri_image().
1242 */
1243 assert(mt->level[0].level_x == 0);
1244 assert(mt->level[0].level_y == 0);
1245
1246 /* Given level is relative to level zero while the miptree may be
1247 * represent just a subset of all levels starting from 'first_level'.
1248 */
1249 assert(level >= mt->first_level);
1250 level -= mt->first_level;
1251
1252 const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
1253 slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
1254 isl_surf_get_image_offset_el(&mt->surf, level, slice, z,
1255 &x_offset_sa, &y_offset_sa);
1256
1257 *x = x_offset_sa;
1258 *y = y_offset_sa;
1259 }
1260
1261
1262 /**
1263 * This function computes the tile_w (in bytes) and tile_h (in rows) of
1264 * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1265 * and tile_h is set to 1.
1266 */
1267 void
1268 intel_get_tile_dims(enum isl_tiling tiling, uint32_t cpp,
1269 uint32_t *tile_w, uint32_t *tile_h)
1270 {
1271 switch (tiling) {
1272 case ISL_TILING_X:
1273 *tile_w = 512;
1274 *tile_h = 8;
1275 break;
1276 case ISL_TILING_Y0:
1277 *tile_w = 128;
1278 *tile_h = 32;
1279 break;
1280 case ISL_TILING_LINEAR:
1281 *tile_w = cpp;
1282 *tile_h = 1;
1283 break;
1284 default:
1285 unreachable("not reached");
1286 }
1287 }
1288
1289
1290 /**
1291 * This function computes masks that may be used to select the bits of the X
1292 * and Y coordinates that indicate the offset within a tile. If the BO is
1293 * untiled, the masks are set to 0.
1294 */
1295 void
1296 intel_get_tile_masks(enum isl_tiling tiling, uint32_t cpp,
1297 uint32_t *mask_x, uint32_t *mask_y)
1298 {
1299 uint32_t tile_w_bytes, tile_h;
1300
1301 intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h);
1302
1303 *mask_x = tile_w_bytes / cpp - 1;
1304 *mask_y = tile_h - 1;
1305 }
1306
1307 /**
1308 * Compute the offset (in bytes) from the start of the BO to the given x
1309 * and y coordinate. For tiled BOs, caller must ensure that x and y are
1310 * multiples of the tile size.
1311 */
1312 uint32_t
1313 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1314 uint32_t x, uint32_t y)
1315 {
1316 int cpp = mt->cpp;
1317 uint32_t pitch = mt->surf.row_pitch;
1318
1319 switch (mt->surf.tiling) {
1320 default:
1321 unreachable("not reached");
1322 case ISL_TILING_LINEAR:
1323 return y * pitch + x * cpp;
1324 case ISL_TILING_X:
1325 assert((x % (512 / cpp)) == 0);
1326 assert((y % 8) == 0);
1327 return y * pitch + x / (512 / cpp) * 4096;
1328 case ISL_TILING_Y0:
1329 assert((x % (128 / cpp)) == 0);
1330 assert((y % 32) == 0);
1331 return y * pitch + x / (128 / cpp) * 4096;
1332 }
1333 }
1334
1335 /**
1336 * Rendering with tiled buffers requires that the base address of the buffer
1337 * be aligned to a page boundary. For renderbuffers, and sometimes with
1338 * textures, we may want the surface to point at a texture image level that
1339 * isn't at a page boundary.
1340 *
1341 * This function returns an appropriately-aligned base offset
1342 * according to the tiling restrictions, plus any required x/y offset
1343 * from there.
1344 */
1345 uint32_t
1346 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1347 GLuint level, GLuint slice,
1348 uint32_t *tile_x,
1349 uint32_t *tile_y)
1350 {
1351 uint32_t x, y;
1352 uint32_t mask_x, mask_y;
1353
1354 intel_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y);
1355 intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1356
1357 *tile_x = x & mask_x;
1358 *tile_y = y & mask_y;
1359
1360 return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
1361 }
1362
1363 static void
1364 intel_miptree_copy_slice_sw(struct brw_context *brw,
1365 struct intel_mipmap_tree *src_mt,
1366 unsigned src_level, unsigned src_layer,
1367 struct intel_mipmap_tree *dst_mt,
1368 unsigned dst_level, unsigned dst_layer,
1369 unsigned width, unsigned height)
1370 {
1371 void *src, *dst;
1372 ptrdiff_t src_stride, dst_stride;
1373 const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8);
1374
1375 intel_miptree_map(brw, src_mt,
1376 src_level, src_layer,
1377 0, 0,
1378 width, height,
1379 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1380 &src, &src_stride);
1381
1382 intel_miptree_map(brw, dst_mt,
1383 dst_level, dst_layer,
1384 0, 0,
1385 width, height,
1386 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1387 BRW_MAP_DIRECT_BIT,
1388 &dst, &dst_stride);
1389
1390 DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1391 _mesa_get_format_name(src_mt->format),
1392 src_mt, src, src_stride,
1393 _mesa_get_format_name(dst_mt->format),
1394 dst_mt, dst, dst_stride,
1395 width, height);
1396
1397 int row_size = cpp * width;
1398 if (src_stride == row_size &&
1399 dst_stride == row_size) {
1400 memcpy(dst, src, row_size * height);
1401 } else {
1402 for (int i = 0; i < height; i++) {
1403 memcpy(dst, src, row_size);
1404 dst += dst_stride;
1405 src += src_stride;
1406 }
1407 }
1408
1409 intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
1410 intel_miptree_unmap(brw, src_mt, src_level, src_layer);
1411
1412 /* Don't forget to copy the stencil data over, too. We could have skipped
1413 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1414 * shuffling the two data sources in/out of temporary storage instead of
1415 * the direct mapping we get this way.
1416 */
1417 if (dst_mt->stencil_mt) {
1418 assert(src_mt->stencil_mt);
1419 intel_miptree_copy_slice_sw(brw,
1420 src_mt->stencil_mt, src_level, src_layer,
1421 dst_mt->stencil_mt, dst_level, dst_layer,
1422 width, height);
1423 }
1424 }
1425
1426 void
1427 intel_miptree_copy_slice(struct brw_context *brw,
1428 struct intel_mipmap_tree *src_mt,
1429 unsigned src_level, unsigned src_layer,
1430 struct intel_mipmap_tree *dst_mt,
1431 unsigned dst_level, unsigned dst_layer)
1432
1433 {
1434 mesa_format format = src_mt->format;
1435 unsigned width = minify(src_mt->surf.phys_level0_sa.width,
1436 src_level - src_mt->first_level);
1437 unsigned height = minify(src_mt->surf.phys_level0_sa.height,
1438 src_level - src_mt->first_level);
1439
1440 assert(src_layer < get_num_phys_layers(&src_mt->surf,
1441 src_level - src_mt->first_level));
1442
1443 assert(src_mt->format == dst_mt->format);
1444
1445 if (dst_mt->compressed) {
1446 unsigned int i, j;
1447 _mesa_get_format_block_size(dst_mt->format, &i, &j);
1448 height = ALIGN_NPOT(height, j) / j;
1449 width = ALIGN_NPOT(width, i) / i;
1450 }
1451
1452 /* If it's a packed depth/stencil buffer with separate stencil, the blit
1453 * below won't apply since we can't do the depth's Y tiling or the
1454 * stencil's W tiling in the blitter.
1455 */
1456 if (src_mt->stencil_mt) {
1457 intel_miptree_copy_slice_sw(brw,
1458 src_mt, src_level, src_layer,
1459 dst_mt, dst_level, dst_layer,
1460 width, height);
1461 return;
1462 }
1463
1464 uint32_t dst_x, dst_y, src_x, src_y;
1465 intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
1466 &dst_x, &dst_y);
1467 intel_miptree_get_image_offset(src_mt, src_level, src_layer,
1468 &src_x, &src_y);
1469
1470 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1471 _mesa_get_format_name(src_mt->format),
1472 src_mt, src_x, src_y, src_mt->surf.row_pitch,
1473 _mesa_get_format_name(dst_mt->format),
1474 dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch,
1475 width, height);
1476
1477 if (!intel_miptree_blit(brw,
1478 src_mt, src_level, src_layer, 0, 0, false,
1479 dst_mt, dst_level, dst_layer, 0, 0, false,
1480 width, height, GL_COPY)) {
1481 perf_debug("miptree validate blit for %s failed\n",
1482 _mesa_get_format_name(format));
1483
1484 intel_miptree_copy_slice_sw(brw,
1485 src_mt, src_level, src_layer,
1486 dst_mt, dst_level, dst_layer,
1487 width, height);
1488 }
1489 }
1490
1491 /**
1492 * Copies the image's current data to the given miptree, and associates that
1493 * miptree with the image.
1494 *
1495 * If \c invalidate is true, then the actual image data does not need to be
1496 * copied, but the image still needs to be associated to the new miptree (this
1497 * is set to true if we're about to clear the image).
1498 */
1499 void
1500 intel_miptree_copy_teximage(struct brw_context *brw,
1501 struct intel_texture_image *intelImage,
1502 struct intel_mipmap_tree *dst_mt,
1503 bool invalidate)
1504 {
1505 struct intel_mipmap_tree *src_mt = intelImage->mt;
1506 struct intel_texture_object *intel_obj =
1507 intel_texture_object(intelImage->base.Base.TexObject);
1508 int level = intelImage->base.Base.Level;
1509 const unsigned face = intelImage->base.Base.Face;
1510 unsigned start_layer, end_layer;
1511
1512 if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
1513 assert(face == 0);
1514 assert(intelImage->base.Base.Height);
1515 start_layer = 0;
1516 end_layer = intelImage->base.Base.Height - 1;
1517 } else if (face > 0) {
1518 start_layer = face;
1519 end_layer = face;
1520 } else {
1521 assert(intelImage->base.Base.Depth);
1522 start_layer = 0;
1523 end_layer = intelImage->base.Base.Depth - 1;
1524 }
1525
1526 if (!invalidate) {
1527 for (unsigned i = start_layer; i <= end_layer; i++) {
1528 intel_miptree_copy_slice(brw,
1529 src_mt, level, i,
1530 dst_mt, level, i);
1531 }
1532 }
1533
1534 intel_miptree_reference(&intelImage->mt, dst_mt);
1535 intel_obj->needs_validate = true;
1536 }
1537
1538 static void
1539 intel_miptree_init_mcs(struct brw_context *brw,
1540 struct intel_mipmap_tree *mt,
1541 int init_value)
1542 {
1543 assert(mt->mcs_buf != NULL);
1544
1545 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1546 *
1547 * When MCS buffer is enabled and bound to MSRT, it is required that it
1548 * is cleared prior to any rendering.
1549 *
1550 * Since we don't use the MCS buffer for any purpose other than rendering,
1551 * it makes sense to just clear it immediately upon allocation.
1552 *
1553 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1554 */
1555 void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE);
1556 if (unlikely(map == NULL)) {
1557 fprintf(stderr, "Failed to map mcs buffer into GTT\n");
1558 brw_bo_unreference(mt->mcs_buf->bo);
1559 free(mt->mcs_buf);
1560 return;
1561 }
1562 void *data = map;
1563 memset(data, init_value, mt->mcs_buf->size);
1564 brw_bo_unmap(mt->mcs_buf->bo);
1565 }
1566
1567 static struct intel_miptree_aux_buffer *
1568 intel_alloc_aux_buffer(struct brw_context *brw,
1569 const char *name,
1570 const struct isl_surf *aux_surf,
1571 uint32_t alloc_flags,
1572 struct intel_mipmap_tree *mt)
1573 {
1574 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1575 if (!buf)
1576 return false;
1577
1578 buf->size = aux_surf->size;
1579 buf->pitch = aux_surf->row_pitch;
1580 buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
1581
1582 /* ISL has stricter set of alignment rules then the drm allocator.
1583 * Therefore one can pass the ISL dimensions in terms of bytes instead of
1584 * trying to recalculate based on different format block sizes.
1585 */
1586 buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
1587 I915_TILING_Y, buf->pitch, alloc_flags);
1588 if (!buf->bo) {
1589 free(buf);
1590 return NULL;
1591 }
1592
1593 buf->surf = *aux_surf;
1594
1595 return buf;
1596 }
1597
1598 static bool
1599 intel_miptree_alloc_mcs(struct brw_context *brw,
1600 struct intel_mipmap_tree *mt,
1601 GLuint num_samples)
1602 {
1603 assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1604 assert(mt->mcs_buf == NULL);
1605 assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
1606
1607 /* Multisampled miptrees are only supported for single level. */
1608 assert(mt->first_level == 0);
1609 enum isl_aux_state **aux_state =
1610 create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
1611 if (!aux_state)
1612 return false;
1613
1614 struct isl_surf temp_mcs_surf;
1615
1616 MAYBE_UNUSED bool ok =
1617 isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &temp_mcs_surf);
1618 assert(ok);
1619
1620 /* Buffer needs to be initialised requiring the buffer to be immediately
1621 * mapped to cpu space for writing. Therefore do not use the gpu access
1622 * flag which can cause an unnecessary delay if the backing pages happened
1623 * to be just used by the GPU.
1624 */
1625 const uint32_t alloc_flags = 0;
1626 mt->mcs_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
1627 &temp_mcs_surf, alloc_flags, mt);
1628 if (!mt->mcs_buf) {
1629 free(aux_state);
1630 return false;
1631 }
1632
1633 mt->aux_state = aux_state;
1634
1635 intel_miptree_init_mcs(brw, mt, 0xFF);
1636
1637 return true;
1638 }
1639
1640 bool
1641 intel_miptree_alloc_ccs(struct brw_context *brw,
1642 struct intel_mipmap_tree *mt)
1643 {
1644 assert(mt->mcs_buf == NULL);
1645 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
1646 mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1647
1648 struct isl_surf temp_ccs_surf;
1649
1650 if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, &temp_ccs_surf, 0))
1651 return false;
1652
1653 assert(temp_ccs_surf.size &&
1654 (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
1655
1656 enum isl_aux_state **aux_state =
1657 create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
1658 if (!aux_state)
1659 return false;
1660
1661 /* When CCS_E is used, we need to ensure that the CCS starts off in a valid
1662 * state. From the Sky Lake PRM, "MCS Buffer for Render Target(s)":
1663 *
1664 * "If Software wants to enable Color Compression without Fast clear,
1665 * Software needs to initialize MCS with zeros."
1666 *
1667 * A CCS value of 0 indicates that the corresponding block is in the
1668 * pass-through state which is what we want.
1669 *
1670 * For CCS_D, on the other hand, we don't care as we're about to perform a
1671 * fast-clear operation. In that case, being hot in caches more useful.
1672 */
1673 const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
1674 BO_ALLOC_ZEROED : BO_ALLOC_FOR_RENDER;
1675 mt->mcs_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
1676 &temp_ccs_surf, alloc_flags, mt);
1677 if (!mt->mcs_buf) {
1678 free(aux_state);
1679 return false;
1680 }
1681
1682 mt->aux_state = aux_state;
1683
1684 return true;
1685 }
1686
1687 /**
1688 * Helper for intel_miptree_alloc_hiz() that sets
1689 * \c mt->level[level].has_hiz. Return true if and only if
1690 * \c has_hiz was set.
1691 */
1692 static bool
1693 intel_miptree_level_enable_hiz(struct brw_context *brw,
1694 struct intel_mipmap_tree *mt,
1695 uint32_t level)
1696 {
1697 assert(mt->hiz_buf);
1698 assert(mt->surf.size > 0);
1699
1700 if (brw->gen >= 8 || brw->is_haswell) {
1701 uint32_t width = minify(mt->surf.phys_level0_sa.width, level);
1702 uint32_t height = minify(mt->surf.phys_level0_sa.height, level);
1703
1704 /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1705 * and the height is 4 aligned. This allows our HiZ support
1706 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1707 * we can grow the width & height to allow the HiZ op to
1708 * force the proper size alignments.
1709 */
1710 if (level > 0 && ((width & 7) || (height & 3))) {
1711 DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1712 return false;
1713 }
1714 }
1715
1716 DBG("mt %p level %d: HiZ enabled\n", mt, level);
1717 mt->level[level].has_hiz = true;
1718 return true;
1719 }
1720
1721 bool
1722 intel_miptree_alloc_hiz(struct brw_context *brw,
1723 struct intel_mipmap_tree *mt)
1724 {
1725 assert(mt->hiz_buf == NULL);
1726 assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
1727
1728 enum isl_aux_state **aux_state =
1729 create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
1730 if (!aux_state)
1731 return false;
1732
1733 struct isl_surf temp_hiz_surf;
1734
1735 MAYBE_UNUSED bool ok =
1736 isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
1737 assert(ok);
1738
1739 const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
1740 mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
1741 &temp_hiz_surf, alloc_flags, mt);
1742
1743 if (!mt->hiz_buf) {
1744 free(aux_state);
1745 return false;
1746 }
1747
1748 for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
1749 intel_miptree_level_enable_hiz(brw, mt, level);
1750
1751 mt->aux_state = aux_state;
1752
1753 return true;
1754 }
1755
1756
1757 /**
1758 * Allocate the initial aux surface for a miptree based on mt->aux_usage
1759 *
1760 * Since MCS, HiZ, and CCS_E can compress more than just clear color, we
1761 * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only
1762 * compress clear color so we wait until an actual fast-clear to allocate it.
1763 */
1764 static bool
1765 intel_miptree_alloc_aux(struct brw_context *brw,
1766 struct intel_mipmap_tree *mt)
1767 {
1768 switch (mt->aux_usage) {
1769 case ISL_AUX_USAGE_NONE:
1770 return true;
1771
1772 case ISL_AUX_USAGE_HIZ:
1773 assert(!_mesa_is_format_color_format(mt->format));
1774 if (!intel_miptree_alloc_hiz(brw, mt))
1775 return false;
1776 return true;
1777
1778 case ISL_AUX_USAGE_MCS:
1779 assert(_mesa_is_format_color_format(mt->format));
1780 assert(mt->surf.samples > 1);
1781 if (!intel_miptree_alloc_mcs(brw, mt, mt->surf.samples))
1782 return false;
1783 return true;
1784
1785 case ISL_AUX_USAGE_CCS_D:
1786 /* Since CCS_D can only compress clear color so we wait until an actual
1787 * fast-clear to allocate it.
1788 */
1789 return true;
1790
1791 case ISL_AUX_USAGE_CCS_E:
1792 assert(_mesa_is_format_color_format(mt->format));
1793 assert(mt->surf.samples == 1);
1794 if (!intel_miptree_alloc_ccs(brw, mt))
1795 return false;
1796 return true;
1797 }
1798
1799 unreachable("Invalid aux usage");
1800 }
1801
1802
1803 /**
1804 * Can the miptree sample using the hiz buffer?
1805 */
1806 bool
1807 intel_miptree_sample_with_hiz(struct brw_context *brw,
1808 struct intel_mipmap_tree *mt)
1809 {
1810 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
1811 * so keep things conservative for now and never enable it unless we're SKL+.
1812 */
1813 if (brw->gen < 9) {
1814 return false;
1815 }
1816
1817 if (!mt->hiz_buf) {
1818 return false;
1819 }
1820
1821 /* It seems the hardware won't fallback to the depth buffer if some of the
1822 * mipmap levels aren't available in the HiZ buffer. So we need all levels
1823 * of the texture to be HiZ enabled.
1824 */
1825 for (unsigned level = 0; level < mt->surf.levels; ++level) {
1826 if (!intel_miptree_level_has_hiz(mt, level))
1827 return false;
1828 }
1829
1830 /* If compressed multisampling is enabled, then we use it for the auxiliary
1831 * buffer instead.
1832 *
1833 * From the BDW PRM (Volume 2d: Command Reference: Structures
1834 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
1835 *
1836 * "If this field is set to AUX_HIZ, Number of Multisamples must be
1837 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
1838 *
1839 * There is no such blurb for 1D textures, but there is sufficient evidence
1840 * that this is broken on SKL+.
1841 */
1842 return (mt->surf.samples == 1 &&
1843 mt->target != GL_TEXTURE_3D &&
1844 mt->target != GL_TEXTURE_1D /* gen9+ restriction */);
1845 }
1846
1847 /**
1848 * Does the miptree slice have hiz enabled?
1849 */
1850 bool
1851 intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level)
1852 {
1853 intel_miptree_check_level_layer(mt, level, 0);
1854 return mt->level[level].has_hiz;
1855 }
1856
1857 bool
1858 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt,
1859 unsigned start_level, unsigned num_levels,
1860 unsigned start_layer, unsigned num_layers)
1861 {
1862 assert(_mesa_is_format_color_format(mt->format));
1863
1864 if (!mt->mcs_buf)
1865 return false;
1866
1867 /* Clamp the level range to fit the miptree */
1868 assert(start_level + num_levels >= start_level);
1869 const uint32_t last_level =
1870 MIN2(mt->last_level, start_level + num_levels - 1);
1871 start_level = MAX2(mt->first_level, start_level);
1872 num_levels = last_level - start_level + 1;
1873
1874 for (uint32_t level = start_level; level <= last_level; level++) {
1875 uint32_t level_layers = get_num_phys_layers(&mt->surf, level);
1876
1877 level_layers = MIN2(num_layers, level_layers);
1878
1879 for (unsigned a = 0; a < level_layers; a++) {
1880 enum isl_aux_state aux_state =
1881 intel_miptree_get_aux_state(mt, level, start_layer + a);
1882 assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
1883 if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
1884 return true;
1885 }
1886 }
1887
1888 return false;
1889 }
1890
1891 static void
1892 intel_miptree_check_color_resolve(const struct brw_context *brw,
1893 const struct intel_mipmap_tree *mt,
1894 unsigned level, unsigned layer)
1895 {
1896
1897 if (!mt->mcs_buf)
1898 return;
1899
1900 /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
1901 assert(brw->gen >= 8 ||
1902 (level == 0 && mt->first_level == 0 && mt->last_level == 0));
1903
1904 /* Compression of arrayed msaa surfaces is supported. */
1905 if (mt->surf.samples > 1)
1906 return;
1907
1908 /* Fast color clear is supported for non-msaa arrays only on Gen8+. */
1909 assert(brw->gen >= 8 ||
1910 (layer == 0 &&
1911 mt->surf.logical_level0_px.depth == 1 &&
1912 mt->surf.logical_level0_px.array_len == 1));
1913
1914 (void)level;
1915 (void)layer;
1916 }
1917
1918 static enum blorp_fast_clear_op
1919 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
1920 enum isl_aux_usage aux_usage,
1921 bool fast_clear_supported)
1922 {
1923 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D);
1924
1925 const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_D;
1926
1927 assert(ccs_supported == fast_clear_supported);
1928
1929 switch (aux_state) {
1930 case ISL_AUX_STATE_CLEAR:
1931 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1932 if (!ccs_supported)
1933 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1934 else
1935 return BLORP_FAST_CLEAR_OP_NONE;
1936
1937 case ISL_AUX_STATE_PASS_THROUGH:
1938 return BLORP_FAST_CLEAR_OP_NONE;
1939
1940 case ISL_AUX_STATE_PARTIAL_CLEAR:
1941 case ISL_AUX_STATE_RESOLVED:
1942 case ISL_AUX_STATE_AUX_INVALID:
1943 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1944 break;
1945 }
1946
1947 unreachable("Invalid aux state for CCS_D");
1948 }
1949
1950 static enum blorp_fast_clear_op
1951 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
1952 enum isl_aux_usage aux_usage,
1953 bool fast_clear_supported)
1954 {
1955 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_E);
1956
1957 const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_E;
1958
1959 switch (aux_state) {
1960 case ISL_AUX_STATE_CLEAR:
1961 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1962 if (!ccs_supported)
1963 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1964 else if (!fast_clear_supported)
1965 return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
1966 else
1967 return BLORP_FAST_CLEAR_OP_NONE;
1968
1969 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1970 if (!ccs_supported)
1971 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1972 else
1973 return BLORP_FAST_CLEAR_OP_NONE;
1974
1975 case ISL_AUX_STATE_PASS_THROUGH:
1976 return BLORP_FAST_CLEAR_OP_NONE;
1977
1978 case ISL_AUX_STATE_PARTIAL_CLEAR:
1979 case ISL_AUX_STATE_RESOLVED:
1980 case ISL_AUX_STATE_AUX_INVALID:
1981 break;
1982 }
1983
1984 unreachable("Invalid aux state for CCS_E");
1985 }
1986
1987 static void
1988 intel_miptree_prepare_ccs_access(struct brw_context *brw,
1989 struct intel_mipmap_tree *mt,
1990 uint32_t level, uint32_t layer,
1991 enum isl_aux_usage aux_usage,
1992 bool fast_clear_supported)
1993 {
1994 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
1995
1996 enum blorp_fast_clear_op resolve_op;
1997 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
1998 resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage,
1999 fast_clear_supported);
2000 } else {
2001 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2002 resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage,
2003 fast_clear_supported);
2004 }
2005
2006 if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {
2007 intel_miptree_check_color_resolve(brw, mt, level, layer);
2008 brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);
2009
2010 switch (resolve_op) {
2011 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
2012 /* The CCS full resolve operation destroys the CCS and sets it to the
2013 * pass-through state. (You can also think of this as being both a
2014 * resolve and an ambiguate in one operation.)
2015 */
2016 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2017 ISL_AUX_STATE_PASS_THROUGH);
2018 break;
2019
2020 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
2021 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2022 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2023 break;
2024
2025 default:
2026 unreachable("Invalid resolve op");
2027 }
2028 }
2029 }
2030
2031 static void
2032 intel_miptree_finish_ccs_write(struct brw_context *brw,
2033 struct intel_mipmap_tree *mt,
2034 uint32_t level, uint32_t layer,
2035 enum isl_aux_usage aux_usage)
2036 {
2037 assert(aux_usage == ISL_AUX_USAGE_NONE ||
2038 aux_usage == ISL_AUX_USAGE_CCS_D ||
2039 aux_usage == ISL_AUX_USAGE_CCS_E);
2040
2041 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2042
2043 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
2044 switch (aux_state) {
2045 case ISL_AUX_STATE_CLEAR:
2046 assert(aux_usage == ISL_AUX_USAGE_CCS_E);
2047 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2048 ISL_AUX_STATE_COMPRESSED_CLEAR);
2049 break;
2050
2051 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2052 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2053 assert(aux_usage == ISL_AUX_USAGE_CCS_E);
2054 break; /* Nothing to do */
2055
2056 case ISL_AUX_STATE_PASS_THROUGH:
2057 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
2058 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2059 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2060 } else {
2061 /* Nothing to do */
2062 }
2063 break;
2064
2065 case ISL_AUX_STATE_PARTIAL_CLEAR:
2066 case ISL_AUX_STATE_RESOLVED:
2067 case ISL_AUX_STATE_AUX_INVALID:
2068 unreachable("Invalid aux state for CCS_E");
2069 }
2070 } else {
2071 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2072 /* CCS_D is a bit simpler */
2073 switch (aux_state) {
2074 case ISL_AUX_STATE_CLEAR:
2075 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
2076 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2077 ISL_AUX_STATE_COMPRESSED_CLEAR);
2078 break;
2079
2080 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2081 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
2082 break; /* Nothing to do */
2083
2084 case ISL_AUX_STATE_PASS_THROUGH:
2085 /* Nothing to do */
2086 break;
2087
2088 case ISL_AUX_STATE_PARTIAL_CLEAR:
2089 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2090 case ISL_AUX_STATE_RESOLVED:
2091 case ISL_AUX_STATE_AUX_INVALID:
2092 unreachable("Invalid aux state for CCS_D");
2093 }
2094 }
2095 }
2096
2097 static void
2098 intel_miptree_prepare_mcs_access(struct brw_context *brw,
2099 struct intel_mipmap_tree *mt,
2100 uint32_t layer,
2101 enum isl_aux_usage aux_usage,
2102 bool fast_clear_supported)
2103 {
2104 assert(aux_usage == ISL_AUX_USAGE_MCS);
2105
2106 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2107 case ISL_AUX_STATE_CLEAR:
2108 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2109 if (!fast_clear_supported) {
2110 brw_blorp_mcs_partial_resolve(brw, mt, layer, 1);
2111 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2112 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2113 }
2114 break;
2115
2116 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2117 break; /* Nothing to do */
2118
2119 case ISL_AUX_STATE_RESOLVED:
2120 case ISL_AUX_STATE_PASS_THROUGH:
2121 case ISL_AUX_STATE_AUX_INVALID:
2122 case ISL_AUX_STATE_PARTIAL_CLEAR:
2123 unreachable("Invalid aux state for MCS");
2124 }
2125 }
2126
2127 static void
2128 intel_miptree_finish_mcs_write(struct brw_context *brw,
2129 struct intel_mipmap_tree *mt,
2130 uint32_t layer,
2131 enum isl_aux_usage aux_usage)
2132 {
2133 assert(aux_usage == ISL_AUX_USAGE_MCS);
2134
2135 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2136 case ISL_AUX_STATE_CLEAR:
2137 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2138 ISL_AUX_STATE_COMPRESSED_CLEAR);
2139 break;
2140
2141 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2142 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2143 break; /* Nothing to do */
2144
2145 case ISL_AUX_STATE_RESOLVED:
2146 case ISL_AUX_STATE_PASS_THROUGH:
2147 case ISL_AUX_STATE_AUX_INVALID:
2148 case ISL_AUX_STATE_PARTIAL_CLEAR:
2149 unreachable("Invalid aux state for MCS");
2150 }
2151 }
2152
2153 static void
2154 intel_miptree_prepare_hiz_access(struct brw_context *brw,
2155 struct intel_mipmap_tree *mt,
2156 uint32_t level, uint32_t layer,
2157 enum isl_aux_usage aux_usage,
2158 bool fast_clear_supported)
2159 {
2160 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
2161
2162 enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
2163 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2164 case ISL_AUX_STATE_CLEAR:
2165 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2166 if (aux_usage != ISL_AUX_USAGE_HIZ || !fast_clear_supported)
2167 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2168 break;
2169
2170 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2171 if (aux_usage != ISL_AUX_USAGE_HIZ)
2172 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2173 break;
2174
2175 case ISL_AUX_STATE_PASS_THROUGH:
2176 case ISL_AUX_STATE_RESOLVED:
2177 break;
2178
2179 case ISL_AUX_STATE_AUX_INVALID:
2180 if (aux_usage == ISL_AUX_USAGE_HIZ)
2181 hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
2182 break;
2183
2184 case ISL_AUX_STATE_PARTIAL_CLEAR:
2185 unreachable("Invalid HiZ state");
2186 }
2187
2188 if (hiz_op != BLORP_HIZ_OP_NONE) {
2189 intel_hiz_exec(brw, mt, level, layer, 1, hiz_op);
2190
2191 switch (hiz_op) {
2192 case BLORP_HIZ_OP_DEPTH_RESOLVE:
2193 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2194 ISL_AUX_STATE_RESOLVED);
2195 break;
2196
2197 case BLORP_HIZ_OP_HIZ_RESOLVE:
2198 /* The HiZ resolve operation is actually an ambiguate */
2199 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2200 ISL_AUX_STATE_PASS_THROUGH);
2201 break;
2202
2203 default:
2204 unreachable("Invalid HiZ op");
2205 }
2206 }
2207 }
2208
2209 static void
2210 intel_miptree_finish_hiz_write(struct brw_context *brw,
2211 struct intel_mipmap_tree *mt,
2212 uint32_t level, uint32_t layer,
2213 enum isl_aux_usage aux_usage)
2214 {
2215 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
2216
2217 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2218 case ISL_AUX_STATE_CLEAR:
2219 assert(aux_usage == ISL_AUX_USAGE_HIZ);
2220 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2221 ISL_AUX_STATE_COMPRESSED_CLEAR);
2222 break;
2223
2224 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2225 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2226 assert(aux_usage == ISL_AUX_USAGE_HIZ);
2227 break; /* Nothing to do */
2228
2229 case ISL_AUX_STATE_RESOLVED:
2230 if (aux_usage == ISL_AUX_USAGE_HIZ) {
2231 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2232 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2233 } else {
2234 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2235 ISL_AUX_STATE_AUX_INVALID);
2236 }
2237 break;
2238
2239 case ISL_AUX_STATE_PASS_THROUGH:
2240 if (aux_usage == ISL_AUX_USAGE_HIZ) {
2241 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2242 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2243 }
2244 break;
2245
2246 case ISL_AUX_STATE_AUX_INVALID:
2247 assert(aux_usage != ISL_AUX_USAGE_HIZ);
2248 break;
2249
2250 case ISL_AUX_STATE_PARTIAL_CLEAR:
2251 unreachable("Invalid HiZ state");
2252 }
2253 }
2254
2255 static inline uint32_t
2256 miptree_level_range_length(const struct intel_mipmap_tree *mt,
2257 uint32_t start_level, uint32_t num_levels)
2258 {
2259 assert(start_level >= mt->first_level);
2260 assert(start_level <= mt->last_level);
2261
2262 if (num_levels == INTEL_REMAINING_LAYERS)
2263 num_levels = mt->last_level - start_level + 1;
2264 /* Check for overflow */
2265 assert(start_level + num_levels >= start_level);
2266 assert(start_level + num_levels <= mt->last_level + 1);
2267
2268 return num_levels;
2269 }
2270
2271 static inline uint32_t
2272 miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level,
2273 uint32_t start_layer, uint32_t num_layers)
2274 {
2275 assert(level <= mt->last_level);
2276
2277 const uint32_t total_num_layers = get_num_logical_layers(mt, level);
2278 assert(start_layer < total_num_layers);
2279 if (num_layers == INTEL_REMAINING_LAYERS)
2280 num_layers = total_num_layers - start_layer;
2281 /* Check for overflow */
2282 assert(start_layer + num_layers >= start_layer);
2283 assert(start_layer + num_layers <= total_num_layers);
2284
2285 return num_layers;
2286 }
2287
2288 void
2289 intel_miptree_prepare_access(struct brw_context *brw,
2290 struct intel_mipmap_tree *mt,
2291 uint32_t start_level, uint32_t num_levels,
2292 uint32_t start_layer, uint32_t num_layers,
2293 enum isl_aux_usage aux_usage,
2294 bool fast_clear_supported)
2295 {
2296 num_levels = miptree_level_range_length(mt, start_level, num_levels);
2297
2298 switch (mt->aux_usage) {
2299 case ISL_AUX_USAGE_NONE:
2300 /* Nothing to do */
2301 break;
2302
2303 case ISL_AUX_USAGE_MCS:
2304 assert(mt->mcs_buf);
2305 assert(start_level == 0 && num_levels == 1);
2306 const uint32_t level_layers =
2307 miptree_layer_range_length(mt, 0, start_layer, num_layers);
2308 for (uint32_t a = 0; a < level_layers; a++) {
2309 intel_miptree_prepare_mcs_access(brw, mt, start_layer + a,
2310 aux_usage, fast_clear_supported);
2311 }
2312 break;
2313
2314 case ISL_AUX_USAGE_CCS_D:
2315 case ISL_AUX_USAGE_CCS_E:
2316 if (!mt->mcs_buf)
2317 return;
2318
2319 for (uint32_t l = 0; l < num_levels; l++) {
2320 const uint32_t level = start_level + l;
2321 const uint32_t level_layers =
2322 miptree_layer_range_length(mt, level, start_layer, num_layers);
2323 for (uint32_t a = 0; a < level_layers; a++) {
2324 intel_miptree_prepare_ccs_access(brw, mt, level,
2325 start_layer + a,
2326 aux_usage, fast_clear_supported);
2327 }
2328 }
2329 break;
2330
2331 case ISL_AUX_USAGE_HIZ:
2332 assert(mt->hiz_buf);
2333 for (uint32_t l = 0; l < num_levels; l++) {
2334 const uint32_t level = start_level + l;
2335 if (!intel_miptree_level_has_hiz(mt, level))
2336 continue;
2337
2338 const uint32_t level_layers =
2339 miptree_layer_range_length(mt, level, start_layer, num_layers);
2340 for (uint32_t a = 0; a < level_layers; a++) {
2341 intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a,
2342 aux_usage, fast_clear_supported);
2343 }
2344 }
2345 break;
2346
2347 default:
2348 unreachable("Invalid aux usage");
2349 }
2350 }
2351
2352 void
2353 intel_miptree_finish_write(struct brw_context *brw,
2354 struct intel_mipmap_tree *mt, uint32_t level,
2355 uint32_t start_layer, uint32_t num_layers,
2356 enum isl_aux_usage aux_usage)
2357 {
2358 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2359
2360 switch (mt->aux_usage) {
2361 case ISL_AUX_USAGE_NONE:
2362 /* Nothing to do */
2363 break;
2364
2365 case ISL_AUX_USAGE_MCS:
2366 assert(mt->mcs_buf);
2367 for (uint32_t a = 0; a < num_layers; a++) {
2368 intel_miptree_finish_mcs_write(brw, mt, start_layer + a,
2369 aux_usage);
2370 }
2371 break;
2372
2373 case ISL_AUX_USAGE_CCS_D:
2374 case ISL_AUX_USAGE_CCS_E:
2375 if (!mt->mcs_buf)
2376 return;
2377
2378 for (uint32_t a = 0; a < num_layers; a++) {
2379 intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a,
2380 aux_usage);
2381 }
2382 break;
2383
2384 case ISL_AUX_USAGE_HIZ:
2385 if (!intel_miptree_level_has_hiz(mt, level))
2386 return;
2387
2388 for (uint32_t a = 0; a < num_layers; a++) {
2389 intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a,
2390 aux_usage);
2391 }
2392 break;
2393
2394 default:
2395 unreachable("Invavlid aux usage");
2396 }
2397 }
2398
2399 enum isl_aux_state
2400 intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt,
2401 uint32_t level, uint32_t layer)
2402 {
2403 intel_miptree_check_level_layer(mt, level, layer);
2404
2405 if (_mesa_is_format_color_format(mt->format)) {
2406 assert(mt->mcs_buf != NULL);
2407 assert(mt->surf.samples == 1 ||
2408 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2409 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2410 unreachable("Cannot get aux state for stencil");
2411 } else {
2412 assert(intel_miptree_level_has_hiz(mt, level));
2413 }
2414
2415 return mt->aux_state[level][layer];
2416 }
2417
2418 void
2419 intel_miptree_set_aux_state(struct brw_context *brw,
2420 struct intel_mipmap_tree *mt, uint32_t level,
2421 uint32_t start_layer, uint32_t num_layers,
2422 enum isl_aux_state aux_state)
2423 {
2424 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2425
2426 if (_mesa_is_format_color_format(mt->format)) {
2427 assert(mt->mcs_buf != NULL);
2428 assert(mt->surf.samples == 1 ||
2429 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2430 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2431 unreachable("Cannot get aux state for stencil");
2432 } else {
2433 assert(intel_miptree_level_has_hiz(mt, level));
2434 }
2435
2436 for (unsigned a = 0; a < num_layers; a++)
2437 mt->aux_state[level][start_layer + a] = aux_state;
2438 }
2439
2440 /* On Gen9 color buffers may be compressed by the hardware (lossless
2441 * compression). There are, however, format restrictions and care needs to be
2442 * taken that the sampler engine is capable for re-interpreting a buffer with
2443 * format different the buffer was originally written with.
2444 *
2445 * For example, SRGB formats are not compressible and the sampler engine isn't
2446 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
2447 * color buffer needs to be resolved so that the sampling surface can be
2448 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
2449 * set).
2450 */
2451 static bool
2452 can_texture_with_ccs(struct brw_context *brw,
2453 struct intel_mipmap_tree *mt,
2454 mesa_format view_format)
2455 {
2456 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
2457 return false;
2458
2459 enum isl_format isl_mt_format = brw_isl_format_for_mesa_format(mt->format);
2460 enum isl_format isl_view_format = brw_isl_format_for_mesa_format(view_format);
2461
2462 if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
2463 isl_mt_format, isl_view_format)) {
2464 perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
2465 _mesa_get_format_name(view_format),
2466 _mesa_get_format_name(mt->format));
2467 return false;
2468 }
2469
2470 return true;
2471 }
2472
2473 enum isl_aux_usage
2474 intel_miptree_texture_aux_usage(struct brw_context *brw,
2475 struct intel_mipmap_tree *mt,
2476 enum isl_format view_format)
2477 {
2478 switch (mt->aux_usage) {
2479 case ISL_AUX_USAGE_HIZ:
2480 if (intel_miptree_sample_with_hiz(brw, mt))
2481 return ISL_AUX_USAGE_HIZ;
2482 break;
2483
2484 case ISL_AUX_USAGE_MCS:
2485 return ISL_AUX_USAGE_MCS;
2486
2487 case ISL_AUX_USAGE_CCS_D:
2488 case ISL_AUX_USAGE_CCS_E:
2489 if (mt->mcs_buf && can_texture_with_ccs(brw, mt, view_format))
2490 return ISL_AUX_USAGE_CCS_E;
2491 break;
2492
2493 default:
2494 break;
2495 }
2496
2497 return ISL_AUX_USAGE_NONE;
2498 }
2499
2500 static void
2501 intel_miptree_prepare_texture_slices(struct brw_context *brw,
2502 struct intel_mipmap_tree *mt,
2503 mesa_format view_format,
2504 uint32_t start_level, uint32_t num_levels,
2505 uint32_t start_layer, uint32_t num_layers,
2506 bool *aux_supported_out)
2507 {
2508 enum isl_aux_usage aux_usage =
2509 intel_miptree_texture_aux_usage(brw, mt, view_format);
2510 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
2511
2512 /* Clear color is specified as ints or floats and the conversion is done by
2513 * the sampler. If we have a texture view, we would have to perform the
2514 * clear color conversion manually. Just disable clear color.
2515 */
2516 if (mt->format != view_format)
2517 clear_supported = false;
2518
2519 intel_miptree_prepare_access(brw, mt, start_level, num_levels,
2520 start_layer, num_layers,
2521 aux_usage, clear_supported);
2522 if (aux_supported_out)
2523 *aux_supported_out = aux_usage != ISL_AUX_USAGE_NONE;
2524 }
2525
2526 void
2527 intel_miptree_prepare_texture(struct brw_context *brw,
2528 struct intel_mipmap_tree *mt,
2529 mesa_format view_format,
2530 bool *aux_supported_out)
2531 {
2532 intel_miptree_prepare_texture_slices(brw, mt, view_format,
2533 0, INTEL_REMAINING_LEVELS,
2534 0, INTEL_REMAINING_LAYERS,
2535 aux_supported_out);
2536 }
2537
2538 void
2539 intel_miptree_prepare_image(struct brw_context *brw,
2540 struct intel_mipmap_tree *mt)
2541 {
2542 /* The data port doesn't understand any compression */
2543 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2544 0, INTEL_REMAINING_LAYERS,
2545 ISL_AUX_USAGE_NONE, false);
2546 }
2547
2548 void
2549 intel_miptree_prepare_fb_fetch(struct brw_context *brw,
2550 struct intel_mipmap_tree *mt, uint32_t level,
2551 uint32_t start_layer, uint32_t num_layers)
2552 {
2553 intel_miptree_prepare_texture_slices(brw, mt, mt->format, level, 1,
2554 start_layer, num_layers, NULL);
2555 }
2556
2557 enum isl_aux_usage
2558 intel_miptree_render_aux_usage(struct brw_context *brw,
2559 struct intel_mipmap_tree *mt,
2560 bool srgb_enabled)
2561 {
2562 switch (mt->aux_usage) {
2563 case ISL_AUX_USAGE_MCS:
2564 assert(mt->mcs_buf);
2565 return ISL_AUX_USAGE_MCS;
2566
2567 case ISL_AUX_USAGE_CCS_D:
2568 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
2569 * the single-sampled color renderbuffers because the CCS buffer isn't
2570 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
2571 * enabled because otherwise the surface state will be programmed with
2572 * the linear equivalent format anyway.
2573 */
2574 if (srgb_enabled &&
2575 _mesa_get_srgb_format_linear(mt->format) != mt->format) {
2576 return ISL_AUX_USAGE_NONE;
2577 } else if (!mt->mcs_buf) {
2578 return ISL_AUX_USAGE_NONE;
2579 } else {
2580 return ISL_AUX_USAGE_CCS_D;
2581 }
2582
2583 case ISL_AUX_USAGE_CCS_E: {
2584 /* Lossless compression is not supported for SRGB formats, it
2585 * should be impossible to get here with such surfaces.
2586 */
2587 assert(!srgb_enabled ||
2588 _mesa_get_srgb_format_linear(mt->format) == mt->format);
2589
2590 return ISL_AUX_USAGE_CCS_E;
2591 }
2592
2593 default:
2594 return ISL_AUX_USAGE_NONE;
2595 }
2596 }
2597
2598 void
2599 intel_miptree_prepare_render(struct brw_context *brw,
2600 struct intel_mipmap_tree *mt, uint32_t level,
2601 uint32_t start_layer, uint32_t layer_count,
2602 bool srgb_enabled)
2603 {
2604 enum isl_aux_usage aux_usage =
2605 intel_miptree_render_aux_usage(brw, mt, srgb_enabled);
2606 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2607 aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
2608 }
2609
2610 void
2611 intel_miptree_finish_render(struct brw_context *brw,
2612 struct intel_mipmap_tree *mt, uint32_t level,
2613 uint32_t start_layer, uint32_t layer_count,
2614 bool srgb_enabled)
2615 {
2616 assert(_mesa_is_format_color_format(mt->format));
2617
2618 enum isl_aux_usage aux_usage =
2619 intel_miptree_render_aux_usage(brw, mt, srgb_enabled);
2620 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2621 aux_usage);
2622 }
2623
2624 void
2625 intel_miptree_prepare_depth(struct brw_context *brw,
2626 struct intel_mipmap_tree *mt, uint32_t level,
2627 uint32_t start_layer, uint32_t layer_count)
2628 {
2629 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2630 mt->aux_usage, mt->hiz_buf != NULL);
2631 }
2632
2633 void
2634 intel_miptree_finish_depth(struct brw_context *brw,
2635 struct intel_mipmap_tree *mt, uint32_t level,
2636 uint32_t start_layer, uint32_t layer_count,
2637 bool depth_written)
2638 {
2639 if (depth_written) {
2640 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2641 mt->hiz_buf != NULL);
2642 }
2643 }
2644
2645 /**
2646 * Make it possible to share the BO backing the given miptree with another
2647 * process or another miptree.
2648 *
2649 * Fast color clears are unsafe with shared buffers, so we need to resolve and
2650 * then discard the MCS buffer, if present. We also set the no_ccs flag to
2651 * ensure that no MCS buffer gets allocated in the future.
2652 *
2653 * HiZ is similarly unsafe with shared buffers.
2654 */
2655 void
2656 intel_miptree_make_shareable(struct brw_context *brw,
2657 struct intel_mipmap_tree *mt)
2658 {
2659 /* MCS buffers are also used for multisample buffers, but we can't resolve
2660 * away a multisample MCS buffer because it's an integral part of how the
2661 * pixel data is stored. Fortunately this code path should never be
2662 * reached for multisample buffers.
2663 */
2664 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE ||
2665 mt->surf.samples == 1);
2666
2667 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2668 0, INTEL_REMAINING_LAYERS,
2669 ISL_AUX_USAGE_NONE, false);
2670
2671 if (mt->mcs_buf) {
2672 brw_bo_unreference(mt->mcs_buf->bo);
2673 free(mt->mcs_buf);
2674 mt->mcs_buf = NULL;
2675
2676 /* Any pending MCS/CCS operations are no longer needed. Trying to
2677 * execute any will likely crash due to the missing aux buffer. So let's
2678 * delete all pending ops.
2679 */
2680 free(mt->aux_state);
2681 mt->aux_state = NULL;
2682 }
2683
2684 if (mt->hiz_buf) {
2685 intel_miptree_aux_buffer_free(mt->hiz_buf);
2686 mt->hiz_buf = NULL;
2687
2688 for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
2689 mt->level[l].has_hiz = false;
2690 }
2691
2692 /* Any pending HiZ operations are no longer needed. Trying to execute
2693 * any will likely crash due to the missing aux buffer. So let's delete
2694 * all pending ops.
2695 */
2696 free(mt->aux_state);
2697 mt->aux_state = NULL;
2698 }
2699
2700 mt->aux_usage = ISL_AUX_USAGE_NONE;
2701 }
2702
2703
2704 /**
2705 * \brief Get pointer offset into stencil buffer.
2706 *
2707 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
2708 * must decode the tile's layout in software.
2709 *
2710 * See
2711 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2712 * Format.
2713 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2714 *
2715 * Even though the returned offset is always positive, the return type is
2716 * signed due to
2717 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2718 * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
2719 */
2720 static intptr_t
2721 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2722 {
2723 uint32_t tile_size = 4096;
2724 uint32_t tile_width = 64;
2725 uint32_t tile_height = 64;
2726 uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
2727
2728 uint32_t tile_x = x / tile_width;
2729 uint32_t tile_y = y / tile_height;
2730
2731 /* The byte's address relative to the tile's base addres. */
2732 uint32_t byte_x = x % tile_width;
2733 uint32_t byte_y = y % tile_height;
2734
2735 uintptr_t u = tile_y * row_size
2736 + tile_x * tile_size
2737 + 512 * (byte_x / 8)
2738 + 64 * (byte_y / 8)
2739 + 32 * ((byte_y / 4) % 2)
2740 + 16 * ((byte_x / 4) % 2)
2741 + 8 * ((byte_y / 2) % 2)
2742 + 4 * ((byte_x / 2) % 2)
2743 + 2 * (byte_y % 2)
2744 + 1 * (byte_x % 2);
2745
2746 if (swizzled) {
2747 /* adjust for bit6 swizzling */
2748 if (((byte_x / 8) % 2) == 1) {
2749 if (((byte_y / 8) % 2) == 0) {
2750 u += 64;
2751 } else {
2752 u -= 64;
2753 }
2754 }
2755 }
2756
2757 return u;
2758 }
2759
2760 void
2761 intel_miptree_updownsample(struct brw_context *brw,
2762 struct intel_mipmap_tree *src,
2763 struct intel_mipmap_tree *dst)
2764 {
2765 unsigned src_w = src->surf.logical_level0_px.width;
2766 unsigned src_h = src->surf.logical_level0_px.height;
2767 unsigned dst_w = dst->surf.logical_level0_px.width;
2768 unsigned dst_h = dst->surf.logical_level0_px.height;
2769
2770 brw_blorp_blit_miptrees(brw,
2771 src, 0 /* level */, 0 /* layer */,
2772 src->format, SWIZZLE_XYZW,
2773 dst, 0 /* level */, 0 /* layer */, dst->format,
2774 0, 0, src_w, src_h,
2775 0, 0, dst_w, dst_h,
2776 GL_NEAREST, false, false /*mirror x, y*/,
2777 false, false);
2778
2779 if (src->stencil_mt) {
2780 src_w = src->stencil_mt->surf.logical_level0_px.width;
2781 src_h = src->stencil_mt->surf.logical_level0_px.height;
2782 dst_w = dst->stencil_mt->surf.logical_level0_px.width;
2783 dst_h = dst->stencil_mt->surf.logical_level0_px.height;
2784
2785 brw_blorp_blit_miptrees(brw,
2786 src->stencil_mt, 0 /* level */, 0 /* layer */,
2787 src->stencil_mt->format, SWIZZLE_XYZW,
2788 dst->stencil_mt, 0 /* level */, 0 /* layer */,
2789 dst->stencil_mt->format,
2790 0, 0, src_w, src_h,
2791 0, 0, dst_w, dst_h,
2792 GL_NEAREST, false, false /*mirror x, y*/,
2793 false, false /* decode/encode srgb */);
2794 }
2795 }
2796
2797 void
2798 intel_update_r8stencil(struct brw_context *brw,
2799 struct intel_mipmap_tree *mt)
2800 {
2801 assert(brw->gen >= 7);
2802 struct intel_mipmap_tree *src =
2803 mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
2804 if (!src || brw->gen >= 8 || !src->r8stencil_needs_update)
2805 return;
2806
2807 assert(src->surf.size > 0);
2808
2809 if (!mt->r8stencil_mt) {
2810 assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
2811 mt->r8stencil_mt = make_surface(
2812 brw,
2813 src->target,
2814 MESA_FORMAT_R_UINT8,
2815 src->first_level, src->last_level,
2816 src->surf.logical_level0_px.width,
2817 src->surf.logical_level0_px.height,
2818 src->surf.dim == ISL_SURF_DIM_3D ?
2819 src->surf.logical_level0_px.depth :
2820 src->surf.logical_level0_px.array_len,
2821 src->surf.samples,
2822 ISL_TILING_Y0_BIT,
2823 ISL_SURF_USAGE_TEXTURE_BIT,
2824 BO_ALLOC_FOR_RENDER, 0, NULL);
2825 assert(mt->r8stencil_mt);
2826 }
2827
2828 struct intel_mipmap_tree *dst = mt->r8stencil_mt;
2829
2830 for (int level = src->first_level; level <= src->last_level; level++) {
2831 const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ?
2832 minify(src->surf.phys_level0_sa.depth, level) :
2833 src->surf.phys_level0_sa.array_len;
2834
2835 for (unsigned layer = 0; layer < depth; layer++) {
2836 brw_blorp_copy_miptrees(brw,
2837 src, level, layer,
2838 dst, level, layer,
2839 0, 0, 0, 0,
2840 minify(src->surf.logical_level0_px.width,
2841 level),
2842 minify(src->surf.logical_level0_px.height,
2843 level));
2844 }
2845 }
2846
2847 brw_render_cache_set_check_flush(brw, dst->bo);
2848 src->r8stencil_needs_update = false;
2849 }
2850
2851 static void *
2852 intel_miptree_map_raw(struct brw_context *brw,
2853 struct intel_mipmap_tree *mt,
2854 GLbitfield mode)
2855 {
2856 struct brw_bo *bo = mt->bo;
2857
2858 if (brw_batch_references(&brw->batch, bo))
2859 intel_batchbuffer_flush(brw);
2860
2861 return brw_bo_map(brw, bo, mode);
2862 }
2863
2864 static void
2865 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2866 {
2867 brw_bo_unmap(mt->bo);
2868 }
2869
2870 static void
2871 intel_miptree_map_gtt(struct brw_context *brw,
2872 struct intel_mipmap_tree *mt,
2873 struct intel_miptree_map *map,
2874 unsigned int level, unsigned int slice)
2875 {
2876 unsigned int bw, bh;
2877 void *base;
2878 unsigned int image_x, image_y;
2879 intptr_t x = map->x;
2880 intptr_t y = map->y;
2881
2882 /* For compressed formats, the stride is the number of bytes per
2883 * row of blocks. intel_miptree_get_image_offset() already does
2884 * the divide.
2885 */
2886 _mesa_get_format_block_size(mt->format, &bw, &bh);
2887 assert(y % bh == 0);
2888 assert(x % bw == 0);
2889 y /= bh;
2890 x /= bw;
2891
2892 base = intel_miptree_map_raw(brw, mt, map->mode);
2893
2894 if (base == NULL)
2895 map->ptr = NULL;
2896 else {
2897 base += mt->offset;
2898
2899 /* Note that in the case of cube maps, the caller must have passed the
2900 * slice number referencing the face.
2901 */
2902 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2903 x += image_x;
2904 y += image_y;
2905
2906 map->stride = mt->surf.row_pitch;
2907 map->ptr = base + y * map->stride + x * mt->cpp;
2908 }
2909
2910 DBG("%s: %d,%d %dx%d from mt %p (%s) "
2911 "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
2912 map->x, map->y, map->w, map->h,
2913 mt, _mesa_get_format_name(mt->format),
2914 x, y, map->ptr, map->stride);
2915 }
2916
2917 static void
2918 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
2919 {
2920 intel_miptree_unmap_raw(mt);
2921 }
2922
2923 static void
2924 intel_miptree_map_blit(struct brw_context *brw,
2925 struct intel_mipmap_tree *mt,
2926 struct intel_miptree_map *map,
2927 unsigned int level, unsigned int slice)
2928 {
2929 map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
2930 /* first_level */ 0,
2931 /* last_level */ 0,
2932 map->w, map->h, 1,
2933 /* samples */ 1,
2934 MIPTREE_LAYOUT_TILING_NONE);
2935
2936 if (!map->linear_mt) {
2937 fprintf(stderr, "Failed to allocate blit temporary\n");
2938 goto fail;
2939 }
2940 map->stride = map->linear_mt->surf.row_pitch;
2941
2942 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
2943 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
2944 * invalidate is set, since we'll be writing the whole rectangle from our
2945 * temporary buffer back out.
2946 */
2947 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2948 if (!intel_miptree_copy(brw,
2949 mt, level, slice, map->x, map->y,
2950 map->linear_mt, 0, 0, 0, 0,
2951 map->w, map->h)) {
2952 fprintf(stderr, "Failed to blit\n");
2953 goto fail;
2954 }
2955 }
2956
2957 map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
2958
2959 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2960 map->x, map->y, map->w, map->h,
2961 mt, _mesa_get_format_name(mt->format),
2962 level, slice, map->ptr, map->stride);
2963
2964 return;
2965
2966 fail:
2967 intel_miptree_release(&map->linear_mt);
2968 map->ptr = NULL;
2969 map->stride = 0;
2970 }
2971
2972 static void
2973 intel_miptree_unmap_blit(struct brw_context *brw,
2974 struct intel_mipmap_tree *mt,
2975 struct intel_miptree_map *map,
2976 unsigned int level,
2977 unsigned int slice)
2978 {
2979 struct gl_context *ctx = &brw->ctx;
2980
2981 intel_miptree_unmap_raw(map->linear_mt);
2982
2983 if (map->mode & GL_MAP_WRITE_BIT) {
2984 bool ok = intel_miptree_copy(brw,
2985 map->linear_mt, 0, 0, 0, 0,
2986 mt, level, slice, map->x, map->y,
2987 map->w, map->h);
2988 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
2989 }
2990
2991 intel_miptree_release(&map->linear_mt);
2992 }
2993
2994 /**
2995 * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
2996 */
2997 #if defined(USE_SSE41)
2998 static void
2999 intel_miptree_map_movntdqa(struct brw_context *brw,
3000 struct intel_mipmap_tree *mt,
3001 struct intel_miptree_map *map,
3002 unsigned int level, unsigned int slice)
3003 {
3004 assert(map->mode & GL_MAP_READ_BIT);
3005 assert(!(map->mode & GL_MAP_WRITE_BIT));
3006
3007 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
3008 map->x, map->y, map->w, map->h,
3009 mt, _mesa_get_format_name(mt->format),
3010 level, slice, map->ptr, map->stride);
3011
3012 /* Map the original image */
3013 uint32_t image_x;
3014 uint32_t image_y;
3015 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3016 image_x += map->x;
3017 image_y += map->y;
3018
3019 void *src = intel_miptree_map_raw(brw, mt, map->mode);
3020 if (!src)
3021 return;
3022
3023 src += mt->offset;
3024
3025 src += image_y * mt->surf.row_pitch;
3026 src += image_x * mt->cpp;
3027
3028 /* Due to the pixel offsets for the particular image being mapped, our
3029 * src pointer may not be 16-byte aligned. However, if the pitch is
3030 * divisible by 16, then the amount by which it's misaligned will remain
3031 * consistent from row to row.
3032 */
3033 assert((mt->surf.row_pitch % 16) == 0);
3034 const int misalignment = ((uintptr_t) src) & 15;
3035
3036 /* Create an untiled temporary buffer for the mapping. */
3037 const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
3038
3039 map->stride = ALIGN(misalignment + width_bytes, 16);
3040
3041 map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
3042 /* Offset the destination so it has the same misalignment as src. */
3043 map->ptr = map->buffer + misalignment;
3044
3045 assert((((uintptr_t) map->ptr) & 15) == misalignment);
3046
3047 for (uint32_t y = 0; y < map->h; y++) {
3048 void *dst_ptr = map->ptr + y * map->stride;
3049 void *src_ptr = src + y * mt->surf.row_pitch;
3050
3051 _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
3052 }
3053
3054 intel_miptree_unmap_raw(mt);
3055 }
3056
3057 static void
3058 intel_miptree_unmap_movntdqa(struct brw_context *brw,
3059 struct intel_mipmap_tree *mt,
3060 struct intel_miptree_map *map,
3061 unsigned int level,
3062 unsigned int slice)
3063 {
3064 _mesa_align_free(map->buffer);
3065 map->buffer = NULL;
3066 map->ptr = NULL;
3067 }
3068 #endif
3069
3070 static void
3071 intel_miptree_map_s8(struct brw_context *brw,
3072 struct intel_mipmap_tree *mt,
3073 struct intel_miptree_map *map,
3074 unsigned int level, unsigned int slice)
3075 {
3076 map->stride = map->w;
3077 map->buffer = map->ptr = malloc(map->stride * map->h);
3078 if (!map->buffer)
3079 return;
3080
3081 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3082 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3083 * invalidate is set, since we'll be writing the whole rectangle from our
3084 * temporary buffer back out.
3085 */
3086 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3087 uint8_t *untiled_s8_map = map->ptr;
3088 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
3089 unsigned int image_x, image_y;
3090
3091 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3092
3093 for (uint32_t y = 0; y < map->h; y++) {
3094 for (uint32_t x = 0; x < map->w; x++) {
3095 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
3096 x + image_x + map->x,
3097 y + image_y + map->y,
3098 brw->has_swizzling);
3099 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
3100 }
3101 }
3102
3103 intel_miptree_unmap_raw(mt);
3104
3105 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
3106 map->x, map->y, map->w, map->h,
3107 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
3108 } else {
3109 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3110 map->x, map->y, map->w, map->h,
3111 mt, map->ptr, map->stride);
3112 }
3113 }
3114
3115 static void
3116 intel_miptree_unmap_s8(struct brw_context *brw,
3117 struct intel_mipmap_tree *mt,
3118 struct intel_miptree_map *map,
3119 unsigned int level,
3120 unsigned int slice)
3121 {
3122 if (map->mode & GL_MAP_WRITE_BIT) {
3123 unsigned int image_x, image_y;
3124 uint8_t *untiled_s8_map = map->ptr;
3125 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
3126
3127 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3128
3129 for (uint32_t y = 0; y < map->h; y++) {
3130 for (uint32_t x = 0; x < map->w; x++) {
3131 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
3132 image_x + x + map->x,
3133 image_y + y + map->y,
3134 brw->has_swizzling);
3135 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
3136 }
3137 }
3138
3139 intel_miptree_unmap_raw(mt);
3140 }
3141
3142 free(map->buffer);
3143 }
3144
3145 static void
3146 intel_miptree_map_etc(struct brw_context *brw,
3147 struct intel_mipmap_tree *mt,
3148 struct intel_miptree_map *map,
3149 unsigned int level,
3150 unsigned int slice)
3151 {
3152 assert(mt->etc_format != MESA_FORMAT_NONE);
3153 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
3154 assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
3155 }
3156
3157 assert(map->mode & GL_MAP_WRITE_BIT);
3158 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
3159
3160 map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
3161 map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
3162 map->w, map->h, 1));
3163 map->ptr = map->buffer;
3164 }
3165
3166 static void
3167 intel_miptree_unmap_etc(struct brw_context *brw,
3168 struct intel_mipmap_tree *mt,
3169 struct intel_miptree_map *map,
3170 unsigned int level,
3171 unsigned int slice)
3172 {
3173 uint32_t image_x;
3174 uint32_t image_y;
3175 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3176
3177 image_x += map->x;
3178 image_y += map->y;
3179
3180 uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
3181 + image_y * mt->surf.row_pitch
3182 + image_x * mt->cpp;
3183
3184 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
3185 _mesa_etc1_unpack_rgba8888(dst, mt->surf.row_pitch,
3186 map->ptr, map->stride,
3187 map->w, map->h);
3188 else
3189 _mesa_unpack_etc2_format(dst, mt->surf.row_pitch,
3190 map->ptr, map->stride,
3191 map->w, map->h, mt->etc_format);
3192
3193 intel_miptree_unmap_raw(mt);
3194 free(map->buffer);
3195 }
3196
3197 /**
3198 * Mapping function for packed depth/stencil miptrees backed by real separate
3199 * miptrees for depth and stencil.
3200 *
3201 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
3202 * separate from the depth buffer. Yet at the GL API level, we have to expose
3203 * packed depth/stencil textures and FBO attachments, and Mesa core expects to
3204 * be able to map that memory for texture storage and glReadPixels-type
3205 * operations. We give Mesa core that access by mallocing a temporary and
3206 * copying the data between the actual backing store and the temporary.
3207 */
3208 static void
3209 intel_miptree_map_depthstencil(struct brw_context *brw,
3210 struct intel_mipmap_tree *mt,
3211 struct intel_miptree_map *map,
3212 unsigned int level, unsigned int slice)
3213 {
3214 struct intel_mipmap_tree *z_mt = mt;
3215 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3216 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3217 int packed_bpp = map_z32f_x24s8 ? 8 : 4;
3218
3219 map->stride = map->w * packed_bpp;
3220 map->buffer = map->ptr = malloc(map->stride * map->h);
3221 if (!map->buffer)
3222 return;
3223
3224 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3225 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3226 * invalidate is set, since we'll be writing the whole rectangle from our
3227 * temporary buffer back out.
3228 */
3229 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3230 uint32_t *packed_map = map->ptr;
3231 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
3232 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
3233 unsigned int s_image_x, s_image_y;
3234 unsigned int z_image_x, z_image_y;
3235
3236 intel_miptree_get_image_offset(s_mt, level, slice,
3237 &s_image_x, &s_image_y);
3238 intel_miptree_get_image_offset(z_mt, level, slice,
3239 &z_image_x, &z_image_y);
3240
3241 for (uint32_t y = 0; y < map->h; y++) {
3242 for (uint32_t x = 0; x < map->w; x++) {
3243 int map_x = map->x + x, map_y = map->y + y;
3244 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3245 map_x + s_image_x,
3246 map_y + s_image_y,
3247 brw->has_swizzling);
3248 ptrdiff_t z_offset = ((map_y + z_image_y) *
3249 (z_mt->surf.row_pitch / 4) +
3250 (map_x + z_image_x));
3251 uint8_t s = s_map[s_offset];
3252 uint32_t z = z_map[z_offset];
3253
3254 if (map_z32f_x24s8) {
3255 packed_map[(y * map->w + x) * 2 + 0] = z;
3256 packed_map[(y * map->w + x) * 2 + 1] = s;
3257 } else {
3258 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
3259 }
3260 }
3261 }
3262
3263 intel_miptree_unmap_raw(s_mt);
3264 intel_miptree_unmap_raw(z_mt);
3265
3266 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
3267 __func__,
3268 map->x, map->y, map->w, map->h,
3269 z_mt, map->x + z_image_x, map->y + z_image_y,
3270 s_mt, map->x + s_image_x, map->y + s_image_y,
3271 map->ptr, map->stride);
3272 } else {
3273 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3274 map->x, map->y, map->w, map->h,
3275 mt, map->ptr, map->stride);
3276 }
3277 }
3278
3279 static void
3280 intel_miptree_unmap_depthstencil(struct brw_context *brw,
3281 struct intel_mipmap_tree *mt,
3282 struct intel_miptree_map *map,
3283 unsigned int level,
3284 unsigned int slice)
3285 {
3286 struct intel_mipmap_tree *z_mt = mt;
3287 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3288 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3289
3290 if (map->mode & GL_MAP_WRITE_BIT) {
3291 uint32_t *packed_map = map->ptr;
3292 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
3293 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
3294 unsigned int s_image_x, s_image_y;
3295 unsigned int z_image_x, z_image_y;
3296
3297 intel_miptree_get_image_offset(s_mt, level, slice,
3298 &s_image_x, &s_image_y);
3299 intel_miptree_get_image_offset(z_mt, level, slice,
3300 &z_image_x, &z_image_y);
3301
3302 for (uint32_t y = 0; y < map->h; y++) {
3303 for (uint32_t x = 0; x < map->w; x++) {
3304 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3305 x + s_image_x + map->x,
3306 y + s_image_y + map->y,
3307 brw->has_swizzling);
3308 ptrdiff_t z_offset = ((y + z_image_y + map->y) *
3309 (z_mt->surf.row_pitch / 4) +
3310 (x + z_image_x + map->x));
3311
3312 if (map_z32f_x24s8) {
3313 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
3314 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
3315 } else {
3316 uint32_t packed = packed_map[y * map->w + x];
3317 s_map[s_offset] = packed >> 24;
3318 z_map[z_offset] = packed;
3319 }
3320 }
3321 }
3322
3323 intel_miptree_unmap_raw(s_mt);
3324 intel_miptree_unmap_raw(z_mt);
3325
3326 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
3327 __func__,
3328 map->x, map->y, map->w, map->h,
3329 z_mt, _mesa_get_format_name(z_mt->format),
3330 map->x + z_image_x, map->y + z_image_y,
3331 s_mt, map->x + s_image_x, map->y + s_image_y,
3332 map->ptr, map->stride);
3333 }
3334
3335 free(map->buffer);
3336 }
3337
3338 /**
3339 * Create and attach a map to the miptree at (level, slice). Return the
3340 * attached map.
3341 */
3342 static struct intel_miptree_map*
3343 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
3344 unsigned int level,
3345 unsigned int slice,
3346 unsigned int x,
3347 unsigned int y,
3348 unsigned int w,
3349 unsigned int h,
3350 GLbitfield mode)
3351 {
3352 struct intel_miptree_map *map = calloc(1, sizeof(*map));
3353
3354 if (!map)
3355 return NULL;
3356
3357 assert(mt->level[level].slice[slice].map == NULL);
3358 mt->level[level].slice[slice].map = map;
3359
3360 map->mode = mode;
3361 map->x = x;
3362 map->y = y;
3363 map->w = w;
3364 map->h = h;
3365
3366 return map;
3367 }
3368
3369 /**
3370 * Release the map at (level, slice).
3371 */
3372 static void
3373 intel_miptree_release_map(struct intel_mipmap_tree *mt,
3374 unsigned int level,
3375 unsigned int slice)
3376 {
3377 struct intel_miptree_map **map;
3378
3379 map = &mt->level[level].slice[slice].map;
3380 free(*map);
3381 *map = NULL;
3382 }
3383
3384 static bool
3385 can_blit_slice(struct intel_mipmap_tree *mt,
3386 unsigned int level, unsigned int slice)
3387 {
3388 /* See intel_miptree_blit() for details on the 32k pitch limit. */
3389 if (mt->surf.row_pitch >= 32768)
3390 return false;
3391
3392 return true;
3393 }
3394
3395 static bool
3396 use_intel_mipree_map_blit(struct brw_context *brw,
3397 struct intel_mipmap_tree *mt,
3398 GLbitfield mode,
3399 unsigned int level,
3400 unsigned int slice)
3401 {
3402 if (brw->has_llc &&
3403 /* It's probably not worth swapping to the blit ring because of
3404 * all the overhead involved.
3405 */
3406 !(mode & GL_MAP_WRITE_BIT) &&
3407 !mt->compressed &&
3408 (mt->surf.tiling == ISL_TILING_X ||
3409 /* Prior to Sandybridge, the blitter can't handle Y tiling */
3410 (brw->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
3411 /* Fast copy blit on skl+ supports all tiling formats. */
3412 brw->gen >= 9) &&
3413 can_blit_slice(mt, level, slice))
3414 return true;
3415
3416 if (mt->surf.tiling != ISL_TILING_LINEAR &&
3417 mt->bo->size >= brw->max_gtt_map_object_size) {
3418 assert(can_blit_slice(mt, level, slice));
3419 return true;
3420 }
3421
3422 return false;
3423 }
3424
3425 /**
3426 * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
3427 * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
3428 * arithmetic overflow.
3429 *
3430 * If you call this function and use \a out_stride, then you're doing pointer
3431 * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
3432 * bugs. The caller must still take care to avoid 32-bit overflow errors in
3433 * all arithmetic expressions that contain buffer offsets and pixel sizes,
3434 * which usually have type uint32_t or GLuint.
3435 */
3436 void
3437 intel_miptree_map(struct brw_context *brw,
3438 struct intel_mipmap_tree *mt,
3439 unsigned int level,
3440 unsigned int slice,
3441 unsigned int x,
3442 unsigned int y,
3443 unsigned int w,
3444 unsigned int h,
3445 GLbitfield mode,
3446 void **out_ptr,
3447 ptrdiff_t *out_stride)
3448 {
3449 struct intel_miptree_map *map;
3450
3451 assert(mt->surf.samples == 1);
3452
3453 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
3454 if (!map){
3455 *out_ptr = NULL;
3456 *out_stride = 0;
3457 return;
3458 }
3459
3460 intel_miptree_access_raw(brw, mt, level, slice,
3461 map->mode & GL_MAP_WRITE_BIT);
3462
3463 if (mt->format == MESA_FORMAT_S_UINT8) {
3464 intel_miptree_map_s8(brw, mt, map, level, slice);
3465 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3466 !(mode & BRW_MAP_DIRECT_BIT)) {
3467 intel_miptree_map_etc(brw, mt, map, level, slice);
3468 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
3469 intel_miptree_map_depthstencil(brw, mt, map, level, slice);
3470 } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
3471 intel_miptree_map_blit(brw, mt, map, level, slice);
3472 #if defined(USE_SSE41)
3473 } else if (!(mode & GL_MAP_WRITE_BIT) &&
3474 !mt->compressed && cpu_has_sse4_1 &&
3475 (mt->surf.row_pitch % 16 == 0)) {
3476 intel_miptree_map_movntdqa(brw, mt, map, level, slice);
3477 #endif
3478 } else {
3479 intel_miptree_map_gtt(brw, mt, map, level, slice);
3480 }
3481
3482 *out_ptr = map->ptr;
3483 *out_stride = map->stride;
3484
3485 if (map->ptr == NULL)
3486 intel_miptree_release_map(mt, level, slice);
3487 }
3488
3489 void
3490 intel_miptree_unmap(struct brw_context *brw,
3491 struct intel_mipmap_tree *mt,
3492 unsigned int level,
3493 unsigned int slice)
3494 {
3495 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
3496
3497 assert(mt->surf.samples == 1);
3498
3499 if (!map)
3500 return;
3501
3502 DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
3503 mt, _mesa_get_format_name(mt->format), level, slice);
3504
3505 if (mt->format == MESA_FORMAT_S_UINT8) {
3506 intel_miptree_unmap_s8(brw, mt, map, level, slice);
3507 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3508 !(map->mode & BRW_MAP_DIRECT_BIT)) {
3509 intel_miptree_unmap_etc(brw, mt, map, level, slice);
3510 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
3511 intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
3512 } else if (map->linear_mt) {
3513 intel_miptree_unmap_blit(brw, mt, map, level, slice);
3514 #if defined(USE_SSE41)
3515 } else if (map->buffer && cpu_has_sse4_1) {
3516 intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
3517 #endif
3518 } else {
3519 intel_miptree_unmap_gtt(mt);
3520 }
3521
3522 intel_miptree_release_map(mt, level, slice);
3523 }
3524
3525 enum isl_surf_dim
3526 get_isl_surf_dim(GLenum target)
3527 {
3528 switch (target) {
3529 case GL_TEXTURE_1D:
3530 case GL_TEXTURE_1D_ARRAY:
3531 return ISL_SURF_DIM_1D;
3532
3533 case GL_TEXTURE_2D:
3534 case GL_TEXTURE_2D_ARRAY:
3535 case GL_TEXTURE_RECTANGLE:
3536 case GL_TEXTURE_CUBE_MAP:
3537 case GL_TEXTURE_CUBE_MAP_ARRAY:
3538 case GL_TEXTURE_2D_MULTISAMPLE:
3539 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3540 case GL_TEXTURE_EXTERNAL_OES:
3541 return ISL_SURF_DIM_2D;
3542
3543 case GL_TEXTURE_3D:
3544 return ISL_SURF_DIM_3D;
3545 }
3546
3547 unreachable("Invalid texture target");
3548 }
3549
3550 enum isl_dim_layout
3551 get_isl_dim_layout(const struct gen_device_info *devinfo,
3552 enum isl_tiling tiling, GLenum target)
3553 {
3554 switch (target) {
3555 case GL_TEXTURE_1D:
3556 case GL_TEXTURE_1D_ARRAY:
3557 return (devinfo->gen >= 9 && tiling == ISL_TILING_LINEAR ?
3558 ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
3559
3560 case GL_TEXTURE_2D:
3561 case GL_TEXTURE_2D_ARRAY:
3562 case GL_TEXTURE_RECTANGLE:
3563 case GL_TEXTURE_2D_MULTISAMPLE:
3564 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3565 case GL_TEXTURE_EXTERNAL_OES:
3566 return ISL_DIM_LAYOUT_GEN4_2D;
3567
3568 case GL_TEXTURE_CUBE_MAP:
3569 case GL_TEXTURE_CUBE_MAP_ARRAY:
3570 return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
3571 ISL_DIM_LAYOUT_GEN4_2D);
3572
3573 case GL_TEXTURE_3D:
3574 return (devinfo->gen >= 9 ?
3575 ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
3576 }
3577
3578 unreachable("Invalid texture target");
3579 }
3580
3581 enum isl_aux_usage
3582 intel_miptree_get_aux_isl_usage(const struct brw_context *brw,
3583 const struct intel_mipmap_tree *mt)
3584 {
3585 if (mt->hiz_buf)
3586 return ISL_AUX_USAGE_HIZ;
3587
3588 if (!mt->mcs_buf)
3589 return ISL_AUX_USAGE_NONE;
3590
3591 return mt->aux_usage;
3592 }