i965/miptree: Add support for partially resolving MCS
[mesa.git] / src / mesa / drivers / dri / i965 / intel_mipmap_tree.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #include <GL/gl.h>
27 #include <GL/internal/dri_interface.h>
28
29 #include "intel_batchbuffer.h"
30 #include "intel_image.h"
31 #include "intel_mipmap_tree.h"
32 #include "intel_tex.h"
33 #include "intel_blit.h"
34 #include "intel_fbo.h"
35
36 #include "brw_blorp.h"
37 #include "brw_context.h"
38 #include "brw_state.h"
39
40 #include "main/enums.h"
41 #include "main/fbobject.h"
42 #include "main/formats.h"
43 #include "main/glformats.h"
44 #include "main/texcompress_etc.h"
45 #include "main/teximage.h"
46 #include "main/streaming-load-memcpy.h"
47 #include "x86/common_x86_asm.h"
48
49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
50
51 static void *intel_miptree_map_raw(struct brw_context *brw,
52 struct intel_mipmap_tree *mt,
53 GLbitfield mode);
54
55 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
56
57 static bool
58 intel_miptree_alloc_aux(struct brw_context *brw,
59 struct intel_mipmap_tree *mt);
60
61 static bool
62 is_mcs_supported(const struct brw_context *brw, mesa_format format,
63 uint32_t layout_flags)
64 {
65 /* Prior to Gen7, all MSAA surfaces used IMS layout. */
66 if (brw->gen < 7)
67 return false;
68
69 /* In Gen7, IMS layout is only used for depth and stencil buffers. */
70 switch (_mesa_get_format_base_format(format)) {
71 case GL_DEPTH_COMPONENT:
72 case GL_STENCIL_INDEX:
73 case GL_DEPTH_STENCIL:
74 return false;
75 default:
76 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
77 *
78 * This field must be set to 0 for all SINT MSRTs when all RT channels
79 * are not written
80 *
81 * In practice this means that we have to disable MCS for all signed
82 * integer MSAA buffers. The alternative, to disable MCS only when one
83 * of the render target channels is disabled, is impractical because it
84 * would require converting between CMS and UMS MSAA layouts on the fly,
85 * which is expensive.
86 */
87 if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
88 return false;
89 } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
90 /* We can't use the CMS layout because it uses an aux buffer, the MCS
91 * buffer. So fallback to UMS, which is identical to CMS without the
92 * MCS. */
93 return false;
94 } else {
95 return true;
96 }
97 }
98 }
99
100 static bool
101 intel_tiling_supports_ccs(const struct brw_context *brw,
102 enum isl_tiling tiling)
103 {
104 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
105 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
106 *
107 * - Support is limited to tiled render targets.
108 *
109 * Gen9 changes the restriction to Y-tile only.
110 */
111 if (brw->gen >= 9)
112 return tiling == ISL_TILING_Y0;
113 else if (brw->gen >= 7)
114 return tiling != ISL_TILING_LINEAR;
115 else
116 return false;
117 }
118
119 /**
120 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
121 * can be used. This doesn't (and should not) inspect any of the properties of
122 * the miptree's BO.
123 *
124 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
125 * beneath the "Fast Color Clear" bullet (p326):
126 *
127 * - Support is for non-mip-mapped and non-array surface types only.
128 *
129 * And then later, on p327:
130 *
131 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
132 * 64bpp, and 128bpp.
133 *
134 * From the Skylake documentation, it is made clear that X-tiling is no longer
135 * supported:
136 *
137 * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
138 * non-MSRTs only.
139 */
140 static bool
141 intel_miptree_supports_ccs(struct brw_context *brw,
142 const struct intel_mipmap_tree *mt)
143 {
144 /* MCS support does not exist prior to Gen7 */
145 if (brw->gen < 7)
146 return false;
147
148 /* This function applies only to non-multisampled render targets. */
149 if (mt->surf.samples > 1)
150 return false;
151
152 /* MCS is only supported for color buffers */
153 switch (_mesa_get_format_base_format(mt->format)) {
154 case GL_DEPTH_COMPONENT:
155 case GL_DEPTH_STENCIL:
156 case GL_STENCIL_INDEX:
157 return false;
158 }
159
160 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
161 return false;
162
163 const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0;
164 const bool arrayed = mt->surf.logical_level0_px.array_len > 1 ||
165 mt->surf.logical_level0_px.depth > 1;
166
167 if (arrayed) {
168 /* Multisample surfaces with the CMS layout are not layered surfaces,
169 * yet still have physical_depth0 > 1. Assert that we don't
170 * accidentally reject a multisampled surface here. We should have
171 * rejected it earlier by explicitly checking the sample count.
172 */
173 assert(mt->surf.samples == 1);
174 }
175
176 /* Handle the hardware restrictions...
177 *
178 * All GENs have the following restriction: "MCS buffer for non-MSRT is
179 * supported only for RT formats 32bpp, 64bpp, and 128bpp."
180 *
181 * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of
182 * Non-MultiSampler Render Target Restrictions) Support is for
183 * non-mip-mapped and non-array surface types only.
184 *
185 * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of
186 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
187 * surfaces are supported with MCS buffer layout with these alignments in
188 * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128.
189 *
190 * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of
191 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
192 * surfaces are supported with MCS buffer layout with these alignments in
193 * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64.
194 */
195 if (brw->gen < 8 && (mip_mapped || arrayed))
196 return false;
197
198 /* There's no point in using an MCS buffer if the surface isn't in a
199 * renderable format.
200 */
201 if (!brw->mesa_format_supports_render[mt->format])
202 return false;
203
204 if (brw->gen >= 9) {
205 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
206 const enum isl_format isl_format =
207 brw_isl_format_for_mesa_format(linear_format);
208 return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
209 } else
210 return true;
211 }
212
213 static bool
214 intel_tiling_supports_hiz(const struct brw_context *brw,
215 enum isl_tiling tiling)
216 {
217 if (brw->gen < 6)
218 return false;
219
220 return tiling == ISL_TILING_Y0;
221 }
222
223 static bool
224 intel_miptree_supports_hiz(const struct brw_context *brw,
225 const struct intel_mipmap_tree *mt)
226 {
227 if (!brw->has_hiz)
228 return false;
229
230 switch (mt->format) {
231 case MESA_FORMAT_Z_FLOAT32:
232 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
233 case MESA_FORMAT_Z24_UNORM_X8_UINT:
234 case MESA_FORMAT_Z24_UNORM_S8_UINT:
235 case MESA_FORMAT_Z_UNORM16:
236 return true;
237 default:
238 return false;
239 }
240 }
241
242 static bool
243 intel_miptree_supports_ccs_e(struct brw_context *brw,
244 const struct intel_mipmap_tree *mt)
245 {
246 if (brw->gen < 9)
247 return false;
248
249 /* For now compression is only enabled for integer formats even though
250 * there exist supported floating point formats also. This is a heuristic
251 * decision based on current public benchmarks. In none of the cases these
252 * formats provided any improvement but a few cases were seen to regress.
253 * Hence these are left to to be enabled in the future when they are known
254 * to improve things.
255 */
256 if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
257 return false;
258
259 if (!intel_miptree_supports_ccs(brw, mt))
260 return false;
261
262 /* Fast clear can be also used to clear srgb surfaces by using equivalent
263 * linear format. This trick, however, can't be extended to be used with
264 * lossless compression and therefore a check is needed to see if the format
265 * really is linear.
266 */
267 return _mesa_get_srgb_format_linear(mt->format) == mt->format;
268 }
269
270 /**
271 * Determine depth format corresponding to a depth+stencil format,
272 * for separate stencil.
273 */
274 mesa_format
275 intel_depth_format_for_depthstencil_format(mesa_format format) {
276 switch (format) {
277 case MESA_FORMAT_Z24_UNORM_S8_UINT:
278 return MESA_FORMAT_Z24_UNORM_X8_UINT;
279 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
280 return MESA_FORMAT_Z_FLOAT32;
281 default:
282 return format;
283 }
284 }
285
286 static bool
287 create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
288 unsigned depth0, struct intel_mipmap_level *table)
289 {
290 for (unsigned level = first_level; level <= last_level; level++) {
291 const unsigned d =
292 target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
293
294 table[level].slice = calloc(d, sizeof(*table[0].slice));
295 if (!table[level].slice)
296 goto unwind;
297 }
298
299 return true;
300
301 unwind:
302 for (unsigned level = first_level; level <= last_level; level++)
303 free(table[level].slice);
304
305 return false;
306 }
307
308 static bool
309 needs_separate_stencil(const struct brw_context *brw,
310 struct intel_mipmap_tree *mt,
311 mesa_format format, uint32_t layout_flags)
312 {
313
314 if (layout_flags & MIPTREE_LAYOUT_FOR_BO)
315 return false;
316
317 if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL)
318 return false;
319
320 if (brw->must_use_separate_stencil)
321 return true;
322
323 return brw->has_separate_stencil &&
324 intel_miptree_supports_hiz(brw, mt);
325 }
326
327 /**
328 * Choose the aux usage for this miptree. This function must be called fairly
329 * late in the miptree create process after we have a tiling.
330 */
331 static void
332 intel_miptree_choose_aux_usage(struct brw_context *brw,
333 struct intel_mipmap_tree *mt)
334 {
335 assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
336
337 const unsigned no_flags = 0;
338 if (mt->surf.samples > 1 && is_mcs_supported(brw, mt->format, no_flags)) {
339 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
340 mt->aux_usage = ISL_AUX_USAGE_MCS;
341 } else if (intel_tiling_supports_ccs(brw, mt->surf.tiling) &&
342 intel_miptree_supports_ccs(brw, mt)) {
343 if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
344 intel_miptree_supports_ccs_e(brw, mt)) {
345 mt->aux_usage = ISL_AUX_USAGE_CCS_E;
346 } else {
347 mt->aux_usage = ISL_AUX_USAGE_CCS_D;
348 }
349 } else if (intel_tiling_supports_hiz(brw, mt->surf.tiling) &&
350 intel_miptree_supports_hiz(brw, mt)) {
351 mt->aux_usage = ISL_AUX_USAGE_HIZ;
352 }
353
354 /* We can do fast-clear on all auxiliary surface types that are
355 * allocated through the normal texture creation paths.
356 */
357 if (mt->aux_usage != ISL_AUX_USAGE_NONE)
358 mt->supports_fast_clear = true;
359 }
360
361
362 /**
363 * Choose an appropriate uncompressed format for a requested
364 * compressed format, if unsupported.
365 */
366 mesa_format
367 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
368 {
369 /* No need to lower ETC formats on these platforms,
370 * they are supported natively.
371 */
372 if (brw->gen >= 8 || brw->is_baytrail)
373 return format;
374
375 switch (format) {
376 case MESA_FORMAT_ETC1_RGB8:
377 return MESA_FORMAT_R8G8B8X8_UNORM;
378 case MESA_FORMAT_ETC2_RGB8:
379 return MESA_FORMAT_R8G8B8X8_UNORM;
380 case MESA_FORMAT_ETC2_SRGB8:
381 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
382 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
383 return MESA_FORMAT_B8G8R8A8_SRGB;
384 case MESA_FORMAT_ETC2_RGBA8_EAC:
385 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
386 return MESA_FORMAT_R8G8B8A8_UNORM;
387 case MESA_FORMAT_ETC2_R11_EAC:
388 return MESA_FORMAT_R_UNORM16;
389 case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
390 return MESA_FORMAT_R_SNORM16;
391 case MESA_FORMAT_ETC2_RG11_EAC:
392 return MESA_FORMAT_R16G16_UNORM;
393 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
394 return MESA_FORMAT_R16G16_SNORM;
395 default:
396 /* Non ETC1 / ETC2 format */
397 return format;
398 }
399 }
400
401 static unsigned
402 get_num_logical_layers(const struct intel_mipmap_tree *mt, unsigned level)
403 {
404 if (mt->surf.dim == ISL_SURF_DIM_3D)
405 return minify(mt->surf.logical_level0_px.depth, level);
406 else
407 return mt->surf.logical_level0_px.array_len;
408 }
409
410 static unsigned
411 get_num_phys_layers(const struct isl_surf *surf, unsigned level)
412 {
413 /* In case of physical dimensions one needs to consider also the layout.
414 * See isl_calc_phys_level0_extent_sa().
415 */
416 if (surf->dim != ISL_SURF_DIM_3D)
417 return surf->phys_level0_sa.array_len;
418
419 if (surf->dim_layout == ISL_DIM_LAYOUT_GEN4_2D)
420 return minify(surf->phys_level0_sa.array_len, level);
421
422 return minify(surf->phys_level0_sa.depth, level);
423 }
424
425 /** \brief Assert that the level and layer are valid for the miptree. */
426 void
427 intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt,
428 uint32_t level,
429 uint32_t layer)
430 {
431 (void) mt;
432 (void) level;
433 (void) layer;
434
435 assert(level >= mt->first_level);
436 assert(level <= mt->last_level);
437 assert(layer < get_num_phys_layers(&mt->surf, level));
438 }
439
440 static enum isl_aux_state **
441 create_aux_state_map(struct intel_mipmap_tree *mt,
442 enum isl_aux_state initial)
443 {
444 const uint32_t levels = mt->last_level + 1;
445
446 uint32_t total_slices = 0;
447 for (uint32_t level = 0; level < levels; level++)
448 total_slices += get_num_logical_layers(mt, level);
449
450 const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
451
452 /* We're going to allocate a single chunk of data for both the per-level
453 * reference array and the arrays of aux_state. This makes cleanup
454 * significantly easier.
455 */
456 const size_t total_size = per_level_array_size +
457 total_slices * sizeof(enum isl_aux_state);
458 void *data = malloc(total_size);
459 if (data == NULL)
460 return NULL;
461
462 enum isl_aux_state **per_level_arr = data;
463 enum isl_aux_state *s = data + per_level_array_size;
464 for (uint32_t level = 0; level < levels; level++) {
465 per_level_arr[level] = s;
466 const unsigned level_layers = get_num_logical_layers(mt, level);
467 for (uint32_t a = 0; a < level_layers; a++)
468 *(s++) = initial;
469 }
470 assert((void *)s == data + total_size);
471
472 return per_level_arr;
473 }
474
475 static void
476 free_aux_state_map(enum isl_aux_state **state)
477 {
478 free(state);
479 }
480
481 static bool
482 need_to_retile_as_linear(struct brw_context *brw, unsigned row_pitch,
483 enum isl_tiling tiling, unsigned samples)
484 {
485 if (samples > 1)
486 return false;
487
488 if (tiling == ISL_TILING_LINEAR)
489 return false;
490
491 /* If the width is much smaller than a tile, don't bother tiling. */
492 if (row_pitch < 64)
493 return true;
494
495 if (ALIGN(row_pitch, 512) >= 32768) {
496 perf_debug("row pitch %u too large to blit, falling back to untiled",
497 row_pitch);
498 return true;
499 }
500
501 return false;
502 }
503
504 static bool
505 need_to_retile_as_x(const struct brw_context *brw, uint64_t size,
506 enum isl_tiling tiling)
507 {
508 /* If the BO is too large to fit in the aperture, we need to use the
509 * BLT engine to support it. Prior to Sandybridge, the BLT paths can't
510 * handle Y-tiling, so we need to fall back to X.
511 */
512 if (brw->gen < 6 && size >= brw->max_gtt_map_object_size &&
513 tiling == ISL_TILING_Y0)
514 return true;
515
516 return false;
517 }
518
519 static struct intel_mipmap_tree *
520 make_surface(struct brw_context *brw, GLenum target, mesa_format format,
521 unsigned first_level, unsigned last_level,
522 unsigned width0, unsigned height0, unsigned depth0,
523 unsigned num_samples, isl_tiling_flags_t tiling_flags,
524 isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
525 unsigned row_pitch, struct brw_bo *bo)
526 {
527 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
528 if (!mt)
529 return NULL;
530
531 if (!create_mapping_table(target, first_level, last_level, depth0,
532 mt->level)) {
533 free(mt);
534 return NULL;
535 }
536
537 mt->refcount = 1;
538
539 if (target == GL_TEXTURE_CUBE_MAP ||
540 target == GL_TEXTURE_CUBE_MAP_ARRAY)
541 isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
542
543 DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
544 __func__,
545 _mesa_enum_to_string(target),
546 _mesa_get_format_name(format),
547 num_samples, width0, height0, depth0,
548 first_level, last_level, mt);
549
550 struct isl_surf_init_info init_info = {
551 .dim = get_isl_surf_dim(target),
552 .format = translate_tex_format(brw, format, false),
553 .width = width0,
554 .height = height0,
555 .depth = target == GL_TEXTURE_3D ? depth0 : 1,
556 .levels = last_level - first_level + 1,
557 .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
558 .samples = num_samples,
559 .row_pitch = row_pitch,
560 .usage = isl_usage_flags,
561 .tiling_flags = tiling_flags,
562 };
563
564 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
565 goto fail;
566
567 /* In case caller doesn't specifically request Y-tiling (needed
568 * unconditionally for depth), check for corner cases needing special
569 * treatment.
570 */
571 if (tiling_flags & ~ISL_TILING_Y0_BIT) {
572 if (need_to_retile_as_linear(brw, mt->surf.row_pitch,
573 mt->surf.tiling, mt->surf.samples)) {
574 init_info.tiling_flags = 1u << ISL_TILING_LINEAR;
575 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
576 goto fail;
577 } else if (need_to_retile_as_x(brw, mt->surf.size, mt->surf.tiling)) {
578 init_info.tiling_flags = 1u << ISL_TILING_X;
579 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
580 goto fail;
581 }
582 }
583
584 /* In case of linear the buffer gets padded by fixed 64 bytes and therefore
585 * the size may not be multiple of row_pitch.
586 * See isl_apply_surface_padding().
587 */
588 if (mt->surf.tiling != ISL_TILING_LINEAR)
589 assert(mt->surf.size % mt->surf.row_pitch == 0);
590
591 if (!bo) {
592 mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
593 mt->surf.size,
594 isl_tiling_to_i915_tiling(
595 mt->surf.tiling),
596 mt->surf.row_pitch, alloc_flags);
597 if (!mt->bo)
598 goto fail;
599 } else {
600 mt->bo = bo;
601 }
602
603 mt->first_level = first_level;
604 mt->last_level = last_level;
605 mt->target = target;
606 mt->format = format;
607 mt->aux_state = NULL;
608 mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8;
609 mt->compressed = _mesa_is_format_compressed(format);
610
611 return mt;
612
613 fail:
614 intel_miptree_release(&mt);
615 return NULL;
616 }
617
618 static bool
619 make_separate_stencil_surface(struct brw_context *brw,
620 struct intel_mipmap_tree *mt)
621 {
622 mt->stencil_mt = make_surface(brw, mt->target, MESA_FORMAT_S_UINT8,
623 0, mt->surf.levels - 1,
624 mt->surf.logical_level0_px.width,
625 mt->surf.logical_level0_px.height,
626 mt->surf.dim == ISL_SURF_DIM_3D ?
627 mt->surf.logical_level0_px.depth :
628 mt->surf.logical_level0_px.array_len,
629 mt->surf.samples, ISL_TILING_W_BIT,
630 ISL_SURF_USAGE_STENCIL_BIT |
631 ISL_SURF_USAGE_TEXTURE_BIT,
632 BO_ALLOC_FOR_RENDER, 0, NULL);
633
634 if (!mt->stencil_mt)
635 return false;
636
637 mt->stencil_mt->r8stencil_needs_update = true;
638
639 return true;
640 }
641
642 static bool
643 force_linear_tiling(uint32_t layout_flags)
644 {
645 /* ANY includes NONE and Y bit. */
646 if (layout_flags & MIPTREE_LAYOUT_TILING_Y)
647 return false;
648
649 return layout_flags & MIPTREE_LAYOUT_TILING_NONE;
650 }
651
652 static struct intel_mipmap_tree *
653 miptree_create(struct brw_context *brw,
654 GLenum target,
655 mesa_format format,
656 GLuint first_level,
657 GLuint last_level,
658 GLuint width0,
659 GLuint height0,
660 GLuint depth0,
661 GLuint num_samples,
662 uint32_t layout_flags)
663 {
664 if (format == MESA_FORMAT_S_UINT8)
665 return make_surface(brw, target, format, first_level, last_level,
666 width0, height0, depth0, num_samples,
667 ISL_TILING_W_BIT,
668 ISL_SURF_USAGE_STENCIL_BIT |
669 ISL_SURF_USAGE_TEXTURE_BIT,
670 BO_ALLOC_FOR_RENDER,
671 0,
672 NULL);
673
674 const GLenum base_format = _mesa_get_format_base_format(format);
675 if ((base_format == GL_DEPTH_COMPONENT ||
676 base_format == GL_DEPTH_STENCIL) &&
677 !force_linear_tiling(layout_flags)) {
678 /* Fix up the Z miptree format for how we're splitting out separate
679 * stencil. Gen7 expects there to be no stencil bits in its depth buffer.
680 */
681 const mesa_format depth_only_format =
682 intel_depth_format_for_depthstencil_format(format);
683 struct intel_mipmap_tree *mt = make_surface(
684 brw, target, brw->gen >= 6 ? depth_only_format : format,
685 first_level, last_level,
686 width0, height0, depth0, num_samples, ISL_TILING_Y0_BIT,
687 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
688 BO_ALLOC_FOR_RENDER, 0, NULL);
689
690 if (needs_separate_stencil(brw, mt, format, layout_flags) &&
691 !make_separate_stencil_surface(brw, mt)) {
692 intel_miptree_release(&mt);
693 return NULL;
694 }
695
696 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
697 intel_miptree_choose_aux_usage(brw, mt);
698
699 return mt;
700 }
701
702 mesa_format tex_format = format;
703 mesa_format etc_format = MESA_FORMAT_NONE;
704 uint32_t alloc_flags = 0;
705
706 format = intel_lower_compressed_format(brw, format);
707
708 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
709
710 assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
711 if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
712 alloc_flags |= BO_ALLOC_FOR_RENDER;
713
714 isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ?
715 ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK;
716
717 /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
718 if (brw->gen < 6)
719 tiling_flags &= ~ISL_TILING_Y0_BIT;
720
721 struct intel_mipmap_tree *mt = make_surface(
722 brw, target, format,
723 first_level, last_level,
724 width0, height0, depth0,
725 num_samples, tiling_flags,
726 ISL_SURF_USAGE_RENDER_TARGET_BIT |
727 ISL_SURF_USAGE_TEXTURE_BIT,
728 alloc_flags, 0, NULL);
729 if (!mt)
730 return NULL;
731
732 mt->etc_format = etc_format;
733
734 if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
735 mt->bo->cache_coherent = false;
736
737 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
738 intel_miptree_choose_aux_usage(brw, mt);
739
740 return mt;
741 }
742
743 struct intel_mipmap_tree *
744 intel_miptree_create(struct brw_context *brw,
745 GLenum target,
746 mesa_format format,
747 GLuint first_level,
748 GLuint last_level,
749 GLuint width0,
750 GLuint height0,
751 GLuint depth0,
752 GLuint num_samples,
753 uint32_t layout_flags)
754 {
755 assert(num_samples > 0);
756
757 struct intel_mipmap_tree *mt = miptree_create(
758 brw, target, format,
759 first_level, last_level,
760 width0, height0, depth0, num_samples,
761 layout_flags);
762 if (!mt)
763 return NULL;
764
765 mt->offset = 0;
766
767 if (!intel_miptree_alloc_aux(brw, mt)) {
768 intel_miptree_release(&mt);
769 return NULL;
770 }
771
772 return mt;
773 }
774
775 struct intel_mipmap_tree *
776 intel_miptree_create_for_bo(struct brw_context *brw,
777 struct brw_bo *bo,
778 mesa_format format,
779 uint32_t offset,
780 uint32_t width,
781 uint32_t height,
782 uint32_t depth,
783 int pitch,
784 uint32_t layout_flags)
785 {
786 struct intel_mipmap_tree *mt;
787 uint32_t tiling, swizzle;
788 const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
789 const GLenum base_format = _mesa_get_format_base_format(format);
790
791 if ((base_format == GL_DEPTH_COMPONENT ||
792 base_format == GL_DEPTH_STENCIL)) {
793 const mesa_format depth_only_format =
794 intel_depth_format_for_depthstencil_format(format);
795 mt = make_surface(brw, target,
796 brw->gen >= 6 ? depth_only_format : format,
797 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT,
798 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
799 BO_ALLOC_FOR_RENDER, pitch, bo);
800
801 brw_bo_reference(bo);
802
803 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
804 intel_miptree_choose_aux_usage(brw, mt);
805
806 return mt;
807 } else if (format == MESA_FORMAT_S_UINT8) {
808 mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
809 0, 0, width, height, depth, 1,
810 ISL_TILING_W_BIT,
811 ISL_SURF_USAGE_STENCIL_BIT |
812 ISL_SURF_USAGE_TEXTURE_BIT,
813 BO_ALLOC_FOR_RENDER, pitch, bo);
814 if (!mt)
815 return NULL;
816
817 assert(bo->size >= mt->surf.size);
818
819 brw_bo_reference(bo);
820 return mt;
821 }
822
823 brw_bo_get_tiling(bo, &tiling, &swizzle);
824
825 /* Nothing will be able to use this miptree with the BO if the offset isn't
826 * aligned.
827 */
828 if (tiling != I915_TILING_NONE)
829 assert(offset % 4096 == 0);
830
831 /* miptrees can't handle negative pitch. If you need flipping of images,
832 * that's outside of the scope of the mt.
833 */
834 assert(pitch >= 0);
835
836 /* The BO already has a tiling format and we shouldn't confuse the lower
837 * layers by making it try to find a tiling format again.
838 */
839 assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
840 assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
841
842 mt = make_surface(brw, target, format,
843 0, 0, width, height, depth, 1,
844 1lu << isl_tiling_from_i915_tiling(tiling),
845 ISL_SURF_USAGE_RENDER_TARGET_BIT |
846 ISL_SURF_USAGE_TEXTURE_BIT,
847 0, pitch, bo);
848 if (!mt)
849 return NULL;
850
851 brw_bo_reference(bo);
852 mt->bo = bo;
853 mt->offset = offset;
854
855 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
856 intel_miptree_choose_aux_usage(brw, mt);
857
858 return mt;
859 }
860
861 static struct intel_mipmap_tree *
862 miptree_create_for_planar_image(struct brw_context *brw,
863 __DRIimage *image, GLenum target)
864 {
865 struct intel_image_format *f = image->planar_format;
866 struct intel_mipmap_tree *planar_mt = NULL;
867
868 for (int i = 0; i < f->nplanes; i++) {
869 const int index = f->planes[i].buffer_index;
870 const uint32_t dri_format = f->planes[i].dri_format;
871 const mesa_format format = driImageFormatToGLFormat(dri_format);
872 const uint32_t width = image->width >> f->planes[i].width_shift;
873 const uint32_t height = image->height >> f->planes[i].height_shift;
874
875 /* Disable creation of the texture's aux buffers because the driver
876 * exposes no EGL API to manage them. That is, there is no API for
877 * resolving the aux buffer's content to the main buffer nor for
878 * invalidating the aux buffer's content.
879 */
880 struct intel_mipmap_tree *mt =
881 intel_miptree_create_for_bo(brw, image->bo, format,
882 image->offsets[index],
883 width, height, 1,
884 image->strides[index],
885 MIPTREE_LAYOUT_DISABLE_AUX);
886 if (mt == NULL)
887 return NULL;
888
889 mt->target = target;
890
891 if (i == 0)
892 planar_mt = mt;
893 else
894 planar_mt->plane[i - 1] = mt;
895 }
896
897 return planar_mt;
898 }
899
900 struct intel_mipmap_tree *
901 intel_miptree_create_for_dri_image(struct brw_context *brw,
902 __DRIimage *image, GLenum target,
903 enum isl_colorspace colorspace,
904 bool is_winsys_image)
905 {
906 if (image->planar_format && image->planar_format->nplanes > 0) {
907 assert(colorspace == ISL_COLORSPACE_NONE ||
908 colorspace == ISL_COLORSPACE_YUV);
909 return miptree_create_for_planar_image(brw, image, target);
910 }
911
912 mesa_format format = image->format;
913 switch (colorspace) {
914 case ISL_COLORSPACE_NONE:
915 /* Keep the image format unmodified */
916 break;
917
918 case ISL_COLORSPACE_LINEAR:
919 format =_mesa_get_srgb_format_linear(format);
920 break;
921
922 case ISL_COLORSPACE_SRGB:
923 format =_mesa_get_linear_format_srgb(format);
924 break;
925
926 default:
927 unreachable("Inalid colorspace for non-planar image");
928 }
929
930 if (!brw->ctx.TextureFormatSupported[format]) {
931 /* The texture storage paths in core Mesa detect if the driver does not
932 * support the user-requested format, and then searches for a
933 * fallback format. The DRIimage code bypasses core Mesa, though. So we
934 * do the fallbacks here for important formats.
935 *
936 * We must support DRM_FOURCC_XBGR8888 textures because the Android
937 * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
938 * the Chrome OS compositor consumes as dma_buf EGLImages.
939 */
940 format = _mesa_format_fallback_rgbx_to_rgba(format);
941 }
942
943 if (!brw->ctx.TextureFormatSupported[format])
944 return NULL;
945
946 /* If this image comes in from a window system, we have different
947 * requirements than if it comes in via an EGL import operation. Window
948 * system images can use any form of auxiliary compression we wish because
949 * they get "flushed" before being handed off to the window system and we
950 * have the opportunity to do resolves. Window system buffers also may be
951 * used for scanout so we need to flag that appropriately.
952 */
953 const uint32_t mt_layout_flags =
954 is_winsys_image ? MIPTREE_LAYOUT_FOR_SCANOUT : MIPTREE_LAYOUT_DISABLE_AUX;
955
956 /* Disable creation of the texture's aux buffers because the driver exposes
957 * no EGL API to manage them. That is, there is no API for resolving the aux
958 * buffer's content to the main buffer nor for invalidating the aux buffer's
959 * content.
960 */
961 struct intel_mipmap_tree *mt =
962 intel_miptree_create_for_bo(brw, image->bo, format,
963 image->offset, image->width, image->height, 1,
964 image->pitch, mt_layout_flags);
965 if (mt == NULL)
966 return NULL;
967
968 mt->target = target;
969 mt->level[0].level_x = image->tile_x;
970 mt->level[0].level_y = image->tile_y;
971
972 /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
973 * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
974 * trouble resolving back to destination image due to alignment issues.
975 */
976 if (!brw->has_surface_tile_offset) {
977 uint32_t draw_x, draw_y;
978 intel_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
979
980 if (draw_x != 0 || draw_y != 0) {
981 _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
982 intel_miptree_release(&mt);
983 return NULL;
984 }
985 }
986
987 if (!intel_miptree_alloc_aux(brw, mt)) {
988 intel_miptree_release(&mt);
989 return NULL;
990 }
991
992 return mt;
993 }
994
995 /**
996 * For a singlesample renderbuffer, this simply wraps the given BO with a
997 * miptree.
998 *
999 * For a multisample renderbuffer, this wraps the window system's
1000 * (singlesample) BO with a singlesample miptree attached to the
1001 * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
1002 * that will contain the actual rendering (which is lazily resolved to
1003 * irb->singlesample_mt).
1004 */
1005 bool
1006 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
1007 struct intel_renderbuffer *irb,
1008 struct intel_mipmap_tree *singlesample_mt,
1009 uint32_t width, uint32_t height,
1010 uint32_t pitch)
1011 {
1012 struct intel_mipmap_tree *multisample_mt = NULL;
1013 struct gl_renderbuffer *rb = &irb->Base.Base;
1014 mesa_format format = rb->Format;
1015 const unsigned num_samples = MAX2(rb->NumSamples, 1);
1016
1017 /* Only the front and back buffers, which are color buffers, are allocated
1018 * through the image loader.
1019 */
1020 assert(_mesa_get_format_base_format(format) == GL_RGB ||
1021 _mesa_get_format_base_format(format) == GL_RGBA);
1022
1023 assert(singlesample_mt);
1024
1025 if (num_samples == 1) {
1026 intel_miptree_release(&irb->mt);
1027 irb->mt = singlesample_mt;
1028
1029 assert(!irb->singlesample_mt);
1030 } else {
1031 intel_miptree_release(&irb->singlesample_mt);
1032 irb->singlesample_mt = singlesample_mt;
1033
1034 if (!irb->mt ||
1035 irb->mt->surf.logical_level0_px.width != width ||
1036 irb->mt->surf.logical_level0_px.height != height) {
1037 multisample_mt = intel_miptree_create_for_renderbuffer(intel,
1038 format,
1039 width,
1040 height,
1041 num_samples);
1042 if (!multisample_mt)
1043 goto fail;
1044
1045 irb->need_downsample = false;
1046 intel_miptree_release(&irb->mt);
1047 irb->mt = multisample_mt;
1048 }
1049 }
1050 return true;
1051
1052 fail:
1053 intel_miptree_release(&irb->mt);
1054 return false;
1055 }
1056
1057 struct intel_mipmap_tree*
1058 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
1059 mesa_format format,
1060 uint32_t width,
1061 uint32_t height,
1062 uint32_t num_samples)
1063 {
1064 struct intel_mipmap_tree *mt;
1065 uint32_t depth = 1;
1066 GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
1067 const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1068 MIPTREE_LAYOUT_TILING_ANY;
1069
1070 mt = intel_miptree_create(brw, target, format, 0, 0,
1071 width, height, depth, num_samples,
1072 layout_flags);
1073 if (!mt)
1074 goto fail;
1075
1076 return mt;
1077
1078 fail:
1079 intel_miptree_release(&mt);
1080 return NULL;
1081 }
1082
1083 void
1084 intel_miptree_reference(struct intel_mipmap_tree **dst,
1085 struct intel_mipmap_tree *src)
1086 {
1087 if (*dst == src)
1088 return;
1089
1090 intel_miptree_release(dst);
1091
1092 if (src) {
1093 src->refcount++;
1094 DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
1095 }
1096
1097 *dst = src;
1098 }
1099
1100 static void
1101 intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf)
1102 {
1103 if (aux_buf == NULL)
1104 return;
1105
1106 brw_bo_unreference(aux_buf->bo);
1107
1108 free(aux_buf);
1109 }
1110
1111 void
1112 intel_miptree_release(struct intel_mipmap_tree **mt)
1113 {
1114 if (!*mt)
1115 return;
1116
1117 DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
1118 if (--(*mt)->refcount <= 0) {
1119 GLuint i;
1120
1121 DBG("%s deleting %p\n", __func__, *mt);
1122
1123 brw_bo_unreference((*mt)->bo);
1124 intel_miptree_release(&(*mt)->stencil_mt);
1125 intel_miptree_release(&(*mt)->r8stencil_mt);
1126 intel_miptree_aux_buffer_free((*mt)->hiz_buf);
1127 intel_miptree_aux_buffer_free((*mt)->mcs_buf);
1128 free_aux_state_map((*mt)->aux_state);
1129
1130 intel_miptree_release(&(*mt)->plane[0]);
1131 intel_miptree_release(&(*mt)->plane[1]);
1132
1133 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
1134 free((*mt)->level[i].slice);
1135 }
1136
1137 free(*mt);
1138 }
1139 *mt = NULL;
1140 }
1141
1142
1143 void
1144 intel_get_image_dims(struct gl_texture_image *image,
1145 int *width, int *height, int *depth)
1146 {
1147 switch (image->TexObject->Target) {
1148 case GL_TEXTURE_1D_ARRAY:
1149 /* For a 1D Array texture the OpenGL API will treat the image height as
1150 * the number of array slices. For Intel hardware, we treat the 1D array
1151 * as a 2D Array with a height of 1. So, here we want to swap image
1152 * height and depth.
1153 */
1154 assert(image->Depth == 1);
1155 *width = image->Width;
1156 *height = 1;
1157 *depth = image->Height;
1158 break;
1159 case GL_TEXTURE_CUBE_MAP:
1160 /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
1161 * though we really have 6 slices.
1162 */
1163 assert(image->Depth == 1);
1164 *width = image->Width;
1165 *height = image->Height;
1166 *depth = 6;
1167 break;
1168 default:
1169 *width = image->Width;
1170 *height = image->Height;
1171 *depth = image->Depth;
1172 break;
1173 }
1174 }
1175
1176 /**
1177 * Can the image be pulled into a unified mipmap tree? This mirrors
1178 * the completeness test in a lot of ways.
1179 *
1180 * Not sure whether I want to pass gl_texture_image here.
1181 */
1182 bool
1183 intel_miptree_match_image(struct intel_mipmap_tree *mt,
1184 struct gl_texture_image *image)
1185 {
1186 struct intel_texture_image *intelImage = intel_texture_image(image);
1187 GLuint level = intelImage->base.Base.Level;
1188 int width, height, depth;
1189
1190 /* glTexImage* choose the texture object based on the target passed in, and
1191 * objects can't change targets over their lifetimes, so this should be
1192 * true.
1193 */
1194 assert(image->TexObject->Target == mt->target);
1195
1196 mesa_format mt_format = mt->format;
1197 if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
1198 mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
1199 if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
1200 mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
1201 if (mt->etc_format != MESA_FORMAT_NONE)
1202 mt_format = mt->etc_format;
1203
1204 if (image->TexFormat != mt_format)
1205 return false;
1206
1207 intel_get_image_dims(image, &width, &height, &depth);
1208
1209 if (mt->target == GL_TEXTURE_CUBE_MAP)
1210 depth = 6;
1211
1212 if (level >= mt->surf.levels)
1213 return false;
1214
1215 const unsigned level_depth =
1216 mt->surf.dim == ISL_SURF_DIM_3D ?
1217 minify(mt->surf.logical_level0_px.depth, level) :
1218 mt->surf.logical_level0_px.array_len;
1219
1220 return width == minify(mt->surf.logical_level0_px.width, level) &&
1221 height == minify(mt->surf.logical_level0_px.height, level) &&
1222 depth == level_depth &&
1223 MAX2(image->NumSamples, 1) == mt->surf.samples;
1224 }
1225
1226 void
1227 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1228 GLuint level, GLuint slice,
1229 GLuint *x, GLuint *y)
1230 {
1231 if (level == 0 && slice == 0) {
1232 *x = mt->level[0].level_x;
1233 *y = mt->level[0].level_y;
1234 return;
1235 }
1236
1237 uint32_t x_offset_sa, y_offset_sa;
1238
1239 /* Miptree itself can have an offset only if it represents a single
1240 * slice in an imported buffer object.
1241 * See intel_miptree_create_for_dri_image().
1242 */
1243 assert(mt->level[0].level_x == 0);
1244 assert(mt->level[0].level_y == 0);
1245
1246 /* Given level is relative to level zero while the miptree may be
1247 * represent just a subset of all levels starting from 'first_level'.
1248 */
1249 assert(level >= mt->first_level);
1250 level -= mt->first_level;
1251
1252 const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
1253 slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
1254 isl_surf_get_image_offset_el(&mt->surf, level, slice, z,
1255 &x_offset_sa, &y_offset_sa);
1256
1257 *x = x_offset_sa;
1258 *y = y_offset_sa;
1259 }
1260
1261
1262 /**
1263 * This function computes the tile_w (in bytes) and tile_h (in rows) of
1264 * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1265 * and tile_h is set to 1.
1266 */
1267 void
1268 intel_get_tile_dims(enum isl_tiling tiling, uint32_t cpp,
1269 uint32_t *tile_w, uint32_t *tile_h)
1270 {
1271 switch (tiling) {
1272 case ISL_TILING_X:
1273 *tile_w = 512;
1274 *tile_h = 8;
1275 break;
1276 case ISL_TILING_Y0:
1277 *tile_w = 128;
1278 *tile_h = 32;
1279 break;
1280 case ISL_TILING_LINEAR:
1281 *tile_w = cpp;
1282 *tile_h = 1;
1283 break;
1284 default:
1285 unreachable("not reached");
1286 }
1287 }
1288
1289
1290 /**
1291 * This function computes masks that may be used to select the bits of the X
1292 * and Y coordinates that indicate the offset within a tile. If the BO is
1293 * untiled, the masks are set to 0.
1294 */
1295 void
1296 intel_get_tile_masks(enum isl_tiling tiling, uint32_t cpp,
1297 uint32_t *mask_x, uint32_t *mask_y)
1298 {
1299 uint32_t tile_w_bytes, tile_h;
1300
1301 intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h);
1302
1303 *mask_x = tile_w_bytes / cpp - 1;
1304 *mask_y = tile_h - 1;
1305 }
1306
1307 /**
1308 * Compute the offset (in bytes) from the start of the BO to the given x
1309 * and y coordinate. For tiled BOs, caller must ensure that x and y are
1310 * multiples of the tile size.
1311 */
1312 uint32_t
1313 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1314 uint32_t x, uint32_t y)
1315 {
1316 int cpp = mt->cpp;
1317 uint32_t pitch = mt->surf.row_pitch;
1318
1319 switch (mt->surf.tiling) {
1320 default:
1321 unreachable("not reached");
1322 case ISL_TILING_LINEAR:
1323 return y * pitch + x * cpp;
1324 case ISL_TILING_X:
1325 assert((x % (512 / cpp)) == 0);
1326 assert((y % 8) == 0);
1327 return y * pitch + x / (512 / cpp) * 4096;
1328 case ISL_TILING_Y0:
1329 assert((x % (128 / cpp)) == 0);
1330 assert((y % 32) == 0);
1331 return y * pitch + x / (128 / cpp) * 4096;
1332 }
1333 }
1334
1335 /**
1336 * Rendering with tiled buffers requires that the base address of the buffer
1337 * be aligned to a page boundary. For renderbuffers, and sometimes with
1338 * textures, we may want the surface to point at a texture image level that
1339 * isn't at a page boundary.
1340 *
1341 * This function returns an appropriately-aligned base offset
1342 * according to the tiling restrictions, plus any required x/y offset
1343 * from there.
1344 */
1345 uint32_t
1346 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1347 GLuint level, GLuint slice,
1348 uint32_t *tile_x,
1349 uint32_t *tile_y)
1350 {
1351 uint32_t x, y;
1352 uint32_t mask_x, mask_y;
1353
1354 intel_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y);
1355 intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1356
1357 *tile_x = x & mask_x;
1358 *tile_y = y & mask_y;
1359
1360 return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
1361 }
1362
1363 static void
1364 intel_miptree_copy_slice_sw(struct brw_context *brw,
1365 struct intel_mipmap_tree *src_mt,
1366 unsigned src_level, unsigned src_layer,
1367 struct intel_mipmap_tree *dst_mt,
1368 unsigned dst_level, unsigned dst_layer,
1369 unsigned width, unsigned height)
1370 {
1371 void *src, *dst;
1372 ptrdiff_t src_stride, dst_stride;
1373 const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8);
1374
1375 intel_miptree_map(brw, src_mt,
1376 src_level, src_layer,
1377 0, 0,
1378 width, height,
1379 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1380 &src, &src_stride);
1381
1382 intel_miptree_map(brw, dst_mt,
1383 dst_level, dst_layer,
1384 0, 0,
1385 width, height,
1386 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1387 BRW_MAP_DIRECT_BIT,
1388 &dst, &dst_stride);
1389
1390 DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1391 _mesa_get_format_name(src_mt->format),
1392 src_mt, src, src_stride,
1393 _mesa_get_format_name(dst_mt->format),
1394 dst_mt, dst, dst_stride,
1395 width, height);
1396
1397 int row_size = cpp * width;
1398 if (src_stride == row_size &&
1399 dst_stride == row_size) {
1400 memcpy(dst, src, row_size * height);
1401 } else {
1402 for (int i = 0; i < height; i++) {
1403 memcpy(dst, src, row_size);
1404 dst += dst_stride;
1405 src += src_stride;
1406 }
1407 }
1408
1409 intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
1410 intel_miptree_unmap(brw, src_mt, src_level, src_layer);
1411
1412 /* Don't forget to copy the stencil data over, too. We could have skipped
1413 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1414 * shuffling the two data sources in/out of temporary storage instead of
1415 * the direct mapping we get this way.
1416 */
1417 if (dst_mt->stencil_mt) {
1418 assert(src_mt->stencil_mt);
1419 intel_miptree_copy_slice_sw(brw,
1420 src_mt->stencil_mt, src_level, src_layer,
1421 dst_mt->stencil_mt, dst_level, dst_layer,
1422 width, height);
1423 }
1424 }
1425
1426 void
1427 intel_miptree_copy_slice(struct brw_context *brw,
1428 struct intel_mipmap_tree *src_mt,
1429 unsigned src_level, unsigned src_layer,
1430 struct intel_mipmap_tree *dst_mt,
1431 unsigned dst_level, unsigned dst_layer)
1432
1433 {
1434 mesa_format format = src_mt->format;
1435 unsigned width = minify(src_mt->surf.phys_level0_sa.width,
1436 src_level - src_mt->first_level);
1437 unsigned height = minify(src_mt->surf.phys_level0_sa.height,
1438 src_level - src_mt->first_level);
1439
1440 assert(src_layer < get_num_phys_layers(&src_mt->surf,
1441 src_level - src_mt->first_level));
1442
1443 assert(src_mt->format == dst_mt->format);
1444
1445 if (dst_mt->compressed) {
1446 unsigned int i, j;
1447 _mesa_get_format_block_size(dst_mt->format, &i, &j);
1448 height = ALIGN_NPOT(height, j) / j;
1449 width = ALIGN_NPOT(width, i) / i;
1450 }
1451
1452 /* If it's a packed depth/stencil buffer with separate stencil, the blit
1453 * below won't apply since we can't do the depth's Y tiling or the
1454 * stencil's W tiling in the blitter.
1455 */
1456 if (src_mt->stencil_mt) {
1457 intel_miptree_copy_slice_sw(brw,
1458 src_mt, src_level, src_layer,
1459 dst_mt, dst_level, dst_layer,
1460 width, height);
1461 return;
1462 }
1463
1464 uint32_t dst_x, dst_y, src_x, src_y;
1465 intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
1466 &dst_x, &dst_y);
1467 intel_miptree_get_image_offset(src_mt, src_level, src_layer,
1468 &src_x, &src_y);
1469
1470 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1471 _mesa_get_format_name(src_mt->format),
1472 src_mt, src_x, src_y, src_mt->surf.row_pitch,
1473 _mesa_get_format_name(dst_mt->format),
1474 dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch,
1475 width, height);
1476
1477 if (!intel_miptree_blit(brw,
1478 src_mt, src_level, src_layer, 0, 0, false,
1479 dst_mt, dst_level, dst_layer, 0, 0, false,
1480 width, height, GL_COPY)) {
1481 perf_debug("miptree validate blit for %s failed\n",
1482 _mesa_get_format_name(format));
1483
1484 intel_miptree_copy_slice_sw(brw,
1485 src_mt, src_level, src_layer,
1486 dst_mt, dst_level, dst_layer,
1487 width, height);
1488 }
1489 }
1490
1491 /**
1492 * Copies the image's current data to the given miptree, and associates that
1493 * miptree with the image.
1494 *
1495 * If \c invalidate is true, then the actual image data does not need to be
1496 * copied, but the image still needs to be associated to the new miptree (this
1497 * is set to true if we're about to clear the image).
1498 */
1499 void
1500 intel_miptree_copy_teximage(struct brw_context *brw,
1501 struct intel_texture_image *intelImage,
1502 struct intel_mipmap_tree *dst_mt,
1503 bool invalidate)
1504 {
1505 struct intel_mipmap_tree *src_mt = intelImage->mt;
1506 struct intel_texture_object *intel_obj =
1507 intel_texture_object(intelImage->base.Base.TexObject);
1508 int level = intelImage->base.Base.Level;
1509 const unsigned face = intelImage->base.Base.Face;
1510 unsigned start_layer, end_layer;
1511
1512 if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
1513 assert(face == 0);
1514 assert(intelImage->base.Base.Height);
1515 start_layer = 0;
1516 end_layer = intelImage->base.Base.Height - 1;
1517 } else if (face > 0) {
1518 start_layer = face;
1519 end_layer = face;
1520 } else {
1521 assert(intelImage->base.Base.Depth);
1522 start_layer = 0;
1523 end_layer = intelImage->base.Base.Depth - 1;
1524 }
1525
1526 if (!invalidate) {
1527 for (unsigned i = start_layer; i <= end_layer; i++) {
1528 intel_miptree_copy_slice(brw,
1529 src_mt, level, i,
1530 dst_mt, level, i);
1531 }
1532 }
1533
1534 intel_miptree_reference(&intelImage->mt, dst_mt);
1535 intel_obj->needs_validate = true;
1536 }
1537
1538 static void
1539 intel_miptree_init_mcs(struct brw_context *brw,
1540 struct intel_mipmap_tree *mt,
1541 int init_value)
1542 {
1543 assert(mt->mcs_buf != NULL);
1544
1545 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1546 *
1547 * When MCS buffer is enabled and bound to MSRT, it is required that it
1548 * is cleared prior to any rendering.
1549 *
1550 * Since we don't use the MCS buffer for any purpose other than rendering,
1551 * it makes sense to just clear it immediately upon allocation.
1552 *
1553 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1554 */
1555 void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE);
1556 if (unlikely(map == NULL)) {
1557 fprintf(stderr, "Failed to map mcs buffer into GTT\n");
1558 brw_bo_unreference(mt->mcs_buf->bo);
1559 free(mt->mcs_buf);
1560 return;
1561 }
1562 void *data = map;
1563 memset(data, init_value, mt->mcs_buf->size);
1564 brw_bo_unmap(mt->mcs_buf->bo);
1565 }
1566
1567 static struct intel_miptree_aux_buffer *
1568 intel_alloc_aux_buffer(struct brw_context *brw,
1569 const char *name,
1570 const struct isl_surf *aux_surf,
1571 uint32_t alloc_flags,
1572 struct intel_mipmap_tree *mt)
1573 {
1574 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1575 if (!buf)
1576 return false;
1577
1578 buf->size = aux_surf->size;
1579 buf->pitch = aux_surf->row_pitch;
1580 buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
1581
1582 /* ISL has stricter set of alignment rules then the drm allocator.
1583 * Therefore one can pass the ISL dimensions in terms of bytes instead of
1584 * trying to recalculate based on different format block sizes.
1585 */
1586 buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
1587 I915_TILING_Y, buf->pitch, alloc_flags);
1588 if (!buf->bo) {
1589 free(buf);
1590 return NULL;
1591 }
1592
1593 buf->surf = *aux_surf;
1594
1595 return buf;
1596 }
1597
1598 static bool
1599 intel_miptree_alloc_mcs(struct brw_context *brw,
1600 struct intel_mipmap_tree *mt,
1601 GLuint num_samples)
1602 {
1603 assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1604 assert(mt->mcs_buf == NULL);
1605 assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
1606
1607 /* Multisampled miptrees are only supported for single level. */
1608 assert(mt->first_level == 0);
1609 enum isl_aux_state **aux_state =
1610 create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
1611 if (!aux_state)
1612 return false;
1613
1614 struct isl_surf temp_mcs_surf;
1615
1616 MAYBE_UNUSED bool ok =
1617 isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &temp_mcs_surf);
1618 assert(ok);
1619
1620 /* Buffer needs to be initialised requiring the buffer to be immediately
1621 * mapped to cpu space for writing. Therefore do not use the gpu access
1622 * flag which can cause an unnecessary delay if the backing pages happened
1623 * to be just used by the GPU.
1624 */
1625 const uint32_t alloc_flags = 0;
1626 mt->mcs_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
1627 &temp_mcs_surf, alloc_flags, mt);
1628 if (!mt->mcs_buf) {
1629 free(aux_state);
1630 return false;
1631 }
1632
1633 mt->aux_state = aux_state;
1634
1635 intel_miptree_init_mcs(brw, mt, 0xFF);
1636
1637 return true;
1638 }
1639
1640 bool
1641 intel_miptree_alloc_ccs(struct brw_context *brw,
1642 struct intel_mipmap_tree *mt)
1643 {
1644 assert(mt->mcs_buf == NULL);
1645 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
1646 mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1647
1648 struct isl_surf temp_ccs_surf;
1649
1650 if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, &temp_ccs_surf, 0))
1651 return false;
1652
1653 assert(temp_ccs_surf.size &&
1654 (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
1655
1656 enum isl_aux_state **aux_state =
1657 create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
1658 if (!aux_state)
1659 return false;
1660
1661 /* When CCS_E is used, we need to ensure that the CCS starts off in a valid
1662 * state. From the Sky Lake PRM, "MCS Buffer for Render Target(s)":
1663 *
1664 * "If Software wants to enable Color Compression without Fast clear,
1665 * Software needs to initialize MCS with zeros."
1666 *
1667 * A CCS value of 0 indicates that the corresponding block is in the
1668 * pass-through state which is what we want.
1669 *
1670 * For CCS_D, on the other hand, we don't care as we're about to perform a
1671 * fast-clear operation. In that case, being hot in caches more useful.
1672 */
1673 const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
1674 BO_ALLOC_ZEROED : BO_ALLOC_FOR_RENDER;
1675 mt->mcs_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
1676 &temp_ccs_surf, alloc_flags, mt);
1677 if (!mt->mcs_buf) {
1678 free(aux_state);
1679 return false;
1680 }
1681
1682 mt->aux_state = aux_state;
1683
1684 return true;
1685 }
1686
1687 /**
1688 * Helper for intel_miptree_alloc_hiz() that sets
1689 * \c mt->level[level].has_hiz. Return true if and only if
1690 * \c has_hiz was set.
1691 */
1692 static bool
1693 intel_miptree_level_enable_hiz(struct brw_context *brw,
1694 struct intel_mipmap_tree *mt,
1695 uint32_t level)
1696 {
1697 assert(mt->hiz_buf);
1698 assert(mt->surf.size > 0);
1699
1700 if (brw->gen >= 8 || brw->is_haswell) {
1701 uint32_t width = minify(mt->surf.phys_level0_sa.width, level);
1702 uint32_t height = minify(mt->surf.phys_level0_sa.height, level);
1703
1704 /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1705 * and the height is 4 aligned. This allows our HiZ support
1706 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1707 * we can grow the width & height to allow the HiZ op to
1708 * force the proper size alignments.
1709 */
1710 if (level > 0 && ((width & 7) || (height & 3))) {
1711 DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1712 return false;
1713 }
1714 }
1715
1716 DBG("mt %p level %d: HiZ enabled\n", mt, level);
1717 mt->level[level].has_hiz = true;
1718 return true;
1719 }
1720
1721 bool
1722 intel_miptree_alloc_hiz(struct brw_context *brw,
1723 struct intel_mipmap_tree *mt)
1724 {
1725 assert(mt->hiz_buf == NULL);
1726 assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
1727
1728 enum isl_aux_state **aux_state =
1729 create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
1730 if (!aux_state)
1731 return false;
1732
1733 struct isl_surf temp_hiz_surf;
1734
1735 MAYBE_UNUSED bool ok =
1736 isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
1737 assert(ok);
1738
1739 const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
1740 mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
1741 &temp_hiz_surf, alloc_flags, mt);
1742
1743 if (!mt->hiz_buf) {
1744 free(aux_state);
1745 return false;
1746 }
1747
1748 for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
1749 intel_miptree_level_enable_hiz(brw, mt, level);
1750
1751 mt->aux_state = aux_state;
1752
1753 return true;
1754 }
1755
1756
1757 /**
1758 * Allocate the initial aux surface for a miptree based on mt->aux_usage
1759 *
1760 * Since MCS, HiZ, and CCS_E can compress more than just clear color, we
1761 * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only
1762 * compress clear color so we wait until an actual fast-clear to allocate it.
1763 */
1764 static bool
1765 intel_miptree_alloc_aux(struct brw_context *brw,
1766 struct intel_mipmap_tree *mt)
1767 {
1768 switch (mt->aux_usage) {
1769 case ISL_AUX_USAGE_NONE:
1770 return true;
1771
1772 case ISL_AUX_USAGE_HIZ:
1773 assert(!_mesa_is_format_color_format(mt->format));
1774 if (!intel_miptree_alloc_hiz(brw, mt))
1775 return false;
1776 return true;
1777
1778 case ISL_AUX_USAGE_MCS:
1779 assert(_mesa_is_format_color_format(mt->format));
1780 assert(mt->surf.samples > 1);
1781 if (!intel_miptree_alloc_mcs(brw, mt, mt->surf.samples))
1782 return false;
1783 return true;
1784
1785 case ISL_AUX_USAGE_CCS_D:
1786 /* Since CCS_D can only compress clear color so we wait until an actual
1787 * fast-clear to allocate it.
1788 */
1789 return true;
1790
1791 case ISL_AUX_USAGE_CCS_E:
1792 assert(_mesa_is_format_color_format(mt->format));
1793 assert(mt->surf.samples == 1);
1794 if (!intel_miptree_alloc_ccs(brw, mt))
1795 return false;
1796 return true;
1797 }
1798
1799 unreachable("Invalid aux usage");
1800 }
1801
1802
1803 /**
1804 * Can the miptree sample using the hiz buffer?
1805 */
1806 bool
1807 intel_miptree_sample_with_hiz(struct brw_context *brw,
1808 struct intel_mipmap_tree *mt)
1809 {
1810 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
1811 * so keep things conservative for now and never enable it unless we're SKL+.
1812 */
1813 if (brw->gen < 9) {
1814 return false;
1815 }
1816
1817 if (!mt->hiz_buf) {
1818 return false;
1819 }
1820
1821 /* It seems the hardware won't fallback to the depth buffer if some of the
1822 * mipmap levels aren't available in the HiZ buffer. So we need all levels
1823 * of the texture to be HiZ enabled.
1824 */
1825 for (unsigned level = 0; level < mt->surf.levels; ++level) {
1826 if (!intel_miptree_level_has_hiz(mt, level))
1827 return false;
1828 }
1829
1830 /* If compressed multisampling is enabled, then we use it for the auxiliary
1831 * buffer instead.
1832 *
1833 * From the BDW PRM (Volume 2d: Command Reference: Structures
1834 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
1835 *
1836 * "If this field is set to AUX_HIZ, Number of Multisamples must be
1837 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
1838 *
1839 * There is no such blurb for 1D textures, but there is sufficient evidence
1840 * that this is broken on SKL+.
1841 */
1842 return (mt->surf.samples == 1 &&
1843 mt->target != GL_TEXTURE_3D &&
1844 mt->target != GL_TEXTURE_1D /* gen9+ restriction */);
1845 }
1846
1847 /**
1848 * Does the miptree slice have hiz enabled?
1849 */
1850 bool
1851 intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level)
1852 {
1853 intel_miptree_check_level_layer(mt, level, 0);
1854 return mt->level[level].has_hiz;
1855 }
1856
1857 bool
1858 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt,
1859 unsigned start_level, unsigned num_levels,
1860 unsigned start_layer, unsigned num_layers)
1861 {
1862 assert(_mesa_is_format_color_format(mt->format));
1863
1864 if (!mt->mcs_buf)
1865 return false;
1866
1867 /* Clamp the level range to fit the miptree */
1868 assert(start_level + num_levels >= start_level);
1869 const uint32_t last_level =
1870 MIN2(mt->last_level, start_level + num_levels - 1);
1871 start_level = MAX2(mt->first_level, start_level);
1872 num_levels = last_level - start_level + 1;
1873
1874 for (uint32_t level = start_level; level <= last_level; level++) {
1875 uint32_t level_layers = get_num_phys_layers(&mt->surf, level);
1876
1877 level_layers = MIN2(num_layers, level_layers);
1878
1879 for (unsigned a = 0; a < level_layers; a++) {
1880 enum isl_aux_state aux_state =
1881 intel_miptree_get_aux_state(mt, level, start_layer + a);
1882 assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
1883 if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
1884 return true;
1885 }
1886 }
1887
1888 return false;
1889 }
1890
1891 static void
1892 intel_miptree_check_color_resolve(const struct brw_context *brw,
1893 const struct intel_mipmap_tree *mt,
1894 unsigned level, unsigned layer)
1895 {
1896
1897 if (!mt->mcs_buf)
1898 return;
1899
1900 /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
1901 assert(brw->gen >= 8 ||
1902 (level == 0 && mt->first_level == 0 && mt->last_level == 0));
1903
1904 /* Compression of arrayed msaa surfaces is supported. */
1905 if (mt->surf.samples > 1)
1906 return;
1907
1908 /* Fast color clear is supported for non-msaa arrays only on Gen8+. */
1909 assert(brw->gen >= 8 ||
1910 (layer == 0 &&
1911 mt->surf.logical_level0_px.depth == 1 &&
1912 mt->surf.logical_level0_px.array_len == 1));
1913
1914 (void)level;
1915 (void)layer;
1916 }
1917
1918 static enum blorp_fast_clear_op
1919 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
1920 bool ccs_supported, bool fast_clear_supported)
1921 {
1922 assert(ccs_supported == fast_clear_supported);
1923
1924 switch (aux_state) {
1925 case ISL_AUX_STATE_CLEAR:
1926 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1927 if (!ccs_supported)
1928 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1929 else
1930 return BLORP_FAST_CLEAR_OP_NONE;
1931
1932 case ISL_AUX_STATE_PASS_THROUGH:
1933 return BLORP_FAST_CLEAR_OP_NONE;
1934
1935 case ISL_AUX_STATE_RESOLVED:
1936 case ISL_AUX_STATE_AUX_INVALID:
1937 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1938 break;
1939 }
1940
1941 unreachable("Invalid aux state for CCS_D");
1942 }
1943
1944 static enum blorp_fast_clear_op
1945 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
1946 bool ccs_supported, bool fast_clear_supported)
1947 {
1948 switch (aux_state) {
1949 case ISL_AUX_STATE_CLEAR:
1950 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1951 if (!ccs_supported)
1952 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1953 else if (!fast_clear_supported)
1954 return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
1955 else
1956 return BLORP_FAST_CLEAR_OP_NONE;
1957
1958 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1959 if (!ccs_supported)
1960 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1961 else
1962 return BLORP_FAST_CLEAR_OP_NONE;
1963
1964 case ISL_AUX_STATE_PASS_THROUGH:
1965 return BLORP_FAST_CLEAR_OP_NONE;
1966
1967 case ISL_AUX_STATE_RESOLVED:
1968 case ISL_AUX_STATE_AUX_INVALID:
1969 break;
1970 }
1971
1972 unreachable("Invalid aux state for CCS_E");
1973 }
1974
1975 static void
1976 intel_miptree_prepare_ccs_access(struct brw_context *brw,
1977 struct intel_mipmap_tree *mt,
1978 uint32_t level, uint32_t layer,
1979 bool aux_supported,
1980 bool fast_clear_supported)
1981 {
1982 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
1983
1984 enum blorp_fast_clear_op resolve_op;
1985 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
1986 resolve_op = get_ccs_e_resolve_op(aux_state, aux_supported,
1987 fast_clear_supported);
1988 } else {
1989 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1990 resolve_op = get_ccs_d_resolve_op(aux_state, aux_supported,
1991 fast_clear_supported);
1992 }
1993
1994 if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {
1995 intel_miptree_check_color_resolve(brw, mt, level, layer);
1996 brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);
1997
1998 switch (resolve_op) {
1999 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
2000 /* The CCS full resolve operation destroys the CCS and sets it to the
2001 * pass-through state. (You can also think of this as being both a
2002 * resolve and an ambiguate in one operation.)
2003 */
2004 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2005 ISL_AUX_STATE_PASS_THROUGH);
2006 break;
2007
2008 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
2009 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2010 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2011 break;
2012
2013 default:
2014 unreachable("Invalid resolve op");
2015 }
2016 }
2017 }
2018
2019 static void
2020 intel_miptree_finish_ccs_write(struct brw_context *brw,
2021 struct intel_mipmap_tree *mt,
2022 uint32_t level, uint32_t layer,
2023 bool written_with_ccs)
2024 {
2025 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2026
2027 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
2028 switch (aux_state) {
2029 case ISL_AUX_STATE_CLEAR:
2030 assert(written_with_ccs);
2031 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2032 ISL_AUX_STATE_COMPRESSED_CLEAR);
2033 break;
2034
2035 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2036 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2037 assert(written_with_ccs);
2038 break; /* Nothing to do */
2039
2040 case ISL_AUX_STATE_PASS_THROUGH:
2041 if (written_with_ccs) {
2042 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2043 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2044 } else {
2045 /* Nothing to do */
2046 }
2047 break;
2048
2049 case ISL_AUX_STATE_RESOLVED:
2050 case ISL_AUX_STATE_AUX_INVALID:
2051 unreachable("Invalid aux state for CCS_E");
2052 }
2053 } else {
2054 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2055 /* CCS_D is a bit simpler */
2056 switch (aux_state) {
2057 case ISL_AUX_STATE_CLEAR:
2058 assert(written_with_ccs);
2059 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2060 ISL_AUX_STATE_COMPRESSED_CLEAR);
2061 break;
2062
2063 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2064 assert(written_with_ccs);
2065 break; /* Nothing to do */
2066
2067 case ISL_AUX_STATE_PASS_THROUGH:
2068 /* Nothing to do */
2069 break;
2070
2071 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2072 case ISL_AUX_STATE_RESOLVED:
2073 case ISL_AUX_STATE_AUX_INVALID:
2074 unreachable("Invalid aux state for CCS_D");
2075 }
2076 }
2077 }
2078
2079 static void
2080 intel_miptree_prepare_mcs_access(struct brw_context *brw,
2081 struct intel_mipmap_tree *mt,
2082 uint32_t layer,
2083 bool mcs_supported,
2084 bool fast_clear_supported)
2085 {
2086 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2087 case ISL_AUX_STATE_CLEAR:
2088 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2089 assert(mcs_supported);
2090 if (!fast_clear_supported) {
2091 brw_blorp_mcs_partial_resolve(brw, mt, layer, 1);
2092 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2093 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2094 }
2095 break;
2096
2097 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2098 assert(mcs_supported);
2099 break; /* Nothing to do */
2100
2101 case ISL_AUX_STATE_RESOLVED:
2102 case ISL_AUX_STATE_PASS_THROUGH:
2103 case ISL_AUX_STATE_AUX_INVALID:
2104 unreachable("Invalid aux state for MCS");
2105 }
2106 }
2107
2108 static void
2109 intel_miptree_finish_mcs_write(struct brw_context *brw,
2110 struct intel_mipmap_tree *mt,
2111 uint32_t layer,
2112 bool written_with_mcs)
2113 {
2114 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2115 case ISL_AUX_STATE_CLEAR:
2116 assert(written_with_mcs);
2117 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2118 ISL_AUX_STATE_COMPRESSED_CLEAR);
2119 break;
2120
2121 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2122 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2123 assert(written_with_mcs);
2124 break; /* Nothing to do */
2125
2126 case ISL_AUX_STATE_RESOLVED:
2127 case ISL_AUX_STATE_PASS_THROUGH:
2128 case ISL_AUX_STATE_AUX_INVALID:
2129 unreachable("Invalid aux state for MCS");
2130 }
2131 }
2132
2133 static void
2134 intel_miptree_prepare_hiz_access(struct brw_context *brw,
2135 struct intel_mipmap_tree *mt,
2136 uint32_t level, uint32_t layer,
2137 bool hiz_supported, bool fast_clear_supported)
2138 {
2139 enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
2140 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2141 case ISL_AUX_STATE_CLEAR:
2142 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2143 if (!hiz_supported || !fast_clear_supported)
2144 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2145 break;
2146
2147 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2148 if (!hiz_supported)
2149 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2150 break;
2151
2152 case ISL_AUX_STATE_PASS_THROUGH:
2153 case ISL_AUX_STATE_RESOLVED:
2154 break;
2155
2156 case ISL_AUX_STATE_AUX_INVALID:
2157 if (hiz_supported)
2158 hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
2159 break;
2160 }
2161
2162 if (hiz_op != BLORP_HIZ_OP_NONE) {
2163 intel_hiz_exec(brw, mt, level, layer, 1, hiz_op);
2164
2165 switch (hiz_op) {
2166 case BLORP_HIZ_OP_DEPTH_RESOLVE:
2167 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2168 ISL_AUX_STATE_RESOLVED);
2169 break;
2170
2171 case BLORP_HIZ_OP_HIZ_RESOLVE:
2172 /* The HiZ resolve operation is actually an ambiguate */
2173 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2174 ISL_AUX_STATE_PASS_THROUGH);
2175 break;
2176
2177 default:
2178 unreachable("Invalid HiZ op");
2179 }
2180 }
2181 }
2182
2183 static void
2184 intel_miptree_finish_hiz_write(struct brw_context *brw,
2185 struct intel_mipmap_tree *mt,
2186 uint32_t level, uint32_t layer,
2187 bool written_with_hiz)
2188 {
2189 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2190 case ISL_AUX_STATE_CLEAR:
2191 assert(written_with_hiz);
2192 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2193 ISL_AUX_STATE_COMPRESSED_CLEAR);
2194 break;
2195
2196 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2197 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2198 assert(written_with_hiz);
2199 break; /* Nothing to do */
2200
2201 case ISL_AUX_STATE_RESOLVED:
2202 if (written_with_hiz) {
2203 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2204 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2205 } else {
2206 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2207 ISL_AUX_STATE_AUX_INVALID);
2208 }
2209 break;
2210
2211 case ISL_AUX_STATE_PASS_THROUGH:
2212 if (written_with_hiz) {
2213 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2214 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2215 }
2216 break;
2217
2218 case ISL_AUX_STATE_AUX_INVALID:
2219 assert(!written_with_hiz);
2220 break;
2221 }
2222 }
2223
2224 static inline uint32_t
2225 miptree_level_range_length(const struct intel_mipmap_tree *mt,
2226 uint32_t start_level, uint32_t num_levels)
2227 {
2228 assert(start_level >= mt->first_level);
2229 assert(start_level <= mt->last_level);
2230
2231 if (num_levels == INTEL_REMAINING_LAYERS)
2232 num_levels = mt->last_level - start_level + 1;
2233 /* Check for overflow */
2234 assert(start_level + num_levels >= start_level);
2235 assert(start_level + num_levels <= mt->last_level + 1);
2236
2237 return num_levels;
2238 }
2239
2240 static inline uint32_t
2241 miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level,
2242 uint32_t start_layer, uint32_t num_layers)
2243 {
2244 assert(level <= mt->last_level);
2245
2246 const uint32_t total_num_layers = get_num_logical_layers(mt, level);
2247 assert(start_layer < total_num_layers);
2248 if (num_layers == INTEL_REMAINING_LAYERS)
2249 num_layers = total_num_layers - start_layer;
2250 /* Check for overflow */
2251 assert(start_layer + num_layers >= start_layer);
2252 assert(start_layer + num_layers <= total_num_layers);
2253
2254 return num_layers;
2255 }
2256
2257 void
2258 intel_miptree_prepare_access(struct brw_context *brw,
2259 struct intel_mipmap_tree *mt,
2260 uint32_t start_level, uint32_t num_levels,
2261 uint32_t start_layer, uint32_t num_layers,
2262 bool aux_supported, bool fast_clear_supported)
2263 {
2264 num_levels = miptree_level_range_length(mt, start_level, num_levels);
2265
2266 if (_mesa_is_format_color_format(mt->format)) {
2267 if (!mt->mcs_buf)
2268 return;
2269
2270 if (mt->surf.samples > 1) {
2271 assert(start_level == 0 && num_levels == 1);
2272 const uint32_t level_layers =
2273 miptree_layer_range_length(mt, 0, start_layer, num_layers);
2274 for (uint32_t a = 0; a < level_layers; a++) {
2275 intel_miptree_prepare_mcs_access(brw, mt, start_layer + a,
2276 aux_supported,
2277 fast_clear_supported);
2278 }
2279 } else {
2280 for (uint32_t l = 0; l < num_levels; l++) {
2281 const uint32_t level = start_level + l;
2282 const uint32_t level_layers =
2283 miptree_layer_range_length(mt, level, start_layer, num_layers);
2284 for (uint32_t a = 0; a < level_layers; a++) {
2285 intel_miptree_prepare_ccs_access(brw, mt, level,
2286 start_layer + a, aux_supported,
2287 fast_clear_supported);
2288 }
2289 }
2290 }
2291 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2292 /* Nothing to do for stencil */
2293 } else {
2294 if (!mt->hiz_buf)
2295 return;
2296
2297 for (uint32_t l = 0; l < num_levels; l++) {
2298 const uint32_t level = start_level + l;
2299 if (!intel_miptree_level_has_hiz(mt, level))
2300 continue;
2301
2302 const uint32_t level_layers =
2303 miptree_layer_range_length(mt, level, start_layer, num_layers);
2304 for (uint32_t a = 0; a < level_layers; a++) {
2305 intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a,
2306 aux_supported,
2307 fast_clear_supported);
2308 }
2309 }
2310 }
2311 }
2312
2313 void
2314 intel_miptree_finish_write(struct brw_context *brw,
2315 struct intel_mipmap_tree *mt, uint32_t level,
2316 uint32_t start_layer, uint32_t num_layers,
2317 bool written_with_aux)
2318 {
2319 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2320
2321 if (_mesa_is_format_color_format(mt->format)) {
2322 if (!mt->mcs_buf)
2323 return;
2324
2325 if (mt->surf.samples > 1) {
2326 assert(level == 0);
2327 for (uint32_t a = 0; a < num_layers; a++) {
2328 intel_miptree_finish_mcs_write(brw, mt, start_layer + a,
2329 written_with_aux);
2330 }
2331 } else {
2332 for (uint32_t a = 0; a < num_layers; a++) {
2333 intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a,
2334 written_with_aux);
2335 }
2336 }
2337 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2338 /* Nothing to do for stencil */
2339 } else {
2340 if (!intel_miptree_level_has_hiz(mt, level))
2341 return;
2342
2343 for (uint32_t a = 0; a < num_layers; a++) {
2344 intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a,
2345 written_with_aux);
2346 }
2347 }
2348 }
2349
2350 enum isl_aux_state
2351 intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt,
2352 uint32_t level, uint32_t layer)
2353 {
2354 intel_miptree_check_level_layer(mt, level, layer);
2355
2356 if (_mesa_is_format_color_format(mt->format)) {
2357 assert(mt->mcs_buf != NULL);
2358 assert(mt->surf.samples == 1 ||
2359 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2360 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2361 unreachable("Cannot get aux state for stencil");
2362 } else {
2363 assert(intel_miptree_level_has_hiz(mt, level));
2364 }
2365
2366 return mt->aux_state[level][layer];
2367 }
2368
2369 void
2370 intel_miptree_set_aux_state(struct brw_context *brw,
2371 struct intel_mipmap_tree *mt, uint32_t level,
2372 uint32_t start_layer, uint32_t num_layers,
2373 enum isl_aux_state aux_state)
2374 {
2375 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2376
2377 if (_mesa_is_format_color_format(mt->format)) {
2378 assert(mt->mcs_buf != NULL);
2379 assert(mt->surf.samples == 1 ||
2380 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2381 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2382 unreachable("Cannot get aux state for stencil");
2383 } else {
2384 assert(intel_miptree_level_has_hiz(mt, level));
2385 }
2386
2387 for (unsigned a = 0; a < num_layers; a++)
2388 mt->aux_state[level][start_layer + a] = aux_state;
2389 }
2390
2391 /* On Gen9 color buffers may be compressed by the hardware (lossless
2392 * compression). There are, however, format restrictions and care needs to be
2393 * taken that the sampler engine is capable for re-interpreting a buffer with
2394 * format different the buffer was originally written with.
2395 *
2396 * For example, SRGB formats are not compressible and the sampler engine isn't
2397 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
2398 * color buffer needs to be resolved so that the sampling surface can be
2399 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
2400 * set).
2401 */
2402 static bool
2403 can_texture_with_ccs(struct brw_context *brw,
2404 struct intel_mipmap_tree *mt,
2405 mesa_format view_format)
2406 {
2407 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
2408 return false;
2409
2410 enum isl_format isl_mt_format = brw_isl_format_for_mesa_format(mt->format);
2411 enum isl_format isl_view_format = brw_isl_format_for_mesa_format(view_format);
2412
2413 if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
2414 isl_mt_format, isl_view_format)) {
2415 perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
2416 _mesa_get_format_name(view_format),
2417 _mesa_get_format_name(mt->format));
2418 return false;
2419 }
2420
2421 return true;
2422 }
2423
2424 static void
2425 intel_miptree_prepare_texture_slices(struct brw_context *brw,
2426 struct intel_mipmap_tree *mt,
2427 mesa_format view_format,
2428 uint32_t start_level, uint32_t num_levels,
2429 uint32_t start_layer, uint32_t num_layers,
2430 bool *aux_supported_out)
2431 {
2432 bool aux_supported, clear_supported;
2433 if (_mesa_is_format_color_format(mt->format)) {
2434 if (mt->surf.samples > 1) {
2435 aux_supported = clear_supported = true;
2436 } else {
2437 aux_supported = can_texture_with_ccs(brw, mt, view_format);
2438
2439 /* Clear color is specified as ints or floats and the conversion is
2440 * done by the sampler. If we have a texture view, we would have to
2441 * perform the clear color conversion manually. Just disable clear
2442 * color.
2443 */
2444 clear_supported = aux_supported && (mt->format == view_format);
2445 }
2446 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2447 aux_supported = clear_supported = false;
2448 } else {
2449 aux_supported = clear_supported = intel_miptree_sample_with_hiz(brw, mt);
2450 }
2451
2452 intel_miptree_prepare_access(brw, mt, start_level, num_levels,
2453 start_layer, num_layers,
2454 aux_supported, clear_supported);
2455 if (aux_supported_out)
2456 *aux_supported_out = aux_supported;
2457 }
2458
2459 void
2460 intel_miptree_prepare_texture(struct brw_context *brw,
2461 struct intel_mipmap_tree *mt,
2462 mesa_format view_format,
2463 bool *aux_supported_out)
2464 {
2465 intel_miptree_prepare_texture_slices(brw, mt, view_format,
2466 0, INTEL_REMAINING_LEVELS,
2467 0, INTEL_REMAINING_LAYERS,
2468 aux_supported_out);
2469 }
2470
2471 void
2472 intel_miptree_prepare_image(struct brw_context *brw,
2473 struct intel_mipmap_tree *mt)
2474 {
2475 /* The data port doesn't understand any compression */
2476 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2477 0, INTEL_REMAINING_LAYERS, false, false);
2478 }
2479
2480 void
2481 intel_miptree_prepare_fb_fetch(struct brw_context *brw,
2482 struct intel_mipmap_tree *mt, uint32_t level,
2483 uint32_t start_layer, uint32_t num_layers)
2484 {
2485 intel_miptree_prepare_texture_slices(brw, mt, mt->format, level, 1,
2486 start_layer, num_layers, NULL);
2487 }
2488
2489 void
2490 intel_miptree_prepare_render(struct brw_context *brw,
2491 struct intel_mipmap_tree *mt, uint32_t level,
2492 uint32_t start_layer, uint32_t layer_count,
2493 bool srgb_enabled)
2494 {
2495 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
2496 * the single-sampled color renderbuffers because the CCS buffer isn't
2497 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
2498 * enabled because otherwise the surface state will be programmed with
2499 * the linear equivalent format anyway.
2500 */
2501 if (brw->gen == 9 && srgb_enabled && mt->surf.samples == 1 &&
2502 _mesa_get_srgb_format_linear(mt->format) != mt->format) {
2503
2504 /* Lossless compression is not supported for SRGB formats, it
2505 * should be impossible to get here with such surfaces.
2506 */
2507 assert(mt->aux_usage != ISL_AUX_USAGE_CCS_E);
2508 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2509 false, false);
2510 }
2511 }
2512
2513 void
2514 intel_miptree_finish_render(struct brw_context *brw,
2515 struct intel_mipmap_tree *mt, uint32_t level,
2516 uint32_t start_layer, uint32_t layer_count)
2517 {
2518 assert(_mesa_is_format_color_format(mt->format));
2519 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2520 mt->mcs_buf != NULL);
2521 }
2522
2523 void
2524 intel_miptree_prepare_depth(struct brw_context *brw,
2525 struct intel_mipmap_tree *mt, uint32_t level,
2526 uint32_t start_layer, uint32_t layer_count)
2527 {
2528 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2529 mt->hiz_buf != NULL, mt->hiz_buf != NULL);
2530 }
2531
2532 void
2533 intel_miptree_finish_depth(struct brw_context *brw,
2534 struct intel_mipmap_tree *mt, uint32_t level,
2535 uint32_t start_layer, uint32_t layer_count,
2536 bool depth_written)
2537 {
2538 if (depth_written) {
2539 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2540 mt->hiz_buf != NULL);
2541 }
2542 }
2543
2544 /**
2545 * Make it possible to share the BO backing the given miptree with another
2546 * process or another miptree.
2547 *
2548 * Fast color clears are unsafe with shared buffers, so we need to resolve and
2549 * then discard the MCS buffer, if present. We also set the no_ccs flag to
2550 * ensure that no MCS buffer gets allocated in the future.
2551 *
2552 * HiZ is similarly unsafe with shared buffers.
2553 */
2554 void
2555 intel_miptree_make_shareable(struct brw_context *brw,
2556 struct intel_mipmap_tree *mt)
2557 {
2558 /* MCS buffers are also used for multisample buffers, but we can't resolve
2559 * away a multisample MCS buffer because it's an integral part of how the
2560 * pixel data is stored. Fortunately this code path should never be
2561 * reached for multisample buffers.
2562 */
2563 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE ||
2564 mt->surf.samples == 1);
2565
2566 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2567 0, INTEL_REMAINING_LAYERS, false, false);
2568
2569 if (mt->mcs_buf) {
2570 brw_bo_unreference(mt->mcs_buf->bo);
2571 free(mt->mcs_buf);
2572 mt->mcs_buf = NULL;
2573
2574 /* Any pending MCS/CCS operations are no longer needed. Trying to
2575 * execute any will likely crash due to the missing aux buffer. So let's
2576 * delete all pending ops.
2577 */
2578 free(mt->aux_state);
2579 mt->aux_state = NULL;
2580 }
2581
2582 if (mt->hiz_buf) {
2583 intel_miptree_aux_buffer_free(mt->hiz_buf);
2584 mt->hiz_buf = NULL;
2585
2586 for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
2587 mt->level[l].has_hiz = false;
2588 }
2589
2590 /* Any pending HiZ operations are no longer needed. Trying to execute
2591 * any will likely crash due to the missing aux buffer. So let's delete
2592 * all pending ops.
2593 */
2594 free(mt->aux_state);
2595 mt->aux_state = NULL;
2596 }
2597
2598 mt->aux_usage = ISL_AUX_USAGE_NONE;
2599 }
2600
2601
2602 /**
2603 * \brief Get pointer offset into stencil buffer.
2604 *
2605 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
2606 * must decode the tile's layout in software.
2607 *
2608 * See
2609 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2610 * Format.
2611 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2612 *
2613 * Even though the returned offset is always positive, the return type is
2614 * signed due to
2615 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2616 * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
2617 */
2618 static intptr_t
2619 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2620 {
2621 uint32_t tile_size = 4096;
2622 uint32_t tile_width = 64;
2623 uint32_t tile_height = 64;
2624 uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
2625
2626 uint32_t tile_x = x / tile_width;
2627 uint32_t tile_y = y / tile_height;
2628
2629 /* The byte's address relative to the tile's base addres. */
2630 uint32_t byte_x = x % tile_width;
2631 uint32_t byte_y = y % tile_height;
2632
2633 uintptr_t u = tile_y * row_size
2634 + tile_x * tile_size
2635 + 512 * (byte_x / 8)
2636 + 64 * (byte_y / 8)
2637 + 32 * ((byte_y / 4) % 2)
2638 + 16 * ((byte_x / 4) % 2)
2639 + 8 * ((byte_y / 2) % 2)
2640 + 4 * ((byte_x / 2) % 2)
2641 + 2 * (byte_y % 2)
2642 + 1 * (byte_x % 2);
2643
2644 if (swizzled) {
2645 /* adjust for bit6 swizzling */
2646 if (((byte_x / 8) % 2) == 1) {
2647 if (((byte_y / 8) % 2) == 0) {
2648 u += 64;
2649 } else {
2650 u -= 64;
2651 }
2652 }
2653 }
2654
2655 return u;
2656 }
2657
2658 void
2659 intel_miptree_updownsample(struct brw_context *brw,
2660 struct intel_mipmap_tree *src,
2661 struct intel_mipmap_tree *dst)
2662 {
2663 unsigned src_w = src->surf.logical_level0_px.width;
2664 unsigned src_h = src->surf.logical_level0_px.height;
2665 unsigned dst_w = dst->surf.logical_level0_px.width;
2666 unsigned dst_h = dst->surf.logical_level0_px.height;
2667
2668 brw_blorp_blit_miptrees(brw,
2669 src, 0 /* level */, 0 /* layer */,
2670 src->format, SWIZZLE_XYZW,
2671 dst, 0 /* level */, 0 /* layer */, dst->format,
2672 0, 0, src_w, src_h,
2673 0, 0, dst_w, dst_h,
2674 GL_NEAREST, false, false /*mirror x, y*/,
2675 false, false);
2676
2677 if (src->stencil_mt) {
2678 src_w = src->stencil_mt->surf.logical_level0_px.width;
2679 src_h = src->stencil_mt->surf.logical_level0_px.height;
2680 dst_w = dst->stencil_mt->surf.logical_level0_px.width;
2681 dst_h = dst->stencil_mt->surf.logical_level0_px.height;
2682
2683 brw_blorp_blit_miptrees(brw,
2684 src->stencil_mt, 0 /* level */, 0 /* layer */,
2685 src->stencil_mt->format, SWIZZLE_XYZW,
2686 dst->stencil_mt, 0 /* level */, 0 /* layer */,
2687 dst->stencil_mt->format,
2688 0, 0, src_w, src_h,
2689 0, 0, dst_w, dst_h,
2690 GL_NEAREST, false, false /*mirror x, y*/,
2691 false, false /* decode/encode srgb */);
2692 }
2693 }
2694
2695 void
2696 intel_update_r8stencil(struct brw_context *brw,
2697 struct intel_mipmap_tree *mt)
2698 {
2699 assert(brw->gen >= 7);
2700 struct intel_mipmap_tree *src =
2701 mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
2702 if (!src || brw->gen >= 8 || !src->r8stencil_needs_update)
2703 return;
2704
2705 assert(src->surf.size > 0);
2706
2707 if (!mt->r8stencil_mt) {
2708 assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
2709 mt->r8stencil_mt = make_surface(
2710 brw,
2711 src->target,
2712 MESA_FORMAT_R_UINT8,
2713 src->first_level, src->last_level,
2714 src->surf.logical_level0_px.width,
2715 src->surf.logical_level0_px.height,
2716 src->surf.dim == ISL_SURF_DIM_3D ?
2717 src->surf.logical_level0_px.depth :
2718 src->surf.logical_level0_px.array_len,
2719 src->surf.samples,
2720 ISL_TILING_Y0_BIT,
2721 ISL_SURF_USAGE_TEXTURE_BIT,
2722 BO_ALLOC_FOR_RENDER, 0, NULL);
2723 assert(mt->r8stencil_mt);
2724 }
2725
2726 struct intel_mipmap_tree *dst = mt->r8stencil_mt;
2727
2728 for (int level = src->first_level; level <= src->last_level; level++) {
2729 const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ?
2730 minify(src->surf.phys_level0_sa.depth, level) :
2731 src->surf.phys_level0_sa.array_len;
2732
2733 for (unsigned layer = 0; layer < depth; layer++) {
2734 brw_blorp_copy_miptrees(brw,
2735 src, level, layer,
2736 dst, level, layer,
2737 0, 0, 0, 0,
2738 minify(src->surf.logical_level0_px.width,
2739 level),
2740 minify(src->surf.logical_level0_px.height,
2741 level));
2742 }
2743 }
2744
2745 brw_render_cache_set_check_flush(brw, dst->bo);
2746 src->r8stencil_needs_update = false;
2747 }
2748
2749 static void *
2750 intel_miptree_map_raw(struct brw_context *brw,
2751 struct intel_mipmap_tree *mt,
2752 GLbitfield mode)
2753 {
2754 struct brw_bo *bo = mt->bo;
2755
2756 if (brw_batch_references(&brw->batch, bo))
2757 intel_batchbuffer_flush(brw);
2758
2759 return brw_bo_map(brw, bo, mode);
2760 }
2761
2762 static void
2763 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2764 {
2765 brw_bo_unmap(mt->bo);
2766 }
2767
2768 static void
2769 intel_miptree_map_gtt(struct brw_context *brw,
2770 struct intel_mipmap_tree *mt,
2771 struct intel_miptree_map *map,
2772 unsigned int level, unsigned int slice)
2773 {
2774 unsigned int bw, bh;
2775 void *base;
2776 unsigned int image_x, image_y;
2777 intptr_t x = map->x;
2778 intptr_t y = map->y;
2779
2780 /* For compressed formats, the stride is the number of bytes per
2781 * row of blocks. intel_miptree_get_image_offset() already does
2782 * the divide.
2783 */
2784 _mesa_get_format_block_size(mt->format, &bw, &bh);
2785 assert(y % bh == 0);
2786 assert(x % bw == 0);
2787 y /= bh;
2788 x /= bw;
2789
2790 base = intel_miptree_map_raw(brw, mt, map->mode);
2791
2792 if (base == NULL)
2793 map->ptr = NULL;
2794 else {
2795 base += mt->offset;
2796
2797 /* Note that in the case of cube maps, the caller must have passed the
2798 * slice number referencing the face.
2799 */
2800 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2801 x += image_x;
2802 y += image_y;
2803
2804 map->stride = mt->surf.row_pitch;
2805 map->ptr = base + y * map->stride + x * mt->cpp;
2806 }
2807
2808 DBG("%s: %d,%d %dx%d from mt %p (%s) "
2809 "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
2810 map->x, map->y, map->w, map->h,
2811 mt, _mesa_get_format_name(mt->format),
2812 x, y, map->ptr, map->stride);
2813 }
2814
2815 static void
2816 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
2817 {
2818 intel_miptree_unmap_raw(mt);
2819 }
2820
2821 static void
2822 intel_miptree_map_blit(struct brw_context *brw,
2823 struct intel_mipmap_tree *mt,
2824 struct intel_miptree_map *map,
2825 unsigned int level, unsigned int slice)
2826 {
2827 map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
2828 /* first_level */ 0,
2829 /* last_level */ 0,
2830 map->w, map->h, 1,
2831 /* samples */ 1,
2832 MIPTREE_LAYOUT_TILING_NONE);
2833
2834 if (!map->linear_mt) {
2835 fprintf(stderr, "Failed to allocate blit temporary\n");
2836 goto fail;
2837 }
2838 map->stride = map->linear_mt->surf.row_pitch;
2839
2840 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
2841 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
2842 * invalidate is set, since we'll be writing the whole rectangle from our
2843 * temporary buffer back out.
2844 */
2845 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2846 if (!intel_miptree_copy(brw,
2847 mt, level, slice, map->x, map->y,
2848 map->linear_mt, 0, 0, 0, 0,
2849 map->w, map->h)) {
2850 fprintf(stderr, "Failed to blit\n");
2851 goto fail;
2852 }
2853 }
2854
2855 map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
2856
2857 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2858 map->x, map->y, map->w, map->h,
2859 mt, _mesa_get_format_name(mt->format),
2860 level, slice, map->ptr, map->stride);
2861
2862 return;
2863
2864 fail:
2865 intel_miptree_release(&map->linear_mt);
2866 map->ptr = NULL;
2867 map->stride = 0;
2868 }
2869
2870 static void
2871 intel_miptree_unmap_blit(struct brw_context *brw,
2872 struct intel_mipmap_tree *mt,
2873 struct intel_miptree_map *map,
2874 unsigned int level,
2875 unsigned int slice)
2876 {
2877 struct gl_context *ctx = &brw->ctx;
2878
2879 intel_miptree_unmap_raw(map->linear_mt);
2880
2881 if (map->mode & GL_MAP_WRITE_BIT) {
2882 bool ok = intel_miptree_copy(brw,
2883 map->linear_mt, 0, 0, 0, 0,
2884 mt, level, slice, map->x, map->y,
2885 map->w, map->h);
2886 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
2887 }
2888
2889 intel_miptree_release(&map->linear_mt);
2890 }
2891
2892 /**
2893 * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
2894 */
2895 #if defined(USE_SSE41)
2896 static void
2897 intel_miptree_map_movntdqa(struct brw_context *brw,
2898 struct intel_mipmap_tree *mt,
2899 struct intel_miptree_map *map,
2900 unsigned int level, unsigned int slice)
2901 {
2902 assert(map->mode & GL_MAP_READ_BIT);
2903 assert(!(map->mode & GL_MAP_WRITE_BIT));
2904
2905 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2906 map->x, map->y, map->w, map->h,
2907 mt, _mesa_get_format_name(mt->format),
2908 level, slice, map->ptr, map->stride);
2909
2910 /* Map the original image */
2911 uint32_t image_x;
2912 uint32_t image_y;
2913 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2914 image_x += map->x;
2915 image_y += map->y;
2916
2917 void *src = intel_miptree_map_raw(brw, mt, map->mode);
2918 if (!src)
2919 return;
2920
2921 src += mt->offset;
2922
2923 src += image_y * mt->surf.row_pitch;
2924 src += image_x * mt->cpp;
2925
2926 /* Due to the pixel offsets for the particular image being mapped, our
2927 * src pointer may not be 16-byte aligned. However, if the pitch is
2928 * divisible by 16, then the amount by which it's misaligned will remain
2929 * consistent from row to row.
2930 */
2931 assert((mt->surf.row_pitch % 16) == 0);
2932 const int misalignment = ((uintptr_t) src) & 15;
2933
2934 /* Create an untiled temporary buffer for the mapping. */
2935 const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
2936
2937 map->stride = ALIGN(misalignment + width_bytes, 16);
2938
2939 map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
2940 /* Offset the destination so it has the same misalignment as src. */
2941 map->ptr = map->buffer + misalignment;
2942
2943 assert((((uintptr_t) map->ptr) & 15) == misalignment);
2944
2945 for (uint32_t y = 0; y < map->h; y++) {
2946 void *dst_ptr = map->ptr + y * map->stride;
2947 void *src_ptr = src + y * mt->surf.row_pitch;
2948
2949 _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
2950 }
2951
2952 intel_miptree_unmap_raw(mt);
2953 }
2954
2955 static void
2956 intel_miptree_unmap_movntdqa(struct brw_context *brw,
2957 struct intel_mipmap_tree *mt,
2958 struct intel_miptree_map *map,
2959 unsigned int level,
2960 unsigned int slice)
2961 {
2962 _mesa_align_free(map->buffer);
2963 map->buffer = NULL;
2964 map->ptr = NULL;
2965 }
2966 #endif
2967
2968 static void
2969 intel_miptree_map_s8(struct brw_context *brw,
2970 struct intel_mipmap_tree *mt,
2971 struct intel_miptree_map *map,
2972 unsigned int level, unsigned int slice)
2973 {
2974 map->stride = map->w;
2975 map->buffer = map->ptr = malloc(map->stride * map->h);
2976 if (!map->buffer)
2977 return;
2978
2979 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
2980 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
2981 * invalidate is set, since we'll be writing the whole rectangle from our
2982 * temporary buffer back out.
2983 */
2984 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2985 uint8_t *untiled_s8_map = map->ptr;
2986 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
2987 unsigned int image_x, image_y;
2988
2989 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2990
2991 for (uint32_t y = 0; y < map->h; y++) {
2992 for (uint32_t x = 0; x < map->w; x++) {
2993 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
2994 x + image_x + map->x,
2995 y + image_y + map->y,
2996 brw->has_swizzling);
2997 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
2998 }
2999 }
3000
3001 intel_miptree_unmap_raw(mt);
3002
3003 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
3004 map->x, map->y, map->w, map->h,
3005 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
3006 } else {
3007 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3008 map->x, map->y, map->w, map->h,
3009 mt, map->ptr, map->stride);
3010 }
3011 }
3012
3013 static void
3014 intel_miptree_unmap_s8(struct brw_context *brw,
3015 struct intel_mipmap_tree *mt,
3016 struct intel_miptree_map *map,
3017 unsigned int level,
3018 unsigned int slice)
3019 {
3020 if (map->mode & GL_MAP_WRITE_BIT) {
3021 unsigned int image_x, image_y;
3022 uint8_t *untiled_s8_map = map->ptr;
3023 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
3024
3025 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3026
3027 for (uint32_t y = 0; y < map->h; y++) {
3028 for (uint32_t x = 0; x < map->w; x++) {
3029 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
3030 image_x + x + map->x,
3031 image_y + y + map->y,
3032 brw->has_swizzling);
3033 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
3034 }
3035 }
3036
3037 intel_miptree_unmap_raw(mt);
3038 }
3039
3040 free(map->buffer);
3041 }
3042
3043 static void
3044 intel_miptree_map_etc(struct brw_context *brw,
3045 struct intel_mipmap_tree *mt,
3046 struct intel_miptree_map *map,
3047 unsigned int level,
3048 unsigned int slice)
3049 {
3050 assert(mt->etc_format != MESA_FORMAT_NONE);
3051 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
3052 assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
3053 }
3054
3055 assert(map->mode & GL_MAP_WRITE_BIT);
3056 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
3057
3058 map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
3059 map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
3060 map->w, map->h, 1));
3061 map->ptr = map->buffer;
3062 }
3063
3064 static void
3065 intel_miptree_unmap_etc(struct brw_context *brw,
3066 struct intel_mipmap_tree *mt,
3067 struct intel_miptree_map *map,
3068 unsigned int level,
3069 unsigned int slice)
3070 {
3071 uint32_t image_x;
3072 uint32_t image_y;
3073 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3074
3075 image_x += map->x;
3076 image_y += map->y;
3077
3078 uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
3079 + image_y * mt->surf.row_pitch
3080 + image_x * mt->cpp;
3081
3082 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
3083 _mesa_etc1_unpack_rgba8888(dst, mt->surf.row_pitch,
3084 map->ptr, map->stride,
3085 map->w, map->h);
3086 else
3087 _mesa_unpack_etc2_format(dst, mt->surf.row_pitch,
3088 map->ptr, map->stride,
3089 map->w, map->h, mt->etc_format);
3090
3091 intel_miptree_unmap_raw(mt);
3092 free(map->buffer);
3093 }
3094
3095 /**
3096 * Mapping function for packed depth/stencil miptrees backed by real separate
3097 * miptrees for depth and stencil.
3098 *
3099 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
3100 * separate from the depth buffer. Yet at the GL API level, we have to expose
3101 * packed depth/stencil textures and FBO attachments, and Mesa core expects to
3102 * be able to map that memory for texture storage and glReadPixels-type
3103 * operations. We give Mesa core that access by mallocing a temporary and
3104 * copying the data between the actual backing store and the temporary.
3105 */
3106 static void
3107 intel_miptree_map_depthstencil(struct brw_context *brw,
3108 struct intel_mipmap_tree *mt,
3109 struct intel_miptree_map *map,
3110 unsigned int level, unsigned int slice)
3111 {
3112 struct intel_mipmap_tree *z_mt = mt;
3113 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3114 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3115 int packed_bpp = map_z32f_x24s8 ? 8 : 4;
3116
3117 map->stride = map->w * packed_bpp;
3118 map->buffer = map->ptr = malloc(map->stride * map->h);
3119 if (!map->buffer)
3120 return;
3121
3122 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3123 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3124 * invalidate is set, since we'll be writing the whole rectangle from our
3125 * temporary buffer back out.
3126 */
3127 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3128 uint32_t *packed_map = map->ptr;
3129 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
3130 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
3131 unsigned int s_image_x, s_image_y;
3132 unsigned int z_image_x, z_image_y;
3133
3134 intel_miptree_get_image_offset(s_mt, level, slice,
3135 &s_image_x, &s_image_y);
3136 intel_miptree_get_image_offset(z_mt, level, slice,
3137 &z_image_x, &z_image_y);
3138
3139 for (uint32_t y = 0; y < map->h; y++) {
3140 for (uint32_t x = 0; x < map->w; x++) {
3141 int map_x = map->x + x, map_y = map->y + y;
3142 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3143 map_x + s_image_x,
3144 map_y + s_image_y,
3145 brw->has_swizzling);
3146 ptrdiff_t z_offset = ((map_y + z_image_y) *
3147 (z_mt->surf.row_pitch / 4) +
3148 (map_x + z_image_x));
3149 uint8_t s = s_map[s_offset];
3150 uint32_t z = z_map[z_offset];
3151
3152 if (map_z32f_x24s8) {
3153 packed_map[(y * map->w + x) * 2 + 0] = z;
3154 packed_map[(y * map->w + x) * 2 + 1] = s;
3155 } else {
3156 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
3157 }
3158 }
3159 }
3160
3161 intel_miptree_unmap_raw(s_mt);
3162 intel_miptree_unmap_raw(z_mt);
3163
3164 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
3165 __func__,
3166 map->x, map->y, map->w, map->h,
3167 z_mt, map->x + z_image_x, map->y + z_image_y,
3168 s_mt, map->x + s_image_x, map->y + s_image_y,
3169 map->ptr, map->stride);
3170 } else {
3171 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3172 map->x, map->y, map->w, map->h,
3173 mt, map->ptr, map->stride);
3174 }
3175 }
3176
3177 static void
3178 intel_miptree_unmap_depthstencil(struct brw_context *brw,
3179 struct intel_mipmap_tree *mt,
3180 struct intel_miptree_map *map,
3181 unsigned int level,
3182 unsigned int slice)
3183 {
3184 struct intel_mipmap_tree *z_mt = mt;
3185 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3186 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3187
3188 if (map->mode & GL_MAP_WRITE_BIT) {
3189 uint32_t *packed_map = map->ptr;
3190 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
3191 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
3192 unsigned int s_image_x, s_image_y;
3193 unsigned int z_image_x, z_image_y;
3194
3195 intel_miptree_get_image_offset(s_mt, level, slice,
3196 &s_image_x, &s_image_y);
3197 intel_miptree_get_image_offset(z_mt, level, slice,
3198 &z_image_x, &z_image_y);
3199
3200 for (uint32_t y = 0; y < map->h; y++) {
3201 for (uint32_t x = 0; x < map->w; x++) {
3202 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3203 x + s_image_x + map->x,
3204 y + s_image_y + map->y,
3205 brw->has_swizzling);
3206 ptrdiff_t z_offset = ((y + z_image_y + map->y) *
3207 (z_mt->surf.row_pitch / 4) +
3208 (x + z_image_x + map->x));
3209
3210 if (map_z32f_x24s8) {
3211 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
3212 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
3213 } else {
3214 uint32_t packed = packed_map[y * map->w + x];
3215 s_map[s_offset] = packed >> 24;
3216 z_map[z_offset] = packed;
3217 }
3218 }
3219 }
3220
3221 intel_miptree_unmap_raw(s_mt);
3222 intel_miptree_unmap_raw(z_mt);
3223
3224 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
3225 __func__,
3226 map->x, map->y, map->w, map->h,
3227 z_mt, _mesa_get_format_name(z_mt->format),
3228 map->x + z_image_x, map->y + z_image_y,
3229 s_mt, map->x + s_image_x, map->y + s_image_y,
3230 map->ptr, map->stride);
3231 }
3232
3233 free(map->buffer);
3234 }
3235
3236 /**
3237 * Create and attach a map to the miptree at (level, slice). Return the
3238 * attached map.
3239 */
3240 static struct intel_miptree_map*
3241 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
3242 unsigned int level,
3243 unsigned int slice,
3244 unsigned int x,
3245 unsigned int y,
3246 unsigned int w,
3247 unsigned int h,
3248 GLbitfield mode)
3249 {
3250 struct intel_miptree_map *map = calloc(1, sizeof(*map));
3251
3252 if (!map)
3253 return NULL;
3254
3255 assert(mt->level[level].slice[slice].map == NULL);
3256 mt->level[level].slice[slice].map = map;
3257
3258 map->mode = mode;
3259 map->x = x;
3260 map->y = y;
3261 map->w = w;
3262 map->h = h;
3263
3264 return map;
3265 }
3266
3267 /**
3268 * Release the map at (level, slice).
3269 */
3270 static void
3271 intel_miptree_release_map(struct intel_mipmap_tree *mt,
3272 unsigned int level,
3273 unsigned int slice)
3274 {
3275 struct intel_miptree_map **map;
3276
3277 map = &mt->level[level].slice[slice].map;
3278 free(*map);
3279 *map = NULL;
3280 }
3281
3282 static bool
3283 can_blit_slice(struct intel_mipmap_tree *mt,
3284 unsigned int level, unsigned int slice)
3285 {
3286 /* See intel_miptree_blit() for details on the 32k pitch limit. */
3287 if (mt->surf.row_pitch >= 32768)
3288 return false;
3289
3290 return true;
3291 }
3292
3293 static bool
3294 use_intel_mipree_map_blit(struct brw_context *brw,
3295 struct intel_mipmap_tree *mt,
3296 GLbitfield mode,
3297 unsigned int level,
3298 unsigned int slice)
3299 {
3300 if (brw->has_llc &&
3301 /* It's probably not worth swapping to the blit ring because of
3302 * all the overhead involved.
3303 */
3304 !(mode & GL_MAP_WRITE_BIT) &&
3305 !mt->compressed &&
3306 (mt->surf.tiling == ISL_TILING_X ||
3307 /* Prior to Sandybridge, the blitter can't handle Y tiling */
3308 (brw->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
3309 /* Fast copy blit on skl+ supports all tiling formats. */
3310 brw->gen >= 9) &&
3311 can_blit_slice(mt, level, slice))
3312 return true;
3313
3314 if (mt->surf.tiling != ISL_TILING_LINEAR &&
3315 mt->bo->size >= brw->max_gtt_map_object_size) {
3316 assert(can_blit_slice(mt, level, slice));
3317 return true;
3318 }
3319
3320 return false;
3321 }
3322
3323 /**
3324 * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
3325 * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
3326 * arithmetic overflow.
3327 *
3328 * If you call this function and use \a out_stride, then you're doing pointer
3329 * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
3330 * bugs. The caller must still take care to avoid 32-bit overflow errors in
3331 * all arithmetic expressions that contain buffer offsets and pixel sizes,
3332 * which usually have type uint32_t or GLuint.
3333 */
3334 void
3335 intel_miptree_map(struct brw_context *brw,
3336 struct intel_mipmap_tree *mt,
3337 unsigned int level,
3338 unsigned int slice,
3339 unsigned int x,
3340 unsigned int y,
3341 unsigned int w,
3342 unsigned int h,
3343 GLbitfield mode,
3344 void **out_ptr,
3345 ptrdiff_t *out_stride)
3346 {
3347 struct intel_miptree_map *map;
3348
3349 assert(mt->surf.samples == 1);
3350
3351 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
3352 if (!map){
3353 *out_ptr = NULL;
3354 *out_stride = 0;
3355 return;
3356 }
3357
3358 intel_miptree_access_raw(brw, mt, level, slice,
3359 map->mode & GL_MAP_WRITE_BIT);
3360
3361 if (mt->format == MESA_FORMAT_S_UINT8) {
3362 intel_miptree_map_s8(brw, mt, map, level, slice);
3363 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3364 !(mode & BRW_MAP_DIRECT_BIT)) {
3365 intel_miptree_map_etc(brw, mt, map, level, slice);
3366 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
3367 intel_miptree_map_depthstencil(brw, mt, map, level, slice);
3368 } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
3369 intel_miptree_map_blit(brw, mt, map, level, slice);
3370 #if defined(USE_SSE41)
3371 } else if (!(mode & GL_MAP_WRITE_BIT) &&
3372 !mt->compressed && cpu_has_sse4_1 &&
3373 (mt->surf.row_pitch % 16 == 0)) {
3374 intel_miptree_map_movntdqa(brw, mt, map, level, slice);
3375 #endif
3376 } else {
3377 intel_miptree_map_gtt(brw, mt, map, level, slice);
3378 }
3379
3380 *out_ptr = map->ptr;
3381 *out_stride = map->stride;
3382
3383 if (map->ptr == NULL)
3384 intel_miptree_release_map(mt, level, slice);
3385 }
3386
3387 void
3388 intel_miptree_unmap(struct brw_context *brw,
3389 struct intel_mipmap_tree *mt,
3390 unsigned int level,
3391 unsigned int slice)
3392 {
3393 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
3394
3395 assert(mt->surf.samples == 1);
3396
3397 if (!map)
3398 return;
3399
3400 DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
3401 mt, _mesa_get_format_name(mt->format), level, slice);
3402
3403 if (mt->format == MESA_FORMAT_S_UINT8) {
3404 intel_miptree_unmap_s8(brw, mt, map, level, slice);
3405 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3406 !(map->mode & BRW_MAP_DIRECT_BIT)) {
3407 intel_miptree_unmap_etc(brw, mt, map, level, slice);
3408 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
3409 intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
3410 } else if (map->linear_mt) {
3411 intel_miptree_unmap_blit(brw, mt, map, level, slice);
3412 #if defined(USE_SSE41)
3413 } else if (map->buffer && cpu_has_sse4_1) {
3414 intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
3415 #endif
3416 } else {
3417 intel_miptree_unmap_gtt(mt);
3418 }
3419
3420 intel_miptree_release_map(mt, level, slice);
3421 }
3422
3423 enum isl_surf_dim
3424 get_isl_surf_dim(GLenum target)
3425 {
3426 switch (target) {
3427 case GL_TEXTURE_1D:
3428 case GL_TEXTURE_1D_ARRAY:
3429 return ISL_SURF_DIM_1D;
3430
3431 case GL_TEXTURE_2D:
3432 case GL_TEXTURE_2D_ARRAY:
3433 case GL_TEXTURE_RECTANGLE:
3434 case GL_TEXTURE_CUBE_MAP:
3435 case GL_TEXTURE_CUBE_MAP_ARRAY:
3436 case GL_TEXTURE_2D_MULTISAMPLE:
3437 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3438 case GL_TEXTURE_EXTERNAL_OES:
3439 return ISL_SURF_DIM_2D;
3440
3441 case GL_TEXTURE_3D:
3442 return ISL_SURF_DIM_3D;
3443 }
3444
3445 unreachable("Invalid texture target");
3446 }
3447
3448 enum isl_dim_layout
3449 get_isl_dim_layout(const struct gen_device_info *devinfo,
3450 enum isl_tiling tiling, GLenum target)
3451 {
3452 switch (target) {
3453 case GL_TEXTURE_1D:
3454 case GL_TEXTURE_1D_ARRAY:
3455 return (devinfo->gen >= 9 && tiling == ISL_TILING_LINEAR ?
3456 ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
3457
3458 case GL_TEXTURE_2D:
3459 case GL_TEXTURE_2D_ARRAY:
3460 case GL_TEXTURE_RECTANGLE:
3461 case GL_TEXTURE_2D_MULTISAMPLE:
3462 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3463 case GL_TEXTURE_EXTERNAL_OES:
3464 return ISL_DIM_LAYOUT_GEN4_2D;
3465
3466 case GL_TEXTURE_CUBE_MAP:
3467 case GL_TEXTURE_CUBE_MAP_ARRAY:
3468 return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
3469 ISL_DIM_LAYOUT_GEN4_2D);
3470
3471 case GL_TEXTURE_3D:
3472 return (devinfo->gen >= 9 ?
3473 ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
3474 }
3475
3476 unreachable("Invalid texture target");
3477 }
3478
3479 enum isl_aux_usage
3480 intel_miptree_get_aux_isl_usage(const struct brw_context *brw,
3481 const struct intel_mipmap_tree *mt)
3482 {
3483 if (mt->hiz_buf)
3484 return ISL_AUX_USAGE_HIZ;
3485
3486 if (!mt->mcs_buf)
3487 return ISL_AUX_USAGE_NONE;
3488
3489 return mt->aux_usage;
3490 }