i965/bufmgr: Skip wait ioctl when not busy.
[mesa.git] / src / mesa / drivers / dri / i965 / intel_mipmap_tree.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #include <GL/gl.h>
27 #include <GL/internal/dri_interface.h>
28
29 #include "intel_batchbuffer.h"
30 #include "intel_image.h"
31 #include "intel_mipmap_tree.h"
32 #include "intel_tex.h"
33 #include "intel_blit.h"
34 #include "intel_fbo.h"
35
36 #include "brw_blorp.h"
37 #include "brw_context.h"
38 #include "brw_state.h"
39
40 #include "main/enums.h"
41 #include "main/fbobject.h"
42 #include "main/formats.h"
43 #include "main/glformats.h"
44 #include "main/texcompress_etc.h"
45 #include "main/teximage.h"
46 #include "main/streaming-load-memcpy.h"
47 #include "x86/common_x86_asm.h"
48
49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
50
51 static void *intel_miptree_map_raw(struct brw_context *brw,
52 struct intel_mipmap_tree *mt,
53 GLbitfield mode);
54
55 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
56
57 static bool
58 intel_miptree_alloc_aux(struct brw_context *brw,
59 struct intel_mipmap_tree *mt);
60
61 static bool
62 is_mcs_supported(const struct brw_context *brw, mesa_format format,
63 uint32_t layout_flags)
64 {
65 /* Prior to Gen7, all MSAA surfaces used IMS layout. */
66 if (brw->gen < 7)
67 return false;
68
69 /* In Gen7, IMS layout is only used for depth and stencil buffers. */
70 switch (_mesa_get_format_base_format(format)) {
71 case GL_DEPTH_COMPONENT:
72 case GL_STENCIL_INDEX:
73 case GL_DEPTH_STENCIL:
74 return false;
75 default:
76 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
77 *
78 * This field must be set to 0 for all SINT MSRTs when all RT channels
79 * are not written
80 *
81 * In practice this means that we have to disable MCS for all signed
82 * integer MSAA buffers. The alternative, to disable MCS only when one
83 * of the render target channels is disabled, is impractical because it
84 * would require converting between CMS and UMS MSAA layouts on the fly,
85 * which is expensive.
86 */
87 if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
88 return false;
89 } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
90 /* We can't use the CMS layout because it uses an aux buffer, the MCS
91 * buffer. So fallback to UMS, which is identical to CMS without the
92 * MCS. */
93 return false;
94 } else {
95 return true;
96 }
97 }
98 }
99
100 static bool
101 intel_tiling_supports_ccs(const struct brw_context *brw,
102 enum isl_tiling tiling)
103 {
104 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
105 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
106 *
107 * - Support is limited to tiled render targets.
108 *
109 * Gen9 changes the restriction to Y-tile only.
110 */
111 if (brw->gen >= 9)
112 return tiling == ISL_TILING_Y0;
113 else if (brw->gen >= 7)
114 return tiling != ISL_TILING_LINEAR;
115 else
116 return false;
117 }
118
119 /**
120 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
121 * can be used. This doesn't (and should not) inspect any of the properties of
122 * the miptree's BO.
123 *
124 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
125 * beneath the "Fast Color Clear" bullet (p326):
126 *
127 * - Support is for non-mip-mapped and non-array surface types only.
128 *
129 * And then later, on p327:
130 *
131 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
132 * 64bpp, and 128bpp.
133 *
134 * From the Skylake documentation, it is made clear that X-tiling is no longer
135 * supported:
136 *
137 * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
138 * non-MSRTs only.
139 */
140 static bool
141 intel_miptree_supports_ccs(struct brw_context *brw,
142 const struct intel_mipmap_tree *mt)
143 {
144 /* MCS support does not exist prior to Gen7 */
145 if (brw->gen < 7)
146 return false;
147
148 /* This function applies only to non-multisampled render targets. */
149 if (mt->surf.samples > 1)
150 return false;
151
152 /* MCS is only supported for color buffers */
153 switch (_mesa_get_format_base_format(mt->format)) {
154 case GL_DEPTH_COMPONENT:
155 case GL_DEPTH_STENCIL:
156 case GL_STENCIL_INDEX:
157 return false;
158 }
159
160 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
161 return false;
162
163 const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0;
164 const bool arrayed = mt->surf.logical_level0_px.array_len > 1 ||
165 mt->surf.logical_level0_px.depth > 1;
166
167 if (arrayed) {
168 /* Multisample surfaces with the CMS layout are not layered surfaces,
169 * yet still have physical_depth0 > 1. Assert that we don't
170 * accidentally reject a multisampled surface here. We should have
171 * rejected it earlier by explicitly checking the sample count.
172 */
173 assert(mt->surf.samples == 1);
174 }
175
176 /* Handle the hardware restrictions...
177 *
178 * All GENs have the following restriction: "MCS buffer for non-MSRT is
179 * supported only for RT formats 32bpp, 64bpp, and 128bpp."
180 *
181 * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of
182 * Non-MultiSampler Render Target Restrictions) Support is for
183 * non-mip-mapped and non-array surface types only.
184 *
185 * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of
186 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
187 * surfaces are supported with MCS buffer layout with these alignments in
188 * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128.
189 *
190 * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of
191 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
192 * surfaces are supported with MCS buffer layout with these alignments in
193 * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64.
194 */
195 if (brw->gen < 8 && (mip_mapped || arrayed))
196 return false;
197
198 /* There's no point in using an MCS buffer if the surface isn't in a
199 * renderable format.
200 */
201 if (!brw->mesa_format_supports_render[mt->format])
202 return false;
203
204 if (brw->gen >= 9) {
205 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
206 const enum isl_format isl_format =
207 brw_isl_format_for_mesa_format(linear_format);
208 return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
209 } else
210 return true;
211 }
212
213 static bool
214 intel_tiling_supports_hiz(const struct brw_context *brw,
215 enum isl_tiling tiling)
216 {
217 if (brw->gen < 6)
218 return false;
219
220 return tiling == ISL_TILING_Y0;
221 }
222
223 static bool
224 intel_miptree_supports_hiz(const struct brw_context *brw,
225 const struct intel_mipmap_tree *mt)
226 {
227 if (!brw->has_hiz)
228 return false;
229
230 switch (mt->format) {
231 case MESA_FORMAT_Z_FLOAT32:
232 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
233 case MESA_FORMAT_Z24_UNORM_X8_UINT:
234 case MESA_FORMAT_Z24_UNORM_S8_UINT:
235 case MESA_FORMAT_Z_UNORM16:
236 return true;
237 default:
238 return false;
239 }
240 }
241
242 static bool
243 intel_miptree_supports_ccs_e(struct brw_context *brw,
244 const struct intel_mipmap_tree *mt)
245 {
246 /* For now compression is only enabled for integer formats even though
247 * there exist supported floating point formats also. This is a heuristic
248 * decision based on current public benchmarks. In none of the cases these
249 * formats provided any improvement but a few cases were seen to regress.
250 * Hence these are left to to be enabled in the future when they are known
251 * to improve things.
252 */
253 if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
254 return false;
255
256 if (!intel_miptree_supports_ccs(brw, mt))
257 return false;
258
259 /* Fast clear can be also used to clear srgb surfaces by using equivalent
260 * linear format. This trick, however, can't be extended to be used with
261 * lossless compression and therefore a check is needed to see if the format
262 * really is linear.
263 */
264 return _mesa_get_srgb_format_linear(mt->format) == mt->format;
265 }
266
267 /**
268 * Determine depth format corresponding to a depth+stencil format,
269 * for separate stencil.
270 */
271 mesa_format
272 intel_depth_format_for_depthstencil_format(mesa_format format) {
273 switch (format) {
274 case MESA_FORMAT_Z24_UNORM_S8_UINT:
275 return MESA_FORMAT_Z24_UNORM_X8_UINT;
276 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
277 return MESA_FORMAT_Z_FLOAT32;
278 default:
279 return format;
280 }
281 }
282
283 static bool
284 create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
285 unsigned depth0, struct intel_mipmap_level *table)
286 {
287 for (unsigned level = first_level; level <= last_level; level++) {
288 const unsigned d =
289 target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
290
291 table[level].slice = calloc(d, sizeof(*table[0].slice));
292 if (!table[level].slice)
293 goto unwind;
294 }
295
296 return true;
297
298 unwind:
299 for (unsigned level = first_level; level <= last_level; level++)
300 free(table[level].slice);
301
302 return false;
303 }
304
305 static bool
306 needs_separate_stencil(const struct brw_context *brw,
307 struct intel_mipmap_tree *mt,
308 mesa_format format, uint32_t layout_flags)
309 {
310
311 if (layout_flags & MIPTREE_LAYOUT_FOR_BO)
312 return false;
313
314 if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL)
315 return false;
316
317 if (brw->must_use_separate_stencil)
318 return true;
319
320 return brw->has_separate_stencil &&
321 intel_miptree_supports_hiz(brw, mt);
322 }
323
324 /**
325 * Choose the aux usage for this miptree. This function must be called fairly
326 * late in the miptree create process after we have a tiling.
327 */
328 static void
329 intel_miptree_choose_aux_usage(struct brw_context *brw,
330 struct intel_mipmap_tree *mt)
331 {
332 assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
333
334 const unsigned no_flags = 0;
335 if (mt->surf.samples > 1 && is_mcs_supported(brw, mt->format, no_flags)) {
336 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
337 mt->aux_usage = ISL_AUX_USAGE_MCS;
338 } else if (intel_tiling_supports_ccs(brw, mt->surf.tiling) &&
339 intel_miptree_supports_ccs(brw, mt)) {
340 if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
341 brw->gen >= 9 && !mt->is_scanout &&
342 intel_miptree_supports_ccs_e(brw, mt)) {
343 mt->aux_usage = ISL_AUX_USAGE_CCS_E;
344 } else {
345 mt->aux_usage = ISL_AUX_USAGE_CCS_D;
346 }
347 } else if (intel_tiling_supports_hiz(brw, mt->surf.tiling) &&
348 intel_miptree_supports_hiz(brw, mt)) {
349 mt->aux_usage = ISL_AUX_USAGE_HIZ;
350 }
351
352 /* We can do fast-clear on all auxiliary surface types that are
353 * allocated through the normal texture creation paths.
354 */
355 if (mt->aux_usage != ISL_AUX_USAGE_NONE)
356 mt->supports_fast_clear = true;
357 }
358
359
360 /**
361 * Choose an appropriate uncompressed format for a requested
362 * compressed format, if unsupported.
363 */
364 mesa_format
365 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
366 {
367 /* No need to lower ETC formats on these platforms,
368 * they are supported natively.
369 */
370 if (brw->gen >= 8 || brw->is_baytrail)
371 return format;
372
373 switch (format) {
374 case MESA_FORMAT_ETC1_RGB8:
375 return MESA_FORMAT_R8G8B8X8_UNORM;
376 case MESA_FORMAT_ETC2_RGB8:
377 return MESA_FORMAT_R8G8B8X8_UNORM;
378 case MESA_FORMAT_ETC2_SRGB8:
379 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
380 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
381 return MESA_FORMAT_B8G8R8A8_SRGB;
382 case MESA_FORMAT_ETC2_RGBA8_EAC:
383 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
384 return MESA_FORMAT_R8G8B8A8_UNORM;
385 case MESA_FORMAT_ETC2_R11_EAC:
386 return MESA_FORMAT_R_UNORM16;
387 case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
388 return MESA_FORMAT_R_SNORM16;
389 case MESA_FORMAT_ETC2_RG11_EAC:
390 return MESA_FORMAT_R16G16_UNORM;
391 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
392 return MESA_FORMAT_R16G16_SNORM;
393 default:
394 /* Non ETC1 / ETC2 format */
395 return format;
396 }
397 }
398
399 static unsigned
400 get_num_phys_layers(const struct isl_surf *surf, unsigned level)
401 {
402 /* In case of physical dimensions one needs to consider also the layout.
403 * See isl_calc_phys_level0_extent_sa().
404 */
405 if (surf->dim != ISL_SURF_DIM_3D)
406 return surf->phys_level0_sa.array_len;
407
408 if (surf->dim_layout == ISL_DIM_LAYOUT_GEN4_2D)
409 return minify(surf->phys_level0_sa.array_len, level);
410
411 return minify(surf->phys_level0_sa.depth, level);
412 }
413
414 /** \brief Assert that the level and layer are valid for the miptree. */
415 void
416 intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt,
417 uint32_t level,
418 uint32_t layer)
419 {
420 (void) mt;
421 (void) level;
422 (void) layer;
423
424 assert(level >= mt->first_level);
425 assert(level <= mt->last_level);
426 assert(layer < get_num_phys_layers(&mt->surf, level));
427 }
428
429 static enum isl_aux_state **
430 create_aux_state_map(struct intel_mipmap_tree *mt,
431 enum isl_aux_state initial)
432 {
433 const uint32_t levels = mt->last_level + 1;
434
435 uint32_t total_slices = 0;
436 for (uint32_t level = 0; level < levels; level++)
437 total_slices += get_num_phys_layers(&mt->surf, level);
438
439 const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
440
441 /* We're going to allocate a single chunk of data for both the per-level
442 * reference array and the arrays of aux_state. This makes cleanup
443 * significantly easier.
444 */
445 const size_t total_size = per_level_array_size +
446 total_slices * sizeof(enum isl_aux_state);
447 void *data = malloc(total_size);
448 if (data == NULL)
449 return NULL;
450
451 enum isl_aux_state **per_level_arr = data;
452 enum isl_aux_state *s = data + per_level_array_size;
453 for (uint32_t level = 0; level < levels; level++) {
454 per_level_arr[level] = s;
455
456 const unsigned level_depth = get_num_phys_layers(&mt->surf, level);
457
458 for (uint32_t a = 0; a < level_depth; a++)
459 *(s++) = initial;
460 }
461 assert((void *)s == data + total_size);
462
463 return per_level_arr;
464 }
465
466 static void
467 free_aux_state_map(enum isl_aux_state **state)
468 {
469 free(state);
470 }
471
472 static bool
473 need_to_retile_as_linear(struct brw_context *brw, unsigned row_pitch,
474 enum isl_tiling tiling, unsigned samples)
475 {
476 if (samples > 1)
477 return false;
478
479 if (tiling == ISL_TILING_LINEAR)
480 return false;
481
482 /* If the width is much smaller than a tile, don't bother tiling. */
483 if (row_pitch < 64)
484 return true;
485
486 if (ALIGN(row_pitch, 512) >= 32768) {
487 perf_debug("row pitch %u too large to blit, falling back to untiled",
488 row_pitch);
489 return true;
490 }
491
492 return false;
493 }
494
495 static bool
496 need_to_retile_as_x(const struct brw_context *brw, uint64_t size,
497 enum isl_tiling tiling)
498 {
499 /* If the BO is too large to fit in the aperture, we need to use the
500 * BLT engine to support it. Prior to Sandybridge, the BLT paths can't
501 * handle Y-tiling, so we need to fall back to X.
502 */
503 if (brw->gen < 6 && size >= brw->max_gtt_map_object_size &&
504 tiling == ISL_TILING_Y0)
505 return true;
506
507 return false;
508 }
509
510 static struct intel_mipmap_tree *
511 make_surface(struct brw_context *brw, GLenum target, mesa_format format,
512 unsigned first_level, unsigned last_level,
513 unsigned width0, unsigned height0, unsigned depth0,
514 unsigned num_samples, isl_tiling_flags_t tiling_flags,
515 isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
516 unsigned row_pitch, struct brw_bo *bo)
517 {
518 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
519 if (!mt)
520 return NULL;
521
522 if (!create_mapping_table(target, first_level, last_level, depth0,
523 mt->level)) {
524 free(mt);
525 return NULL;
526 }
527
528 mt->refcount = 1;
529
530 if (target == GL_TEXTURE_CUBE_MAP ||
531 target == GL_TEXTURE_CUBE_MAP_ARRAY)
532 isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
533
534 DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
535 __func__,
536 _mesa_enum_to_string(target),
537 _mesa_get_format_name(format),
538 num_samples, width0, height0, depth0,
539 first_level, last_level, mt);
540
541 struct isl_surf_init_info init_info = {
542 .dim = get_isl_surf_dim(target),
543 .format = translate_tex_format(brw, format, false),
544 .width = width0,
545 .height = height0,
546 .depth = target == GL_TEXTURE_3D ? depth0 : 1,
547 .levels = last_level - first_level + 1,
548 .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
549 .samples = num_samples,
550 .row_pitch = row_pitch,
551 .usage = isl_usage_flags,
552 .tiling_flags = tiling_flags,
553 };
554
555 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
556 goto fail;
557
558 /* In case caller doesn't specifically request Y-tiling (needed
559 * unconditionally for depth), check for corner cases needing special
560 * treatment.
561 */
562 if (tiling_flags & ~ISL_TILING_Y0_BIT) {
563 if (need_to_retile_as_linear(brw, mt->surf.row_pitch,
564 mt->surf.tiling, mt->surf.samples)) {
565 init_info.tiling_flags = 1u << ISL_TILING_LINEAR;
566 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
567 goto fail;
568 } else if (need_to_retile_as_x(brw, mt->surf.size, mt->surf.tiling)) {
569 init_info.tiling_flags = 1u << ISL_TILING_X;
570 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
571 goto fail;
572 }
573 }
574
575 /* In case of linear the buffer gets padded by fixed 64 bytes and therefore
576 * the size may not be multiple of row_pitch.
577 * See isl_apply_surface_padding().
578 */
579 if (mt->surf.tiling != ISL_TILING_LINEAR)
580 assert(mt->surf.size % mt->surf.row_pitch == 0);
581
582 if (!bo) {
583 mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
584 mt->surf.size,
585 isl_tiling_to_i915_tiling(
586 mt->surf.tiling),
587 mt->surf.row_pitch, alloc_flags);
588 if (!mt->bo)
589 goto fail;
590 } else {
591 mt->bo = bo;
592 }
593
594 mt->first_level = first_level;
595 mt->last_level = last_level;
596 mt->target = target;
597 mt->format = format;
598 mt->aux_state = NULL;
599 mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8;
600 mt->compressed = _mesa_is_format_compressed(format);
601
602 return mt;
603
604 fail:
605 intel_miptree_release(&mt);
606 return NULL;
607 }
608
609 static bool
610 make_separate_stencil_surface(struct brw_context *brw,
611 struct intel_mipmap_tree *mt)
612 {
613 mt->stencil_mt = make_surface(brw, mt->target, MESA_FORMAT_S_UINT8,
614 0, mt->surf.levels - 1,
615 mt->surf.logical_level0_px.width,
616 mt->surf.logical_level0_px.height,
617 mt->surf.dim == ISL_SURF_DIM_3D ?
618 mt->surf.logical_level0_px.depth :
619 mt->surf.logical_level0_px.array_len,
620 mt->surf.samples, ISL_TILING_W_BIT,
621 ISL_SURF_USAGE_STENCIL_BIT |
622 ISL_SURF_USAGE_TEXTURE_BIT,
623 BO_ALLOC_FOR_RENDER, 0, NULL);
624
625 if (!mt->stencil_mt)
626 return false;
627
628 mt->stencil_mt->r8stencil_needs_update = true;
629
630 return true;
631 }
632
633 static bool
634 force_linear_tiling(uint32_t layout_flags)
635 {
636 /* ANY includes NONE and Y bit. */
637 if (layout_flags & MIPTREE_LAYOUT_TILING_Y)
638 return false;
639
640 return layout_flags & MIPTREE_LAYOUT_TILING_NONE;
641 }
642
643 static struct intel_mipmap_tree *
644 miptree_create(struct brw_context *brw,
645 GLenum target,
646 mesa_format format,
647 GLuint first_level,
648 GLuint last_level,
649 GLuint width0,
650 GLuint height0,
651 GLuint depth0,
652 GLuint num_samples,
653 uint32_t layout_flags)
654 {
655 if (format == MESA_FORMAT_S_UINT8)
656 return make_surface(brw, target, format, first_level, last_level,
657 width0, height0, depth0, num_samples,
658 ISL_TILING_W_BIT,
659 ISL_SURF_USAGE_STENCIL_BIT |
660 ISL_SURF_USAGE_TEXTURE_BIT,
661 BO_ALLOC_FOR_RENDER,
662 0,
663 NULL);
664
665 const GLenum base_format = _mesa_get_format_base_format(format);
666 if ((base_format == GL_DEPTH_COMPONENT ||
667 base_format == GL_DEPTH_STENCIL) &&
668 !force_linear_tiling(layout_flags)) {
669 /* Fix up the Z miptree format for how we're splitting out separate
670 * stencil. Gen7 expects there to be no stencil bits in its depth buffer.
671 */
672 const mesa_format depth_only_format =
673 intel_depth_format_for_depthstencil_format(format);
674 struct intel_mipmap_tree *mt = make_surface(
675 brw, target, brw->gen >= 6 ? depth_only_format : format,
676 first_level, last_level,
677 width0, height0, depth0, num_samples, ISL_TILING_Y0_BIT,
678 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
679 BO_ALLOC_FOR_RENDER, 0, NULL);
680
681 if (needs_separate_stencil(brw, mt, format, layout_flags) &&
682 !make_separate_stencil_surface(brw, mt)) {
683 intel_miptree_release(&mt);
684 return NULL;
685 }
686
687 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
688 intel_miptree_choose_aux_usage(brw, mt);
689
690 return mt;
691 }
692
693 mesa_format tex_format = format;
694 mesa_format etc_format = MESA_FORMAT_NONE;
695 uint32_t alloc_flags = 0;
696
697 format = intel_lower_compressed_format(brw, format);
698
699 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
700
701 assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
702 if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
703 alloc_flags |= BO_ALLOC_FOR_RENDER;
704
705 isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ?
706 ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK;
707
708 /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
709 if (brw->gen < 6)
710 tiling_flags &= ~ISL_TILING_Y0_BIT;
711
712 struct intel_mipmap_tree *mt = make_surface(
713 brw, target, format,
714 first_level, last_level,
715 width0, height0, depth0,
716 num_samples, tiling_flags,
717 ISL_SURF_USAGE_RENDER_TARGET_BIT |
718 ISL_SURF_USAGE_TEXTURE_BIT,
719 alloc_flags, 0, NULL);
720 if (!mt)
721 return NULL;
722
723 mt->etc_format = etc_format;
724
725 if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) {
726 mt->bo->cache_coherent = false;
727 mt->is_scanout = true;
728 }
729
730 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
731 intel_miptree_choose_aux_usage(brw, mt);
732
733 return mt;
734 }
735
736 struct intel_mipmap_tree *
737 intel_miptree_create(struct brw_context *brw,
738 GLenum target,
739 mesa_format format,
740 GLuint first_level,
741 GLuint last_level,
742 GLuint width0,
743 GLuint height0,
744 GLuint depth0,
745 GLuint num_samples,
746 uint32_t layout_flags)
747 {
748 assert(num_samples > 0);
749
750 struct intel_mipmap_tree *mt = miptree_create(
751 brw, target, format,
752 first_level, last_level,
753 width0, height0, depth0, num_samples,
754 layout_flags);
755 if (!mt)
756 return NULL;
757
758 mt->offset = 0;
759
760 if (!intel_miptree_alloc_aux(brw, mt)) {
761 intel_miptree_release(&mt);
762 return NULL;
763 }
764
765 return mt;
766 }
767
768 struct intel_mipmap_tree *
769 intel_miptree_create_for_bo(struct brw_context *brw,
770 struct brw_bo *bo,
771 mesa_format format,
772 uint32_t offset,
773 uint32_t width,
774 uint32_t height,
775 uint32_t depth,
776 int pitch,
777 uint32_t layout_flags)
778 {
779 struct intel_mipmap_tree *mt;
780 uint32_t tiling, swizzle;
781 const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
782 const GLenum base_format = _mesa_get_format_base_format(format);
783
784 if ((base_format == GL_DEPTH_COMPONENT ||
785 base_format == GL_DEPTH_STENCIL)) {
786 const mesa_format depth_only_format =
787 intel_depth_format_for_depthstencil_format(format);
788 mt = make_surface(brw, target,
789 brw->gen >= 6 ? depth_only_format : format,
790 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT,
791 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
792 BO_ALLOC_FOR_RENDER, pitch, bo);
793
794 brw_bo_reference(bo);
795
796 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
797 intel_miptree_choose_aux_usage(brw, mt);
798
799 return mt;
800 } else if (format == MESA_FORMAT_S_UINT8) {
801 mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
802 0, 0, width, height, depth, 1,
803 ISL_TILING_W_BIT,
804 ISL_SURF_USAGE_STENCIL_BIT |
805 ISL_SURF_USAGE_TEXTURE_BIT,
806 BO_ALLOC_FOR_RENDER, pitch, bo);
807 if (!mt)
808 return NULL;
809
810 assert(bo->size >= mt->surf.size);
811
812 brw_bo_reference(bo);
813 return mt;
814 }
815
816 brw_bo_get_tiling(bo, &tiling, &swizzle);
817
818 /* Nothing will be able to use this miptree with the BO if the offset isn't
819 * aligned.
820 */
821 if (tiling != I915_TILING_NONE)
822 assert(offset % 4096 == 0);
823
824 /* miptrees can't handle negative pitch. If you need flipping of images,
825 * that's outside of the scope of the mt.
826 */
827 assert(pitch >= 0);
828
829 /* The BO already has a tiling format and we shouldn't confuse the lower
830 * layers by making it try to find a tiling format again.
831 */
832 assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
833 assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
834
835 mt = make_surface(brw, target, format,
836 0, 0, width, height, depth, 1,
837 1lu << isl_tiling_from_i915_tiling(tiling),
838 ISL_SURF_USAGE_RENDER_TARGET_BIT |
839 ISL_SURF_USAGE_TEXTURE_BIT,
840 0, pitch, bo);
841 if (!mt)
842 return NULL;
843
844 brw_bo_reference(bo);
845 mt->bo = bo;
846 mt->offset = offset;
847
848 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
849 intel_miptree_choose_aux_usage(brw, mt);
850
851 return mt;
852 }
853
854 static struct intel_mipmap_tree *
855 miptree_create_for_planar_image(struct brw_context *brw,
856 __DRIimage *image, GLenum target)
857 {
858 struct intel_image_format *f = image->planar_format;
859 struct intel_mipmap_tree *planar_mt = NULL;
860
861 for (int i = 0; i < f->nplanes; i++) {
862 const int index = f->planes[i].buffer_index;
863 const uint32_t dri_format = f->planes[i].dri_format;
864 const mesa_format format = driImageFormatToGLFormat(dri_format);
865 const uint32_t width = image->width >> f->planes[i].width_shift;
866 const uint32_t height = image->height >> f->planes[i].height_shift;
867
868 /* Disable creation of the texture's aux buffers because the driver
869 * exposes no EGL API to manage them. That is, there is no API for
870 * resolving the aux buffer's content to the main buffer nor for
871 * invalidating the aux buffer's content.
872 */
873 struct intel_mipmap_tree *mt =
874 intel_miptree_create_for_bo(brw, image->bo, format,
875 image->offsets[index],
876 width, height, 1,
877 image->strides[index],
878 MIPTREE_LAYOUT_DISABLE_AUX);
879 if (mt == NULL)
880 return NULL;
881
882 mt->target = target;
883
884 if (i == 0)
885 planar_mt = mt;
886 else
887 planar_mt->plane[i - 1] = mt;
888 }
889
890 return planar_mt;
891 }
892
893 struct intel_mipmap_tree *
894 intel_miptree_create_for_dri_image(struct brw_context *brw,
895 __DRIimage *image, GLenum target,
896 enum isl_colorspace colorspace,
897 bool is_winsys_image)
898 {
899 if (image->planar_format && image->planar_format->nplanes > 0) {
900 assert(colorspace == ISL_COLORSPACE_NONE ||
901 colorspace == ISL_COLORSPACE_YUV);
902 return miptree_create_for_planar_image(brw, image, target);
903 }
904
905 mesa_format format = image->format;
906 switch (colorspace) {
907 case ISL_COLORSPACE_NONE:
908 /* Keep the image format unmodified */
909 break;
910
911 case ISL_COLORSPACE_LINEAR:
912 format =_mesa_get_srgb_format_linear(format);
913 break;
914
915 case ISL_COLORSPACE_SRGB:
916 format =_mesa_get_linear_format_srgb(format);
917 break;
918
919 default:
920 unreachable("Inalid colorspace for non-planar image");
921 }
922
923 if (!brw->ctx.TextureFormatSupported[format]) {
924 /* The texture storage paths in core Mesa detect if the driver does not
925 * support the user-requested format, and then searches for a
926 * fallback format. The DRIimage code bypasses core Mesa, though. So we
927 * do the fallbacks here for important formats.
928 *
929 * We must support DRM_FOURCC_XBGR8888 textures because the Android
930 * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
931 * the Chrome OS compositor consumes as dma_buf EGLImages.
932 */
933 format = _mesa_format_fallback_rgbx_to_rgba(format);
934 }
935
936 if (!brw->ctx.TextureFormatSupported[format])
937 return NULL;
938
939 /* If this image comes in from a window system, we have different
940 * requirements than if it comes in via an EGL import operation. Window
941 * system images can use any form of auxiliary compression we wish because
942 * they get "flushed" before being handed off to the window system and we
943 * have the opportunity to do resolves. Window system buffers also may be
944 * used for scanout so we need to flag that appropriately.
945 */
946 const uint32_t mt_layout_flags =
947 is_winsys_image ? MIPTREE_LAYOUT_FOR_SCANOUT : MIPTREE_LAYOUT_DISABLE_AUX;
948
949 /* Disable creation of the texture's aux buffers because the driver exposes
950 * no EGL API to manage them. That is, there is no API for resolving the aux
951 * buffer's content to the main buffer nor for invalidating the aux buffer's
952 * content.
953 */
954 struct intel_mipmap_tree *mt =
955 intel_miptree_create_for_bo(brw, image->bo, format,
956 image->offset, image->width, image->height, 1,
957 image->pitch, mt_layout_flags);
958 if (mt == NULL)
959 return NULL;
960
961 mt->target = target;
962 mt->level[0].level_x = image->tile_x;
963 mt->level[0].level_y = image->tile_y;
964
965 /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
966 * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
967 * trouble resolving back to destination image due to alignment issues.
968 */
969 if (!brw->has_surface_tile_offset) {
970 uint32_t draw_x, draw_y;
971 intel_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
972
973 if (draw_x != 0 || draw_y != 0) {
974 _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
975 intel_miptree_release(&mt);
976 return NULL;
977 }
978 }
979
980 if (!intel_miptree_alloc_aux(brw, mt)) {
981 intel_miptree_release(&mt);
982 return NULL;
983 }
984
985 return mt;
986 }
987
988 /**
989 * For a singlesample renderbuffer, this simply wraps the given BO with a
990 * miptree.
991 *
992 * For a multisample renderbuffer, this wraps the window system's
993 * (singlesample) BO with a singlesample miptree attached to the
994 * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
995 * that will contain the actual rendering (which is lazily resolved to
996 * irb->singlesample_mt).
997 */
998 bool
999 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
1000 struct intel_renderbuffer *irb,
1001 struct intel_mipmap_tree *singlesample_mt,
1002 uint32_t width, uint32_t height,
1003 uint32_t pitch)
1004 {
1005 struct intel_mipmap_tree *multisample_mt = NULL;
1006 struct gl_renderbuffer *rb = &irb->Base.Base;
1007 mesa_format format = rb->Format;
1008 const unsigned num_samples = MAX2(rb->NumSamples, 1);
1009
1010 /* Only the front and back buffers, which are color buffers, are allocated
1011 * through the image loader.
1012 */
1013 assert(_mesa_get_format_base_format(format) == GL_RGB ||
1014 _mesa_get_format_base_format(format) == GL_RGBA);
1015
1016 assert(singlesample_mt);
1017
1018 if (num_samples == 1) {
1019 intel_miptree_release(&irb->mt);
1020 irb->mt = singlesample_mt;
1021
1022 assert(!irb->singlesample_mt);
1023 } else {
1024 intel_miptree_release(&irb->singlesample_mt);
1025 irb->singlesample_mt = singlesample_mt;
1026
1027 if (!irb->mt ||
1028 irb->mt->surf.logical_level0_px.width != width ||
1029 irb->mt->surf.logical_level0_px.height != height) {
1030 multisample_mt = intel_miptree_create_for_renderbuffer(intel,
1031 format,
1032 width,
1033 height,
1034 num_samples);
1035 if (!multisample_mt)
1036 goto fail;
1037
1038 irb->need_downsample = false;
1039 intel_miptree_release(&irb->mt);
1040 irb->mt = multisample_mt;
1041 }
1042 }
1043 return true;
1044
1045 fail:
1046 intel_miptree_release(&irb->mt);
1047 return false;
1048 }
1049
1050 struct intel_mipmap_tree*
1051 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
1052 mesa_format format,
1053 uint32_t width,
1054 uint32_t height,
1055 uint32_t num_samples)
1056 {
1057 struct intel_mipmap_tree *mt;
1058 uint32_t depth = 1;
1059 GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
1060 const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1061 MIPTREE_LAYOUT_TILING_ANY |
1062 MIPTREE_LAYOUT_FOR_SCANOUT;
1063
1064 mt = intel_miptree_create(brw, target, format, 0, 0,
1065 width, height, depth, num_samples,
1066 layout_flags);
1067 if (!mt)
1068 goto fail;
1069
1070 return mt;
1071
1072 fail:
1073 intel_miptree_release(&mt);
1074 return NULL;
1075 }
1076
1077 void
1078 intel_miptree_reference(struct intel_mipmap_tree **dst,
1079 struct intel_mipmap_tree *src)
1080 {
1081 if (*dst == src)
1082 return;
1083
1084 intel_miptree_release(dst);
1085
1086 if (src) {
1087 src->refcount++;
1088 DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
1089 }
1090
1091 *dst = src;
1092 }
1093
1094 static void
1095 intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf)
1096 {
1097 if (aux_buf == NULL)
1098 return;
1099
1100 brw_bo_unreference(aux_buf->bo);
1101
1102 free(aux_buf);
1103 }
1104
1105 void
1106 intel_miptree_release(struct intel_mipmap_tree **mt)
1107 {
1108 if (!*mt)
1109 return;
1110
1111 DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
1112 if (--(*mt)->refcount <= 0) {
1113 GLuint i;
1114
1115 DBG("%s deleting %p\n", __func__, *mt);
1116
1117 brw_bo_unreference((*mt)->bo);
1118 intel_miptree_release(&(*mt)->stencil_mt);
1119 intel_miptree_release(&(*mt)->r8stencil_mt);
1120 intel_miptree_aux_buffer_free((*mt)->hiz_buf);
1121 intel_miptree_aux_buffer_free((*mt)->mcs_buf);
1122 free_aux_state_map((*mt)->aux_state);
1123
1124 intel_miptree_release(&(*mt)->plane[0]);
1125 intel_miptree_release(&(*mt)->plane[1]);
1126
1127 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
1128 free((*mt)->level[i].slice);
1129 }
1130
1131 free(*mt);
1132 }
1133 *mt = NULL;
1134 }
1135
1136
1137 void
1138 intel_get_image_dims(struct gl_texture_image *image,
1139 int *width, int *height, int *depth)
1140 {
1141 switch (image->TexObject->Target) {
1142 case GL_TEXTURE_1D_ARRAY:
1143 /* For a 1D Array texture the OpenGL API will treat the image height as
1144 * the number of array slices. For Intel hardware, we treat the 1D array
1145 * as a 2D Array with a height of 1. So, here we want to swap image
1146 * height and depth.
1147 */
1148 assert(image->Depth == 1);
1149 *width = image->Width;
1150 *height = 1;
1151 *depth = image->Height;
1152 break;
1153 case GL_TEXTURE_CUBE_MAP:
1154 /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
1155 * though we really have 6 slices.
1156 */
1157 assert(image->Depth == 1);
1158 *width = image->Width;
1159 *height = image->Height;
1160 *depth = 6;
1161 break;
1162 default:
1163 *width = image->Width;
1164 *height = image->Height;
1165 *depth = image->Depth;
1166 break;
1167 }
1168 }
1169
1170 /**
1171 * Can the image be pulled into a unified mipmap tree? This mirrors
1172 * the completeness test in a lot of ways.
1173 *
1174 * Not sure whether I want to pass gl_texture_image here.
1175 */
1176 bool
1177 intel_miptree_match_image(struct intel_mipmap_tree *mt,
1178 struct gl_texture_image *image)
1179 {
1180 struct intel_texture_image *intelImage = intel_texture_image(image);
1181 GLuint level = intelImage->base.Base.Level;
1182 int width, height, depth;
1183
1184 /* glTexImage* choose the texture object based on the target passed in, and
1185 * objects can't change targets over their lifetimes, so this should be
1186 * true.
1187 */
1188 assert(image->TexObject->Target == mt->target);
1189
1190 mesa_format mt_format = mt->format;
1191 if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
1192 mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
1193 if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
1194 mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
1195 if (mt->etc_format != MESA_FORMAT_NONE)
1196 mt_format = mt->etc_format;
1197
1198 if (image->TexFormat != mt_format)
1199 return false;
1200
1201 intel_get_image_dims(image, &width, &height, &depth);
1202
1203 if (mt->target == GL_TEXTURE_CUBE_MAP)
1204 depth = 6;
1205
1206 if (level >= mt->surf.levels)
1207 return false;
1208
1209 const unsigned level_depth =
1210 mt->surf.dim == ISL_SURF_DIM_3D ?
1211 minify(mt->surf.logical_level0_px.depth, level) :
1212 mt->surf.logical_level0_px.array_len;
1213
1214 return width == minify(mt->surf.logical_level0_px.width, level) &&
1215 height == minify(mt->surf.logical_level0_px.height, level) &&
1216 depth == level_depth &&
1217 MAX2(image->NumSamples, 1) == mt->surf.samples;
1218 }
1219
1220 void
1221 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1222 GLuint level, GLuint slice,
1223 GLuint *x, GLuint *y)
1224 {
1225 if (level == 0 && slice == 0) {
1226 *x = mt->level[0].level_x;
1227 *y = mt->level[0].level_y;
1228 return;
1229 }
1230
1231 uint32_t x_offset_sa, y_offset_sa;
1232
1233 /* Miptree itself can have an offset only if it represents a single
1234 * slice in an imported buffer object.
1235 * See intel_miptree_create_for_dri_image().
1236 */
1237 assert(mt->level[0].level_x == 0);
1238 assert(mt->level[0].level_y == 0);
1239
1240 /* Given level is relative to level zero while the miptree may be
1241 * represent just a subset of all levels starting from 'first_level'.
1242 */
1243 assert(level >= mt->first_level);
1244 level -= mt->first_level;
1245
1246 const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
1247 slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
1248 isl_surf_get_image_offset_el(&mt->surf, level, slice, z,
1249 &x_offset_sa, &y_offset_sa);
1250
1251 *x = x_offset_sa;
1252 *y = y_offset_sa;
1253 }
1254
1255
1256 /**
1257 * This function computes the tile_w (in bytes) and tile_h (in rows) of
1258 * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1259 * and tile_h is set to 1.
1260 */
1261 void
1262 intel_get_tile_dims(enum isl_tiling tiling, uint32_t cpp,
1263 uint32_t *tile_w, uint32_t *tile_h)
1264 {
1265 switch (tiling) {
1266 case ISL_TILING_X:
1267 *tile_w = 512;
1268 *tile_h = 8;
1269 break;
1270 case ISL_TILING_Y0:
1271 *tile_w = 128;
1272 *tile_h = 32;
1273 break;
1274 case ISL_TILING_LINEAR:
1275 *tile_w = cpp;
1276 *tile_h = 1;
1277 break;
1278 default:
1279 unreachable("not reached");
1280 }
1281 }
1282
1283
1284 /**
1285 * This function computes masks that may be used to select the bits of the X
1286 * and Y coordinates that indicate the offset within a tile. If the BO is
1287 * untiled, the masks are set to 0.
1288 */
1289 void
1290 intel_get_tile_masks(enum isl_tiling tiling, uint32_t cpp,
1291 uint32_t *mask_x, uint32_t *mask_y)
1292 {
1293 uint32_t tile_w_bytes, tile_h;
1294
1295 intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h);
1296
1297 *mask_x = tile_w_bytes / cpp - 1;
1298 *mask_y = tile_h - 1;
1299 }
1300
1301 /**
1302 * Compute the offset (in bytes) from the start of the BO to the given x
1303 * and y coordinate. For tiled BOs, caller must ensure that x and y are
1304 * multiples of the tile size.
1305 */
1306 uint32_t
1307 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1308 uint32_t x, uint32_t y)
1309 {
1310 int cpp = mt->cpp;
1311 uint32_t pitch = mt->surf.row_pitch;
1312
1313 switch (mt->surf.tiling) {
1314 default:
1315 unreachable("not reached");
1316 case ISL_TILING_LINEAR:
1317 return y * pitch + x * cpp;
1318 case ISL_TILING_X:
1319 assert((x % (512 / cpp)) == 0);
1320 assert((y % 8) == 0);
1321 return y * pitch + x / (512 / cpp) * 4096;
1322 case ISL_TILING_Y0:
1323 assert((x % (128 / cpp)) == 0);
1324 assert((y % 32) == 0);
1325 return y * pitch + x / (128 / cpp) * 4096;
1326 }
1327 }
1328
1329 /**
1330 * Rendering with tiled buffers requires that the base address of the buffer
1331 * be aligned to a page boundary. For renderbuffers, and sometimes with
1332 * textures, we may want the surface to point at a texture image level that
1333 * isn't at a page boundary.
1334 *
1335 * This function returns an appropriately-aligned base offset
1336 * according to the tiling restrictions, plus any required x/y offset
1337 * from there.
1338 */
1339 uint32_t
1340 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1341 GLuint level, GLuint slice,
1342 uint32_t *tile_x,
1343 uint32_t *tile_y)
1344 {
1345 uint32_t x, y;
1346 uint32_t mask_x, mask_y;
1347
1348 intel_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y);
1349 intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1350
1351 *tile_x = x & mask_x;
1352 *tile_y = y & mask_y;
1353
1354 return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
1355 }
1356
1357 static void
1358 intel_miptree_copy_slice_sw(struct brw_context *brw,
1359 struct intel_mipmap_tree *src_mt,
1360 unsigned src_level, unsigned src_layer,
1361 struct intel_mipmap_tree *dst_mt,
1362 unsigned dst_level, unsigned dst_layer,
1363 unsigned width, unsigned height)
1364 {
1365 void *src, *dst;
1366 ptrdiff_t src_stride, dst_stride;
1367 const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8);
1368
1369 intel_miptree_map(brw, src_mt,
1370 src_level, src_layer,
1371 0, 0,
1372 width, height,
1373 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1374 &src, &src_stride);
1375
1376 intel_miptree_map(brw, dst_mt,
1377 dst_level, dst_layer,
1378 0, 0,
1379 width, height,
1380 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1381 BRW_MAP_DIRECT_BIT,
1382 &dst, &dst_stride);
1383
1384 DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1385 _mesa_get_format_name(src_mt->format),
1386 src_mt, src, src_stride,
1387 _mesa_get_format_name(dst_mt->format),
1388 dst_mt, dst, dst_stride,
1389 width, height);
1390
1391 int row_size = cpp * width;
1392 if (src_stride == row_size &&
1393 dst_stride == row_size) {
1394 memcpy(dst, src, row_size * height);
1395 } else {
1396 for (int i = 0; i < height; i++) {
1397 memcpy(dst, src, row_size);
1398 dst += dst_stride;
1399 src += src_stride;
1400 }
1401 }
1402
1403 intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
1404 intel_miptree_unmap(brw, src_mt, src_level, src_layer);
1405
1406 /* Don't forget to copy the stencil data over, too. We could have skipped
1407 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1408 * shuffling the two data sources in/out of temporary storage instead of
1409 * the direct mapping we get this way.
1410 */
1411 if (dst_mt->stencil_mt) {
1412 assert(src_mt->stencil_mt);
1413 intel_miptree_copy_slice_sw(brw,
1414 src_mt->stencil_mt, src_level, src_layer,
1415 dst_mt->stencil_mt, dst_level, dst_layer,
1416 width, height);
1417 }
1418 }
1419
1420 void
1421 intel_miptree_copy_slice(struct brw_context *brw,
1422 struct intel_mipmap_tree *src_mt,
1423 unsigned src_level, unsigned src_layer,
1424 struct intel_mipmap_tree *dst_mt,
1425 unsigned dst_level, unsigned dst_layer)
1426
1427 {
1428 mesa_format format = src_mt->format;
1429 unsigned width = minify(src_mt->surf.phys_level0_sa.width,
1430 src_level - src_mt->first_level);
1431 unsigned height = minify(src_mt->surf.phys_level0_sa.height,
1432 src_level - src_mt->first_level);
1433
1434 assert(src_layer < get_num_phys_layers(&src_mt->surf,
1435 src_level - src_mt->first_level));
1436
1437 assert(src_mt->format == dst_mt->format);
1438
1439 if (dst_mt->compressed) {
1440 unsigned int i, j;
1441 _mesa_get_format_block_size(dst_mt->format, &i, &j);
1442 height = ALIGN_NPOT(height, j) / j;
1443 width = ALIGN_NPOT(width, i) / i;
1444 }
1445
1446 /* If it's a packed depth/stencil buffer with separate stencil, the blit
1447 * below won't apply since we can't do the depth's Y tiling or the
1448 * stencil's W tiling in the blitter.
1449 */
1450 if (src_mt->stencil_mt) {
1451 intel_miptree_copy_slice_sw(brw,
1452 src_mt, src_level, src_layer,
1453 dst_mt, dst_level, dst_layer,
1454 width, height);
1455 return;
1456 }
1457
1458 uint32_t dst_x, dst_y, src_x, src_y;
1459 intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
1460 &dst_x, &dst_y);
1461 intel_miptree_get_image_offset(src_mt, src_level, src_layer,
1462 &src_x, &src_y);
1463
1464 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1465 _mesa_get_format_name(src_mt->format),
1466 src_mt, src_x, src_y, src_mt->surf.row_pitch,
1467 _mesa_get_format_name(dst_mt->format),
1468 dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch,
1469 width, height);
1470
1471 if (!intel_miptree_blit(brw,
1472 src_mt, src_level, src_layer, 0, 0, false,
1473 dst_mt, dst_level, dst_layer, 0, 0, false,
1474 width, height, GL_COPY)) {
1475 perf_debug("miptree validate blit for %s failed\n",
1476 _mesa_get_format_name(format));
1477
1478 intel_miptree_copy_slice_sw(brw,
1479 src_mt, src_level, src_layer,
1480 dst_mt, dst_level, dst_layer,
1481 width, height);
1482 }
1483 }
1484
1485 /**
1486 * Copies the image's current data to the given miptree, and associates that
1487 * miptree with the image.
1488 *
1489 * If \c invalidate is true, then the actual image data does not need to be
1490 * copied, but the image still needs to be associated to the new miptree (this
1491 * is set to true if we're about to clear the image).
1492 */
1493 void
1494 intel_miptree_copy_teximage(struct brw_context *brw,
1495 struct intel_texture_image *intelImage,
1496 struct intel_mipmap_tree *dst_mt,
1497 bool invalidate)
1498 {
1499 struct intel_mipmap_tree *src_mt = intelImage->mt;
1500 struct intel_texture_object *intel_obj =
1501 intel_texture_object(intelImage->base.Base.TexObject);
1502 int level = intelImage->base.Base.Level;
1503 const unsigned face = intelImage->base.Base.Face;
1504 unsigned start_layer, end_layer;
1505
1506 if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
1507 assert(face == 0);
1508 assert(intelImage->base.Base.Height);
1509 start_layer = 0;
1510 end_layer = intelImage->base.Base.Height - 1;
1511 } else if (face > 0) {
1512 start_layer = face;
1513 end_layer = face;
1514 } else {
1515 assert(intelImage->base.Base.Depth);
1516 start_layer = 0;
1517 end_layer = intelImage->base.Base.Depth - 1;
1518 }
1519
1520 if (!invalidate) {
1521 for (unsigned i = start_layer; i <= end_layer; i++) {
1522 intel_miptree_copy_slice(brw,
1523 src_mt, level, i,
1524 dst_mt, level, i);
1525 }
1526 }
1527
1528 intel_miptree_reference(&intelImage->mt, dst_mt);
1529 intel_obj->needs_validate = true;
1530 }
1531
1532 static void
1533 intel_miptree_init_mcs(struct brw_context *brw,
1534 struct intel_mipmap_tree *mt,
1535 int init_value)
1536 {
1537 assert(mt->mcs_buf != NULL);
1538
1539 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1540 *
1541 * When MCS buffer is enabled and bound to MSRT, it is required that it
1542 * is cleared prior to any rendering.
1543 *
1544 * Since we don't use the MCS buffer for any purpose other than rendering,
1545 * it makes sense to just clear it immediately upon allocation.
1546 *
1547 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1548 */
1549 void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE);
1550 if (unlikely(map == NULL)) {
1551 fprintf(stderr, "Failed to map mcs buffer into GTT\n");
1552 brw_bo_unreference(mt->mcs_buf->bo);
1553 free(mt->mcs_buf);
1554 return;
1555 }
1556 void *data = map;
1557 memset(data, init_value, mt->mcs_buf->size);
1558 brw_bo_unmap(mt->mcs_buf->bo);
1559 }
1560
1561 static struct intel_miptree_aux_buffer *
1562 intel_alloc_aux_buffer(struct brw_context *brw,
1563 const char *name,
1564 const struct isl_surf *aux_surf,
1565 uint32_t alloc_flags,
1566 struct intel_mipmap_tree *mt)
1567 {
1568 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1569 if (!buf)
1570 return false;
1571
1572 buf->size = aux_surf->size;
1573 buf->pitch = aux_surf->row_pitch;
1574 buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
1575
1576 /* ISL has stricter set of alignment rules then the drm allocator.
1577 * Therefore one can pass the ISL dimensions in terms of bytes instead of
1578 * trying to recalculate based on different format block sizes.
1579 */
1580 buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
1581 I915_TILING_Y, buf->pitch, alloc_flags);
1582 if (!buf->bo) {
1583 free(buf);
1584 return NULL;
1585 }
1586
1587 buf->surf = *aux_surf;
1588
1589 return buf;
1590 }
1591
1592 static bool
1593 intel_miptree_alloc_mcs(struct brw_context *brw,
1594 struct intel_mipmap_tree *mt,
1595 GLuint num_samples)
1596 {
1597 assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1598 assert(mt->mcs_buf == NULL);
1599 assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
1600
1601 /* Multisampled miptrees are only supported for single level. */
1602 assert(mt->first_level == 0);
1603 enum isl_aux_state **aux_state =
1604 create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
1605 if (!aux_state)
1606 return false;
1607
1608 struct isl_surf temp_mcs_surf;
1609
1610 MAYBE_UNUSED bool ok =
1611 isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &temp_mcs_surf);
1612 assert(ok);
1613
1614 /* Buffer needs to be initialised requiring the buffer to be immediately
1615 * mapped to cpu space for writing. Therefore do not use the gpu access
1616 * flag which can cause an unnecessary delay if the backing pages happened
1617 * to be just used by the GPU.
1618 */
1619 const uint32_t alloc_flags = 0;
1620 mt->mcs_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
1621 &temp_mcs_surf, alloc_flags, mt);
1622 if (!mt->mcs_buf) {
1623 free(aux_state);
1624 return false;
1625 }
1626
1627 mt->aux_state = aux_state;
1628
1629 intel_miptree_init_mcs(brw, mt, 0xFF);
1630
1631 return true;
1632 }
1633
1634 bool
1635 intel_miptree_alloc_ccs(struct brw_context *brw,
1636 struct intel_mipmap_tree *mt)
1637 {
1638 assert(mt->mcs_buf == NULL);
1639 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
1640 mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1641
1642 struct isl_surf temp_ccs_surf;
1643
1644 if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, &temp_ccs_surf, 0))
1645 return false;
1646
1647 assert(temp_ccs_surf.size &&
1648 (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
1649
1650 enum isl_aux_state **aux_state =
1651 create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
1652 if (!aux_state)
1653 return false;
1654
1655 /* When CCS_E is used, we need to ensure that the CCS starts off in a valid
1656 * state. From the Sky Lake PRM, "MCS Buffer for Render Target(s)":
1657 *
1658 * "If Software wants to enable Color Compression without Fast clear,
1659 * Software needs to initialize MCS with zeros."
1660 *
1661 * A CCS value of 0 indicates that the corresponding block is in the
1662 * pass-through state which is what we want.
1663 *
1664 * For CCS_D, on the other hand, we don't care as we're about to perform a
1665 * fast-clear operation. In that case, being hot in caches more useful.
1666 */
1667 const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
1668 BO_ALLOC_ZEROED : BO_ALLOC_FOR_RENDER;
1669 mt->mcs_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
1670 &temp_ccs_surf, alloc_flags, mt);
1671 if (!mt->mcs_buf) {
1672 free(aux_state);
1673 return false;
1674 }
1675
1676 mt->aux_state = aux_state;
1677
1678 return true;
1679 }
1680
1681 /**
1682 * Helper for intel_miptree_alloc_hiz() that sets
1683 * \c mt->level[level].has_hiz. Return true if and only if
1684 * \c has_hiz was set.
1685 */
1686 static bool
1687 intel_miptree_level_enable_hiz(struct brw_context *brw,
1688 struct intel_mipmap_tree *mt,
1689 uint32_t level)
1690 {
1691 assert(mt->hiz_buf);
1692 assert(mt->surf.size > 0);
1693
1694 if (brw->gen >= 8 || brw->is_haswell) {
1695 uint32_t width = minify(mt->surf.phys_level0_sa.width, level);
1696 uint32_t height = minify(mt->surf.phys_level0_sa.height, level);
1697
1698 /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1699 * and the height is 4 aligned. This allows our HiZ support
1700 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1701 * we can grow the width & height to allow the HiZ op to
1702 * force the proper size alignments.
1703 */
1704 if (level > 0 && ((width & 7) || (height & 3))) {
1705 DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1706 return false;
1707 }
1708 }
1709
1710 DBG("mt %p level %d: HiZ enabled\n", mt, level);
1711 mt->level[level].has_hiz = true;
1712 return true;
1713 }
1714
1715 bool
1716 intel_miptree_alloc_hiz(struct brw_context *brw,
1717 struct intel_mipmap_tree *mt)
1718 {
1719 assert(mt->hiz_buf == NULL);
1720 assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
1721
1722 enum isl_aux_state **aux_state =
1723 create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
1724 if (!aux_state)
1725 return false;
1726
1727 struct isl_surf temp_hiz_surf;
1728
1729 MAYBE_UNUSED bool ok =
1730 isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
1731 assert(ok);
1732
1733 const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
1734 mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
1735 &temp_hiz_surf, alloc_flags, mt);
1736
1737 if (!mt->hiz_buf) {
1738 free(aux_state);
1739 return false;
1740 }
1741
1742 for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
1743 intel_miptree_level_enable_hiz(brw, mt, level);
1744
1745 mt->aux_state = aux_state;
1746
1747 return true;
1748 }
1749
1750
1751 /**
1752 * Allocate the initial aux surface for a miptree based on mt->aux_usage
1753 *
1754 * Since MCS, HiZ, and CCS_E can compress more than just clear color, we
1755 * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only
1756 * compress clear color so we wait until an actual fast-clear to allocate it.
1757 */
1758 static bool
1759 intel_miptree_alloc_aux(struct brw_context *brw,
1760 struct intel_mipmap_tree *mt)
1761 {
1762 switch (mt->aux_usage) {
1763 case ISL_AUX_USAGE_NONE:
1764 return true;
1765
1766 case ISL_AUX_USAGE_HIZ:
1767 assert(!_mesa_is_format_color_format(mt->format));
1768 if (!intel_miptree_alloc_hiz(brw, mt))
1769 return false;
1770 return true;
1771
1772 case ISL_AUX_USAGE_MCS:
1773 assert(_mesa_is_format_color_format(mt->format));
1774 assert(mt->surf.samples > 1);
1775 if (!intel_miptree_alloc_mcs(brw, mt, mt->surf.samples))
1776 return false;
1777 return true;
1778
1779 case ISL_AUX_USAGE_CCS_D:
1780 /* Since CCS_D can only compress clear color so we wait until an actual
1781 * fast-clear to allocate it.
1782 */
1783 return true;
1784
1785 case ISL_AUX_USAGE_CCS_E:
1786 assert(_mesa_is_format_color_format(mt->format));
1787 assert(mt->surf.samples == 1);
1788 if (!intel_miptree_alloc_ccs(brw, mt))
1789 return false;
1790 return true;
1791 }
1792
1793 unreachable("Invalid aux usage");
1794 }
1795
1796
1797 /**
1798 * Can the miptree sample using the hiz buffer?
1799 */
1800 bool
1801 intel_miptree_sample_with_hiz(struct brw_context *brw,
1802 struct intel_mipmap_tree *mt)
1803 {
1804 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
1805 * so keep things conservative for now and never enable it unless we're SKL+.
1806 */
1807 if (brw->gen < 9) {
1808 return false;
1809 }
1810
1811 if (!mt->hiz_buf) {
1812 return false;
1813 }
1814
1815 /* It seems the hardware won't fallback to the depth buffer if some of the
1816 * mipmap levels aren't available in the HiZ buffer. So we need all levels
1817 * of the texture to be HiZ enabled.
1818 */
1819 for (unsigned level = 0; level < mt->surf.levels; ++level) {
1820 if (!intel_miptree_level_has_hiz(mt, level))
1821 return false;
1822 }
1823
1824 /* If compressed multisampling is enabled, then we use it for the auxiliary
1825 * buffer instead.
1826 *
1827 * From the BDW PRM (Volume 2d: Command Reference: Structures
1828 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
1829 *
1830 * "If this field is set to AUX_HIZ, Number of Multisamples must be
1831 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
1832 *
1833 * There is no such blurb for 1D textures, but there is sufficient evidence
1834 * that this is broken on SKL+.
1835 */
1836 return (mt->surf.samples == 1 &&
1837 mt->target != GL_TEXTURE_3D &&
1838 mt->target != GL_TEXTURE_1D /* gen9+ restriction */);
1839 }
1840
1841 /**
1842 * Does the miptree slice have hiz enabled?
1843 */
1844 bool
1845 intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level)
1846 {
1847 intel_miptree_check_level_layer(mt, level, 0);
1848 return mt->level[level].has_hiz;
1849 }
1850
1851 bool
1852 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt,
1853 unsigned start_level, unsigned num_levels,
1854 unsigned start_layer, unsigned num_layers)
1855 {
1856 assert(_mesa_is_format_color_format(mt->format));
1857
1858 if (!mt->mcs_buf)
1859 return false;
1860
1861 /* Clamp the level range to fit the miptree */
1862 assert(start_level + num_levels >= start_level);
1863 const uint32_t last_level =
1864 MIN2(mt->last_level, start_level + num_levels - 1);
1865 start_level = MAX2(mt->first_level, start_level);
1866 num_levels = last_level - start_level + 1;
1867
1868 for (uint32_t level = start_level; level <= last_level; level++) {
1869 uint32_t level_layers = get_num_phys_layers(&mt->surf, level);
1870
1871 level_layers = MIN2(num_layers, level_layers);
1872
1873 for (unsigned a = 0; a < level_layers; a++) {
1874 enum isl_aux_state aux_state =
1875 intel_miptree_get_aux_state(mt, level, start_layer + a);
1876 assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
1877 if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
1878 return true;
1879 }
1880 }
1881
1882 return false;
1883 }
1884
1885 static void
1886 intel_miptree_check_color_resolve(const struct brw_context *brw,
1887 const struct intel_mipmap_tree *mt,
1888 unsigned level, unsigned layer)
1889 {
1890
1891 if (!mt->mcs_buf)
1892 return;
1893
1894 /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
1895 assert(brw->gen >= 8 ||
1896 (level == 0 && mt->first_level == 0 && mt->last_level == 0));
1897
1898 /* Compression of arrayed msaa surfaces is supported. */
1899 if (mt->surf.samples > 1)
1900 return;
1901
1902 /* Fast color clear is supported for non-msaa arrays only on Gen8+. */
1903 assert(brw->gen >= 8 ||
1904 (layer == 0 &&
1905 mt->surf.logical_level0_px.depth == 1 &&
1906 mt->surf.logical_level0_px.array_len == 1));
1907
1908 (void)level;
1909 (void)layer;
1910 }
1911
1912 static enum blorp_fast_clear_op
1913 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
1914 bool ccs_supported, bool fast_clear_supported)
1915 {
1916 assert(ccs_supported == fast_clear_supported);
1917
1918 switch (aux_state) {
1919 case ISL_AUX_STATE_CLEAR:
1920 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1921 if (!ccs_supported)
1922 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1923 else
1924 return BLORP_FAST_CLEAR_OP_NONE;
1925
1926 case ISL_AUX_STATE_PASS_THROUGH:
1927 return BLORP_FAST_CLEAR_OP_NONE;
1928
1929 case ISL_AUX_STATE_RESOLVED:
1930 case ISL_AUX_STATE_AUX_INVALID:
1931 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1932 break;
1933 }
1934
1935 unreachable("Invalid aux state for CCS_D");
1936 }
1937
1938 static enum blorp_fast_clear_op
1939 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
1940 bool ccs_supported, bool fast_clear_supported)
1941 {
1942 switch (aux_state) {
1943 case ISL_AUX_STATE_CLEAR:
1944 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1945 if (!ccs_supported)
1946 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1947 else if (!fast_clear_supported)
1948 return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
1949 else
1950 return BLORP_FAST_CLEAR_OP_NONE;
1951
1952 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1953 if (!ccs_supported)
1954 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1955 else
1956 return BLORP_FAST_CLEAR_OP_NONE;
1957
1958 case ISL_AUX_STATE_PASS_THROUGH:
1959 return BLORP_FAST_CLEAR_OP_NONE;
1960
1961 case ISL_AUX_STATE_RESOLVED:
1962 case ISL_AUX_STATE_AUX_INVALID:
1963 break;
1964 }
1965
1966 unreachable("Invalid aux state for CCS_E");
1967 }
1968
1969 static void
1970 intel_miptree_prepare_ccs_access(struct brw_context *brw,
1971 struct intel_mipmap_tree *mt,
1972 uint32_t level, uint32_t layer,
1973 bool aux_supported,
1974 bool fast_clear_supported)
1975 {
1976 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
1977
1978 enum blorp_fast_clear_op resolve_op;
1979 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
1980 resolve_op = get_ccs_e_resolve_op(aux_state, aux_supported,
1981 fast_clear_supported);
1982 } else {
1983 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1984 resolve_op = get_ccs_d_resolve_op(aux_state, aux_supported,
1985 fast_clear_supported);
1986 }
1987
1988 if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {
1989 intel_miptree_check_color_resolve(brw, mt, level, layer);
1990 brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);
1991
1992 switch (resolve_op) {
1993 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
1994 /* The CCS full resolve operation destroys the CCS and sets it to the
1995 * pass-through state. (You can also think of this as being both a
1996 * resolve and an ambiguate in one operation.)
1997 */
1998 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
1999 ISL_AUX_STATE_PASS_THROUGH);
2000 break;
2001
2002 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
2003 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2004 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2005 break;
2006
2007 default:
2008 unreachable("Invalid resolve op");
2009 }
2010 }
2011 }
2012
2013 static void
2014 intel_miptree_finish_ccs_write(struct brw_context *brw,
2015 struct intel_mipmap_tree *mt,
2016 uint32_t level, uint32_t layer,
2017 bool written_with_ccs)
2018 {
2019 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2020
2021 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
2022 switch (aux_state) {
2023 case ISL_AUX_STATE_CLEAR:
2024 assert(written_with_ccs);
2025 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2026 ISL_AUX_STATE_COMPRESSED_CLEAR);
2027 break;
2028
2029 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2030 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2031 assert(written_with_ccs);
2032 break; /* Nothing to do */
2033
2034 case ISL_AUX_STATE_PASS_THROUGH:
2035 if (written_with_ccs) {
2036 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2037 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2038 } else {
2039 /* Nothing to do */
2040 }
2041 break;
2042
2043 case ISL_AUX_STATE_RESOLVED:
2044 case ISL_AUX_STATE_AUX_INVALID:
2045 unreachable("Invalid aux state for CCS_E");
2046 }
2047 } else {
2048 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2049 /* CCS_D is a bit simpler */
2050 switch (aux_state) {
2051 case ISL_AUX_STATE_CLEAR:
2052 assert(written_with_ccs);
2053 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2054 ISL_AUX_STATE_COMPRESSED_CLEAR);
2055 break;
2056
2057 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2058 assert(written_with_ccs);
2059 break; /* Nothing to do */
2060
2061 case ISL_AUX_STATE_PASS_THROUGH:
2062 /* Nothing to do */
2063 break;
2064
2065 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2066 case ISL_AUX_STATE_RESOLVED:
2067 case ISL_AUX_STATE_AUX_INVALID:
2068 unreachable("Invalid aux state for CCS_D");
2069 }
2070 }
2071 }
2072
2073 static void
2074 intel_miptree_finish_mcs_write(struct brw_context *brw,
2075 struct intel_mipmap_tree *mt,
2076 uint32_t level, uint32_t layer,
2077 bool written_with_aux)
2078 {
2079 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2080 case ISL_AUX_STATE_CLEAR:
2081 assert(written_with_aux);
2082 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2083 ISL_AUX_STATE_COMPRESSED_CLEAR);
2084 break;
2085
2086 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2087 assert(written_with_aux);
2088 break; /* Nothing to do */
2089
2090 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2091 case ISL_AUX_STATE_RESOLVED:
2092 case ISL_AUX_STATE_PASS_THROUGH:
2093 case ISL_AUX_STATE_AUX_INVALID:
2094 unreachable("Invalid aux state for MCS");
2095 }
2096 }
2097
2098 static void
2099 intel_miptree_prepare_hiz_access(struct brw_context *brw,
2100 struct intel_mipmap_tree *mt,
2101 uint32_t level, uint32_t layer,
2102 bool hiz_supported, bool fast_clear_supported)
2103 {
2104 enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
2105 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2106 case ISL_AUX_STATE_CLEAR:
2107 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2108 if (!hiz_supported || !fast_clear_supported)
2109 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2110 break;
2111
2112 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2113 if (!hiz_supported)
2114 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2115 break;
2116
2117 case ISL_AUX_STATE_PASS_THROUGH:
2118 case ISL_AUX_STATE_RESOLVED:
2119 break;
2120
2121 case ISL_AUX_STATE_AUX_INVALID:
2122 if (hiz_supported)
2123 hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
2124 break;
2125 }
2126
2127 if (hiz_op != BLORP_HIZ_OP_NONE) {
2128 intel_hiz_exec(brw, mt, level, layer, 1, hiz_op);
2129
2130 switch (hiz_op) {
2131 case BLORP_HIZ_OP_DEPTH_RESOLVE:
2132 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2133 ISL_AUX_STATE_RESOLVED);
2134 break;
2135
2136 case BLORP_HIZ_OP_HIZ_RESOLVE:
2137 /* The HiZ resolve operation is actually an ambiguate */
2138 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2139 ISL_AUX_STATE_PASS_THROUGH);
2140 break;
2141
2142 default:
2143 unreachable("Invalid HiZ op");
2144 }
2145 }
2146 }
2147
2148 static void
2149 intel_miptree_finish_hiz_write(struct brw_context *brw,
2150 struct intel_mipmap_tree *mt,
2151 uint32_t level, uint32_t layer,
2152 bool written_with_hiz)
2153 {
2154 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2155 case ISL_AUX_STATE_CLEAR:
2156 assert(written_with_hiz);
2157 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2158 ISL_AUX_STATE_COMPRESSED_CLEAR);
2159 break;
2160
2161 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2162 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2163 assert(written_with_hiz);
2164 break; /* Nothing to do */
2165
2166 case ISL_AUX_STATE_RESOLVED:
2167 if (written_with_hiz) {
2168 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2169 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2170 } else {
2171 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2172 ISL_AUX_STATE_AUX_INVALID);
2173 }
2174 break;
2175
2176 case ISL_AUX_STATE_PASS_THROUGH:
2177 if (written_with_hiz) {
2178 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2179 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2180 }
2181 break;
2182
2183 case ISL_AUX_STATE_AUX_INVALID:
2184 assert(!written_with_hiz);
2185 break;
2186 }
2187 }
2188
2189 static inline uint32_t
2190 miptree_level_range_length(const struct intel_mipmap_tree *mt,
2191 uint32_t start_level, uint32_t num_levels)
2192 {
2193 assert(start_level >= mt->first_level);
2194 assert(start_level <= mt->last_level);
2195
2196 if (num_levels == INTEL_REMAINING_LAYERS)
2197 num_levels = mt->last_level - start_level + 1;
2198 /* Check for overflow */
2199 assert(start_level + num_levels >= start_level);
2200 assert(start_level + num_levels <= mt->last_level + 1);
2201
2202 return num_levels;
2203 }
2204
2205 static inline uint32_t
2206 miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level,
2207 uint32_t start_layer, uint32_t num_layers)
2208 {
2209 assert(level <= mt->last_level);
2210 const uint32_t total_num_layers = get_num_phys_layers(&mt->surf, level);
2211
2212 assert(start_layer < total_num_layers);
2213 if (num_layers == INTEL_REMAINING_LAYERS)
2214 num_layers = total_num_layers - start_layer;
2215 /* Check for overflow */
2216 assert(start_layer + num_layers >= start_layer);
2217 assert(start_layer + num_layers <= total_num_layers);
2218
2219 return num_layers;
2220 }
2221
2222 void
2223 intel_miptree_prepare_access(struct brw_context *brw,
2224 struct intel_mipmap_tree *mt,
2225 uint32_t start_level, uint32_t num_levels,
2226 uint32_t start_layer, uint32_t num_layers,
2227 bool aux_supported, bool fast_clear_supported)
2228 {
2229 num_levels = miptree_level_range_length(mt, start_level, num_levels);
2230
2231 if (_mesa_is_format_color_format(mt->format)) {
2232 if (!mt->mcs_buf)
2233 return;
2234
2235 if (mt->surf.samples > 1) {
2236 /* Nothing to do for MSAA */
2237 assert(aux_supported && fast_clear_supported);
2238 } else {
2239 for (uint32_t l = 0; l < num_levels; l++) {
2240 const uint32_t level = start_level + l;
2241 const uint32_t level_layers =
2242 miptree_layer_range_length(mt, level, start_layer, num_layers);
2243 for (uint32_t a = 0; a < level_layers; a++) {
2244 intel_miptree_prepare_ccs_access(brw, mt, level,
2245 start_layer + a, aux_supported,
2246 fast_clear_supported);
2247 }
2248 }
2249 }
2250 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2251 /* Nothing to do for stencil */
2252 } else {
2253 if (!mt->hiz_buf)
2254 return;
2255
2256 for (uint32_t l = 0; l < num_levels; l++) {
2257 const uint32_t level = start_level + l;
2258 if (!intel_miptree_level_has_hiz(mt, level))
2259 continue;
2260
2261 const uint32_t level_layers =
2262 miptree_layer_range_length(mt, level, start_layer, num_layers);
2263 for (uint32_t a = 0; a < level_layers; a++) {
2264 intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a,
2265 aux_supported,
2266 fast_clear_supported);
2267 }
2268 }
2269 }
2270 }
2271
2272 void
2273 intel_miptree_finish_write(struct brw_context *brw,
2274 struct intel_mipmap_tree *mt, uint32_t level,
2275 uint32_t start_layer, uint32_t num_layers,
2276 bool written_with_aux)
2277 {
2278 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2279
2280 if (_mesa_is_format_color_format(mt->format)) {
2281 if (!mt->mcs_buf)
2282 return;
2283
2284 if (mt->surf.samples > 1) {
2285 for (uint32_t a = 0; a < num_layers; a++) {
2286 intel_miptree_finish_mcs_write(brw, mt, level, start_layer + a,
2287 written_with_aux);
2288 }
2289 } else {
2290 for (uint32_t a = 0; a < num_layers; a++) {
2291 intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a,
2292 written_with_aux);
2293 }
2294 }
2295 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2296 /* Nothing to do for stencil */
2297 } else {
2298 if (!intel_miptree_level_has_hiz(mt, level))
2299 return;
2300
2301 for (uint32_t a = 0; a < num_layers; a++) {
2302 intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a,
2303 written_with_aux);
2304 }
2305 }
2306 }
2307
2308 enum isl_aux_state
2309 intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt,
2310 uint32_t level, uint32_t layer)
2311 {
2312 intel_miptree_check_level_layer(mt, level, layer);
2313
2314 if (_mesa_is_format_color_format(mt->format)) {
2315 assert(mt->mcs_buf != NULL);
2316 assert(mt->surf.samples == 1 ||
2317 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2318 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2319 unreachable("Cannot get aux state for stencil");
2320 } else {
2321 assert(intel_miptree_level_has_hiz(mt, level));
2322 }
2323
2324 return mt->aux_state[level][layer];
2325 }
2326
2327 void
2328 intel_miptree_set_aux_state(struct brw_context *brw,
2329 struct intel_mipmap_tree *mt, uint32_t level,
2330 uint32_t start_layer, uint32_t num_layers,
2331 enum isl_aux_state aux_state)
2332 {
2333 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2334
2335 if (_mesa_is_format_color_format(mt->format)) {
2336 assert(mt->mcs_buf != NULL);
2337 assert(mt->surf.samples == 1 ||
2338 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2339 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2340 unreachable("Cannot get aux state for stencil");
2341 } else {
2342 assert(intel_miptree_level_has_hiz(mt, level));
2343 }
2344
2345 for (unsigned a = 0; a < num_layers; a++)
2346 mt->aux_state[level][start_layer + a] = aux_state;
2347 }
2348
2349 /* On Gen9 color buffers may be compressed by the hardware (lossless
2350 * compression). There are, however, format restrictions and care needs to be
2351 * taken that the sampler engine is capable for re-interpreting a buffer with
2352 * format different the buffer was originally written with.
2353 *
2354 * For example, SRGB formats are not compressible and the sampler engine isn't
2355 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
2356 * color buffer needs to be resolved so that the sampling surface can be
2357 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
2358 * set).
2359 */
2360 static bool
2361 can_texture_with_ccs(struct brw_context *brw,
2362 struct intel_mipmap_tree *mt,
2363 mesa_format view_format)
2364 {
2365 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
2366 return false;
2367
2368 enum isl_format isl_mt_format = brw_isl_format_for_mesa_format(mt->format);
2369 enum isl_format isl_view_format = brw_isl_format_for_mesa_format(view_format);
2370
2371 if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
2372 isl_mt_format, isl_view_format)) {
2373 perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
2374 _mesa_get_format_name(view_format),
2375 _mesa_get_format_name(mt->format));
2376 return false;
2377 }
2378
2379 return true;
2380 }
2381
2382 static void
2383 intel_miptree_prepare_texture_slices(struct brw_context *brw,
2384 struct intel_mipmap_tree *mt,
2385 mesa_format view_format,
2386 uint32_t start_level, uint32_t num_levels,
2387 uint32_t start_layer, uint32_t num_layers,
2388 bool *aux_supported_out)
2389 {
2390 bool aux_supported, clear_supported;
2391 if (_mesa_is_format_color_format(mt->format)) {
2392 if (mt->surf.samples > 1) {
2393 aux_supported = clear_supported = true;
2394 } else {
2395 aux_supported = can_texture_with_ccs(brw, mt, view_format);
2396
2397 /* Clear color is specified as ints or floats and the conversion is
2398 * done by the sampler. If we have a texture view, we would have to
2399 * perform the clear color conversion manually. Just disable clear
2400 * color.
2401 */
2402 clear_supported = aux_supported && (mt->format == view_format);
2403 }
2404 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2405 aux_supported = clear_supported = false;
2406 } else {
2407 aux_supported = clear_supported = intel_miptree_sample_with_hiz(brw, mt);
2408 }
2409
2410 intel_miptree_prepare_access(brw, mt, start_level, num_levels,
2411 start_layer, num_layers,
2412 aux_supported, clear_supported);
2413 if (aux_supported_out)
2414 *aux_supported_out = aux_supported;
2415 }
2416
2417 void
2418 intel_miptree_prepare_texture(struct brw_context *brw,
2419 struct intel_mipmap_tree *mt,
2420 mesa_format view_format,
2421 bool *aux_supported_out)
2422 {
2423 intel_miptree_prepare_texture_slices(brw, mt, view_format,
2424 0, INTEL_REMAINING_LEVELS,
2425 0, INTEL_REMAINING_LAYERS,
2426 aux_supported_out);
2427 }
2428
2429 void
2430 intel_miptree_prepare_image(struct brw_context *brw,
2431 struct intel_mipmap_tree *mt)
2432 {
2433 /* The data port doesn't understand any compression */
2434 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2435 0, INTEL_REMAINING_LAYERS, false, false);
2436 }
2437
2438 void
2439 intel_miptree_prepare_fb_fetch(struct brw_context *brw,
2440 struct intel_mipmap_tree *mt, uint32_t level,
2441 uint32_t start_layer, uint32_t num_layers)
2442 {
2443 intel_miptree_prepare_texture_slices(brw, mt, mt->format, level, 1,
2444 start_layer, num_layers, NULL);
2445 }
2446
2447 void
2448 intel_miptree_prepare_render(struct brw_context *brw,
2449 struct intel_mipmap_tree *mt, uint32_t level,
2450 uint32_t start_layer, uint32_t layer_count,
2451 bool srgb_enabled)
2452 {
2453 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
2454 * the single-sampled color renderbuffers because the CCS buffer isn't
2455 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
2456 * enabled because otherwise the surface state will be programmed with
2457 * the linear equivalent format anyway.
2458 */
2459 if (brw->gen == 9 && srgb_enabled && mt->surf.samples == 1 &&
2460 _mesa_get_srgb_format_linear(mt->format) != mt->format) {
2461
2462 /* Lossless compression is not supported for SRGB formats, it
2463 * should be impossible to get here with such surfaces.
2464 */
2465 assert(mt->aux_usage != ISL_AUX_USAGE_CCS_E);
2466 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2467 false, false);
2468 }
2469 }
2470
2471 void
2472 intel_miptree_finish_render(struct brw_context *brw,
2473 struct intel_mipmap_tree *mt, uint32_t level,
2474 uint32_t start_layer, uint32_t layer_count)
2475 {
2476 assert(_mesa_is_format_color_format(mt->format));
2477 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2478 mt->mcs_buf != NULL);
2479 }
2480
2481 void
2482 intel_miptree_prepare_depth(struct brw_context *brw,
2483 struct intel_mipmap_tree *mt, uint32_t level,
2484 uint32_t start_layer, uint32_t layer_count)
2485 {
2486 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2487 mt->hiz_buf != NULL, mt->hiz_buf != NULL);
2488 }
2489
2490 void
2491 intel_miptree_finish_depth(struct brw_context *brw,
2492 struct intel_mipmap_tree *mt, uint32_t level,
2493 uint32_t start_layer, uint32_t layer_count,
2494 bool depth_written)
2495 {
2496 if (depth_written) {
2497 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2498 mt->hiz_buf != NULL);
2499 }
2500 }
2501
2502 /**
2503 * Make it possible to share the BO backing the given miptree with another
2504 * process or another miptree.
2505 *
2506 * Fast color clears are unsafe with shared buffers, so we need to resolve and
2507 * then discard the MCS buffer, if present. We also set the no_ccs flag to
2508 * ensure that no MCS buffer gets allocated in the future.
2509 *
2510 * HiZ is similarly unsafe with shared buffers.
2511 */
2512 void
2513 intel_miptree_make_shareable(struct brw_context *brw,
2514 struct intel_mipmap_tree *mt)
2515 {
2516 /* MCS buffers are also used for multisample buffers, but we can't resolve
2517 * away a multisample MCS buffer because it's an integral part of how the
2518 * pixel data is stored. Fortunately this code path should never be
2519 * reached for multisample buffers.
2520 */
2521 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE ||
2522 mt->surf.samples == 1);
2523
2524 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2525 0, INTEL_REMAINING_LAYERS, false, false);
2526
2527 if (mt->mcs_buf) {
2528 brw_bo_unreference(mt->mcs_buf->bo);
2529 free(mt->mcs_buf);
2530 mt->mcs_buf = NULL;
2531
2532 /* Any pending MCS/CCS operations are no longer needed. Trying to
2533 * execute any will likely crash due to the missing aux buffer. So let's
2534 * delete all pending ops.
2535 */
2536 free(mt->aux_state);
2537 mt->aux_state = NULL;
2538 }
2539
2540 if (mt->hiz_buf) {
2541 intel_miptree_aux_buffer_free(mt->hiz_buf);
2542 mt->hiz_buf = NULL;
2543
2544 for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
2545 mt->level[l].has_hiz = false;
2546 }
2547
2548 /* Any pending HiZ operations are no longer needed. Trying to execute
2549 * any will likely crash due to the missing aux buffer. So let's delete
2550 * all pending ops.
2551 */
2552 free(mt->aux_state);
2553 mt->aux_state = NULL;
2554 }
2555
2556 mt->aux_usage = ISL_AUX_USAGE_NONE;
2557 }
2558
2559
2560 /**
2561 * \brief Get pointer offset into stencil buffer.
2562 *
2563 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
2564 * must decode the tile's layout in software.
2565 *
2566 * See
2567 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2568 * Format.
2569 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2570 *
2571 * Even though the returned offset is always positive, the return type is
2572 * signed due to
2573 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2574 * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
2575 */
2576 static intptr_t
2577 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2578 {
2579 uint32_t tile_size = 4096;
2580 uint32_t tile_width = 64;
2581 uint32_t tile_height = 64;
2582 uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
2583
2584 uint32_t tile_x = x / tile_width;
2585 uint32_t tile_y = y / tile_height;
2586
2587 /* The byte's address relative to the tile's base addres. */
2588 uint32_t byte_x = x % tile_width;
2589 uint32_t byte_y = y % tile_height;
2590
2591 uintptr_t u = tile_y * row_size
2592 + tile_x * tile_size
2593 + 512 * (byte_x / 8)
2594 + 64 * (byte_y / 8)
2595 + 32 * ((byte_y / 4) % 2)
2596 + 16 * ((byte_x / 4) % 2)
2597 + 8 * ((byte_y / 2) % 2)
2598 + 4 * ((byte_x / 2) % 2)
2599 + 2 * (byte_y % 2)
2600 + 1 * (byte_x % 2);
2601
2602 if (swizzled) {
2603 /* adjust for bit6 swizzling */
2604 if (((byte_x / 8) % 2) == 1) {
2605 if (((byte_y / 8) % 2) == 0) {
2606 u += 64;
2607 } else {
2608 u -= 64;
2609 }
2610 }
2611 }
2612
2613 return u;
2614 }
2615
2616 void
2617 intel_miptree_updownsample(struct brw_context *brw,
2618 struct intel_mipmap_tree *src,
2619 struct intel_mipmap_tree *dst)
2620 {
2621 unsigned src_w = src->surf.logical_level0_px.width;
2622 unsigned src_h = src->surf.logical_level0_px.height;
2623 unsigned dst_w = dst->surf.logical_level0_px.width;
2624 unsigned dst_h = dst->surf.logical_level0_px.height;
2625
2626 brw_blorp_blit_miptrees(brw,
2627 src, 0 /* level */, 0 /* layer */,
2628 src->format, SWIZZLE_XYZW,
2629 dst, 0 /* level */, 0 /* layer */, dst->format,
2630 0, 0, src_w, src_h,
2631 0, 0, dst_w, dst_h,
2632 GL_NEAREST, false, false /*mirror x, y*/,
2633 false, false);
2634
2635 if (src->stencil_mt) {
2636 src_w = src->stencil_mt->surf.logical_level0_px.width;
2637 src_h = src->stencil_mt->surf.logical_level0_px.height;
2638 dst_w = dst->stencil_mt->surf.logical_level0_px.width;
2639 dst_h = dst->stencil_mt->surf.logical_level0_px.height;
2640
2641 brw_blorp_blit_miptrees(brw,
2642 src->stencil_mt, 0 /* level */, 0 /* layer */,
2643 src->stencil_mt->format, SWIZZLE_XYZW,
2644 dst->stencil_mt, 0 /* level */, 0 /* layer */,
2645 dst->stencil_mt->format,
2646 0, 0, src_w, src_h,
2647 0, 0, dst_w, dst_h,
2648 GL_NEAREST, false, false /*mirror x, y*/,
2649 false, false /* decode/encode srgb */);
2650 }
2651 }
2652
2653 void
2654 intel_update_r8stencil(struct brw_context *brw,
2655 struct intel_mipmap_tree *mt)
2656 {
2657 assert(brw->gen >= 7);
2658 struct intel_mipmap_tree *src =
2659 mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
2660 if (!src || brw->gen >= 8 || !src->r8stencil_needs_update)
2661 return;
2662
2663 assert(src->surf.size > 0);
2664
2665 if (!mt->r8stencil_mt) {
2666 assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
2667 mt->r8stencil_mt = make_surface(
2668 brw,
2669 src->target,
2670 MESA_FORMAT_R_UINT8,
2671 src->first_level, src->last_level,
2672 src->surf.logical_level0_px.width,
2673 src->surf.logical_level0_px.height,
2674 src->surf.dim == ISL_SURF_DIM_3D ?
2675 src->surf.logical_level0_px.depth :
2676 src->surf.logical_level0_px.array_len,
2677 src->surf.samples,
2678 ISL_TILING_Y0_BIT,
2679 ISL_SURF_USAGE_TEXTURE_BIT,
2680 BO_ALLOC_FOR_RENDER, 0, NULL);
2681 assert(mt->r8stencil_mt);
2682 }
2683
2684 struct intel_mipmap_tree *dst = mt->r8stencil_mt;
2685
2686 for (int level = src->first_level; level <= src->last_level; level++) {
2687 const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ?
2688 minify(src->surf.phys_level0_sa.depth, level) :
2689 src->surf.phys_level0_sa.array_len;
2690
2691 for (unsigned layer = 0; layer < depth; layer++) {
2692 brw_blorp_copy_miptrees(brw,
2693 src, level, layer,
2694 dst, level, layer,
2695 0, 0, 0, 0,
2696 minify(src->surf.logical_level0_px.width,
2697 level),
2698 minify(src->surf.logical_level0_px.height,
2699 level));
2700 }
2701 }
2702
2703 brw_render_cache_set_check_flush(brw, dst->bo);
2704 src->r8stencil_needs_update = false;
2705 }
2706
2707 static void *
2708 intel_miptree_map_raw(struct brw_context *brw,
2709 struct intel_mipmap_tree *mt,
2710 GLbitfield mode)
2711 {
2712 struct brw_bo *bo = mt->bo;
2713
2714 if (brw_batch_references(&brw->batch, bo))
2715 intel_batchbuffer_flush(brw);
2716
2717 return brw_bo_map(brw, bo, mode);
2718 }
2719
2720 static void
2721 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2722 {
2723 brw_bo_unmap(mt->bo);
2724 }
2725
2726 static void
2727 intel_miptree_map_gtt(struct brw_context *brw,
2728 struct intel_mipmap_tree *mt,
2729 struct intel_miptree_map *map,
2730 unsigned int level, unsigned int slice)
2731 {
2732 unsigned int bw, bh;
2733 void *base;
2734 unsigned int image_x, image_y;
2735 intptr_t x = map->x;
2736 intptr_t y = map->y;
2737
2738 /* For compressed formats, the stride is the number of bytes per
2739 * row of blocks. intel_miptree_get_image_offset() already does
2740 * the divide.
2741 */
2742 _mesa_get_format_block_size(mt->format, &bw, &bh);
2743 assert(y % bh == 0);
2744 assert(x % bw == 0);
2745 y /= bh;
2746 x /= bw;
2747
2748 base = intel_miptree_map_raw(brw, mt, map->mode);
2749
2750 if (base == NULL)
2751 map->ptr = NULL;
2752 else {
2753 base += mt->offset;
2754
2755 /* Note that in the case of cube maps, the caller must have passed the
2756 * slice number referencing the face.
2757 */
2758 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2759 x += image_x;
2760 y += image_y;
2761
2762 map->stride = mt->surf.row_pitch;
2763 map->ptr = base + y * map->stride + x * mt->cpp;
2764 }
2765
2766 DBG("%s: %d,%d %dx%d from mt %p (%s) "
2767 "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
2768 map->x, map->y, map->w, map->h,
2769 mt, _mesa_get_format_name(mt->format),
2770 x, y, map->ptr, map->stride);
2771 }
2772
2773 static void
2774 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
2775 {
2776 intel_miptree_unmap_raw(mt);
2777 }
2778
2779 static void
2780 intel_miptree_map_blit(struct brw_context *brw,
2781 struct intel_mipmap_tree *mt,
2782 struct intel_miptree_map *map,
2783 unsigned int level, unsigned int slice)
2784 {
2785 map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
2786 /* first_level */ 0,
2787 /* last_level */ 0,
2788 map->w, map->h, 1,
2789 /* samples */ 1,
2790 MIPTREE_LAYOUT_TILING_NONE);
2791
2792 if (!map->linear_mt) {
2793 fprintf(stderr, "Failed to allocate blit temporary\n");
2794 goto fail;
2795 }
2796 map->stride = map->linear_mt->surf.row_pitch;
2797
2798 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
2799 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
2800 * invalidate is set, since we'll be writing the whole rectangle from our
2801 * temporary buffer back out.
2802 */
2803 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2804 if (!intel_miptree_copy(brw,
2805 mt, level, slice, map->x, map->y,
2806 map->linear_mt, 0, 0, 0, 0,
2807 map->w, map->h)) {
2808 fprintf(stderr, "Failed to blit\n");
2809 goto fail;
2810 }
2811 }
2812
2813 map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
2814
2815 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2816 map->x, map->y, map->w, map->h,
2817 mt, _mesa_get_format_name(mt->format),
2818 level, slice, map->ptr, map->stride);
2819
2820 return;
2821
2822 fail:
2823 intel_miptree_release(&map->linear_mt);
2824 map->ptr = NULL;
2825 map->stride = 0;
2826 }
2827
2828 static void
2829 intel_miptree_unmap_blit(struct brw_context *brw,
2830 struct intel_mipmap_tree *mt,
2831 struct intel_miptree_map *map,
2832 unsigned int level,
2833 unsigned int slice)
2834 {
2835 struct gl_context *ctx = &brw->ctx;
2836
2837 intel_miptree_unmap_raw(map->linear_mt);
2838
2839 if (map->mode & GL_MAP_WRITE_BIT) {
2840 bool ok = intel_miptree_copy(brw,
2841 map->linear_mt, 0, 0, 0, 0,
2842 mt, level, slice, map->x, map->y,
2843 map->w, map->h);
2844 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
2845 }
2846
2847 intel_miptree_release(&map->linear_mt);
2848 }
2849
2850 /**
2851 * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
2852 */
2853 #if defined(USE_SSE41)
2854 static void
2855 intel_miptree_map_movntdqa(struct brw_context *brw,
2856 struct intel_mipmap_tree *mt,
2857 struct intel_miptree_map *map,
2858 unsigned int level, unsigned int slice)
2859 {
2860 assert(map->mode & GL_MAP_READ_BIT);
2861 assert(!(map->mode & GL_MAP_WRITE_BIT));
2862
2863 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2864 map->x, map->y, map->w, map->h,
2865 mt, _mesa_get_format_name(mt->format),
2866 level, slice, map->ptr, map->stride);
2867
2868 /* Map the original image */
2869 uint32_t image_x;
2870 uint32_t image_y;
2871 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2872 image_x += map->x;
2873 image_y += map->y;
2874
2875 void *src = intel_miptree_map_raw(brw, mt, map->mode);
2876 if (!src)
2877 return;
2878
2879 src += mt->offset;
2880
2881 src += image_y * mt->surf.row_pitch;
2882 src += image_x * mt->cpp;
2883
2884 /* Due to the pixel offsets for the particular image being mapped, our
2885 * src pointer may not be 16-byte aligned. However, if the pitch is
2886 * divisible by 16, then the amount by which it's misaligned will remain
2887 * consistent from row to row.
2888 */
2889 assert((mt->surf.row_pitch % 16) == 0);
2890 const int misalignment = ((uintptr_t) src) & 15;
2891
2892 /* Create an untiled temporary buffer for the mapping. */
2893 const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
2894
2895 map->stride = ALIGN(misalignment + width_bytes, 16);
2896
2897 map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
2898 /* Offset the destination so it has the same misalignment as src. */
2899 map->ptr = map->buffer + misalignment;
2900
2901 assert((((uintptr_t) map->ptr) & 15) == misalignment);
2902
2903 for (uint32_t y = 0; y < map->h; y++) {
2904 void *dst_ptr = map->ptr + y * map->stride;
2905 void *src_ptr = src + y * mt->surf.row_pitch;
2906
2907 _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
2908 }
2909
2910 intel_miptree_unmap_raw(mt);
2911 }
2912
2913 static void
2914 intel_miptree_unmap_movntdqa(struct brw_context *brw,
2915 struct intel_mipmap_tree *mt,
2916 struct intel_miptree_map *map,
2917 unsigned int level,
2918 unsigned int slice)
2919 {
2920 _mesa_align_free(map->buffer);
2921 map->buffer = NULL;
2922 map->ptr = NULL;
2923 }
2924 #endif
2925
2926 static void
2927 intel_miptree_map_s8(struct brw_context *brw,
2928 struct intel_mipmap_tree *mt,
2929 struct intel_miptree_map *map,
2930 unsigned int level, unsigned int slice)
2931 {
2932 map->stride = map->w;
2933 map->buffer = map->ptr = malloc(map->stride * map->h);
2934 if (!map->buffer)
2935 return;
2936
2937 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
2938 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
2939 * invalidate is set, since we'll be writing the whole rectangle from our
2940 * temporary buffer back out.
2941 */
2942 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2943 uint8_t *untiled_s8_map = map->ptr;
2944 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
2945 unsigned int image_x, image_y;
2946
2947 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2948
2949 for (uint32_t y = 0; y < map->h; y++) {
2950 for (uint32_t x = 0; x < map->w; x++) {
2951 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
2952 x + image_x + map->x,
2953 y + image_y + map->y,
2954 brw->has_swizzling);
2955 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
2956 }
2957 }
2958
2959 intel_miptree_unmap_raw(mt);
2960
2961 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
2962 map->x, map->y, map->w, map->h,
2963 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
2964 } else {
2965 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
2966 map->x, map->y, map->w, map->h,
2967 mt, map->ptr, map->stride);
2968 }
2969 }
2970
2971 static void
2972 intel_miptree_unmap_s8(struct brw_context *brw,
2973 struct intel_mipmap_tree *mt,
2974 struct intel_miptree_map *map,
2975 unsigned int level,
2976 unsigned int slice)
2977 {
2978 if (map->mode & GL_MAP_WRITE_BIT) {
2979 unsigned int image_x, image_y;
2980 uint8_t *untiled_s8_map = map->ptr;
2981 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
2982
2983 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2984
2985 for (uint32_t y = 0; y < map->h; y++) {
2986 for (uint32_t x = 0; x < map->w; x++) {
2987 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
2988 image_x + x + map->x,
2989 image_y + y + map->y,
2990 brw->has_swizzling);
2991 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
2992 }
2993 }
2994
2995 intel_miptree_unmap_raw(mt);
2996 }
2997
2998 free(map->buffer);
2999 }
3000
3001 static void
3002 intel_miptree_map_etc(struct brw_context *brw,
3003 struct intel_mipmap_tree *mt,
3004 struct intel_miptree_map *map,
3005 unsigned int level,
3006 unsigned int slice)
3007 {
3008 assert(mt->etc_format != MESA_FORMAT_NONE);
3009 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
3010 assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
3011 }
3012
3013 assert(map->mode & GL_MAP_WRITE_BIT);
3014 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
3015
3016 map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
3017 map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
3018 map->w, map->h, 1));
3019 map->ptr = map->buffer;
3020 }
3021
3022 static void
3023 intel_miptree_unmap_etc(struct brw_context *brw,
3024 struct intel_mipmap_tree *mt,
3025 struct intel_miptree_map *map,
3026 unsigned int level,
3027 unsigned int slice)
3028 {
3029 uint32_t image_x;
3030 uint32_t image_y;
3031 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3032
3033 image_x += map->x;
3034 image_y += map->y;
3035
3036 uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
3037 + image_y * mt->surf.row_pitch
3038 + image_x * mt->cpp;
3039
3040 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
3041 _mesa_etc1_unpack_rgba8888(dst, mt->surf.row_pitch,
3042 map->ptr, map->stride,
3043 map->w, map->h);
3044 else
3045 _mesa_unpack_etc2_format(dst, mt->surf.row_pitch,
3046 map->ptr, map->stride,
3047 map->w, map->h, mt->etc_format);
3048
3049 intel_miptree_unmap_raw(mt);
3050 free(map->buffer);
3051 }
3052
3053 /**
3054 * Mapping function for packed depth/stencil miptrees backed by real separate
3055 * miptrees for depth and stencil.
3056 *
3057 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
3058 * separate from the depth buffer. Yet at the GL API level, we have to expose
3059 * packed depth/stencil textures and FBO attachments, and Mesa core expects to
3060 * be able to map that memory for texture storage and glReadPixels-type
3061 * operations. We give Mesa core that access by mallocing a temporary and
3062 * copying the data between the actual backing store and the temporary.
3063 */
3064 static void
3065 intel_miptree_map_depthstencil(struct brw_context *brw,
3066 struct intel_mipmap_tree *mt,
3067 struct intel_miptree_map *map,
3068 unsigned int level, unsigned int slice)
3069 {
3070 struct intel_mipmap_tree *z_mt = mt;
3071 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3072 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3073 int packed_bpp = map_z32f_x24s8 ? 8 : 4;
3074
3075 map->stride = map->w * packed_bpp;
3076 map->buffer = map->ptr = malloc(map->stride * map->h);
3077 if (!map->buffer)
3078 return;
3079
3080 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3081 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3082 * invalidate is set, since we'll be writing the whole rectangle from our
3083 * temporary buffer back out.
3084 */
3085 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3086 uint32_t *packed_map = map->ptr;
3087 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
3088 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
3089 unsigned int s_image_x, s_image_y;
3090 unsigned int z_image_x, z_image_y;
3091
3092 intel_miptree_get_image_offset(s_mt, level, slice,
3093 &s_image_x, &s_image_y);
3094 intel_miptree_get_image_offset(z_mt, level, slice,
3095 &z_image_x, &z_image_y);
3096
3097 for (uint32_t y = 0; y < map->h; y++) {
3098 for (uint32_t x = 0; x < map->w; x++) {
3099 int map_x = map->x + x, map_y = map->y + y;
3100 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3101 map_x + s_image_x,
3102 map_y + s_image_y,
3103 brw->has_swizzling);
3104 ptrdiff_t z_offset = ((map_y + z_image_y) *
3105 (z_mt->surf.row_pitch / 4) +
3106 (map_x + z_image_x));
3107 uint8_t s = s_map[s_offset];
3108 uint32_t z = z_map[z_offset];
3109
3110 if (map_z32f_x24s8) {
3111 packed_map[(y * map->w + x) * 2 + 0] = z;
3112 packed_map[(y * map->w + x) * 2 + 1] = s;
3113 } else {
3114 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
3115 }
3116 }
3117 }
3118
3119 intel_miptree_unmap_raw(s_mt);
3120 intel_miptree_unmap_raw(z_mt);
3121
3122 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
3123 __func__,
3124 map->x, map->y, map->w, map->h,
3125 z_mt, map->x + z_image_x, map->y + z_image_y,
3126 s_mt, map->x + s_image_x, map->y + s_image_y,
3127 map->ptr, map->stride);
3128 } else {
3129 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3130 map->x, map->y, map->w, map->h,
3131 mt, map->ptr, map->stride);
3132 }
3133 }
3134
3135 static void
3136 intel_miptree_unmap_depthstencil(struct brw_context *brw,
3137 struct intel_mipmap_tree *mt,
3138 struct intel_miptree_map *map,
3139 unsigned int level,
3140 unsigned int slice)
3141 {
3142 struct intel_mipmap_tree *z_mt = mt;
3143 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3144 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3145
3146 if (map->mode & GL_MAP_WRITE_BIT) {
3147 uint32_t *packed_map = map->ptr;
3148 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
3149 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
3150 unsigned int s_image_x, s_image_y;
3151 unsigned int z_image_x, z_image_y;
3152
3153 intel_miptree_get_image_offset(s_mt, level, slice,
3154 &s_image_x, &s_image_y);
3155 intel_miptree_get_image_offset(z_mt, level, slice,
3156 &z_image_x, &z_image_y);
3157
3158 for (uint32_t y = 0; y < map->h; y++) {
3159 for (uint32_t x = 0; x < map->w; x++) {
3160 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3161 x + s_image_x + map->x,
3162 y + s_image_y + map->y,
3163 brw->has_swizzling);
3164 ptrdiff_t z_offset = ((y + z_image_y + map->y) *
3165 (z_mt->surf.row_pitch / 4) +
3166 (x + z_image_x + map->x));
3167
3168 if (map_z32f_x24s8) {
3169 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
3170 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
3171 } else {
3172 uint32_t packed = packed_map[y * map->w + x];
3173 s_map[s_offset] = packed >> 24;
3174 z_map[z_offset] = packed;
3175 }
3176 }
3177 }
3178
3179 intel_miptree_unmap_raw(s_mt);
3180 intel_miptree_unmap_raw(z_mt);
3181
3182 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
3183 __func__,
3184 map->x, map->y, map->w, map->h,
3185 z_mt, _mesa_get_format_name(z_mt->format),
3186 map->x + z_image_x, map->y + z_image_y,
3187 s_mt, map->x + s_image_x, map->y + s_image_y,
3188 map->ptr, map->stride);
3189 }
3190
3191 free(map->buffer);
3192 }
3193
3194 /**
3195 * Create and attach a map to the miptree at (level, slice). Return the
3196 * attached map.
3197 */
3198 static struct intel_miptree_map*
3199 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
3200 unsigned int level,
3201 unsigned int slice,
3202 unsigned int x,
3203 unsigned int y,
3204 unsigned int w,
3205 unsigned int h,
3206 GLbitfield mode)
3207 {
3208 struct intel_miptree_map *map = calloc(1, sizeof(*map));
3209
3210 if (!map)
3211 return NULL;
3212
3213 assert(mt->level[level].slice[slice].map == NULL);
3214 mt->level[level].slice[slice].map = map;
3215
3216 map->mode = mode;
3217 map->x = x;
3218 map->y = y;
3219 map->w = w;
3220 map->h = h;
3221
3222 return map;
3223 }
3224
3225 /**
3226 * Release the map at (level, slice).
3227 */
3228 static void
3229 intel_miptree_release_map(struct intel_mipmap_tree *mt,
3230 unsigned int level,
3231 unsigned int slice)
3232 {
3233 struct intel_miptree_map **map;
3234
3235 map = &mt->level[level].slice[slice].map;
3236 free(*map);
3237 *map = NULL;
3238 }
3239
3240 static bool
3241 can_blit_slice(struct intel_mipmap_tree *mt,
3242 unsigned int level, unsigned int slice)
3243 {
3244 /* See intel_miptree_blit() for details on the 32k pitch limit. */
3245 if (mt->surf.row_pitch >= 32768)
3246 return false;
3247
3248 return true;
3249 }
3250
3251 static bool
3252 use_intel_mipree_map_blit(struct brw_context *brw,
3253 struct intel_mipmap_tree *mt,
3254 GLbitfield mode,
3255 unsigned int level,
3256 unsigned int slice)
3257 {
3258 if (brw->has_llc &&
3259 /* It's probably not worth swapping to the blit ring because of
3260 * all the overhead involved.
3261 */
3262 !(mode & GL_MAP_WRITE_BIT) &&
3263 !mt->compressed &&
3264 (mt->surf.tiling == ISL_TILING_X ||
3265 /* Prior to Sandybridge, the blitter can't handle Y tiling */
3266 (brw->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
3267 /* Fast copy blit on skl+ supports all tiling formats. */
3268 brw->gen >= 9) &&
3269 can_blit_slice(mt, level, slice))
3270 return true;
3271
3272 if (mt->surf.tiling != ISL_TILING_LINEAR &&
3273 mt->bo->size >= brw->max_gtt_map_object_size) {
3274 assert(can_blit_slice(mt, level, slice));
3275 return true;
3276 }
3277
3278 return false;
3279 }
3280
3281 /**
3282 * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
3283 * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
3284 * arithmetic overflow.
3285 *
3286 * If you call this function and use \a out_stride, then you're doing pointer
3287 * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
3288 * bugs. The caller must still take care to avoid 32-bit overflow errors in
3289 * all arithmetic expressions that contain buffer offsets and pixel sizes,
3290 * which usually have type uint32_t or GLuint.
3291 */
3292 void
3293 intel_miptree_map(struct brw_context *brw,
3294 struct intel_mipmap_tree *mt,
3295 unsigned int level,
3296 unsigned int slice,
3297 unsigned int x,
3298 unsigned int y,
3299 unsigned int w,
3300 unsigned int h,
3301 GLbitfield mode,
3302 void **out_ptr,
3303 ptrdiff_t *out_stride)
3304 {
3305 struct intel_miptree_map *map;
3306
3307 assert(mt->surf.samples == 1);
3308
3309 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
3310 if (!map){
3311 *out_ptr = NULL;
3312 *out_stride = 0;
3313 return;
3314 }
3315
3316 intel_miptree_access_raw(brw, mt, level, slice,
3317 map->mode & GL_MAP_WRITE_BIT);
3318
3319 if (mt->format == MESA_FORMAT_S_UINT8) {
3320 intel_miptree_map_s8(brw, mt, map, level, slice);
3321 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3322 !(mode & BRW_MAP_DIRECT_BIT)) {
3323 intel_miptree_map_etc(brw, mt, map, level, slice);
3324 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
3325 intel_miptree_map_depthstencil(brw, mt, map, level, slice);
3326 } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
3327 intel_miptree_map_blit(brw, mt, map, level, slice);
3328 #if defined(USE_SSE41)
3329 } else if (!(mode & GL_MAP_WRITE_BIT) &&
3330 !mt->compressed && cpu_has_sse4_1 &&
3331 (mt->surf.row_pitch % 16 == 0)) {
3332 intel_miptree_map_movntdqa(brw, mt, map, level, slice);
3333 #endif
3334 } else {
3335 intel_miptree_map_gtt(brw, mt, map, level, slice);
3336 }
3337
3338 *out_ptr = map->ptr;
3339 *out_stride = map->stride;
3340
3341 if (map->ptr == NULL)
3342 intel_miptree_release_map(mt, level, slice);
3343 }
3344
3345 void
3346 intel_miptree_unmap(struct brw_context *brw,
3347 struct intel_mipmap_tree *mt,
3348 unsigned int level,
3349 unsigned int slice)
3350 {
3351 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
3352
3353 assert(mt->surf.samples == 1);
3354
3355 if (!map)
3356 return;
3357
3358 DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
3359 mt, _mesa_get_format_name(mt->format), level, slice);
3360
3361 if (mt->format == MESA_FORMAT_S_UINT8) {
3362 intel_miptree_unmap_s8(brw, mt, map, level, slice);
3363 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3364 !(map->mode & BRW_MAP_DIRECT_BIT)) {
3365 intel_miptree_unmap_etc(brw, mt, map, level, slice);
3366 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
3367 intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
3368 } else if (map->linear_mt) {
3369 intel_miptree_unmap_blit(brw, mt, map, level, slice);
3370 #if defined(USE_SSE41)
3371 } else if (map->buffer && cpu_has_sse4_1) {
3372 intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
3373 #endif
3374 } else {
3375 intel_miptree_unmap_gtt(mt);
3376 }
3377
3378 intel_miptree_release_map(mt, level, slice);
3379 }
3380
3381 enum isl_surf_dim
3382 get_isl_surf_dim(GLenum target)
3383 {
3384 switch (target) {
3385 case GL_TEXTURE_1D:
3386 case GL_TEXTURE_1D_ARRAY:
3387 return ISL_SURF_DIM_1D;
3388
3389 case GL_TEXTURE_2D:
3390 case GL_TEXTURE_2D_ARRAY:
3391 case GL_TEXTURE_RECTANGLE:
3392 case GL_TEXTURE_CUBE_MAP:
3393 case GL_TEXTURE_CUBE_MAP_ARRAY:
3394 case GL_TEXTURE_2D_MULTISAMPLE:
3395 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3396 case GL_TEXTURE_EXTERNAL_OES:
3397 return ISL_SURF_DIM_2D;
3398
3399 case GL_TEXTURE_3D:
3400 return ISL_SURF_DIM_3D;
3401 }
3402
3403 unreachable("Invalid texture target");
3404 }
3405
3406 enum isl_dim_layout
3407 get_isl_dim_layout(const struct gen_device_info *devinfo,
3408 enum isl_tiling tiling, GLenum target)
3409 {
3410 switch (target) {
3411 case GL_TEXTURE_1D:
3412 case GL_TEXTURE_1D_ARRAY:
3413 return (devinfo->gen >= 9 && tiling == ISL_TILING_LINEAR ?
3414 ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
3415
3416 case GL_TEXTURE_2D:
3417 case GL_TEXTURE_2D_ARRAY:
3418 case GL_TEXTURE_RECTANGLE:
3419 case GL_TEXTURE_2D_MULTISAMPLE:
3420 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3421 case GL_TEXTURE_EXTERNAL_OES:
3422 return ISL_DIM_LAYOUT_GEN4_2D;
3423
3424 case GL_TEXTURE_CUBE_MAP:
3425 case GL_TEXTURE_CUBE_MAP_ARRAY:
3426 return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
3427 ISL_DIM_LAYOUT_GEN4_2D);
3428
3429 case GL_TEXTURE_3D:
3430 return (devinfo->gen >= 9 ?
3431 ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
3432 }
3433
3434 unreachable("Invalid texture target");
3435 }
3436
3437 enum isl_aux_usage
3438 intel_miptree_get_aux_isl_usage(const struct brw_context *brw,
3439 const struct intel_mipmap_tree *mt)
3440 {
3441 if (mt->hiz_buf)
3442 return ISL_AUX_USAGE_HIZ;
3443
3444 if (!mt->mcs_buf)
3445 return ISL_AUX_USAGE_NONE;
3446
3447 return mt->aux_usage;
3448 }