149e632877cb9bc535da69ed045213980f23b228
[mesa.git] / src / mesa / drivers / dri / i965 / intel_mipmap_tree.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #include <GL/gl.h>
27 #include <GL/internal/dri_interface.h>
28
29 #include "intel_batchbuffer.h"
30 #include "intel_image.h"
31 #include "intel_mipmap_tree.h"
32 #include "intel_tex.h"
33 #include "intel_blit.h"
34 #include "intel_fbo.h"
35
36 #include "brw_blorp.h"
37 #include "brw_context.h"
38 #include "brw_state.h"
39
40 #include "main/enums.h"
41 #include "main/fbobject.h"
42 #include "main/formats.h"
43 #include "main/glformats.h"
44 #include "main/texcompress_etc.h"
45 #include "main/teximage.h"
46 #include "main/streaming-load-memcpy.h"
47 #include "x86/common_x86_asm.h"
48
49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
50
51 static void *intel_miptree_map_raw(struct brw_context *brw,
52 struct intel_mipmap_tree *mt,
53 GLbitfield mode);
54
55 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
56
57 static bool
58 intel_miptree_alloc_aux(struct brw_context *brw,
59 struct intel_mipmap_tree *mt);
60
61 static bool
62 intel_miptree_supports_mcs(struct brw_context *brw,
63 const struct intel_mipmap_tree *mt)
64 {
65 /* MCS compression only applies to multisampled miptrees */
66 if (mt->surf.samples <= 1)
67 return false;
68
69 /* Prior to Gen7, all MSAA surfaces used IMS layout. */
70 if (brw->gen < 7)
71 return false;
72
73 /* In Gen7, IMS layout is only used for depth and stencil buffers. */
74 switch (_mesa_get_format_base_format(mt->format)) {
75 case GL_DEPTH_COMPONENT:
76 case GL_STENCIL_INDEX:
77 case GL_DEPTH_STENCIL:
78 return false;
79 default:
80 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
81 *
82 * This field must be set to 0 for all SINT MSRTs when all RT channels
83 * are not written
84 *
85 * In practice this means that we have to disable MCS for all signed
86 * integer MSAA buffers. The alternative, to disable MCS only when one
87 * of the render target channels is disabled, is impractical because it
88 * would require converting between CMS and UMS MSAA layouts on the fly,
89 * which is expensive.
90 */
91 if (brw->gen == 7 && _mesa_get_format_datatype(mt->format) == GL_INT) {
92 return false;
93 } else {
94 return true;
95 }
96 }
97 }
98
99 static bool
100 intel_tiling_supports_ccs(const struct brw_context *brw,
101 enum isl_tiling tiling)
102 {
103 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
104 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
105 *
106 * - Support is limited to tiled render targets.
107 *
108 * Gen9 changes the restriction to Y-tile only.
109 */
110 if (brw->gen >= 9)
111 return tiling == ISL_TILING_Y0;
112 else if (brw->gen >= 7)
113 return tiling != ISL_TILING_LINEAR;
114 else
115 return false;
116 }
117
118 /**
119 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
120 * can be used. This doesn't (and should not) inspect any of the properties of
121 * the miptree's BO.
122 *
123 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
124 * beneath the "Fast Color Clear" bullet (p326):
125 *
126 * - Support is for non-mip-mapped and non-array surface types only.
127 *
128 * And then later, on p327:
129 *
130 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
131 * 64bpp, and 128bpp.
132 *
133 * From the Skylake documentation, it is made clear that X-tiling is no longer
134 * supported:
135 *
136 * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
137 * non-MSRTs only.
138 */
139 static bool
140 intel_miptree_supports_ccs(struct brw_context *brw,
141 const struct intel_mipmap_tree *mt)
142 {
143 /* MCS support does not exist prior to Gen7 */
144 if (brw->gen < 7)
145 return false;
146
147 /* This function applies only to non-multisampled render targets. */
148 if (mt->surf.samples > 1)
149 return false;
150
151 /* MCS is only supported for color buffers */
152 switch (_mesa_get_format_base_format(mt->format)) {
153 case GL_DEPTH_COMPONENT:
154 case GL_DEPTH_STENCIL:
155 case GL_STENCIL_INDEX:
156 return false;
157 }
158
159 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
160 return false;
161
162 const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0;
163 const bool arrayed = mt->surf.logical_level0_px.array_len > 1 ||
164 mt->surf.logical_level0_px.depth > 1;
165
166 if (arrayed) {
167 /* Multisample surfaces with the CMS layout are not layered surfaces,
168 * yet still have physical_depth0 > 1. Assert that we don't
169 * accidentally reject a multisampled surface here. We should have
170 * rejected it earlier by explicitly checking the sample count.
171 */
172 assert(mt->surf.samples == 1);
173 }
174
175 /* Handle the hardware restrictions...
176 *
177 * All GENs have the following restriction: "MCS buffer for non-MSRT is
178 * supported only for RT formats 32bpp, 64bpp, and 128bpp."
179 *
180 * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of
181 * Non-MultiSampler Render Target Restrictions) Support is for
182 * non-mip-mapped and non-array surface types only.
183 *
184 * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of
185 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
186 * surfaces are supported with MCS buffer layout with these alignments in
187 * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128.
188 *
189 * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of
190 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
191 * surfaces are supported with MCS buffer layout with these alignments in
192 * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64.
193 */
194 if (brw->gen < 8 && (mip_mapped || arrayed))
195 return false;
196
197 /* There's no point in using an MCS buffer if the surface isn't in a
198 * renderable format.
199 */
200 if (!brw->mesa_format_supports_render[mt->format])
201 return false;
202
203 return true;
204 }
205
206 static bool
207 intel_tiling_supports_hiz(const struct brw_context *brw,
208 enum isl_tiling tiling)
209 {
210 if (brw->gen < 6)
211 return false;
212
213 return tiling == ISL_TILING_Y0;
214 }
215
216 static bool
217 intel_miptree_supports_hiz(const struct brw_context *brw,
218 const struct intel_mipmap_tree *mt)
219 {
220 if (!brw->has_hiz)
221 return false;
222
223 switch (mt->format) {
224 case MESA_FORMAT_Z_FLOAT32:
225 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
226 case MESA_FORMAT_Z24_UNORM_X8_UINT:
227 case MESA_FORMAT_Z24_UNORM_S8_UINT:
228 case MESA_FORMAT_Z_UNORM16:
229 return true;
230 default:
231 return false;
232 }
233 }
234
235 static bool
236 intel_miptree_supports_ccs_e(struct brw_context *brw,
237 const struct intel_mipmap_tree *mt)
238 {
239 if (brw->gen < 9)
240 return false;
241
242 /* For now compression is only enabled for integer formats even though
243 * there exist supported floating point formats also. This is a heuristic
244 * decision based on current public benchmarks. In none of the cases these
245 * formats provided any improvement but a few cases were seen to regress.
246 * Hence these are left to to be enabled in the future when they are known
247 * to improve things.
248 */
249 if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
250 return false;
251
252 if (!intel_miptree_supports_ccs(brw, mt))
253 return false;
254
255 /* Many window system buffers are sRGB even if they are never rendered as
256 * sRGB. For those, we want CCS_E for when sRGBEncode is false. When the
257 * surface is used as sRGB, we fall back to CCS_D.
258 */
259 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
260 enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
261 return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
262 }
263
264 /**
265 * Determine depth format corresponding to a depth+stencil format,
266 * for separate stencil.
267 */
268 mesa_format
269 intel_depth_format_for_depthstencil_format(mesa_format format) {
270 switch (format) {
271 case MESA_FORMAT_Z24_UNORM_S8_UINT:
272 return MESA_FORMAT_Z24_UNORM_X8_UINT;
273 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
274 return MESA_FORMAT_Z_FLOAT32;
275 default:
276 return format;
277 }
278 }
279
280 static bool
281 create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
282 unsigned depth0, struct intel_mipmap_level *table)
283 {
284 for (unsigned level = first_level; level <= last_level; level++) {
285 const unsigned d =
286 target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
287
288 table[level].slice = calloc(d, sizeof(*table[0].slice));
289 if (!table[level].slice)
290 goto unwind;
291 }
292
293 return true;
294
295 unwind:
296 for (unsigned level = first_level; level <= last_level; level++)
297 free(table[level].slice);
298
299 return false;
300 }
301
302 static bool
303 needs_separate_stencil(const struct brw_context *brw,
304 struct intel_mipmap_tree *mt,
305 mesa_format format)
306 {
307 if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL)
308 return false;
309
310 if (brw->must_use_separate_stencil)
311 return true;
312
313 return brw->has_separate_stencil &&
314 intel_miptree_supports_hiz(brw, mt);
315 }
316
317 /**
318 * Choose the aux usage for this miptree. This function must be called fairly
319 * late in the miptree create process after we have a tiling.
320 */
321 static void
322 intel_miptree_choose_aux_usage(struct brw_context *brw,
323 struct intel_mipmap_tree *mt)
324 {
325 assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
326
327 if (intel_miptree_supports_mcs(brw, mt)) {
328 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
329 mt->aux_usage = ISL_AUX_USAGE_MCS;
330 } else if (intel_tiling_supports_ccs(brw, mt->surf.tiling) &&
331 intel_miptree_supports_ccs(brw, mt)) {
332 if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
333 intel_miptree_supports_ccs_e(brw, mt)) {
334 mt->aux_usage = ISL_AUX_USAGE_CCS_E;
335 } else {
336 mt->aux_usage = ISL_AUX_USAGE_CCS_D;
337 }
338 } else if (intel_tiling_supports_hiz(brw, mt->surf.tiling) &&
339 intel_miptree_supports_hiz(brw, mt)) {
340 mt->aux_usage = ISL_AUX_USAGE_HIZ;
341 }
342
343 /* We can do fast-clear on all auxiliary surface types that are
344 * allocated through the normal texture creation paths.
345 */
346 if (mt->aux_usage != ISL_AUX_USAGE_NONE)
347 mt->supports_fast_clear = true;
348 }
349
350
351 /**
352 * Choose an appropriate uncompressed format for a requested
353 * compressed format, if unsupported.
354 */
355 mesa_format
356 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
357 {
358 /* No need to lower ETC formats on these platforms,
359 * they are supported natively.
360 */
361 if (brw->gen >= 8 || brw->is_baytrail)
362 return format;
363
364 switch (format) {
365 case MESA_FORMAT_ETC1_RGB8:
366 return MESA_FORMAT_R8G8B8X8_UNORM;
367 case MESA_FORMAT_ETC2_RGB8:
368 return MESA_FORMAT_R8G8B8X8_UNORM;
369 case MESA_FORMAT_ETC2_SRGB8:
370 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
371 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
372 return MESA_FORMAT_B8G8R8A8_SRGB;
373 case MESA_FORMAT_ETC2_RGBA8_EAC:
374 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
375 return MESA_FORMAT_R8G8B8A8_UNORM;
376 case MESA_FORMAT_ETC2_R11_EAC:
377 return MESA_FORMAT_R_UNORM16;
378 case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
379 return MESA_FORMAT_R_SNORM16;
380 case MESA_FORMAT_ETC2_RG11_EAC:
381 return MESA_FORMAT_R16G16_UNORM;
382 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
383 return MESA_FORMAT_R16G16_SNORM;
384 default:
385 /* Non ETC1 / ETC2 format */
386 return format;
387 }
388 }
389
390 unsigned
391 brw_get_num_logical_layers(const struct intel_mipmap_tree *mt, unsigned level)
392 {
393 if (mt->surf.dim == ISL_SURF_DIM_3D)
394 return minify(mt->surf.logical_level0_px.depth, level);
395 else
396 return mt->surf.logical_level0_px.array_len;
397 }
398
399 static unsigned
400 get_num_phys_layers(const struct isl_surf *surf, unsigned level)
401 {
402 /* In case of physical dimensions one needs to consider also the layout.
403 * See isl_calc_phys_level0_extent_sa().
404 */
405 if (surf->dim != ISL_SURF_DIM_3D)
406 return surf->phys_level0_sa.array_len;
407
408 if (surf->dim_layout == ISL_DIM_LAYOUT_GEN4_2D)
409 return minify(surf->phys_level0_sa.array_len, level);
410
411 return minify(surf->phys_level0_sa.depth, level);
412 }
413
414 /** \brief Assert that the level and layer are valid for the miptree. */
415 void
416 intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt,
417 uint32_t level,
418 uint32_t layer)
419 {
420 (void) mt;
421 (void) level;
422 (void) layer;
423
424 assert(level >= mt->first_level);
425 assert(level <= mt->last_level);
426 assert(layer < get_num_phys_layers(&mt->surf, level));
427 }
428
429 static enum isl_aux_state **
430 create_aux_state_map(struct intel_mipmap_tree *mt,
431 enum isl_aux_state initial)
432 {
433 const uint32_t levels = mt->last_level + 1;
434
435 uint32_t total_slices = 0;
436 for (uint32_t level = 0; level < levels; level++)
437 total_slices += brw_get_num_logical_layers(mt, level);
438
439 const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
440
441 /* We're going to allocate a single chunk of data for both the per-level
442 * reference array and the arrays of aux_state. This makes cleanup
443 * significantly easier.
444 */
445 const size_t total_size = per_level_array_size +
446 total_slices * sizeof(enum isl_aux_state);
447 void *data = malloc(total_size);
448 if (data == NULL)
449 return NULL;
450
451 enum isl_aux_state **per_level_arr = data;
452 enum isl_aux_state *s = data + per_level_array_size;
453 for (uint32_t level = 0; level < levels; level++) {
454 per_level_arr[level] = s;
455 const unsigned level_layers = brw_get_num_logical_layers(mt, level);
456 for (uint32_t a = 0; a < level_layers; a++)
457 *(s++) = initial;
458 }
459 assert((void *)s == data + total_size);
460
461 return per_level_arr;
462 }
463
464 static void
465 free_aux_state_map(enum isl_aux_state **state)
466 {
467 free(state);
468 }
469
470 static bool
471 need_to_retile_as_linear(struct brw_context *brw, unsigned row_pitch,
472 enum isl_tiling tiling, unsigned samples)
473 {
474 if (samples > 1)
475 return false;
476
477 if (tiling == ISL_TILING_LINEAR)
478 return false;
479
480 /* If the width is much smaller than a tile, don't bother tiling. */
481 if (row_pitch < 64)
482 return true;
483
484 if (ALIGN(row_pitch, 512) >= 32768) {
485 perf_debug("row pitch %u too large to blit, falling back to untiled",
486 row_pitch);
487 return true;
488 }
489
490 return false;
491 }
492
493 static bool
494 need_to_retile_as_x(const struct brw_context *brw, uint64_t size,
495 enum isl_tiling tiling)
496 {
497 /* If the BO is too large to fit in the aperture, we need to use the
498 * BLT engine to support it. Prior to Sandybridge, the BLT paths can't
499 * handle Y-tiling, so we need to fall back to X.
500 */
501 if (brw->gen < 6 && size >= brw->max_gtt_map_object_size &&
502 tiling == ISL_TILING_Y0)
503 return true;
504
505 return false;
506 }
507
508 static struct intel_mipmap_tree *
509 make_surface(struct brw_context *brw, GLenum target, mesa_format format,
510 unsigned first_level, unsigned last_level,
511 unsigned width0, unsigned height0, unsigned depth0,
512 unsigned num_samples, isl_tiling_flags_t tiling_flags,
513 isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
514 unsigned row_pitch, struct brw_bo *bo)
515 {
516 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
517 if (!mt)
518 return NULL;
519
520 if (!create_mapping_table(target, first_level, last_level, depth0,
521 mt->level)) {
522 free(mt);
523 return NULL;
524 }
525
526 mt->refcount = 1;
527
528 if (target == GL_TEXTURE_CUBE_MAP ||
529 target == GL_TEXTURE_CUBE_MAP_ARRAY)
530 isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
531
532 DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
533 __func__,
534 _mesa_enum_to_string(target),
535 _mesa_get_format_name(format),
536 num_samples, width0, height0, depth0,
537 first_level, last_level, mt);
538
539 struct isl_surf_init_info init_info = {
540 .dim = get_isl_surf_dim(target),
541 .format = translate_tex_format(brw, format, false),
542 .width = width0,
543 .height = height0,
544 .depth = target == GL_TEXTURE_3D ? depth0 : 1,
545 .levels = last_level - first_level + 1,
546 .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
547 .samples = num_samples,
548 .row_pitch = row_pitch,
549 .usage = isl_usage_flags,
550 .tiling_flags = tiling_flags,
551 };
552
553 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
554 goto fail;
555
556 /* In case caller doesn't specifically request Y-tiling (needed
557 * unconditionally for depth), check for corner cases needing special
558 * treatment.
559 */
560 if (tiling_flags & ~ISL_TILING_Y0_BIT) {
561 if (need_to_retile_as_linear(brw, mt->surf.row_pitch,
562 mt->surf.tiling, mt->surf.samples)) {
563 init_info.tiling_flags = 1u << ISL_TILING_LINEAR;
564 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
565 goto fail;
566 } else if (need_to_retile_as_x(brw, mt->surf.size, mt->surf.tiling)) {
567 init_info.tiling_flags = 1u << ISL_TILING_X;
568 if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
569 goto fail;
570 }
571 }
572
573 /* In case of linear the buffer gets padded by fixed 64 bytes and therefore
574 * the size may not be multiple of row_pitch.
575 * See isl_apply_surface_padding().
576 */
577 if (mt->surf.tiling != ISL_TILING_LINEAR)
578 assert(mt->surf.size % mt->surf.row_pitch == 0);
579
580 if (!bo) {
581 mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
582 mt->surf.size,
583 isl_tiling_to_i915_tiling(
584 mt->surf.tiling),
585 mt->surf.row_pitch, alloc_flags);
586 if (!mt->bo)
587 goto fail;
588 } else {
589 mt->bo = bo;
590 }
591
592 mt->first_level = first_level;
593 mt->last_level = last_level;
594 mt->target = target;
595 mt->format = format;
596 mt->aux_state = NULL;
597 mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8;
598 mt->compressed = _mesa_is_format_compressed(format);
599
600 return mt;
601
602 fail:
603 intel_miptree_release(&mt);
604 return NULL;
605 }
606
607 static bool
608 make_separate_stencil_surface(struct brw_context *brw,
609 struct intel_mipmap_tree *mt)
610 {
611 mt->stencil_mt = make_surface(brw, mt->target, MESA_FORMAT_S_UINT8,
612 0, mt->surf.levels - 1,
613 mt->surf.logical_level0_px.width,
614 mt->surf.logical_level0_px.height,
615 mt->surf.dim == ISL_SURF_DIM_3D ?
616 mt->surf.logical_level0_px.depth :
617 mt->surf.logical_level0_px.array_len,
618 mt->surf.samples, ISL_TILING_W_BIT,
619 ISL_SURF_USAGE_STENCIL_BIT |
620 ISL_SURF_USAGE_TEXTURE_BIT,
621 BO_ALLOC_FOR_RENDER, 0, NULL);
622
623 if (!mt->stencil_mt)
624 return false;
625
626 mt->stencil_mt->r8stencil_needs_update = true;
627
628 return true;
629 }
630
631 static bool
632 force_linear_tiling(uint32_t layout_flags)
633 {
634 /* ANY includes NONE and Y bit. */
635 if (layout_flags & MIPTREE_LAYOUT_TILING_Y)
636 return false;
637
638 return layout_flags & MIPTREE_LAYOUT_TILING_NONE;
639 }
640
641 static struct intel_mipmap_tree *
642 miptree_create(struct brw_context *brw,
643 GLenum target,
644 mesa_format format,
645 GLuint first_level,
646 GLuint last_level,
647 GLuint width0,
648 GLuint height0,
649 GLuint depth0,
650 GLuint num_samples,
651 uint32_t layout_flags)
652 {
653 if (format == MESA_FORMAT_S_UINT8)
654 return make_surface(brw, target, format, first_level, last_level,
655 width0, height0, depth0, num_samples,
656 ISL_TILING_W_BIT,
657 ISL_SURF_USAGE_STENCIL_BIT |
658 ISL_SURF_USAGE_TEXTURE_BIT,
659 BO_ALLOC_FOR_RENDER,
660 0,
661 NULL);
662
663 const GLenum base_format = _mesa_get_format_base_format(format);
664 if ((base_format == GL_DEPTH_COMPONENT ||
665 base_format == GL_DEPTH_STENCIL) &&
666 !force_linear_tiling(layout_flags)) {
667 /* Fix up the Z miptree format for how we're splitting out separate
668 * stencil. Gen7 expects there to be no stencil bits in its depth buffer.
669 */
670 const mesa_format depth_only_format =
671 intel_depth_format_for_depthstencil_format(format);
672 struct intel_mipmap_tree *mt = make_surface(
673 brw, target, brw->gen >= 6 ? depth_only_format : format,
674 first_level, last_level,
675 width0, height0, depth0, num_samples, ISL_TILING_Y0_BIT,
676 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
677 BO_ALLOC_FOR_RENDER, 0, NULL);
678
679 if (needs_separate_stencil(brw, mt, format) &&
680 !make_separate_stencil_surface(brw, mt)) {
681 intel_miptree_release(&mt);
682 return NULL;
683 }
684
685 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
686 intel_miptree_choose_aux_usage(brw, mt);
687
688 return mt;
689 }
690
691 mesa_format tex_format = format;
692 mesa_format etc_format = MESA_FORMAT_NONE;
693 uint32_t alloc_flags = 0;
694
695 format = intel_lower_compressed_format(brw, format);
696
697 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
698
699 if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
700 alloc_flags |= BO_ALLOC_FOR_RENDER;
701
702 isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ?
703 ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK;
704
705 /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
706 if (brw->gen < 6)
707 tiling_flags &= ~ISL_TILING_Y0_BIT;
708
709 struct intel_mipmap_tree *mt = make_surface(
710 brw, target, format,
711 first_level, last_level,
712 width0, height0, depth0,
713 num_samples, tiling_flags,
714 ISL_SURF_USAGE_RENDER_TARGET_BIT |
715 ISL_SURF_USAGE_TEXTURE_BIT,
716 alloc_flags, 0, NULL);
717 if (!mt)
718 return NULL;
719
720 mt->etc_format = etc_format;
721
722 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
723 intel_miptree_choose_aux_usage(brw, mt);
724
725 return mt;
726 }
727
728 struct intel_mipmap_tree *
729 intel_miptree_create(struct brw_context *brw,
730 GLenum target,
731 mesa_format format,
732 GLuint first_level,
733 GLuint last_level,
734 GLuint width0,
735 GLuint height0,
736 GLuint depth0,
737 GLuint num_samples,
738 uint32_t layout_flags)
739 {
740 assert(num_samples > 0);
741
742 struct intel_mipmap_tree *mt = miptree_create(
743 brw, target, format,
744 first_level, last_level,
745 width0, height0, depth0, num_samples,
746 layout_flags);
747 if (!mt)
748 return NULL;
749
750 mt->offset = 0;
751
752 if (!intel_miptree_alloc_aux(brw, mt)) {
753 intel_miptree_release(&mt);
754 return NULL;
755 }
756
757 return mt;
758 }
759
760 struct intel_mipmap_tree *
761 intel_miptree_create_for_bo(struct brw_context *brw,
762 struct brw_bo *bo,
763 mesa_format format,
764 uint32_t offset,
765 uint32_t width,
766 uint32_t height,
767 uint32_t depth,
768 int pitch,
769 uint32_t layout_flags)
770 {
771 struct intel_mipmap_tree *mt;
772 uint32_t tiling, swizzle;
773 const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
774 const GLenum base_format = _mesa_get_format_base_format(format);
775
776 if ((base_format == GL_DEPTH_COMPONENT ||
777 base_format == GL_DEPTH_STENCIL)) {
778 const mesa_format depth_only_format =
779 intel_depth_format_for_depthstencil_format(format);
780 mt = make_surface(brw, target,
781 brw->gen >= 6 ? depth_only_format : format,
782 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT,
783 ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
784 BO_ALLOC_FOR_RENDER, pitch, bo);
785 if (!mt)
786 return NULL;
787
788 brw_bo_reference(bo);
789
790 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
791 intel_miptree_choose_aux_usage(brw, mt);
792
793 return mt;
794 } else if (format == MESA_FORMAT_S_UINT8) {
795 mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
796 0, 0, width, height, depth, 1,
797 ISL_TILING_W_BIT,
798 ISL_SURF_USAGE_STENCIL_BIT |
799 ISL_SURF_USAGE_TEXTURE_BIT,
800 BO_ALLOC_FOR_RENDER, pitch, bo);
801 if (!mt)
802 return NULL;
803
804 assert(bo->size >= mt->surf.size);
805
806 brw_bo_reference(bo);
807 return mt;
808 }
809
810 brw_bo_get_tiling(bo, &tiling, &swizzle);
811
812 /* Nothing will be able to use this miptree with the BO if the offset isn't
813 * aligned.
814 */
815 if (tiling != I915_TILING_NONE)
816 assert(offset % 4096 == 0);
817
818 /* miptrees can't handle negative pitch. If you need flipping of images,
819 * that's outside of the scope of the mt.
820 */
821 assert(pitch >= 0);
822
823 /* The BO already has a tiling format and we shouldn't confuse the lower
824 * layers by making it try to find a tiling format again.
825 */
826 assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
827 assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
828
829 mt = make_surface(brw, target, format,
830 0, 0, width, height, depth, 1,
831 1lu << isl_tiling_from_i915_tiling(tiling),
832 ISL_SURF_USAGE_RENDER_TARGET_BIT |
833 ISL_SURF_USAGE_TEXTURE_BIT,
834 0, pitch, bo);
835 if (!mt)
836 return NULL;
837
838 brw_bo_reference(bo);
839 mt->bo = bo;
840 mt->offset = offset;
841
842 if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
843 intel_miptree_choose_aux_usage(brw, mt);
844
845 return mt;
846 }
847
848 static struct intel_mipmap_tree *
849 miptree_create_for_planar_image(struct brw_context *brw,
850 __DRIimage *image, GLenum target)
851 {
852 const struct intel_image_format *f = image->planar_format;
853 struct intel_mipmap_tree *planar_mt = NULL;
854
855 for (int i = 0; i < f->nplanes; i++) {
856 const int index = f->planes[i].buffer_index;
857 const uint32_t dri_format = f->planes[i].dri_format;
858 const mesa_format format = driImageFormatToGLFormat(dri_format);
859 const uint32_t width = image->width >> f->planes[i].width_shift;
860 const uint32_t height = image->height >> f->planes[i].height_shift;
861
862 /* Disable creation of the texture's aux buffers because the driver
863 * exposes no EGL API to manage them. That is, there is no API for
864 * resolving the aux buffer's content to the main buffer nor for
865 * invalidating the aux buffer's content.
866 */
867 struct intel_mipmap_tree *mt =
868 intel_miptree_create_for_bo(brw, image->bo, format,
869 image->offsets[index],
870 width, height, 1,
871 image->strides[index],
872 MIPTREE_LAYOUT_DISABLE_AUX);
873 if (mt == NULL)
874 return NULL;
875
876 mt->target = target;
877
878 if (i == 0)
879 planar_mt = mt;
880 else
881 planar_mt->plane[i - 1] = mt;
882 }
883
884 return planar_mt;
885 }
886
887 struct intel_mipmap_tree *
888 intel_miptree_create_for_dri_image(struct brw_context *brw,
889 __DRIimage *image, GLenum target,
890 enum isl_colorspace colorspace,
891 bool is_winsys_image)
892 {
893 if (image->planar_format && image->planar_format->nplanes > 0) {
894 assert(colorspace == ISL_COLORSPACE_NONE ||
895 colorspace == ISL_COLORSPACE_YUV);
896 return miptree_create_for_planar_image(brw, image, target);
897 }
898
899 mesa_format format = image->format;
900 switch (colorspace) {
901 case ISL_COLORSPACE_NONE:
902 /* Keep the image format unmodified */
903 break;
904
905 case ISL_COLORSPACE_LINEAR:
906 format =_mesa_get_srgb_format_linear(format);
907 break;
908
909 case ISL_COLORSPACE_SRGB:
910 format =_mesa_get_linear_format_srgb(format);
911 break;
912
913 default:
914 unreachable("Inalid colorspace for non-planar image");
915 }
916
917 if (!brw->ctx.TextureFormatSupported[format]) {
918 /* The texture storage paths in core Mesa detect if the driver does not
919 * support the user-requested format, and then searches for a
920 * fallback format. The DRIimage code bypasses core Mesa, though. So we
921 * do the fallbacks here for important formats.
922 *
923 * We must support DRM_FOURCC_XBGR8888 textures because the Android
924 * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
925 * the Chrome OS compositor consumes as dma_buf EGLImages.
926 */
927 format = _mesa_format_fallback_rgbx_to_rgba(format);
928 }
929
930 if (!brw->ctx.TextureFormatSupported[format])
931 return NULL;
932
933 /* If this image comes in from a window system, we have different
934 * requirements than if it comes in via an EGL import operation. Window
935 * system images can use any form of auxiliary compression we wish because
936 * they get "flushed" before being handed off to the window system and we
937 * have the opportunity to do resolves. Window system buffers also may be
938 * used for scanout so we need to flag that appropriately.
939 */
940 const uint32_t mt_layout_flags =
941 is_winsys_image ? 0 : MIPTREE_LAYOUT_DISABLE_AUX;
942
943 /* Disable creation of the texture's aux buffers because the driver exposes
944 * no EGL API to manage them. That is, there is no API for resolving the aux
945 * buffer's content to the main buffer nor for invalidating the aux buffer's
946 * content.
947 */
948 struct intel_mipmap_tree *mt =
949 intel_miptree_create_for_bo(brw, image->bo, format,
950 image->offset, image->width, image->height, 1,
951 image->pitch, mt_layout_flags);
952 if (mt == NULL)
953 return NULL;
954
955 mt->target = target;
956 mt->level[0].level_x = image->tile_x;
957 mt->level[0].level_y = image->tile_y;
958
959 /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
960 * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
961 * trouble resolving back to destination image due to alignment issues.
962 */
963 if (!brw->has_surface_tile_offset) {
964 uint32_t draw_x, draw_y;
965 intel_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
966
967 if (draw_x != 0 || draw_y != 0) {
968 _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
969 intel_miptree_release(&mt);
970 return NULL;
971 }
972 }
973
974 /* If this is a window-system image, then we can no longer assume it's
975 * cache-coherent because it may suddenly get scanned out which destroys
976 * coherency.
977 */
978 if (is_winsys_image)
979 image->bo->cache_coherent = false;
980
981 if (!intel_miptree_alloc_aux(brw, mt)) {
982 intel_miptree_release(&mt);
983 return NULL;
984 }
985
986 return mt;
987 }
988
989 /**
990 * For a singlesample renderbuffer, this simply wraps the given BO with a
991 * miptree.
992 *
993 * For a multisample renderbuffer, this wraps the window system's
994 * (singlesample) BO with a singlesample miptree attached to the
995 * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
996 * that will contain the actual rendering (which is lazily resolved to
997 * irb->singlesample_mt).
998 */
999 bool
1000 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
1001 struct intel_renderbuffer *irb,
1002 struct intel_mipmap_tree *singlesample_mt,
1003 uint32_t width, uint32_t height,
1004 uint32_t pitch)
1005 {
1006 struct intel_mipmap_tree *multisample_mt = NULL;
1007 struct gl_renderbuffer *rb = &irb->Base.Base;
1008 mesa_format format = rb->Format;
1009 const unsigned num_samples = MAX2(rb->NumSamples, 1);
1010
1011 /* Only the front and back buffers, which are color buffers, are allocated
1012 * through the image loader.
1013 */
1014 assert(_mesa_get_format_base_format(format) == GL_RGB ||
1015 _mesa_get_format_base_format(format) == GL_RGBA);
1016
1017 assert(singlesample_mt);
1018
1019 if (num_samples == 1) {
1020 intel_miptree_release(&irb->mt);
1021 irb->mt = singlesample_mt;
1022
1023 assert(!irb->singlesample_mt);
1024 } else {
1025 intel_miptree_release(&irb->singlesample_mt);
1026 irb->singlesample_mt = singlesample_mt;
1027
1028 if (!irb->mt ||
1029 irb->mt->surf.logical_level0_px.width != width ||
1030 irb->mt->surf.logical_level0_px.height != height) {
1031 multisample_mt = intel_miptree_create_for_renderbuffer(intel,
1032 format,
1033 width,
1034 height,
1035 num_samples);
1036 if (!multisample_mt)
1037 goto fail;
1038
1039 irb->need_downsample = false;
1040 intel_miptree_release(&irb->mt);
1041 irb->mt = multisample_mt;
1042 }
1043 }
1044 return true;
1045
1046 fail:
1047 intel_miptree_release(&irb->mt);
1048 return false;
1049 }
1050
1051 struct intel_mipmap_tree*
1052 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
1053 mesa_format format,
1054 uint32_t width,
1055 uint32_t height,
1056 uint32_t num_samples)
1057 {
1058 struct intel_mipmap_tree *mt;
1059 uint32_t depth = 1;
1060 GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
1061 const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1062 MIPTREE_LAYOUT_TILING_ANY;
1063
1064 mt = intel_miptree_create(brw, target, format, 0, 0,
1065 width, height, depth, num_samples,
1066 layout_flags);
1067 if (!mt)
1068 goto fail;
1069
1070 return mt;
1071
1072 fail:
1073 intel_miptree_release(&mt);
1074 return NULL;
1075 }
1076
1077 void
1078 intel_miptree_reference(struct intel_mipmap_tree **dst,
1079 struct intel_mipmap_tree *src)
1080 {
1081 if (*dst == src)
1082 return;
1083
1084 intel_miptree_release(dst);
1085
1086 if (src) {
1087 src->refcount++;
1088 DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
1089 }
1090
1091 *dst = src;
1092 }
1093
1094 static void
1095 intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf)
1096 {
1097 if (aux_buf == NULL)
1098 return;
1099
1100 brw_bo_unreference(aux_buf->bo);
1101
1102 free(aux_buf);
1103 }
1104
1105 void
1106 intel_miptree_release(struct intel_mipmap_tree **mt)
1107 {
1108 if (!*mt)
1109 return;
1110
1111 DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
1112 if (--(*mt)->refcount <= 0) {
1113 GLuint i;
1114
1115 DBG("%s deleting %p\n", __func__, *mt);
1116
1117 brw_bo_unreference((*mt)->bo);
1118 intel_miptree_release(&(*mt)->stencil_mt);
1119 intel_miptree_release(&(*mt)->r8stencil_mt);
1120 intel_miptree_aux_buffer_free((*mt)->hiz_buf);
1121 intel_miptree_aux_buffer_free((*mt)->mcs_buf);
1122 free_aux_state_map((*mt)->aux_state);
1123
1124 intel_miptree_release(&(*mt)->plane[0]);
1125 intel_miptree_release(&(*mt)->plane[1]);
1126
1127 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
1128 free((*mt)->level[i].slice);
1129 }
1130
1131 free(*mt);
1132 }
1133 *mt = NULL;
1134 }
1135
1136
1137 void
1138 intel_get_image_dims(struct gl_texture_image *image,
1139 int *width, int *height, int *depth)
1140 {
1141 switch (image->TexObject->Target) {
1142 case GL_TEXTURE_1D_ARRAY:
1143 /* For a 1D Array texture the OpenGL API will treat the image height as
1144 * the number of array slices. For Intel hardware, we treat the 1D array
1145 * as a 2D Array with a height of 1. So, here we want to swap image
1146 * height and depth.
1147 */
1148 assert(image->Depth == 1);
1149 *width = image->Width;
1150 *height = 1;
1151 *depth = image->Height;
1152 break;
1153 case GL_TEXTURE_CUBE_MAP:
1154 /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
1155 * though we really have 6 slices.
1156 */
1157 assert(image->Depth == 1);
1158 *width = image->Width;
1159 *height = image->Height;
1160 *depth = 6;
1161 break;
1162 default:
1163 *width = image->Width;
1164 *height = image->Height;
1165 *depth = image->Depth;
1166 break;
1167 }
1168 }
1169
1170 /**
1171 * Can the image be pulled into a unified mipmap tree? This mirrors
1172 * the completeness test in a lot of ways.
1173 *
1174 * Not sure whether I want to pass gl_texture_image here.
1175 */
1176 bool
1177 intel_miptree_match_image(struct intel_mipmap_tree *mt,
1178 struct gl_texture_image *image)
1179 {
1180 struct intel_texture_image *intelImage = intel_texture_image(image);
1181 GLuint level = intelImage->base.Base.Level;
1182 int width, height, depth;
1183
1184 /* glTexImage* choose the texture object based on the target passed in, and
1185 * objects can't change targets over their lifetimes, so this should be
1186 * true.
1187 */
1188 assert(image->TexObject->Target == mt->target);
1189
1190 mesa_format mt_format = mt->format;
1191 if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
1192 mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
1193 if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
1194 mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
1195 if (mt->etc_format != MESA_FORMAT_NONE)
1196 mt_format = mt->etc_format;
1197
1198 if (image->TexFormat != mt_format)
1199 return false;
1200
1201 intel_get_image_dims(image, &width, &height, &depth);
1202
1203 if (mt->target == GL_TEXTURE_CUBE_MAP)
1204 depth = 6;
1205
1206 if (level >= mt->surf.levels)
1207 return false;
1208
1209 const unsigned level_depth =
1210 mt->surf.dim == ISL_SURF_DIM_3D ?
1211 minify(mt->surf.logical_level0_px.depth, level) :
1212 mt->surf.logical_level0_px.array_len;
1213
1214 return width == minify(mt->surf.logical_level0_px.width, level) &&
1215 height == minify(mt->surf.logical_level0_px.height, level) &&
1216 depth == level_depth &&
1217 MAX2(image->NumSamples, 1) == mt->surf.samples;
1218 }
1219
1220 void
1221 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1222 GLuint level, GLuint slice,
1223 GLuint *x, GLuint *y)
1224 {
1225 if (level == 0 && slice == 0) {
1226 *x = mt->level[0].level_x;
1227 *y = mt->level[0].level_y;
1228 return;
1229 }
1230
1231 uint32_t x_offset_sa, y_offset_sa;
1232
1233 /* Miptree itself can have an offset only if it represents a single
1234 * slice in an imported buffer object.
1235 * See intel_miptree_create_for_dri_image().
1236 */
1237 assert(mt->level[0].level_x == 0);
1238 assert(mt->level[0].level_y == 0);
1239
1240 /* Given level is relative to level zero while the miptree may be
1241 * represent just a subset of all levels starting from 'first_level'.
1242 */
1243 assert(level >= mt->first_level);
1244 level -= mt->first_level;
1245
1246 const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
1247 slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
1248 isl_surf_get_image_offset_el(&mt->surf, level, slice, z,
1249 &x_offset_sa, &y_offset_sa);
1250
1251 *x = x_offset_sa;
1252 *y = y_offset_sa;
1253 }
1254
1255
1256 /**
1257 * This function computes the tile_w (in bytes) and tile_h (in rows) of
1258 * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1259 * and tile_h is set to 1.
1260 */
1261 void
1262 intel_get_tile_dims(enum isl_tiling tiling, uint32_t cpp,
1263 uint32_t *tile_w, uint32_t *tile_h)
1264 {
1265 switch (tiling) {
1266 case ISL_TILING_X:
1267 *tile_w = 512;
1268 *tile_h = 8;
1269 break;
1270 case ISL_TILING_Y0:
1271 *tile_w = 128;
1272 *tile_h = 32;
1273 break;
1274 case ISL_TILING_LINEAR:
1275 *tile_w = cpp;
1276 *tile_h = 1;
1277 break;
1278 default:
1279 unreachable("not reached");
1280 }
1281 }
1282
1283
1284 /**
1285 * This function computes masks that may be used to select the bits of the X
1286 * and Y coordinates that indicate the offset within a tile. If the BO is
1287 * untiled, the masks are set to 0.
1288 */
1289 void
1290 intel_get_tile_masks(enum isl_tiling tiling, uint32_t cpp,
1291 uint32_t *mask_x, uint32_t *mask_y)
1292 {
1293 uint32_t tile_w_bytes, tile_h;
1294
1295 intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h);
1296
1297 *mask_x = tile_w_bytes / cpp - 1;
1298 *mask_y = tile_h - 1;
1299 }
1300
1301 /**
1302 * Compute the offset (in bytes) from the start of the BO to the given x
1303 * and y coordinate. For tiled BOs, caller must ensure that x and y are
1304 * multiples of the tile size.
1305 */
1306 uint32_t
1307 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1308 uint32_t x, uint32_t y)
1309 {
1310 int cpp = mt->cpp;
1311 uint32_t pitch = mt->surf.row_pitch;
1312
1313 switch (mt->surf.tiling) {
1314 default:
1315 unreachable("not reached");
1316 case ISL_TILING_LINEAR:
1317 return y * pitch + x * cpp;
1318 case ISL_TILING_X:
1319 assert((x % (512 / cpp)) == 0);
1320 assert((y % 8) == 0);
1321 return y * pitch + x / (512 / cpp) * 4096;
1322 case ISL_TILING_Y0:
1323 assert((x % (128 / cpp)) == 0);
1324 assert((y % 32) == 0);
1325 return y * pitch + x / (128 / cpp) * 4096;
1326 }
1327 }
1328
1329 /**
1330 * Rendering with tiled buffers requires that the base address of the buffer
1331 * be aligned to a page boundary. For renderbuffers, and sometimes with
1332 * textures, we may want the surface to point at a texture image level that
1333 * isn't at a page boundary.
1334 *
1335 * This function returns an appropriately-aligned base offset
1336 * according to the tiling restrictions, plus any required x/y offset
1337 * from there.
1338 */
1339 uint32_t
1340 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1341 GLuint level, GLuint slice,
1342 uint32_t *tile_x,
1343 uint32_t *tile_y)
1344 {
1345 uint32_t x, y;
1346 uint32_t mask_x, mask_y;
1347
1348 intel_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y);
1349 intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1350
1351 *tile_x = x & mask_x;
1352 *tile_y = y & mask_y;
1353
1354 return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
1355 }
1356
1357 static void
1358 intel_miptree_copy_slice_sw(struct brw_context *brw,
1359 struct intel_mipmap_tree *src_mt,
1360 unsigned src_level, unsigned src_layer,
1361 struct intel_mipmap_tree *dst_mt,
1362 unsigned dst_level, unsigned dst_layer,
1363 unsigned width, unsigned height)
1364 {
1365 void *src, *dst;
1366 ptrdiff_t src_stride, dst_stride;
1367 const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8);
1368
1369 intel_miptree_map(brw, src_mt,
1370 src_level, src_layer,
1371 0, 0,
1372 width, height,
1373 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1374 &src, &src_stride);
1375
1376 intel_miptree_map(brw, dst_mt,
1377 dst_level, dst_layer,
1378 0, 0,
1379 width, height,
1380 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1381 BRW_MAP_DIRECT_BIT,
1382 &dst, &dst_stride);
1383
1384 DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1385 _mesa_get_format_name(src_mt->format),
1386 src_mt, src, src_stride,
1387 _mesa_get_format_name(dst_mt->format),
1388 dst_mt, dst, dst_stride,
1389 width, height);
1390
1391 int row_size = cpp * width;
1392 if (src_stride == row_size &&
1393 dst_stride == row_size) {
1394 memcpy(dst, src, row_size * height);
1395 } else {
1396 for (int i = 0; i < height; i++) {
1397 memcpy(dst, src, row_size);
1398 dst += dst_stride;
1399 src += src_stride;
1400 }
1401 }
1402
1403 intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
1404 intel_miptree_unmap(brw, src_mt, src_level, src_layer);
1405
1406 /* Don't forget to copy the stencil data over, too. We could have skipped
1407 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1408 * shuffling the two data sources in/out of temporary storage instead of
1409 * the direct mapping we get this way.
1410 */
1411 if (dst_mt->stencil_mt) {
1412 assert(src_mt->stencil_mt);
1413 intel_miptree_copy_slice_sw(brw,
1414 src_mt->stencil_mt, src_level, src_layer,
1415 dst_mt->stencil_mt, dst_level, dst_layer,
1416 width, height);
1417 }
1418 }
1419
1420 void
1421 intel_miptree_copy_slice(struct brw_context *brw,
1422 struct intel_mipmap_tree *src_mt,
1423 unsigned src_level, unsigned src_layer,
1424 struct intel_mipmap_tree *dst_mt,
1425 unsigned dst_level, unsigned dst_layer)
1426
1427 {
1428 mesa_format format = src_mt->format;
1429 unsigned width = minify(src_mt->surf.phys_level0_sa.width,
1430 src_level - src_mt->first_level);
1431 unsigned height = minify(src_mt->surf.phys_level0_sa.height,
1432 src_level - src_mt->first_level);
1433
1434 assert(src_layer < get_num_phys_layers(&src_mt->surf,
1435 src_level - src_mt->first_level));
1436
1437 assert(src_mt->format == dst_mt->format);
1438
1439 if (dst_mt->compressed) {
1440 unsigned int i, j;
1441 _mesa_get_format_block_size(dst_mt->format, &i, &j);
1442 height = ALIGN_NPOT(height, j) / j;
1443 width = ALIGN_NPOT(width, i) / i;
1444 }
1445
1446 /* If it's a packed depth/stencil buffer with separate stencil, the blit
1447 * below won't apply since we can't do the depth's Y tiling or the
1448 * stencil's W tiling in the blitter.
1449 */
1450 if (src_mt->stencil_mt) {
1451 intel_miptree_copy_slice_sw(brw,
1452 src_mt, src_level, src_layer,
1453 dst_mt, dst_level, dst_layer,
1454 width, height);
1455 return;
1456 }
1457
1458 uint32_t dst_x, dst_y, src_x, src_y;
1459 intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
1460 &dst_x, &dst_y);
1461 intel_miptree_get_image_offset(src_mt, src_level, src_layer,
1462 &src_x, &src_y);
1463
1464 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1465 _mesa_get_format_name(src_mt->format),
1466 src_mt, src_x, src_y, src_mt->surf.row_pitch,
1467 _mesa_get_format_name(dst_mt->format),
1468 dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch,
1469 width, height);
1470
1471 if (!intel_miptree_blit(brw,
1472 src_mt, src_level, src_layer, 0, 0, false,
1473 dst_mt, dst_level, dst_layer, 0, 0, false,
1474 width, height, GL_COPY)) {
1475 perf_debug("miptree validate blit for %s failed\n",
1476 _mesa_get_format_name(format));
1477
1478 intel_miptree_copy_slice_sw(brw,
1479 src_mt, src_level, src_layer,
1480 dst_mt, dst_level, dst_layer,
1481 width, height);
1482 }
1483 }
1484
1485 /**
1486 * Copies the image's current data to the given miptree, and associates that
1487 * miptree with the image.
1488 *
1489 * If \c invalidate is true, then the actual image data does not need to be
1490 * copied, but the image still needs to be associated to the new miptree (this
1491 * is set to true if we're about to clear the image).
1492 */
1493 void
1494 intel_miptree_copy_teximage(struct brw_context *brw,
1495 struct intel_texture_image *intelImage,
1496 struct intel_mipmap_tree *dst_mt,
1497 bool invalidate)
1498 {
1499 struct intel_mipmap_tree *src_mt = intelImage->mt;
1500 struct intel_texture_object *intel_obj =
1501 intel_texture_object(intelImage->base.Base.TexObject);
1502 int level = intelImage->base.Base.Level;
1503 const unsigned face = intelImage->base.Base.Face;
1504 unsigned start_layer, end_layer;
1505
1506 if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
1507 assert(face == 0);
1508 assert(intelImage->base.Base.Height);
1509 start_layer = 0;
1510 end_layer = intelImage->base.Base.Height - 1;
1511 } else if (face > 0) {
1512 start_layer = face;
1513 end_layer = face;
1514 } else {
1515 assert(intelImage->base.Base.Depth);
1516 start_layer = 0;
1517 end_layer = intelImage->base.Base.Depth - 1;
1518 }
1519
1520 if (!invalidate) {
1521 for (unsigned i = start_layer; i <= end_layer; i++) {
1522 intel_miptree_copy_slice(brw,
1523 src_mt, level, i,
1524 dst_mt, level, i);
1525 }
1526 }
1527
1528 intel_miptree_reference(&intelImage->mt, dst_mt);
1529 intel_obj->needs_validate = true;
1530 }
1531
1532 static void
1533 intel_miptree_init_mcs(struct brw_context *brw,
1534 struct intel_mipmap_tree *mt,
1535 int init_value)
1536 {
1537 assert(mt->mcs_buf != NULL);
1538
1539 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1540 *
1541 * When MCS buffer is enabled and bound to MSRT, it is required that it
1542 * is cleared prior to any rendering.
1543 *
1544 * Since we don't use the MCS buffer for any purpose other than rendering,
1545 * it makes sense to just clear it immediately upon allocation.
1546 *
1547 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1548 */
1549 void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE);
1550 if (unlikely(map == NULL)) {
1551 fprintf(stderr, "Failed to map mcs buffer into GTT\n");
1552 brw_bo_unreference(mt->mcs_buf->bo);
1553 free(mt->mcs_buf);
1554 return;
1555 }
1556 void *data = map;
1557 memset(data, init_value, mt->mcs_buf->size);
1558 brw_bo_unmap(mt->mcs_buf->bo);
1559 }
1560
1561 static struct intel_miptree_aux_buffer *
1562 intel_alloc_aux_buffer(struct brw_context *brw,
1563 const char *name,
1564 const struct isl_surf *aux_surf,
1565 uint32_t alloc_flags,
1566 struct intel_mipmap_tree *mt)
1567 {
1568 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1569 if (!buf)
1570 return false;
1571
1572 buf->size = aux_surf->size;
1573 buf->pitch = aux_surf->row_pitch;
1574 buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
1575
1576 /* ISL has stricter set of alignment rules then the drm allocator.
1577 * Therefore one can pass the ISL dimensions in terms of bytes instead of
1578 * trying to recalculate based on different format block sizes.
1579 */
1580 buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
1581 I915_TILING_Y, buf->pitch, alloc_flags);
1582 if (!buf->bo) {
1583 free(buf);
1584 return NULL;
1585 }
1586
1587 buf->surf = *aux_surf;
1588
1589 return buf;
1590 }
1591
1592 static bool
1593 intel_miptree_alloc_mcs(struct brw_context *brw,
1594 struct intel_mipmap_tree *mt,
1595 GLuint num_samples)
1596 {
1597 assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1598 assert(mt->mcs_buf == NULL);
1599 assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
1600
1601 /* Multisampled miptrees are only supported for single level. */
1602 assert(mt->first_level == 0);
1603 enum isl_aux_state **aux_state =
1604 create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
1605 if (!aux_state)
1606 return false;
1607
1608 struct isl_surf temp_mcs_surf;
1609
1610 MAYBE_UNUSED bool ok =
1611 isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &temp_mcs_surf);
1612 assert(ok);
1613
1614 /* Buffer needs to be initialised requiring the buffer to be immediately
1615 * mapped to cpu space for writing. Therefore do not use the gpu access
1616 * flag which can cause an unnecessary delay if the backing pages happened
1617 * to be just used by the GPU.
1618 */
1619 const uint32_t alloc_flags = 0;
1620 mt->mcs_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
1621 &temp_mcs_surf, alloc_flags, mt);
1622 if (!mt->mcs_buf) {
1623 free(aux_state);
1624 return false;
1625 }
1626
1627 mt->aux_state = aux_state;
1628
1629 intel_miptree_init_mcs(brw, mt, 0xFF);
1630
1631 return true;
1632 }
1633
1634 bool
1635 intel_miptree_alloc_ccs(struct brw_context *brw,
1636 struct intel_mipmap_tree *mt)
1637 {
1638 assert(mt->mcs_buf == NULL);
1639 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
1640 mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1641
1642 struct isl_surf temp_ccs_surf;
1643
1644 if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, &temp_ccs_surf, 0))
1645 return false;
1646
1647 assert(temp_ccs_surf.size &&
1648 (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
1649
1650 enum isl_aux_state **aux_state =
1651 create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
1652 if (!aux_state)
1653 return false;
1654
1655 /* When CCS_E is used, we need to ensure that the CCS starts off in a valid
1656 * state. From the Sky Lake PRM, "MCS Buffer for Render Target(s)":
1657 *
1658 * "If Software wants to enable Color Compression without Fast clear,
1659 * Software needs to initialize MCS with zeros."
1660 *
1661 * A CCS value of 0 indicates that the corresponding block is in the
1662 * pass-through state which is what we want.
1663 *
1664 * For CCS_D, on the other hand, we don't care as we're about to perform a
1665 * fast-clear operation. In that case, being hot in caches more useful.
1666 */
1667 const uint32_t alloc_flags = mt->aux_usage == ISL_AUX_USAGE_CCS_E ?
1668 BO_ALLOC_ZEROED : BO_ALLOC_FOR_RENDER;
1669 mt->mcs_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
1670 &temp_ccs_surf, alloc_flags, mt);
1671 if (!mt->mcs_buf) {
1672 free(aux_state);
1673 return false;
1674 }
1675
1676 mt->aux_state = aux_state;
1677
1678 return true;
1679 }
1680
1681 /**
1682 * Helper for intel_miptree_alloc_hiz() that sets
1683 * \c mt->level[level].has_hiz. Return true if and only if
1684 * \c has_hiz was set.
1685 */
1686 static bool
1687 intel_miptree_level_enable_hiz(struct brw_context *brw,
1688 struct intel_mipmap_tree *mt,
1689 uint32_t level)
1690 {
1691 assert(mt->hiz_buf);
1692 assert(mt->surf.size > 0);
1693
1694 if (brw->gen >= 8 || brw->is_haswell) {
1695 uint32_t width = minify(mt->surf.phys_level0_sa.width, level);
1696 uint32_t height = minify(mt->surf.phys_level0_sa.height, level);
1697
1698 /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1699 * and the height is 4 aligned. This allows our HiZ support
1700 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1701 * we can grow the width & height to allow the HiZ op to
1702 * force the proper size alignments.
1703 */
1704 if (level > 0 && ((width & 7) || (height & 3))) {
1705 DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1706 return false;
1707 }
1708 }
1709
1710 DBG("mt %p level %d: HiZ enabled\n", mt, level);
1711 mt->level[level].has_hiz = true;
1712 return true;
1713 }
1714
1715 bool
1716 intel_miptree_alloc_hiz(struct brw_context *brw,
1717 struct intel_mipmap_tree *mt)
1718 {
1719 assert(mt->hiz_buf == NULL);
1720 assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
1721
1722 enum isl_aux_state **aux_state =
1723 create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
1724 if (!aux_state)
1725 return false;
1726
1727 struct isl_surf temp_hiz_surf;
1728
1729 MAYBE_UNUSED bool ok =
1730 isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
1731 assert(ok);
1732
1733 const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
1734 mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
1735 &temp_hiz_surf, alloc_flags, mt);
1736
1737 if (!mt->hiz_buf) {
1738 free(aux_state);
1739 return false;
1740 }
1741
1742 for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
1743 intel_miptree_level_enable_hiz(brw, mt, level);
1744
1745 mt->aux_state = aux_state;
1746
1747 return true;
1748 }
1749
1750
1751 /**
1752 * Allocate the initial aux surface for a miptree based on mt->aux_usage
1753 *
1754 * Since MCS, HiZ, and CCS_E can compress more than just clear color, we
1755 * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only
1756 * compress clear color so we wait until an actual fast-clear to allocate it.
1757 */
1758 static bool
1759 intel_miptree_alloc_aux(struct brw_context *brw,
1760 struct intel_mipmap_tree *mt)
1761 {
1762 switch (mt->aux_usage) {
1763 case ISL_AUX_USAGE_NONE:
1764 return true;
1765
1766 case ISL_AUX_USAGE_HIZ:
1767 assert(!_mesa_is_format_color_format(mt->format));
1768 if (!intel_miptree_alloc_hiz(brw, mt))
1769 return false;
1770 return true;
1771
1772 case ISL_AUX_USAGE_MCS:
1773 assert(_mesa_is_format_color_format(mt->format));
1774 assert(mt->surf.samples > 1);
1775 if (!intel_miptree_alloc_mcs(brw, mt, mt->surf.samples))
1776 return false;
1777 return true;
1778
1779 case ISL_AUX_USAGE_CCS_D:
1780 /* Since CCS_D can only compress clear color so we wait until an actual
1781 * fast-clear to allocate it.
1782 */
1783 return true;
1784
1785 case ISL_AUX_USAGE_CCS_E:
1786 assert(_mesa_is_format_color_format(mt->format));
1787 assert(mt->surf.samples == 1);
1788 if (!intel_miptree_alloc_ccs(brw, mt))
1789 return false;
1790 return true;
1791 }
1792
1793 unreachable("Invalid aux usage");
1794 }
1795
1796
1797 /**
1798 * Can the miptree sample using the hiz buffer?
1799 */
1800 bool
1801 intel_miptree_sample_with_hiz(struct brw_context *brw,
1802 struct intel_mipmap_tree *mt)
1803 {
1804 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
1805 * so keep things conservative for now and never enable it unless we're SKL+.
1806 */
1807 if (brw->gen < 9) {
1808 return false;
1809 }
1810
1811 if (!mt->hiz_buf) {
1812 return false;
1813 }
1814
1815 /* It seems the hardware won't fallback to the depth buffer if some of the
1816 * mipmap levels aren't available in the HiZ buffer. So we need all levels
1817 * of the texture to be HiZ enabled.
1818 */
1819 for (unsigned level = 0; level < mt->surf.levels; ++level) {
1820 if (!intel_miptree_level_has_hiz(mt, level))
1821 return false;
1822 }
1823
1824 /* If compressed multisampling is enabled, then we use it for the auxiliary
1825 * buffer instead.
1826 *
1827 * From the BDW PRM (Volume 2d: Command Reference: Structures
1828 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
1829 *
1830 * "If this field is set to AUX_HIZ, Number of Multisamples must be
1831 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
1832 *
1833 * There is no such blurb for 1D textures, but there is sufficient evidence
1834 * that this is broken on SKL+.
1835 */
1836 return (mt->surf.samples == 1 &&
1837 mt->target != GL_TEXTURE_3D &&
1838 mt->target != GL_TEXTURE_1D /* gen9+ restriction */);
1839 }
1840
1841 /**
1842 * Does the miptree slice have hiz enabled?
1843 */
1844 bool
1845 intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level)
1846 {
1847 intel_miptree_check_level_layer(mt, level, 0);
1848 return mt->level[level].has_hiz;
1849 }
1850
1851 static inline uint32_t
1852 miptree_level_range_length(const struct intel_mipmap_tree *mt,
1853 uint32_t start_level, uint32_t num_levels)
1854 {
1855 assert(start_level >= mt->first_level);
1856 assert(start_level <= mt->last_level);
1857
1858 if (num_levels == INTEL_REMAINING_LAYERS)
1859 num_levels = mt->last_level - start_level + 1;
1860 /* Check for overflow */
1861 assert(start_level + num_levels >= start_level);
1862 assert(start_level + num_levels <= mt->last_level + 1);
1863
1864 return num_levels;
1865 }
1866
1867 static inline uint32_t
1868 miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level,
1869 uint32_t start_layer, uint32_t num_layers)
1870 {
1871 assert(level <= mt->last_level);
1872
1873 const uint32_t total_num_layers = brw_get_num_logical_layers(mt, level);
1874 assert(start_layer < total_num_layers);
1875 if (num_layers == INTEL_REMAINING_LAYERS)
1876 num_layers = total_num_layers - start_layer;
1877 /* Check for overflow */
1878 assert(start_layer + num_layers >= start_layer);
1879 assert(start_layer + num_layers <= total_num_layers);
1880
1881 return num_layers;
1882 }
1883
1884 bool
1885 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt,
1886 unsigned start_level, unsigned num_levels,
1887 unsigned start_layer, unsigned num_layers)
1888 {
1889 assert(_mesa_is_format_color_format(mt->format));
1890
1891 if (!mt->mcs_buf)
1892 return false;
1893
1894 /* Clamp the level range to fit the miptree */
1895 num_levels = miptree_level_range_length(mt, start_level, num_levels);
1896
1897 for (uint32_t l = 0; l < num_levels; l++) {
1898 const uint32_t level = start_level + l;
1899 const uint32_t level_layers =
1900 miptree_layer_range_length(mt, level, start_layer, num_layers);
1901 for (unsigned a = 0; a < level_layers; a++) {
1902 enum isl_aux_state aux_state =
1903 intel_miptree_get_aux_state(mt, level, start_layer + a);
1904 assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
1905 if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
1906 return true;
1907 }
1908 }
1909
1910 return false;
1911 }
1912
1913 static void
1914 intel_miptree_check_color_resolve(const struct brw_context *brw,
1915 const struct intel_mipmap_tree *mt,
1916 unsigned level, unsigned layer)
1917 {
1918
1919 if (!mt->mcs_buf)
1920 return;
1921
1922 /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
1923 assert(brw->gen >= 8 ||
1924 (level == 0 && mt->first_level == 0 && mt->last_level == 0));
1925
1926 /* Compression of arrayed msaa surfaces is supported. */
1927 if (mt->surf.samples > 1)
1928 return;
1929
1930 /* Fast color clear is supported for non-msaa arrays only on Gen8+. */
1931 assert(brw->gen >= 8 ||
1932 (layer == 0 &&
1933 mt->surf.logical_level0_px.depth == 1 &&
1934 mt->surf.logical_level0_px.array_len == 1));
1935
1936 (void)level;
1937 (void)layer;
1938 }
1939
1940 static enum blorp_fast_clear_op
1941 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
1942 enum isl_aux_usage aux_usage,
1943 bool fast_clear_supported)
1944 {
1945 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D);
1946
1947 const bool ccs_supported = aux_usage == ISL_AUX_USAGE_CCS_D;
1948
1949 assert(ccs_supported == fast_clear_supported);
1950
1951 switch (aux_state) {
1952 case ISL_AUX_STATE_CLEAR:
1953 case ISL_AUX_STATE_PARTIAL_CLEAR:
1954 if (!ccs_supported)
1955 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1956 else
1957 return BLORP_FAST_CLEAR_OP_NONE;
1958
1959 case ISL_AUX_STATE_PASS_THROUGH:
1960 return BLORP_FAST_CLEAR_OP_NONE;
1961
1962 case ISL_AUX_STATE_RESOLVED:
1963 case ISL_AUX_STATE_AUX_INVALID:
1964 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1965 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1966 break;
1967 }
1968
1969 unreachable("Invalid aux state for CCS_D");
1970 }
1971
1972 static enum blorp_fast_clear_op
1973 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
1974 enum isl_aux_usage aux_usage,
1975 bool fast_clear_supported)
1976 {
1977 /* CCS_E surfaces can be accessed as CCS_D if we're careful. */
1978 assert(aux_usage == ISL_AUX_USAGE_NONE ||
1979 aux_usage == ISL_AUX_USAGE_CCS_D ||
1980 aux_usage == ISL_AUX_USAGE_CCS_E);
1981
1982 if (aux_usage == ISL_AUX_USAGE_CCS_D)
1983 assert(fast_clear_supported);
1984
1985 switch (aux_state) {
1986 case ISL_AUX_STATE_CLEAR:
1987 case ISL_AUX_STATE_PARTIAL_CLEAR:
1988 if (fast_clear_supported)
1989 return BLORP_FAST_CLEAR_OP_NONE;
1990 else if (aux_usage == ISL_AUX_USAGE_CCS_E)
1991 return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
1992 else
1993 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1994
1995 case ISL_AUX_STATE_COMPRESSED_CLEAR:
1996 if (aux_usage != ISL_AUX_USAGE_CCS_E)
1997 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1998 else if (!fast_clear_supported)
1999 return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
2000 else
2001 return BLORP_FAST_CLEAR_OP_NONE;
2002
2003 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2004 if (aux_usage != ISL_AUX_USAGE_CCS_E)
2005 return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2006 else
2007 return BLORP_FAST_CLEAR_OP_NONE;
2008
2009 case ISL_AUX_STATE_PASS_THROUGH:
2010 return BLORP_FAST_CLEAR_OP_NONE;
2011
2012 case ISL_AUX_STATE_RESOLVED:
2013 case ISL_AUX_STATE_AUX_INVALID:
2014 break;
2015 }
2016
2017 unreachable("Invalid aux state for CCS_E");
2018 }
2019
2020 static void
2021 intel_miptree_prepare_ccs_access(struct brw_context *brw,
2022 struct intel_mipmap_tree *mt,
2023 uint32_t level, uint32_t layer,
2024 enum isl_aux_usage aux_usage,
2025 bool fast_clear_supported)
2026 {
2027 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2028
2029 enum blorp_fast_clear_op resolve_op;
2030 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
2031 resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage,
2032 fast_clear_supported);
2033 } else {
2034 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2035 resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage,
2036 fast_clear_supported);
2037 }
2038
2039 if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {
2040 intel_miptree_check_color_resolve(brw, mt, level, layer);
2041 brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);
2042
2043 switch (resolve_op) {
2044 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
2045 /* The CCS full resolve operation destroys the CCS and sets it to the
2046 * pass-through state. (You can also think of this as being both a
2047 * resolve and an ambiguate in one operation.)
2048 */
2049 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2050 ISL_AUX_STATE_PASS_THROUGH);
2051 break;
2052
2053 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
2054 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2055 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2056 break;
2057
2058 default:
2059 unreachable("Invalid resolve op");
2060 }
2061 }
2062 }
2063
2064 static void
2065 intel_miptree_finish_ccs_write(struct brw_context *brw,
2066 struct intel_mipmap_tree *mt,
2067 uint32_t level, uint32_t layer,
2068 enum isl_aux_usage aux_usage)
2069 {
2070 assert(aux_usage == ISL_AUX_USAGE_NONE ||
2071 aux_usage == ISL_AUX_USAGE_CCS_D ||
2072 aux_usage == ISL_AUX_USAGE_CCS_E);
2073
2074 enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2075
2076 if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
2077 switch (aux_state) {
2078 case ISL_AUX_STATE_CLEAR:
2079 case ISL_AUX_STATE_PARTIAL_CLEAR:
2080 assert(aux_usage == ISL_AUX_USAGE_CCS_E ||
2081 aux_usage == ISL_AUX_USAGE_CCS_D);
2082
2083 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
2084 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2085 ISL_AUX_STATE_COMPRESSED_CLEAR);
2086 } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) {
2087 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2088 ISL_AUX_STATE_PARTIAL_CLEAR);
2089 }
2090 break;
2091
2092 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2093 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2094 assert(aux_usage == ISL_AUX_USAGE_CCS_E);
2095 break; /* Nothing to do */
2096
2097 case ISL_AUX_STATE_PASS_THROUGH:
2098 if (aux_usage == ISL_AUX_USAGE_CCS_E) {
2099 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2100 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2101 } else {
2102 /* Nothing to do */
2103 }
2104 break;
2105
2106 case ISL_AUX_STATE_RESOLVED:
2107 case ISL_AUX_STATE_AUX_INVALID:
2108 unreachable("Invalid aux state for CCS_E");
2109 }
2110 } else {
2111 assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
2112 /* CCS_D is a bit simpler */
2113 switch (aux_state) {
2114 case ISL_AUX_STATE_CLEAR:
2115 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
2116 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2117 ISL_AUX_STATE_PARTIAL_CLEAR);
2118 break;
2119
2120 case ISL_AUX_STATE_PARTIAL_CLEAR:
2121 assert(aux_usage == ISL_AUX_USAGE_CCS_D);
2122 break; /* Nothing to do */
2123
2124 case ISL_AUX_STATE_PASS_THROUGH:
2125 /* Nothing to do */
2126 break;
2127
2128 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2129 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2130 case ISL_AUX_STATE_RESOLVED:
2131 case ISL_AUX_STATE_AUX_INVALID:
2132 unreachable("Invalid aux state for CCS_D");
2133 }
2134 }
2135 }
2136
2137 static void
2138 intel_miptree_prepare_mcs_access(struct brw_context *brw,
2139 struct intel_mipmap_tree *mt,
2140 uint32_t layer,
2141 enum isl_aux_usage aux_usage,
2142 bool fast_clear_supported)
2143 {
2144 assert(aux_usage == ISL_AUX_USAGE_MCS);
2145
2146 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2147 case ISL_AUX_STATE_CLEAR:
2148 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2149 if (!fast_clear_supported) {
2150 brw_blorp_mcs_partial_resolve(brw, mt, layer, 1);
2151 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2152 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2153 }
2154 break;
2155
2156 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2157 break; /* Nothing to do */
2158
2159 case ISL_AUX_STATE_RESOLVED:
2160 case ISL_AUX_STATE_PASS_THROUGH:
2161 case ISL_AUX_STATE_AUX_INVALID:
2162 case ISL_AUX_STATE_PARTIAL_CLEAR:
2163 unreachable("Invalid aux state for MCS");
2164 }
2165 }
2166
2167 static void
2168 intel_miptree_finish_mcs_write(struct brw_context *brw,
2169 struct intel_mipmap_tree *mt,
2170 uint32_t layer,
2171 enum isl_aux_usage aux_usage)
2172 {
2173 assert(aux_usage == ISL_AUX_USAGE_MCS);
2174
2175 switch (intel_miptree_get_aux_state(mt, 0, layer)) {
2176 case ISL_AUX_STATE_CLEAR:
2177 intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
2178 ISL_AUX_STATE_COMPRESSED_CLEAR);
2179 break;
2180
2181 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2182 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2183 break; /* Nothing to do */
2184
2185 case ISL_AUX_STATE_RESOLVED:
2186 case ISL_AUX_STATE_PASS_THROUGH:
2187 case ISL_AUX_STATE_AUX_INVALID:
2188 case ISL_AUX_STATE_PARTIAL_CLEAR:
2189 unreachable("Invalid aux state for MCS");
2190 }
2191 }
2192
2193 static void
2194 intel_miptree_prepare_hiz_access(struct brw_context *brw,
2195 struct intel_mipmap_tree *mt,
2196 uint32_t level, uint32_t layer,
2197 enum isl_aux_usage aux_usage,
2198 bool fast_clear_supported)
2199 {
2200 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
2201
2202 enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
2203 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2204 case ISL_AUX_STATE_CLEAR:
2205 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2206 if (aux_usage != ISL_AUX_USAGE_HIZ || !fast_clear_supported)
2207 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2208 break;
2209
2210 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2211 if (aux_usage != ISL_AUX_USAGE_HIZ)
2212 hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2213 break;
2214
2215 case ISL_AUX_STATE_PASS_THROUGH:
2216 case ISL_AUX_STATE_RESOLVED:
2217 break;
2218
2219 case ISL_AUX_STATE_AUX_INVALID:
2220 if (aux_usage == ISL_AUX_USAGE_HIZ)
2221 hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
2222 break;
2223
2224 case ISL_AUX_STATE_PARTIAL_CLEAR:
2225 unreachable("Invalid HiZ state");
2226 }
2227
2228 if (hiz_op != BLORP_HIZ_OP_NONE) {
2229 intel_hiz_exec(brw, mt, level, layer, 1, hiz_op);
2230
2231 switch (hiz_op) {
2232 case BLORP_HIZ_OP_DEPTH_RESOLVE:
2233 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2234 ISL_AUX_STATE_RESOLVED);
2235 break;
2236
2237 case BLORP_HIZ_OP_HIZ_RESOLVE:
2238 /* The HiZ resolve operation is actually an ambiguate */
2239 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2240 ISL_AUX_STATE_PASS_THROUGH);
2241 break;
2242
2243 default:
2244 unreachable("Invalid HiZ op");
2245 }
2246 }
2247 }
2248
2249 static void
2250 intel_miptree_finish_hiz_write(struct brw_context *brw,
2251 struct intel_mipmap_tree *mt,
2252 uint32_t level, uint32_t layer,
2253 enum isl_aux_usage aux_usage)
2254 {
2255 assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_HIZ);
2256
2257 switch (intel_miptree_get_aux_state(mt, level, layer)) {
2258 case ISL_AUX_STATE_CLEAR:
2259 assert(aux_usage == ISL_AUX_USAGE_HIZ);
2260 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2261 ISL_AUX_STATE_COMPRESSED_CLEAR);
2262 break;
2263
2264 case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2265 case ISL_AUX_STATE_COMPRESSED_CLEAR:
2266 assert(aux_usage == ISL_AUX_USAGE_HIZ);
2267 break; /* Nothing to do */
2268
2269 case ISL_AUX_STATE_RESOLVED:
2270 if (aux_usage == ISL_AUX_USAGE_HIZ) {
2271 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2272 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2273 } else {
2274 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2275 ISL_AUX_STATE_AUX_INVALID);
2276 }
2277 break;
2278
2279 case ISL_AUX_STATE_PASS_THROUGH:
2280 if (aux_usage == ISL_AUX_USAGE_HIZ) {
2281 intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2282 ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2283 }
2284 break;
2285
2286 case ISL_AUX_STATE_AUX_INVALID:
2287 assert(aux_usage != ISL_AUX_USAGE_HIZ);
2288 break;
2289
2290 case ISL_AUX_STATE_PARTIAL_CLEAR:
2291 unreachable("Invalid HiZ state");
2292 }
2293 }
2294
2295 void
2296 intel_miptree_prepare_access(struct brw_context *brw,
2297 struct intel_mipmap_tree *mt,
2298 uint32_t start_level, uint32_t num_levels,
2299 uint32_t start_layer, uint32_t num_layers,
2300 enum isl_aux_usage aux_usage,
2301 bool fast_clear_supported)
2302 {
2303 num_levels = miptree_level_range_length(mt, start_level, num_levels);
2304
2305 switch (mt->aux_usage) {
2306 case ISL_AUX_USAGE_NONE:
2307 /* Nothing to do */
2308 break;
2309
2310 case ISL_AUX_USAGE_MCS:
2311 assert(mt->mcs_buf);
2312 assert(start_level == 0 && num_levels == 1);
2313 const uint32_t level_layers =
2314 miptree_layer_range_length(mt, 0, start_layer, num_layers);
2315 for (uint32_t a = 0; a < level_layers; a++) {
2316 intel_miptree_prepare_mcs_access(brw, mt, start_layer + a,
2317 aux_usage, fast_clear_supported);
2318 }
2319 break;
2320
2321 case ISL_AUX_USAGE_CCS_D:
2322 case ISL_AUX_USAGE_CCS_E:
2323 if (!mt->mcs_buf)
2324 return;
2325
2326 for (uint32_t l = 0; l < num_levels; l++) {
2327 const uint32_t level = start_level + l;
2328 const uint32_t level_layers =
2329 miptree_layer_range_length(mt, level, start_layer, num_layers);
2330 for (uint32_t a = 0; a < level_layers; a++) {
2331 intel_miptree_prepare_ccs_access(brw, mt, level,
2332 start_layer + a,
2333 aux_usage, fast_clear_supported);
2334 }
2335 }
2336 break;
2337
2338 case ISL_AUX_USAGE_HIZ:
2339 assert(mt->hiz_buf);
2340 for (uint32_t l = 0; l < num_levels; l++) {
2341 const uint32_t level = start_level + l;
2342 if (!intel_miptree_level_has_hiz(mt, level))
2343 continue;
2344
2345 const uint32_t level_layers =
2346 miptree_layer_range_length(mt, level, start_layer, num_layers);
2347 for (uint32_t a = 0; a < level_layers; a++) {
2348 intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a,
2349 aux_usage, fast_clear_supported);
2350 }
2351 }
2352 break;
2353
2354 default:
2355 unreachable("Invalid aux usage");
2356 }
2357 }
2358
2359 void
2360 intel_miptree_finish_write(struct brw_context *brw,
2361 struct intel_mipmap_tree *mt, uint32_t level,
2362 uint32_t start_layer, uint32_t num_layers,
2363 enum isl_aux_usage aux_usage)
2364 {
2365 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2366
2367 switch (mt->aux_usage) {
2368 case ISL_AUX_USAGE_NONE:
2369 /* Nothing to do */
2370 break;
2371
2372 case ISL_AUX_USAGE_MCS:
2373 assert(mt->mcs_buf);
2374 for (uint32_t a = 0; a < num_layers; a++) {
2375 intel_miptree_finish_mcs_write(brw, mt, start_layer + a,
2376 aux_usage);
2377 }
2378 break;
2379
2380 case ISL_AUX_USAGE_CCS_D:
2381 case ISL_AUX_USAGE_CCS_E:
2382 if (!mt->mcs_buf)
2383 return;
2384
2385 for (uint32_t a = 0; a < num_layers; a++) {
2386 intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a,
2387 aux_usage);
2388 }
2389 break;
2390
2391 case ISL_AUX_USAGE_HIZ:
2392 if (!intel_miptree_level_has_hiz(mt, level))
2393 return;
2394
2395 for (uint32_t a = 0; a < num_layers; a++) {
2396 intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a,
2397 aux_usage);
2398 }
2399 break;
2400
2401 default:
2402 unreachable("Invavlid aux usage");
2403 }
2404 }
2405
2406 enum isl_aux_state
2407 intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt,
2408 uint32_t level, uint32_t layer)
2409 {
2410 intel_miptree_check_level_layer(mt, level, layer);
2411
2412 if (_mesa_is_format_color_format(mt->format)) {
2413 assert(mt->mcs_buf != NULL);
2414 assert(mt->surf.samples == 1 ||
2415 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2416 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2417 unreachable("Cannot get aux state for stencil");
2418 } else {
2419 assert(intel_miptree_level_has_hiz(mt, level));
2420 }
2421
2422 return mt->aux_state[level][layer];
2423 }
2424
2425 void
2426 intel_miptree_set_aux_state(struct brw_context *brw,
2427 struct intel_mipmap_tree *mt, uint32_t level,
2428 uint32_t start_layer, uint32_t num_layers,
2429 enum isl_aux_state aux_state)
2430 {
2431 num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2432
2433 if (_mesa_is_format_color_format(mt->format)) {
2434 assert(mt->mcs_buf != NULL);
2435 assert(mt->surf.samples == 1 ||
2436 mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
2437 } else if (mt->format == MESA_FORMAT_S_UINT8) {
2438 unreachable("Cannot get aux state for stencil");
2439 } else {
2440 assert(intel_miptree_level_has_hiz(mt, level));
2441 }
2442
2443 for (unsigned a = 0; a < num_layers; a++)
2444 mt->aux_state[level][start_layer + a] = aux_state;
2445 }
2446
2447 /* On Gen9 color buffers may be compressed by the hardware (lossless
2448 * compression). There are, however, format restrictions and care needs to be
2449 * taken that the sampler engine is capable for re-interpreting a buffer with
2450 * format different the buffer was originally written with.
2451 *
2452 * For example, SRGB formats are not compressible and the sampler engine isn't
2453 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
2454 * color buffer needs to be resolved so that the sampling surface can be
2455 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
2456 * set).
2457 */
2458 static bool
2459 can_texture_with_ccs(struct brw_context *brw,
2460 struct intel_mipmap_tree *mt,
2461 enum isl_format view_format)
2462 {
2463 if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
2464 return false;
2465
2466 if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
2467 mt->surf.format, view_format)) {
2468 perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
2469 isl_format_get_layout(view_format)->name,
2470 _mesa_get_format_name(mt->format));
2471 return false;
2472 }
2473
2474 return true;
2475 }
2476
2477 enum isl_aux_usage
2478 intel_miptree_texture_aux_usage(struct brw_context *brw,
2479 struct intel_mipmap_tree *mt,
2480 enum isl_format view_format)
2481 {
2482 switch (mt->aux_usage) {
2483 case ISL_AUX_USAGE_HIZ:
2484 if (intel_miptree_sample_with_hiz(brw, mt))
2485 return ISL_AUX_USAGE_HIZ;
2486 break;
2487
2488 case ISL_AUX_USAGE_MCS:
2489 return ISL_AUX_USAGE_MCS;
2490
2491 case ISL_AUX_USAGE_CCS_D:
2492 case ISL_AUX_USAGE_CCS_E:
2493 if (mt->mcs_buf && can_texture_with_ccs(brw, mt, view_format))
2494 return ISL_AUX_USAGE_CCS_E;
2495 break;
2496
2497 default:
2498 break;
2499 }
2500
2501 return ISL_AUX_USAGE_NONE;
2502 }
2503
2504 static bool
2505 isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
2506 {
2507 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
2508 * values so sRGB curve application was a no-op for all fast-clearable
2509 * formats.
2510 *
2511 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear
2512 * values, the hardware interprets the floats, not as what would be
2513 * returned from the sampler (or written by the shader), but as being
2514 * between format conversion and sRGB curve application. This means that
2515 * we can switch between sRGB and UNORM without having to whack the clear
2516 * color.
2517 */
2518 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
2519 }
2520
2521 static void
2522 intel_miptree_prepare_texture_slices(struct brw_context *brw,
2523 struct intel_mipmap_tree *mt,
2524 enum isl_format view_format,
2525 uint32_t start_level, uint32_t num_levels,
2526 uint32_t start_layer, uint32_t num_layers,
2527 bool *aux_supported_out)
2528 {
2529 enum isl_aux_usage aux_usage =
2530 intel_miptree_texture_aux_usage(brw, mt, view_format);
2531 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
2532
2533 /* Clear color is specified as ints or floats and the conversion is done by
2534 * the sampler. If we have a texture view, we would have to perform the
2535 * clear color conversion manually. Just disable clear color.
2536 */
2537 if (!isl_formats_are_fast_clear_compatible(mt->surf.format, view_format))
2538 clear_supported = false;
2539
2540 intel_miptree_prepare_access(brw, mt, start_level, num_levels,
2541 start_layer, num_layers,
2542 aux_usage, clear_supported);
2543 if (aux_supported_out)
2544 *aux_supported_out = aux_usage != ISL_AUX_USAGE_NONE;
2545 }
2546
2547 void
2548 intel_miptree_prepare_texture(struct brw_context *brw,
2549 struct intel_mipmap_tree *mt,
2550 enum isl_format view_format,
2551 bool *aux_supported_out)
2552 {
2553 intel_miptree_prepare_texture_slices(brw, mt, view_format,
2554 0, INTEL_REMAINING_LEVELS,
2555 0, INTEL_REMAINING_LAYERS,
2556 aux_supported_out);
2557 }
2558
2559 void
2560 intel_miptree_prepare_image(struct brw_context *brw,
2561 struct intel_mipmap_tree *mt)
2562 {
2563 /* The data port doesn't understand any compression */
2564 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2565 0, INTEL_REMAINING_LAYERS,
2566 ISL_AUX_USAGE_NONE, false);
2567 }
2568
2569 void
2570 intel_miptree_prepare_fb_fetch(struct brw_context *brw,
2571 struct intel_mipmap_tree *mt, uint32_t level,
2572 uint32_t start_layer, uint32_t num_layers)
2573 {
2574 intel_miptree_prepare_texture_slices(brw, mt, mt->surf.format, level, 1,
2575 start_layer, num_layers, NULL);
2576 }
2577
2578 enum isl_aux_usage
2579 intel_miptree_render_aux_usage(struct brw_context *brw,
2580 struct intel_mipmap_tree *mt,
2581 bool srgb_enabled, bool blend_enabled)
2582 {
2583 switch (mt->aux_usage) {
2584 case ISL_AUX_USAGE_MCS:
2585 assert(mt->mcs_buf);
2586 return ISL_AUX_USAGE_MCS;
2587
2588 case ISL_AUX_USAGE_CCS_D:
2589 return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE;
2590
2591 case ISL_AUX_USAGE_CCS_E: {
2592 mesa_format mesa_format =
2593 srgb_enabled ? mt->format :_mesa_get_srgb_format_linear(mt->format);
2594 enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
2595
2596 /* If the format supports CCS_E, then we can just use it */
2597 if (isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format))
2598 return ISL_AUX_USAGE_CCS_E;
2599
2600 /* Otherwise, we have to fall back to CCS_D */
2601
2602 /* gen9 hardware technically supports non-0/1 clear colors with sRGB
2603 * formats. However, there are issues with blending where it doesn't
2604 * properly apply the sRGB curve to the clear color when blending.
2605 */
2606 if (blend_enabled && isl_format_is_srgb(isl_format) &&
2607 !isl_color_value_is_zero_one(mt->fast_clear_color, isl_format))
2608 return ISL_AUX_USAGE_NONE;
2609
2610 return ISL_AUX_USAGE_CCS_D;
2611 }
2612
2613 default:
2614 return ISL_AUX_USAGE_NONE;
2615 }
2616 }
2617
2618 void
2619 intel_miptree_prepare_render(struct brw_context *brw,
2620 struct intel_mipmap_tree *mt, uint32_t level,
2621 uint32_t start_layer, uint32_t layer_count,
2622 bool srgb_enabled, bool blend_enabled)
2623 {
2624 enum isl_aux_usage aux_usage =
2625 intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
2626 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2627 aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
2628 }
2629
2630 void
2631 intel_miptree_finish_render(struct brw_context *brw,
2632 struct intel_mipmap_tree *mt, uint32_t level,
2633 uint32_t start_layer, uint32_t layer_count,
2634 bool srgb_enabled, bool blend_enabled)
2635 {
2636 assert(_mesa_is_format_color_format(mt->format));
2637
2638 enum isl_aux_usage aux_usage =
2639 intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
2640 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2641 aux_usage);
2642 }
2643
2644 void
2645 intel_miptree_prepare_depth(struct brw_context *brw,
2646 struct intel_mipmap_tree *mt, uint32_t level,
2647 uint32_t start_layer, uint32_t layer_count)
2648 {
2649 intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2650 mt->aux_usage, mt->hiz_buf != NULL);
2651 }
2652
2653 void
2654 intel_miptree_finish_depth(struct brw_context *brw,
2655 struct intel_mipmap_tree *mt, uint32_t level,
2656 uint32_t start_layer, uint32_t layer_count,
2657 bool depth_written)
2658 {
2659 if (depth_written) {
2660 intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2661 mt->hiz_buf != NULL);
2662 }
2663 }
2664
2665 /**
2666 * Make it possible to share the BO backing the given miptree with another
2667 * process or another miptree.
2668 *
2669 * Fast color clears are unsafe with shared buffers, so we need to resolve and
2670 * then discard the MCS buffer, if present. We also set the no_ccs flag to
2671 * ensure that no MCS buffer gets allocated in the future.
2672 *
2673 * HiZ is similarly unsafe with shared buffers.
2674 */
2675 void
2676 intel_miptree_make_shareable(struct brw_context *brw,
2677 struct intel_mipmap_tree *mt)
2678 {
2679 /* MCS buffers are also used for multisample buffers, but we can't resolve
2680 * away a multisample MCS buffer because it's an integral part of how the
2681 * pixel data is stored. Fortunately this code path should never be
2682 * reached for multisample buffers.
2683 */
2684 assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE ||
2685 mt->surf.samples == 1);
2686
2687 intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2688 0, INTEL_REMAINING_LAYERS,
2689 ISL_AUX_USAGE_NONE, false);
2690
2691 if (mt->mcs_buf) {
2692 brw_bo_unreference(mt->mcs_buf->bo);
2693 free(mt->mcs_buf);
2694 mt->mcs_buf = NULL;
2695
2696 /* Any pending MCS/CCS operations are no longer needed. Trying to
2697 * execute any will likely crash due to the missing aux buffer. So let's
2698 * delete all pending ops.
2699 */
2700 free(mt->aux_state);
2701 mt->aux_state = NULL;
2702 }
2703
2704 if (mt->hiz_buf) {
2705 intel_miptree_aux_buffer_free(mt->hiz_buf);
2706 mt->hiz_buf = NULL;
2707
2708 for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
2709 mt->level[l].has_hiz = false;
2710 }
2711
2712 /* Any pending HiZ operations are no longer needed. Trying to execute
2713 * any will likely crash due to the missing aux buffer. So let's delete
2714 * all pending ops.
2715 */
2716 free(mt->aux_state);
2717 mt->aux_state = NULL;
2718 }
2719
2720 mt->aux_usage = ISL_AUX_USAGE_NONE;
2721 }
2722
2723
2724 /**
2725 * \brief Get pointer offset into stencil buffer.
2726 *
2727 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
2728 * must decode the tile's layout in software.
2729 *
2730 * See
2731 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2732 * Format.
2733 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2734 *
2735 * Even though the returned offset is always positive, the return type is
2736 * signed due to
2737 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2738 * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
2739 */
2740 static intptr_t
2741 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2742 {
2743 uint32_t tile_size = 4096;
2744 uint32_t tile_width = 64;
2745 uint32_t tile_height = 64;
2746 uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
2747
2748 uint32_t tile_x = x / tile_width;
2749 uint32_t tile_y = y / tile_height;
2750
2751 /* The byte's address relative to the tile's base addres. */
2752 uint32_t byte_x = x % tile_width;
2753 uint32_t byte_y = y % tile_height;
2754
2755 uintptr_t u = tile_y * row_size
2756 + tile_x * tile_size
2757 + 512 * (byte_x / 8)
2758 + 64 * (byte_y / 8)
2759 + 32 * ((byte_y / 4) % 2)
2760 + 16 * ((byte_x / 4) % 2)
2761 + 8 * ((byte_y / 2) % 2)
2762 + 4 * ((byte_x / 2) % 2)
2763 + 2 * (byte_y % 2)
2764 + 1 * (byte_x % 2);
2765
2766 if (swizzled) {
2767 /* adjust for bit6 swizzling */
2768 if (((byte_x / 8) % 2) == 1) {
2769 if (((byte_y / 8) % 2) == 0) {
2770 u += 64;
2771 } else {
2772 u -= 64;
2773 }
2774 }
2775 }
2776
2777 return u;
2778 }
2779
2780 void
2781 intel_miptree_updownsample(struct brw_context *brw,
2782 struct intel_mipmap_tree *src,
2783 struct intel_mipmap_tree *dst)
2784 {
2785 unsigned src_w = src->surf.logical_level0_px.width;
2786 unsigned src_h = src->surf.logical_level0_px.height;
2787 unsigned dst_w = dst->surf.logical_level0_px.width;
2788 unsigned dst_h = dst->surf.logical_level0_px.height;
2789
2790 brw_blorp_blit_miptrees(brw,
2791 src, 0 /* level */, 0 /* layer */,
2792 src->format, SWIZZLE_XYZW,
2793 dst, 0 /* level */, 0 /* layer */, dst->format,
2794 0, 0, src_w, src_h,
2795 0, 0, dst_w, dst_h,
2796 GL_NEAREST, false, false /*mirror x, y*/,
2797 false, false);
2798
2799 if (src->stencil_mt) {
2800 src_w = src->stencil_mt->surf.logical_level0_px.width;
2801 src_h = src->stencil_mt->surf.logical_level0_px.height;
2802 dst_w = dst->stencil_mt->surf.logical_level0_px.width;
2803 dst_h = dst->stencil_mt->surf.logical_level0_px.height;
2804
2805 brw_blorp_blit_miptrees(brw,
2806 src->stencil_mt, 0 /* level */, 0 /* layer */,
2807 src->stencil_mt->format, SWIZZLE_XYZW,
2808 dst->stencil_mt, 0 /* level */, 0 /* layer */,
2809 dst->stencil_mt->format,
2810 0, 0, src_w, src_h,
2811 0, 0, dst_w, dst_h,
2812 GL_NEAREST, false, false /*mirror x, y*/,
2813 false, false /* decode/encode srgb */);
2814 }
2815 }
2816
2817 void
2818 intel_update_r8stencil(struct brw_context *brw,
2819 struct intel_mipmap_tree *mt)
2820 {
2821 assert(brw->gen >= 7);
2822 struct intel_mipmap_tree *src =
2823 mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
2824 if (!src || brw->gen >= 8 || !src->r8stencil_needs_update)
2825 return;
2826
2827 assert(src->surf.size > 0);
2828
2829 if (!mt->r8stencil_mt) {
2830 assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
2831 mt->r8stencil_mt = make_surface(
2832 brw,
2833 src->target,
2834 MESA_FORMAT_R_UINT8,
2835 src->first_level, src->last_level,
2836 src->surf.logical_level0_px.width,
2837 src->surf.logical_level0_px.height,
2838 src->surf.dim == ISL_SURF_DIM_3D ?
2839 src->surf.logical_level0_px.depth :
2840 src->surf.logical_level0_px.array_len,
2841 src->surf.samples,
2842 ISL_TILING_Y0_BIT,
2843 ISL_SURF_USAGE_TEXTURE_BIT,
2844 BO_ALLOC_FOR_RENDER, 0, NULL);
2845 assert(mt->r8stencil_mt);
2846 }
2847
2848 struct intel_mipmap_tree *dst = mt->r8stencil_mt;
2849
2850 for (int level = src->first_level; level <= src->last_level; level++) {
2851 const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ?
2852 minify(src->surf.phys_level0_sa.depth, level) :
2853 src->surf.phys_level0_sa.array_len;
2854
2855 for (unsigned layer = 0; layer < depth; layer++) {
2856 brw_blorp_copy_miptrees(brw,
2857 src, level, layer,
2858 dst, level, layer,
2859 0, 0, 0, 0,
2860 minify(src->surf.logical_level0_px.width,
2861 level),
2862 minify(src->surf.logical_level0_px.height,
2863 level));
2864 }
2865 }
2866
2867 brw_render_cache_set_check_flush(brw, dst->bo);
2868 src->r8stencil_needs_update = false;
2869 }
2870
2871 static void *
2872 intel_miptree_map_raw(struct brw_context *brw,
2873 struct intel_mipmap_tree *mt,
2874 GLbitfield mode)
2875 {
2876 struct brw_bo *bo = mt->bo;
2877
2878 if (brw_batch_references(&brw->batch, bo))
2879 intel_batchbuffer_flush(brw);
2880
2881 return brw_bo_map(brw, bo, mode);
2882 }
2883
2884 static void
2885 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2886 {
2887 brw_bo_unmap(mt->bo);
2888 }
2889
2890 static void
2891 intel_miptree_map_gtt(struct brw_context *brw,
2892 struct intel_mipmap_tree *mt,
2893 struct intel_miptree_map *map,
2894 unsigned int level, unsigned int slice)
2895 {
2896 unsigned int bw, bh;
2897 void *base;
2898 unsigned int image_x, image_y;
2899 intptr_t x = map->x;
2900 intptr_t y = map->y;
2901
2902 /* For compressed formats, the stride is the number of bytes per
2903 * row of blocks. intel_miptree_get_image_offset() already does
2904 * the divide.
2905 */
2906 _mesa_get_format_block_size(mt->format, &bw, &bh);
2907 assert(y % bh == 0);
2908 assert(x % bw == 0);
2909 y /= bh;
2910 x /= bw;
2911
2912 base = intel_miptree_map_raw(brw, mt, map->mode);
2913
2914 if (base == NULL)
2915 map->ptr = NULL;
2916 else {
2917 base += mt->offset;
2918
2919 /* Note that in the case of cube maps, the caller must have passed the
2920 * slice number referencing the face.
2921 */
2922 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2923 x += image_x;
2924 y += image_y;
2925
2926 map->stride = mt->surf.row_pitch;
2927 map->ptr = base + y * map->stride + x * mt->cpp;
2928 }
2929
2930 DBG("%s: %d,%d %dx%d from mt %p (%s) "
2931 "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
2932 map->x, map->y, map->w, map->h,
2933 mt, _mesa_get_format_name(mt->format),
2934 x, y, map->ptr, map->stride);
2935 }
2936
2937 static void
2938 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
2939 {
2940 intel_miptree_unmap_raw(mt);
2941 }
2942
2943 static void
2944 intel_miptree_map_blit(struct brw_context *brw,
2945 struct intel_mipmap_tree *mt,
2946 struct intel_miptree_map *map,
2947 unsigned int level, unsigned int slice)
2948 {
2949 map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
2950 /* first_level */ 0,
2951 /* last_level */ 0,
2952 map->w, map->h, 1,
2953 /* samples */ 1,
2954 MIPTREE_LAYOUT_TILING_NONE);
2955
2956 if (!map->linear_mt) {
2957 fprintf(stderr, "Failed to allocate blit temporary\n");
2958 goto fail;
2959 }
2960 map->stride = map->linear_mt->surf.row_pitch;
2961
2962 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
2963 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
2964 * invalidate is set, since we'll be writing the whole rectangle from our
2965 * temporary buffer back out.
2966 */
2967 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2968 if (!intel_miptree_copy(brw,
2969 mt, level, slice, map->x, map->y,
2970 map->linear_mt, 0, 0, 0, 0,
2971 map->w, map->h)) {
2972 fprintf(stderr, "Failed to blit\n");
2973 goto fail;
2974 }
2975 }
2976
2977 map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
2978
2979 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2980 map->x, map->y, map->w, map->h,
2981 mt, _mesa_get_format_name(mt->format),
2982 level, slice, map->ptr, map->stride);
2983
2984 return;
2985
2986 fail:
2987 intel_miptree_release(&map->linear_mt);
2988 map->ptr = NULL;
2989 map->stride = 0;
2990 }
2991
2992 static void
2993 intel_miptree_unmap_blit(struct brw_context *brw,
2994 struct intel_mipmap_tree *mt,
2995 struct intel_miptree_map *map,
2996 unsigned int level,
2997 unsigned int slice)
2998 {
2999 struct gl_context *ctx = &brw->ctx;
3000
3001 intel_miptree_unmap_raw(map->linear_mt);
3002
3003 if (map->mode & GL_MAP_WRITE_BIT) {
3004 bool ok = intel_miptree_copy(brw,
3005 map->linear_mt, 0, 0, 0, 0,
3006 mt, level, slice, map->x, map->y,
3007 map->w, map->h);
3008 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
3009 }
3010
3011 intel_miptree_release(&map->linear_mt);
3012 }
3013
3014 /**
3015 * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
3016 */
3017 #if defined(USE_SSE41)
3018 static void
3019 intel_miptree_map_movntdqa(struct brw_context *brw,
3020 struct intel_mipmap_tree *mt,
3021 struct intel_miptree_map *map,
3022 unsigned int level, unsigned int slice)
3023 {
3024 assert(map->mode & GL_MAP_READ_BIT);
3025 assert(!(map->mode & GL_MAP_WRITE_BIT));
3026
3027 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
3028 map->x, map->y, map->w, map->h,
3029 mt, _mesa_get_format_name(mt->format),
3030 level, slice, map->ptr, map->stride);
3031
3032 /* Map the original image */
3033 uint32_t image_x;
3034 uint32_t image_y;
3035 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3036 image_x += map->x;
3037 image_y += map->y;
3038
3039 void *src = intel_miptree_map_raw(brw, mt, map->mode);
3040 if (!src)
3041 return;
3042
3043 src += mt->offset;
3044
3045 src += image_y * mt->surf.row_pitch;
3046 src += image_x * mt->cpp;
3047
3048 /* Due to the pixel offsets for the particular image being mapped, our
3049 * src pointer may not be 16-byte aligned. However, if the pitch is
3050 * divisible by 16, then the amount by which it's misaligned will remain
3051 * consistent from row to row.
3052 */
3053 assert((mt->surf.row_pitch % 16) == 0);
3054 const int misalignment = ((uintptr_t) src) & 15;
3055
3056 /* Create an untiled temporary buffer for the mapping. */
3057 const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
3058
3059 map->stride = ALIGN(misalignment + width_bytes, 16);
3060
3061 map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
3062 /* Offset the destination so it has the same misalignment as src. */
3063 map->ptr = map->buffer + misalignment;
3064
3065 assert((((uintptr_t) map->ptr) & 15) == misalignment);
3066
3067 for (uint32_t y = 0; y < map->h; y++) {
3068 void *dst_ptr = map->ptr + y * map->stride;
3069 void *src_ptr = src + y * mt->surf.row_pitch;
3070
3071 _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
3072 }
3073
3074 intel_miptree_unmap_raw(mt);
3075 }
3076
3077 static void
3078 intel_miptree_unmap_movntdqa(struct brw_context *brw,
3079 struct intel_mipmap_tree *mt,
3080 struct intel_miptree_map *map,
3081 unsigned int level,
3082 unsigned int slice)
3083 {
3084 _mesa_align_free(map->buffer);
3085 map->buffer = NULL;
3086 map->ptr = NULL;
3087 }
3088 #endif
3089
3090 static void
3091 intel_miptree_map_s8(struct brw_context *brw,
3092 struct intel_mipmap_tree *mt,
3093 struct intel_miptree_map *map,
3094 unsigned int level, unsigned int slice)
3095 {
3096 map->stride = map->w;
3097 map->buffer = map->ptr = malloc(map->stride * map->h);
3098 if (!map->buffer)
3099 return;
3100
3101 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3102 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3103 * invalidate is set, since we'll be writing the whole rectangle from our
3104 * temporary buffer back out.
3105 */
3106 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3107 uint8_t *untiled_s8_map = map->ptr;
3108 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
3109 unsigned int image_x, image_y;
3110
3111 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3112
3113 for (uint32_t y = 0; y < map->h; y++) {
3114 for (uint32_t x = 0; x < map->w; x++) {
3115 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
3116 x + image_x + map->x,
3117 y + image_y + map->y,
3118 brw->has_swizzling);
3119 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
3120 }
3121 }
3122
3123 intel_miptree_unmap_raw(mt);
3124
3125 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
3126 map->x, map->y, map->w, map->h,
3127 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
3128 } else {
3129 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3130 map->x, map->y, map->w, map->h,
3131 mt, map->ptr, map->stride);
3132 }
3133 }
3134
3135 static void
3136 intel_miptree_unmap_s8(struct brw_context *brw,
3137 struct intel_mipmap_tree *mt,
3138 struct intel_miptree_map *map,
3139 unsigned int level,
3140 unsigned int slice)
3141 {
3142 if (map->mode & GL_MAP_WRITE_BIT) {
3143 unsigned int image_x, image_y;
3144 uint8_t *untiled_s8_map = map->ptr;
3145 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
3146
3147 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3148
3149 for (uint32_t y = 0; y < map->h; y++) {
3150 for (uint32_t x = 0; x < map->w; x++) {
3151 ptrdiff_t offset = intel_offset_S8(mt->surf.row_pitch,
3152 image_x + x + map->x,
3153 image_y + y + map->y,
3154 brw->has_swizzling);
3155 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
3156 }
3157 }
3158
3159 intel_miptree_unmap_raw(mt);
3160 }
3161
3162 free(map->buffer);
3163 }
3164
3165 static void
3166 intel_miptree_map_etc(struct brw_context *brw,
3167 struct intel_mipmap_tree *mt,
3168 struct intel_miptree_map *map,
3169 unsigned int level,
3170 unsigned int slice)
3171 {
3172 assert(mt->etc_format != MESA_FORMAT_NONE);
3173 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
3174 assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
3175 }
3176
3177 assert(map->mode & GL_MAP_WRITE_BIT);
3178 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
3179
3180 map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
3181 map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
3182 map->w, map->h, 1));
3183 map->ptr = map->buffer;
3184 }
3185
3186 static void
3187 intel_miptree_unmap_etc(struct brw_context *brw,
3188 struct intel_mipmap_tree *mt,
3189 struct intel_miptree_map *map,
3190 unsigned int level,
3191 unsigned int slice)
3192 {
3193 uint32_t image_x;
3194 uint32_t image_y;
3195 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3196
3197 image_x += map->x;
3198 image_y += map->y;
3199
3200 uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
3201 + image_y * mt->surf.row_pitch
3202 + image_x * mt->cpp;
3203
3204 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
3205 _mesa_etc1_unpack_rgba8888(dst, mt->surf.row_pitch,
3206 map->ptr, map->stride,
3207 map->w, map->h);
3208 else
3209 _mesa_unpack_etc2_format(dst, mt->surf.row_pitch,
3210 map->ptr, map->stride,
3211 map->w, map->h, mt->etc_format);
3212
3213 intel_miptree_unmap_raw(mt);
3214 free(map->buffer);
3215 }
3216
3217 /**
3218 * Mapping function for packed depth/stencil miptrees backed by real separate
3219 * miptrees for depth and stencil.
3220 *
3221 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
3222 * separate from the depth buffer. Yet at the GL API level, we have to expose
3223 * packed depth/stencil textures and FBO attachments, and Mesa core expects to
3224 * be able to map that memory for texture storage and glReadPixels-type
3225 * operations. We give Mesa core that access by mallocing a temporary and
3226 * copying the data between the actual backing store and the temporary.
3227 */
3228 static void
3229 intel_miptree_map_depthstencil(struct brw_context *brw,
3230 struct intel_mipmap_tree *mt,
3231 struct intel_miptree_map *map,
3232 unsigned int level, unsigned int slice)
3233 {
3234 struct intel_mipmap_tree *z_mt = mt;
3235 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3236 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3237 int packed_bpp = map_z32f_x24s8 ? 8 : 4;
3238
3239 map->stride = map->w * packed_bpp;
3240 map->buffer = map->ptr = malloc(map->stride * map->h);
3241 if (!map->buffer)
3242 return;
3243
3244 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
3245 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
3246 * invalidate is set, since we'll be writing the whole rectangle from our
3247 * temporary buffer back out.
3248 */
3249 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3250 uint32_t *packed_map = map->ptr;
3251 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
3252 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
3253 unsigned int s_image_x, s_image_y;
3254 unsigned int z_image_x, z_image_y;
3255
3256 intel_miptree_get_image_offset(s_mt, level, slice,
3257 &s_image_x, &s_image_y);
3258 intel_miptree_get_image_offset(z_mt, level, slice,
3259 &z_image_x, &z_image_y);
3260
3261 for (uint32_t y = 0; y < map->h; y++) {
3262 for (uint32_t x = 0; x < map->w; x++) {
3263 int map_x = map->x + x, map_y = map->y + y;
3264 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3265 map_x + s_image_x,
3266 map_y + s_image_y,
3267 brw->has_swizzling);
3268 ptrdiff_t z_offset = ((map_y + z_image_y) *
3269 (z_mt->surf.row_pitch / 4) +
3270 (map_x + z_image_x));
3271 uint8_t s = s_map[s_offset];
3272 uint32_t z = z_map[z_offset];
3273
3274 if (map_z32f_x24s8) {
3275 packed_map[(y * map->w + x) * 2 + 0] = z;
3276 packed_map[(y * map->w + x) * 2 + 1] = s;
3277 } else {
3278 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
3279 }
3280 }
3281 }
3282
3283 intel_miptree_unmap_raw(s_mt);
3284 intel_miptree_unmap_raw(z_mt);
3285
3286 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
3287 __func__,
3288 map->x, map->y, map->w, map->h,
3289 z_mt, map->x + z_image_x, map->y + z_image_y,
3290 s_mt, map->x + s_image_x, map->y + s_image_y,
3291 map->ptr, map->stride);
3292 } else {
3293 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3294 map->x, map->y, map->w, map->h,
3295 mt, map->ptr, map->stride);
3296 }
3297 }
3298
3299 static void
3300 intel_miptree_unmap_depthstencil(struct brw_context *brw,
3301 struct intel_mipmap_tree *mt,
3302 struct intel_miptree_map *map,
3303 unsigned int level,
3304 unsigned int slice)
3305 {
3306 struct intel_mipmap_tree *z_mt = mt;
3307 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3308 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3309
3310 if (map->mode & GL_MAP_WRITE_BIT) {
3311 uint32_t *packed_map = map->ptr;
3312 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
3313 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
3314 unsigned int s_image_x, s_image_y;
3315 unsigned int z_image_x, z_image_y;
3316
3317 intel_miptree_get_image_offset(s_mt, level, slice,
3318 &s_image_x, &s_image_y);
3319 intel_miptree_get_image_offset(z_mt, level, slice,
3320 &z_image_x, &z_image_y);
3321
3322 for (uint32_t y = 0; y < map->h; y++) {
3323 for (uint32_t x = 0; x < map->w; x++) {
3324 ptrdiff_t s_offset = intel_offset_S8(s_mt->surf.row_pitch,
3325 x + s_image_x + map->x,
3326 y + s_image_y + map->y,
3327 brw->has_swizzling);
3328 ptrdiff_t z_offset = ((y + z_image_y + map->y) *
3329 (z_mt->surf.row_pitch / 4) +
3330 (x + z_image_x + map->x));
3331
3332 if (map_z32f_x24s8) {
3333 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
3334 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
3335 } else {
3336 uint32_t packed = packed_map[y * map->w + x];
3337 s_map[s_offset] = packed >> 24;
3338 z_map[z_offset] = packed;
3339 }
3340 }
3341 }
3342
3343 intel_miptree_unmap_raw(s_mt);
3344 intel_miptree_unmap_raw(z_mt);
3345
3346 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
3347 __func__,
3348 map->x, map->y, map->w, map->h,
3349 z_mt, _mesa_get_format_name(z_mt->format),
3350 map->x + z_image_x, map->y + z_image_y,
3351 s_mt, map->x + s_image_x, map->y + s_image_y,
3352 map->ptr, map->stride);
3353 }
3354
3355 free(map->buffer);
3356 }
3357
3358 /**
3359 * Create and attach a map to the miptree at (level, slice). Return the
3360 * attached map.
3361 */
3362 static struct intel_miptree_map*
3363 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
3364 unsigned int level,
3365 unsigned int slice,
3366 unsigned int x,
3367 unsigned int y,
3368 unsigned int w,
3369 unsigned int h,
3370 GLbitfield mode)
3371 {
3372 struct intel_miptree_map *map = calloc(1, sizeof(*map));
3373
3374 if (!map)
3375 return NULL;
3376
3377 assert(mt->level[level].slice[slice].map == NULL);
3378 mt->level[level].slice[slice].map = map;
3379
3380 map->mode = mode;
3381 map->x = x;
3382 map->y = y;
3383 map->w = w;
3384 map->h = h;
3385
3386 return map;
3387 }
3388
3389 /**
3390 * Release the map at (level, slice).
3391 */
3392 static void
3393 intel_miptree_release_map(struct intel_mipmap_tree *mt,
3394 unsigned int level,
3395 unsigned int slice)
3396 {
3397 struct intel_miptree_map **map;
3398
3399 map = &mt->level[level].slice[slice].map;
3400 free(*map);
3401 *map = NULL;
3402 }
3403
3404 static bool
3405 can_blit_slice(struct intel_mipmap_tree *mt,
3406 unsigned int level, unsigned int slice)
3407 {
3408 /* See intel_miptree_blit() for details on the 32k pitch limit. */
3409 if (mt->surf.row_pitch >= 32768)
3410 return false;
3411
3412 return true;
3413 }
3414
3415 static bool
3416 use_intel_mipree_map_blit(struct brw_context *brw,
3417 struct intel_mipmap_tree *mt,
3418 GLbitfield mode,
3419 unsigned int level,
3420 unsigned int slice)
3421 {
3422 if (brw->has_llc &&
3423 /* It's probably not worth swapping to the blit ring because of
3424 * all the overhead involved.
3425 */
3426 !(mode & GL_MAP_WRITE_BIT) &&
3427 !mt->compressed &&
3428 (mt->surf.tiling == ISL_TILING_X ||
3429 /* Prior to Sandybridge, the blitter can't handle Y tiling */
3430 (brw->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
3431 /* Fast copy blit on skl+ supports all tiling formats. */
3432 brw->gen >= 9) &&
3433 can_blit_slice(mt, level, slice))
3434 return true;
3435
3436 if (mt->surf.tiling != ISL_TILING_LINEAR &&
3437 mt->bo->size >= brw->max_gtt_map_object_size) {
3438 assert(can_blit_slice(mt, level, slice));
3439 return true;
3440 }
3441
3442 return false;
3443 }
3444
3445 /**
3446 * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
3447 * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
3448 * arithmetic overflow.
3449 *
3450 * If you call this function and use \a out_stride, then you're doing pointer
3451 * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
3452 * bugs. The caller must still take care to avoid 32-bit overflow errors in
3453 * all arithmetic expressions that contain buffer offsets and pixel sizes,
3454 * which usually have type uint32_t or GLuint.
3455 */
3456 void
3457 intel_miptree_map(struct brw_context *brw,
3458 struct intel_mipmap_tree *mt,
3459 unsigned int level,
3460 unsigned int slice,
3461 unsigned int x,
3462 unsigned int y,
3463 unsigned int w,
3464 unsigned int h,
3465 GLbitfield mode,
3466 void **out_ptr,
3467 ptrdiff_t *out_stride)
3468 {
3469 struct intel_miptree_map *map;
3470
3471 assert(mt->surf.samples == 1);
3472
3473 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
3474 if (!map){
3475 *out_ptr = NULL;
3476 *out_stride = 0;
3477 return;
3478 }
3479
3480 intel_miptree_access_raw(brw, mt, level, slice,
3481 map->mode & GL_MAP_WRITE_BIT);
3482
3483 if (mt->format == MESA_FORMAT_S_UINT8) {
3484 intel_miptree_map_s8(brw, mt, map, level, slice);
3485 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3486 !(mode & BRW_MAP_DIRECT_BIT)) {
3487 intel_miptree_map_etc(brw, mt, map, level, slice);
3488 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
3489 intel_miptree_map_depthstencil(brw, mt, map, level, slice);
3490 } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
3491 intel_miptree_map_blit(brw, mt, map, level, slice);
3492 #if defined(USE_SSE41)
3493 } else if (!(mode & GL_MAP_WRITE_BIT) &&
3494 !mt->compressed && cpu_has_sse4_1 &&
3495 (mt->surf.row_pitch % 16 == 0)) {
3496 intel_miptree_map_movntdqa(brw, mt, map, level, slice);
3497 #endif
3498 } else {
3499 intel_miptree_map_gtt(brw, mt, map, level, slice);
3500 }
3501
3502 *out_ptr = map->ptr;
3503 *out_stride = map->stride;
3504
3505 if (map->ptr == NULL)
3506 intel_miptree_release_map(mt, level, slice);
3507 }
3508
3509 void
3510 intel_miptree_unmap(struct brw_context *brw,
3511 struct intel_mipmap_tree *mt,
3512 unsigned int level,
3513 unsigned int slice)
3514 {
3515 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
3516
3517 assert(mt->surf.samples == 1);
3518
3519 if (!map)
3520 return;
3521
3522 DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
3523 mt, _mesa_get_format_name(mt->format), level, slice);
3524
3525 if (mt->format == MESA_FORMAT_S_UINT8) {
3526 intel_miptree_unmap_s8(brw, mt, map, level, slice);
3527 } else if (mt->etc_format != MESA_FORMAT_NONE &&
3528 !(map->mode & BRW_MAP_DIRECT_BIT)) {
3529 intel_miptree_unmap_etc(brw, mt, map, level, slice);
3530 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
3531 intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
3532 } else if (map->linear_mt) {
3533 intel_miptree_unmap_blit(brw, mt, map, level, slice);
3534 #if defined(USE_SSE41)
3535 } else if (map->buffer && cpu_has_sse4_1) {
3536 intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
3537 #endif
3538 } else {
3539 intel_miptree_unmap_gtt(mt);
3540 }
3541
3542 intel_miptree_release_map(mt, level, slice);
3543 }
3544
3545 enum isl_surf_dim
3546 get_isl_surf_dim(GLenum target)
3547 {
3548 switch (target) {
3549 case GL_TEXTURE_1D:
3550 case GL_TEXTURE_1D_ARRAY:
3551 return ISL_SURF_DIM_1D;
3552
3553 case GL_TEXTURE_2D:
3554 case GL_TEXTURE_2D_ARRAY:
3555 case GL_TEXTURE_RECTANGLE:
3556 case GL_TEXTURE_CUBE_MAP:
3557 case GL_TEXTURE_CUBE_MAP_ARRAY:
3558 case GL_TEXTURE_2D_MULTISAMPLE:
3559 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3560 case GL_TEXTURE_EXTERNAL_OES:
3561 return ISL_SURF_DIM_2D;
3562
3563 case GL_TEXTURE_3D:
3564 return ISL_SURF_DIM_3D;
3565 }
3566
3567 unreachable("Invalid texture target");
3568 }
3569
3570 enum isl_dim_layout
3571 get_isl_dim_layout(const struct gen_device_info *devinfo,
3572 enum isl_tiling tiling, GLenum target)
3573 {
3574 switch (target) {
3575 case GL_TEXTURE_1D:
3576 case GL_TEXTURE_1D_ARRAY:
3577 return (devinfo->gen >= 9 && tiling == ISL_TILING_LINEAR ?
3578 ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
3579
3580 case GL_TEXTURE_2D:
3581 case GL_TEXTURE_2D_ARRAY:
3582 case GL_TEXTURE_RECTANGLE:
3583 case GL_TEXTURE_2D_MULTISAMPLE:
3584 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3585 case GL_TEXTURE_EXTERNAL_OES:
3586 return ISL_DIM_LAYOUT_GEN4_2D;
3587
3588 case GL_TEXTURE_CUBE_MAP:
3589 case GL_TEXTURE_CUBE_MAP_ARRAY:
3590 return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
3591 ISL_DIM_LAYOUT_GEN4_2D);
3592
3593 case GL_TEXTURE_3D:
3594 return (devinfo->gen >= 9 ?
3595 ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
3596 }
3597
3598 unreachable("Invalid texture target");
3599 }
3600
3601 enum isl_aux_usage
3602 intel_miptree_get_aux_isl_usage(const struct brw_context *brw,
3603 const struct intel_mipmap_tree *mt)
3604 {
3605 if (mt->hiz_buf)
3606 return ISL_AUX_USAGE_HIZ;
3607
3608 if (!mt->mcs_buf)
3609 return ISL_AUX_USAGE_NONE;
3610
3611 return mt->aux_usage;
3612 }