i915: Fork the shared code from i965.
[mesa.git] / src / mesa / drivers / dri / i915 / intel_mipmap_tree.c
1 /**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <GL/gl.h>
29 #include <GL/internal/dri_interface.h>
30
31 #include "intel_batchbuffer.h"
32 #include "intel_chipset.h"
33 #include "intel_context.h"
34 #include "intel_mipmap_tree.h"
35 #include "intel_regions.h"
36 #include "intel_resolve_map.h"
37 #include "intel_tex_layout.h"
38 #include "intel_tex.h"
39 #include "intel_blit.h"
40
41 #ifndef I915
42 #include "brw_blorp.h"
43 #endif
44
45 #include "main/enums.h"
46 #include "main/formats.h"
47 #include "main/glformats.h"
48 #include "main/texcompress_etc.h"
49 #include "main/teximage.h"
50
51 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
52
53 static GLenum
54 target_to_target(GLenum target)
55 {
56 switch (target) {
57 case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
58 case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
59 case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
60 case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
61 case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
62 case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
63 return GL_TEXTURE_CUBE_MAP_ARB;
64 default:
65 return target;
66 }
67 }
68
69
70 /**
71 * Determine which MSAA layout should be used by the MSAA surface being
72 * created, based on the chip generation and the surface type.
73 */
74 static enum intel_msaa_layout
75 compute_msaa_layout(struct intel_context *intel, gl_format format, GLenum target)
76 {
77 /* Prior to Gen7, all MSAA surfaces used IMS layout. */
78 if (intel->gen < 7)
79 return INTEL_MSAA_LAYOUT_IMS;
80
81 /* In Gen7, IMS layout is only used for depth and stencil buffers. */
82 switch (_mesa_get_format_base_format(format)) {
83 case GL_DEPTH_COMPONENT:
84 case GL_STENCIL_INDEX:
85 case GL_DEPTH_STENCIL:
86 return INTEL_MSAA_LAYOUT_IMS;
87 default:
88 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
89 *
90 * This field must be set to 0 for all SINT MSRTs when all RT channels
91 * are not written
92 *
93 * In practice this means that we have to disable MCS for all signed
94 * integer MSAA buffers. The alternative, to disable MCS only when one
95 * of the render target channels is disabled, is impractical because it
96 * would require converting between CMS and UMS MSAA layouts on the fly,
97 * which is expensive.
98 */
99 if (_mesa_get_format_datatype(format) == GL_INT) {
100 /* TODO: is this workaround needed for future chipsets? */
101 assert(intel->gen == 7);
102 return INTEL_MSAA_LAYOUT_UMS;
103 } else {
104 /* For now, if we're going to be texturing from this surface,
105 * force UMS, so that the shader doesn't have to do different things
106 * based on whether there's a multisample control surface needing sampled first.
107 * We can't just blindly read the MCS surface in all cases because:
108 *
109 * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
110 *
111 * If this field is disabled and the sampling engine <ld_mcs> message
112 * is issued on this surface, the MCS surface may be accessed. Software
113 * must ensure that the surface is defined to avoid GTT errors.
114 */
115 if (target == GL_TEXTURE_2D_MULTISAMPLE ||
116 target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
117 return INTEL_MSAA_LAYOUT_UMS;
118 } else {
119 return INTEL_MSAA_LAYOUT_CMS;
120 }
121 }
122 }
123 }
124
125
126 /**
127 * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
128 * scaled-down bitfield representation of the color buffer which is capable of
129 * recording when blocks of the color buffer are equal to the clear value.
130 * This function returns the block size that will be used by the MCS buffer
131 * corresponding to a certain color miptree.
132 *
133 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
134 * beneath the "Fast Color Clear" bullet (p327):
135 *
136 * The following table describes the RT alignment
137 *
138 * Pixels Lines
139 * TiledY RT CL
140 * bpp
141 * 32 8 4
142 * 64 4 4
143 * 128 2 4
144 * TiledX RT CL
145 * bpp
146 * 32 16 2
147 * 64 8 2
148 * 128 4 2
149 *
150 * This alignment has the following uses:
151 *
152 * - For figuring out the size of the MCS buffer. Each 4k tile in the MCS
153 * buffer contains 128 blocks horizontally and 256 blocks vertically.
154 *
155 * - For figuring out alignment restrictions for a fast clear operation. Fast
156 * clear operations must always clear aligned multiples of 16 blocks
157 * horizontally and 32 blocks vertically.
158 *
159 * - For scaling down the coordinates sent through the render pipeline during
160 * a fast clear. X coordinates must be scaled down by 8 times the block
161 * width, and Y coordinates by 16 times the block height.
162 *
163 * - For scaling down the coordinates sent through the render pipeline during
164 * a "Render Target Resolve" operation. X coordinates must be scaled down
165 * by half the block width, and Y coordinates by half the block height.
166 */
167 void
168 intel_get_non_msrt_mcs_alignment(struct intel_context *intel,
169 struct intel_mipmap_tree *mt,
170 unsigned *width_px, unsigned *height)
171 {
172 switch (mt->region->tiling) {
173 default:
174 assert(!"Non-MSRT MCS requires X or Y tiling");
175 /* In release builds, fall through */
176 case I915_TILING_Y:
177 *width_px = 32 / mt->cpp;
178 *height = 4;
179 break;
180 case I915_TILING_X:
181 *width_px = 64 / mt->cpp;
182 *height = 2;
183 }
184 }
185
186
187 /**
188 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
189 * can be used.
190 *
191 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
192 * beneath the "Fast Color Clear" bullet (p326):
193 *
194 * - Support is limited to tiled render targets.
195 * - Support is for non-mip-mapped and non-array surface types only.
196 *
197 * And then later, on p327:
198 *
199 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
200 * 64bpp, and 128bpp.
201 */
202 bool
203 intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel,
204 struct intel_mipmap_tree *mt)
205 {
206 #ifdef I915
207 /* MCS is not supported on the i915 (pre-Gen4) driver */
208 return false;
209 #else
210 struct brw_context *brw = brw_context(&intel->ctx);
211
212 /* MCS support does not exist prior to Gen7 */
213 if (intel->gen < 7)
214 return false;
215
216 /* MCS is only supported for color buffers */
217 switch (_mesa_get_format_base_format(mt->format)) {
218 case GL_DEPTH_COMPONENT:
219 case GL_DEPTH_STENCIL:
220 case GL_STENCIL_INDEX:
221 return false;
222 }
223
224 if (mt->region->tiling != I915_TILING_X &&
225 mt->region->tiling != I915_TILING_Y)
226 return false;
227 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
228 return false;
229 if (mt->first_level != 0 || mt->last_level != 0)
230 return false;
231 if (mt->physical_depth0 != 1)
232 return false;
233
234 /* There's no point in using an MCS buffer if the surface isn't in a
235 * renderable format.
236 */
237 if (!brw->format_supported_as_render_target[mt->format])
238 return false;
239
240 return true;
241 #endif
242 }
243
244
245 /**
246 * @param for_bo Indicates that the caller is
247 * intel_miptree_create_for_bo(). If true, then do not create
248 * \c stencil_mt.
249 */
250 struct intel_mipmap_tree *
251 intel_miptree_create_layout(struct intel_context *intel,
252 GLenum target,
253 gl_format format,
254 GLuint first_level,
255 GLuint last_level,
256 GLuint width0,
257 GLuint height0,
258 GLuint depth0,
259 bool for_bo,
260 GLuint num_samples)
261 {
262 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
263
264 DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
265 _mesa_lookup_enum_by_nr(target),
266 _mesa_get_format_name(format),
267 first_level, last_level, mt);
268
269 mt->target = target_to_target(target);
270 mt->format = format;
271 mt->first_level = first_level;
272 mt->last_level = last_level;
273 mt->logical_width0 = width0;
274 mt->logical_height0 = height0;
275 mt->logical_depth0 = depth0;
276 #ifndef I915
277 mt->mcs_state = INTEL_MCS_STATE_NONE;
278 #endif
279
280 /* The cpp is bytes per (1, blockheight)-sized block for compressed
281 * textures. This is why you'll see divides by blockheight all over
282 */
283 unsigned bw, bh;
284 _mesa_get_format_block_size(format, &bw, &bh);
285 assert(_mesa_get_format_bytes(mt->format) % bw == 0);
286 mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
287
288 mt->num_samples = num_samples;
289 mt->compressed = _mesa_is_format_compressed(format);
290 mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
291 mt->refcount = 1;
292
293 if (num_samples > 1) {
294 /* Adjust width/height/depth for MSAA */
295 mt->msaa_layout = compute_msaa_layout(intel, format, mt->target);
296 if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
297 /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
298 *
299 * "Any of the other messages (sample*, LOD, load4) used with a
300 * (4x) multisampled surface will in-effect sample a surface with
301 * double the height and width as that indicated in the surface
302 * state. Each pixel position on the original-sized surface is
303 * replaced with a 2x2 of samples with the following arrangement:
304 *
305 * sample 0 sample 2
306 * sample 1 sample 3"
307 *
308 * Thus, when sampling from a multisampled texture, it behaves as
309 * though the layout in memory for (x,y,sample) is:
310 *
311 * (0,0,0) (0,0,2) (1,0,0) (1,0,2)
312 * (0,0,1) (0,0,3) (1,0,1) (1,0,3)
313 *
314 * (0,1,0) (0,1,2) (1,1,0) (1,1,2)
315 * (0,1,1) (0,1,3) (1,1,1) (1,1,3)
316 *
317 * However, the actual layout of multisampled data in memory is:
318 *
319 * (0,0,0) (1,0,0) (0,0,1) (1,0,1)
320 * (0,1,0) (1,1,0) (0,1,1) (1,1,1)
321 *
322 * (0,0,2) (1,0,2) (0,0,3) (1,0,3)
323 * (0,1,2) (1,1,2) (0,1,3) (1,1,3)
324 *
325 * This pattern repeats for each 2x2 pixel block.
326 *
327 * As a result, when calculating the size of our 4-sample buffer for
328 * an odd width or height, we have to align before scaling up because
329 * sample 3 is in that bottom right 2x2 block.
330 */
331 switch (num_samples) {
332 case 4:
333 width0 = ALIGN(width0, 2) * 2;
334 height0 = ALIGN(height0, 2) * 2;
335 break;
336 case 8:
337 width0 = ALIGN(width0, 2) * 4;
338 height0 = ALIGN(height0, 2) * 2;
339 break;
340 default:
341 /* num_samples should already have been quantized to 0, 1, 4, or
342 * 8.
343 */
344 assert(false);
345 }
346 } else {
347 /* Non-interleaved */
348 depth0 *= num_samples;
349 }
350 }
351
352 /* array_spacing_lod0 is only used for non-IMS MSAA surfaces. TODO: can we
353 * use it elsewhere?
354 */
355 switch (mt->msaa_layout) {
356 case INTEL_MSAA_LAYOUT_NONE:
357 case INTEL_MSAA_LAYOUT_IMS:
358 mt->array_spacing_lod0 = false;
359 break;
360 case INTEL_MSAA_LAYOUT_UMS:
361 case INTEL_MSAA_LAYOUT_CMS:
362 mt->array_spacing_lod0 = true;
363 break;
364 }
365
366 if (target == GL_TEXTURE_CUBE_MAP) {
367 assert(depth0 == 1);
368 depth0 = 6;
369 }
370
371 mt->physical_width0 = width0;
372 mt->physical_height0 = height0;
373 mt->physical_depth0 = depth0;
374
375 if (!for_bo &&
376 _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
377 (intel->must_use_separate_stencil ||
378 (intel->has_separate_stencil &&
379 intel->vtbl.is_hiz_depth_format(intel, format)))) {
380 mt->stencil_mt = intel_miptree_create(intel,
381 mt->target,
382 MESA_FORMAT_S8,
383 mt->first_level,
384 mt->last_level,
385 mt->logical_width0,
386 mt->logical_height0,
387 mt->logical_depth0,
388 true,
389 num_samples,
390 INTEL_MIPTREE_TILING_ANY);
391 if (!mt->stencil_mt) {
392 intel_miptree_release(&mt);
393 return NULL;
394 }
395
396 /* Fix up the Z miptree format for how we're splitting out separate
397 * stencil. Gen7 expects there to be no stencil bits in its depth buffer.
398 */
399 if (mt->format == MESA_FORMAT_S8_Z24) {
400 mt->format = MESA_FORMAT_X8_Z24;
401 } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) {
402 mt->format = MESA_FORMAT_Z32_FLOAT;
403 mt->cpp = 4;
404 } else {
405 _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
406 _mesa_get_format_name(mt->format));
407 }
408 }
409
410 intel_get_texture_alignment_unit(intel, mt->format,
411 &mt->align_w, &mt->align_h);
412
413 #ifdef I915
414 (void) intel;
415 if (intel->is_945)
416 i945_miptree_layout(mt);
417 else
418 i915_miptree_layout(mt);
419 #else
420 brw_miptree_layout(intel, mt);
421 #endif
422
423 return mt;
424 }
425
426 /**
427 * \brief Helper function for intel_miptree_create().
428 */
429 static uint32_t
430 intel_miptree_choose_tiling(struct intel_context *intel,
431 gl_format format,
432 uint32_t width0,
433 uint32_t num_samples,
434 enum intel_miptree_tiling_mode requested,
435 struct intel_mipmap_tree *mt)
436 {
437
438 if (format == MESA_FORMAT_S8) {
439 /* The stencil buffer is W tiled. However, we request from the kernel a
440 * non-tiled buffer because the GTT is incapable of W fencing.
441 */
442 return I915_TILING_NONE;
443 }
444
445 /* Some usages may want only one type of tiling, like depth miptrees (Y
446 * tiled), or temporary BOs for uploading data once (linear).
447 */
448 switch (requested) {
449 case INTEL_MIPTREE_TILING_ANY:
450 break;
451 case INTEL_MIPTREE_TILING_Y:
452 return I915_TILING_Y;
453 case INTEL_MIPTREE_TILING_NONE:
454 return I915_TILING_NONE;
455 }
456
457 if (num_samples > 1) {
458 /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
459 * Surface"):
460 *
461 * [DevSNB+]: For multi-sample render targets, this field must be
462 * 1. MSRTs can only be tiled.
463 *
464 * Our usual reason for preferring X tiling (fast blits using the
465 * blitting engine) doesn't apply to MSAA, since we'll generally be
466 * downsampling or upsampling when blitting between the MSAA buffer
467 * and another buffer, and the blitting engine doesn't support that.
468 * So use Y tiling, since it makes better use of the cache.
469 */
470 return I915_TILING_Y;
471 }
472
473 GLenum base_format = _mesa_get_format_base_format(format);
474 if (intel->gen >= 4 &&
475 (base_format == GL_DEPTH_COMPONENT ||
476 base_format == GL_DEPTH_STENCIL_EXT))
477 return I915_TILING_Y;
478
479 int minimum_pitch = mt->total_width * mt->cpp;
480
481 /* If the width is much smaller than a tile, don't bother tiling. */
482 if (minimum_pitch < 64)
483 return I915_TILING_NONE;
484
485 if (ALIGN(minimum_pitch, 512) >= 32768) {
486 perf_debug("%dx%d miptree too large to blit, falling back to untiled",
487 mt->total_width, mt->total_height);
488 return I915_TILING_NONE;
489 }
490
491 /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
492 if (intel->gen < 6)
493 return I915_TILING_X;
494
495 return I915_TILING_Y | I915_TILING_X;
496 }
497
498 struct intel_mipmap_tree *
499 intel_miptree_create(struct intel_context *intel,
500 GLenum target,
501 gl_format format,
502 GLuint first_level,
503 GLuint last_level,
504 GLuint width0,
505 GLuint height0,
506 GLuint depth0,
507 bool expect_accelerated_upload,
508 GLuint num_samples,
509 enum intel_miptree_tiling_mode requested_tiling)
510 {
511 struct intel_mipmap_tree *mt;
512 gl_format tex_format = format;
513 gl_format etc_format = MESA_FORMAT_NONE;
514 GLuint total_width, total_height;
515
516 if (!intel->is_baytrail) {
517 switch (format) {
518 case MESA_FORMAT_ETC1_RGB8:
519 format = MESA_FORMAT_RGBX8888_REV;
520 break;
521 case MESA_FORMAT_ETC2_RGB8:
522 format = MESA_FORMAT_RGBX8888_REV;
523 break;
524 case MESA_FORMAT_ETC2_SRGB8:
525 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
526 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
527 format = MESA_FORMAT_SARGB8;
528 break;
529 case MESA_FORMAT_ETC2_RGBA8_EAC:
530 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
531 format = MESA_FORMAT_RGBA8888_REV;
532 break;
533 case MESA_FORMAT_ETC2_R11_EAC:
534 format = MESA_FORMAT_R16;
535 break;
536 case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
537 format = MESA_FORMAT_SIGNED_R16;
538 break;
539 case MESA_FORMAT_ETC2_RG11_EAC:
540 format = MESA_FORMAT_GR1616;
541 break;
542 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
543 format = MESA_FORMAT_SIGNED_GR1616;
544 break;
545 default:
546 /* Non ETC1 / ETC2 format */
547 break;
548 }
549 }
550
551 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
552
553 mt = intel_miptree_create_layout(intel, target, format,
554 first_level, last_level, width0,
555 height0, depth0,
556 false, num_samples);
557 /*
558 * pitch == 0 || height == 0 indicates the null texture
559 */
560 if (!mt || !mt->total_width || !mt->total_height) {
561 intel_miptree_release(&mt);
562 return NULL;
563 }
564
565 total_width = mt->total_width;
566 total_height = mt->total_height;
567
568 if (format == MESA_FORMAT_S8) {
569 /* Align to size of W tile, 64x64. */
570 total_width = ALIGN(total_width, 64);
571 total_height = ALIGN(total_height, 64);
572 }
573
574 uint32_t tiling = intel_miptree_choose_tiling(intel, format, width0,
575 num_samples, requested_tiling,
576 mt);
577 bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
578
579 mt->etc_format = etc_format;
580 mt->region = intel_region_alloc(intel->intelScreen,
581 y_or_x ? I915_TILING_Y : tiling,
582 mt->cpp,
583 total_width,
584 total_height,
585 expect_accelerated_upload);
586
587 /* If the region is too large to fit in the aperture, we need to use the
588 * BLT engine to support it. The BLT paths can't currently handle Y-tiling,
589 * so we need to fall back to X.
590 */
591 if (y_or_x && mt->region->bo->size >= intel->max_gtt_map_object_size) {
592 perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
593 mt->total_width, mt->total_height);
594 intel_region_release(&mt->region);
595
596 mt->region = intel_region_alloc(intel->intelScreen,
597 I915_TILING_X,
598 mt->cpp,
599 total_width,
600 total_height,
601 expect_accelerated_upload);
602 }
603
604 mt->offset = 0;
605
606 if (!mt->region) {
607 intel_miptree_release(&mt);
608 return NULL;
609 }
610
611 #ifndef I915
612 /* If this miptree is capable of supporting fast color clears, set
613 * mcs_state appropriately to ensure that fast clears will occur.
614 * Allocation of the MCS miptree will be deferred until the first fast
615 * clear actually occurs.
616 */
617 if (intel_is_non_msrt_mcs_buffer_supported(intel, mt))
618 mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
619 #endif
620
621 return mt;
622 }
623
624 struct intel_mipmap_tree *
625 intel_miptree_create_for_bo(struct intel_context *intel,
626 drm_intel_bo *bo,
627 gl_format format,
628 uint32_t offset,
629 uint32_t width,
630 uint32_t height,
631 int pitch,
632 uint32_t tiling)
633 {
634 struct intel_mipmap_tree *mt;
635
636 struct intel_region *region = calloc(1, sizeof(*region));
637 if (!region)
638 return NULL;
639
640 /* Nothing will be able to use this miptree with the BO if the offset isn't
641 * aligned.
642 */
643 if (tiling != I915_TILING_NONE)
644 assert(offset % 4096 == 0);
645
646 /* miptrees can't handle negative pitch. If you need flipping of images,
647 * that's outside of the scope of the mt.
648 */
649 assert(pitch >= 0);
650
651 mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format,
652 0, 0,
653 width, height, 1,
654 true, 0 /* num_samples */);
655 if (!mt)
656 return mt;
657
658 region->cpp = mt->cpp;
659 region->width = width;
660 region->height = height;
661 region->pitch = pitch;
662 region->refcount = 1;
663 drm_intel_bo_reference(bo);
664 region->bo = bo;
665 region->tiling = tiling;
666
667 mt->region = region;
668 mt->offset = offset;
669
670 return mt;
671 }
672
673
674 /**
675 * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
676 *
677 * For a multisample DRI2 buffer, this wraps the given region with
678 * a singlesample miptree, then creates a multisample miptree into which the
679 * singlesample miptree is embedded as a child.
680 */
681 struct intel_mipmap_tree*
682 intel_miptree_create_for_dri2_buffer(struct intel_context *intel,
683 unsigned dri_attachment,
684 gl_format format,
685 uint32_t num_samples,
686 struct intel_region *region)
687 {
688 struct intel_mipmap_tree *singlesample_mt = NULL;
689 struct intel_mipmap_tree *multisample_mt = NULL;
690
691 /* Only the front and back buffers, which are color buffers, are shared
692 * through DRI2.
693 */
694 assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
695 dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
696 dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
697 assert(_mesa_get_format_base_format(format) == GL_RGB ||
698 _mesa_get_format_base_format(format) == GL_RGBA);
699
700 singlesample_mt = intel_miptree_create_for_bo(intel,
701 region->bo,
702 format,
703 0,
704 region->width,
705 region->height,
706 region->pitch,
707 region->tiling);
708 if (!singlesample_mt)
709 return NULL;
710 singlesample_mt->region->name = region->name;
711
712 #ifndef I915
713 /* If this miptree is capable of supporting fast color clears, set
714 * mcs_state appropriately to ensure that fast clears will occur.
715 * Allocation of the MCS miptree will be deferred until the first fast
716 * clear actually occurs.
717 */
718 if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
719 singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
720 #endif
721
722 if (num_samples == 0)
723 return singlesample_mt;
724
725 multisample_mt = intel_miptree_create_for_renderbuffer(intel,
726 format,
727 region->width,
728 region->height,
729 num_samples);
730 if (!multisample_mt) {
731 intel_miptree_release(&singlesample_mt);
732 return NULL;
733 }
734
735 multisample_mt->singlesample_mt = singlesample_mt;
736 multisample_mt->need_downsample = false;
737
738 if (intel->is_front_buffer_rendering &&
739 (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
740 dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
741 intel_miptree_upsample(intel, multisample_mt);
742 }
743
744 return multisample_mt;
745 }
746
747 struct intel_mipmap_tree*
748 intel_miptree_create_for_renderbuffer(struct intel_context *intel,
749 gl_format format,
750 uint32_t width,
751 uint32_t height,
752 uint32_t num_samples)
753 {
754 struct intel_mipmap_tree *mt;
755 uint32_t depth = 1;
756 bool ok;
757
758 mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0,
759 width, height, depth, true, num_samples,
760 INTEL_MIPTREE_TILING_ANY);
761 if (!mt)
762 goto fail;
763
764 if (intel->vtbl.is_hiz_depth_format(intel, format)) {
765 ok = intel_miptree_alloc_hiz(intel, mt);
766 if (!ok)
767 goto fail;
768 }
769
770 if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
771 ok = intel_miptree_alloc_mcs(intel, mt, num_samples);
772 if (!ok)
773 goto fail;
774 }
775
776 return mt;
777
778 fail:
779 intel_miptree_release(&mt);
780 return NULL;
781 }
782
783 void
784 intel_miptree_reference(struct intel_mipmap_tree **dst,
785 struct intel_mipmap_tree *src)
786 {
787 if (*dst == src)
788 return;
789
790 intel_miptree_release(dst);
791
792 if (src) {
793 src->refcount++;
794 DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
795 }
796
797 *dst = src;
798 }
799
800
801 void
802 intel_miptree_release(struct intel_mipmap_tree **mt)
803 {
804 if (!*mt)
805 return;
806
807 DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
808 if (--(*mt)->refcount <= 0) {
809 GLuint i;
810
811 DBG("%s deleting %p\n", __FUNCTION__, *mt);
812
813 intel_region_release(&((*mt)->region));
814 intel_miptree_release(&(*mt)->stencil_mt);
815 intel_miptree_release(&(*mt)->hiz_mt);
816 #ifndef I915
817 intel_miptree_release(&(*mt)->mcs_mt);
818 #endif
819 intel_miptree_release(&(*mt)->singlesample_mt);
820 intel_resolve_map_clear(&(*mt)->hiz_map);
821
822 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
823 free((*mt)->level[i].slice);
824 }
825
826 free(*mt);
827 }
828 *mt = NULL;
829 }
830
831 void
832 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
833 int *width, int *height, int *depth)
834 {
835 switch (image->TexObject->Target) {
836 case GL_TEXTURE_1D_ARRAY:
837 *width = image->Width;
838 *height = 1;
839 *depth = image->Height;
840 break;
841 default:
842 *width = image->Width;
843 *height = image->Height;
844 *depth = image->Depth;
845 break;
846 }
847 }
848
849 /**
850 * Can the image be pulled into a unified mipmap tree? This mirrors
851 * the completeness test in a lot of ways.
852 *
853 * Not sure whether I want to pass gl_texture_image here.
854 */
855 bool
856 intel_miptree_match_image(struct intel_mipmap_tree *mt,
857 struct gl_texture_image *image)
858 {
859 struct intel_texture_image *intelImage = intel_texture_image(image);
860 GLuint level = intelImage->base.Base.Level;
861 int width, height, depth;
862
863 /* glTexImage* choose the texture object based on the target passed in, and
864 * objects can't change targets over their lifetimes, so this should be
865 * true.
866 */
867 assert(target_to_target(image->TexObject->Target) == mt->target);
868
869 gl_format mt_format = mt->format;
870 if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt)
871 mt_format = MESA_FORMAT_S8_Z24;
872 if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt)
873 mt_format = MESA_FORMAT_Z32_FLOAT_X24S8;
874 if (mt->etc_format != MESA_FORMAT_NONE)
875 mt_format = mt->etc_format;
876
877 if (image->TexFormat != mt_format)
878 return false;
879
880 intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
881
882 if (mt->target == GL_TEXTURE_CUBE_MAP)
883 depth = 6;
884
885 /* Test image dimensions against the base level image adjusted for
886 * minification. This will also catch images not present in the
887 * tree, changed targets, etc.
888 */
889 if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
890 mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
891 /* nonzero level here is always bogus */
892 assert(level == 0);
893
894 if (width != mt->logical_width0 ||
895 height != mt->logical_height0 ||
896 depth != mt->logical_depth0) {
897 return false;
898 }
899 }
900 else {
901 /* all normal textures, renderbuffers, etc */
902 if (width != mt->level[level].width ||
903 height != mt->level[level].height ||
904 depth != mt->level[level].depth) {
905 return false;
906 }
907 }
908
909 if (image->NumSamples != mt->num_samples)
910 return false;
911
912 return true;
913 }
914
915
916 void
917 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
918 GLuint level,
919 GLuint x, GLuint y,
920 GLuint w, GLuint h, GLuint d)
921 {
922 mt->level[level].width = w;
923 mt->level[level].height = h;
924 mt->level[level].depth = d;
925 mt->level[level].level_x = x;
926 mt->level[level].level_y = y;
927
928 DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
929 level, w, h, d, x, y);
930
931 assert(mt->level[level].slice == NULL);
932
933 mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
934 mt->level[level].slice[0].x_offset = mt->level[level].level_x;
935 mt->level[level].slice[0].y_offset = mt->level[level].level_y;
936 }
937
938
939 void
940 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
941 GLuint level, GLuint img,
942 GLuint x, GLuint y)
943 {
944 if (img == 0 && level == 0)
945 assert(x == 0 && y == 0);
946
947 assert(img < mt->level[level].depth);
948
949 mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
950 mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
951
952 DBG("%s level %d img %d pos %d,%d\n",
953 __FUNCTION__, level, img,
954 mt->level[level].slice[img].x_offset,
955 mt->level[level].slice[img].y_offset);
956 }
957
958 void
959 intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
960 GLuint level, GLuint slice,
961 GLuint *x, GLuint *y)
962 {
963 assert(slice < mt->level[level].depth);
964
965 *x = mt->level[level].slice[slice].x_offset;
966 *y = mt->level[level].slice[slice].y_offset;
967 }
968
969 /**
970 * Rendering with tiled buffers requires that the base address of the buffer
971 * be aligned to a page boundary. For renderbuffers, and sometimes with
972 * textures, we may want the surface to point at a texture image level that
973 * isn't at a page boundary.
974 *
975 * This function returns an appropriately-aligned base offset
976 * according to the tiling restrictions, plus any required x/y offset
977 * from there.
978 */
979 uint32_t
980 intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
981 GLuint level, GLuint slice,
982 uint32_t *tile_x,
983 uint32_t *tile_y)
984 {
985 struct intel_region *region = mt->region;
986 uint32_t x, y;
987 uint32_t mask_x, mask_y;
988
989 intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
990 intel_miptree_get_image_offset(mt, level, slice, &x, &y);
991
992 *tile_x = x & mask_x;
993 *tile_y = y & mask_y;
994
995 return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
996 false);
997 }
998
999 static void
1000 intel_miptree_copy_slice_sw(struct intel_context *intel,
1001 struct intel_mipmap_tree *dst_mt,
1002 struct intel_mipmap_tree *src_mt,
1003 int level,
1004 int slice,
1005 int width,
1006 int height)
1007 {
1008 void *src, *dst;
1009 int src_stride, dst_stride;
1010 int cpp = dst_mt->cpp;
1011
1012 intel_miptree_map(intel, src_mt,
1013 level, slice,
1014 0, 0,
1015 width, height,
1016 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1017 &src, &src_stride);
1018
1019 intel_miptree_map(intel, dst_mt,
1020 level, slice,
1021 0, 0,
1022 width, height,
1023 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1024 BRW_MAP_DIRECT_BIT,
1025 &dst, &dst_stride);
1026
1027 DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
1028 _mesa_get_format_name(src_mt->format),
1029 src_mt, src, src_stride,
1030 _mesa_get_format_name(dst_mt->format),
1031 dst_mt, dst, dst_stride,
1032 width, height);
1033
1034 int row_size = cpp * width;
1035 if (src_stride == row_size &&
1036 dst_stride == row_size) {
1037 memcpy(dst, src, row_size * height);
1038 } else {
1039 for (int i = 0; i < height; i++) {
1040 memcpy(dst, src, row_size);
1041 dst += dst_stride;
1042 src += src_stride;
1043 }
1044 }
1045
1046 intel_miptree_unmap(intel, dst_mt, level, slice);
1047 intel_miptree_unmap(intel, src_mt, level, slice);
1048
1049 /* Don't forget to copy the stencil data over, too. We could have skipped
1050 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1051 * shuffling the two data sources in/out of temporary storage instead of
1052 * the direct mapping we get this way.
1053 */
1054 if (dst_mt->stencil_mt) {
1055 assert(src_mt->stencil_mt);
1056 intel_miptree_copy_slice_sw(intel, dst_mt->stencil_mt, src_mt->stencil_mt,
1057 level, slice, width, height);
1058 }
1059 }
1060
1061 static void
1062 intel_miptree_copy_slice(struct intel_context *intel,
1063 struct intel_mipmap_tree *dst_mt,
1064 struct intel_mipmap_tree *src_mt,
1065 int level,
1066 int face,
1067 int depth)
1068
1069 {
1070 gl_format format = src_mt->format;
1071 uint32_t width = src_mt->level[level].width;
1072 uint32_t height = src_mt->level[level].height;
1073 int slice;
1074
1075 if (face > 0)
1076 slice = face;
1077 else
1078 slice = depth;
1079
1080 assert(depth < src_mt->level[level].depth);
1081 assert(src_mt->format == dst_mt->format);
1082
1083 if (dst_mt->compressed) {
1084 height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1085 width = ALIGN(width, dst_mt->align_w);
1086 }
1087
1088 /* If it's a packed depth/stencil buffer with separate stencil, the blit
1089 * below won't apply since we can't do the depth's Y tiling or the
1090 * stencil's W tiling in the blitter.
1091 */
1092 if (src_mt->stencil_mt) {
1093 intel_miptree_copy_slice_sw(intel,
1094 dst_mt, src_mt,
1095 level, slice,
1096 width, height);
1097 return;
1098 }
1099
1100 uint32_t dst_x, dst_y, src_x, src_y;
1101 intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1102 intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1103
1104 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1105 _mesa_get_format_name(src_mt->format),
1106 src_mt, src_x, src_y, src_mt->region->pitch,
1107 _mesa_get_format_name(dst_mt->format),
1108 dst_mt, dst_x, dst_y, dst_mt->region->pitch,
1109 width, height);
1110
1111 if (!intel_miptree_blit(intel,
1112 src_mt, level, slice, 0, 0, false,
1113 dst_mt, level, slice, 0, 0, false,
1114 width, height, GL_COPY)) {
1115 perf_debug("miptree validate blit for %s failed\n",
1116 _mesa_get_format_name(format));
1117
1118 intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice,
1119 width, height);
1120 }
1121 }
1122
1123 /**
1124 * Copies the image's current data to the given miptree, and associates that
1125 * miptree with the image.
1126 *
1127 * If \c invalidate is true, then the actual image data does not need to be
1128 * copied, but the image still needs to be associated to the new miptree (this
1129 * is set to true if we're about to clear the image).
1130 */
1131 void
1132 intel_miptree_copy_teximage(struct intel_context *intel,
1133 struct intel_texture_image *intelImage,
1134 struct intel_mipmap_tree *dst_mt,
1135 bool invalidate)
1136 {
1137 struct intel_mipmap_tree *src_mt = intelImage->mt;
1138 struct intel_texture_object *intel_obj =
1139 intel_texture_object(intelImage->base.Base.TexObject);
1140 int level = intelImage->base.Base.Level;
1141 int face = intelImage->base.Base.Face;
1142 GLuint depth = intelImage->base.Base.Depth;
1143
1144 if (!invalidate) {
1145 for (int slice = 0; slice < depth; slice++) {
1146 intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice);
1147 }
1148 }
1149
1150 intel_miptree_reference(&intelImage->mt, dst_mt);
1151 intel_obj->needs_validate = true;
1152 }
1153
1154 bool
1155 intel_miptree_alloc_mcs(struct intel_context *intel,
1156 struct intel_mipmap_tree *mt,
1157 GLuint num_samples)
1158 {
1159 assert(intel->gen >= 7); /* MCS only used on Gen7+ */
1160 #ifdef I915
1161 return false;
1162 #else
1163 assert(mt->mcs_mt == NULL);
1164
1165 /* Choose the correct format for the MCS buffer. All that really matters
1166 * is that we allocate the right buffer size, since we'll always be
1167 * accessing this miptree using MCS-specific hardware mechanisms, which
1168 * infer the correct format based on num_samples.
1169 */
1170 gl_format format;
1171 switch (num_samples) {
1172 case 4:
1173 /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1174 * each sample).
1175 */
1176 format = MESA_FORMAT_R8;
1177 break;
1178 case 8:
1179 /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1180 * for each sample, plus 8 padding bits).
1181 */
1182 format = MESA_FORMAT_R_UINT32;
1183 break;
1184 default:
1185 assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
1186 return false;
1187 };
1188
1189 /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1190 *
1191 * "The MCS surface must be stored as Tile Y."
1192 */
1193 mt->mcs_state = INTEL_MCS_STATE_MSAA;
1194 mt->mcs_mt = intel_miptree_create(intel,
1195 mt->target,
1196 format,
1197 mt->first_level,
1198 mt->last_level,
1199 mt->logical_width0,
1200 mt->logical_height0,
1201 mt->logical_depth0,
1202 true,
1203 0 /* num_samples */,
1204 INTEL_MIPTREE_TILING_Y);
1205
1206 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1207 *
1208 * When MCS buffer is enabled and bound to MSRT, it is required that it
1209 * is cleared prior to any rendering.
1210 *
1211 * Since we don't use the MCS buffer for any purpose other than rendering,
1212 * it makes sense to just clear it immediately upon allocation.
1213 *
1214 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1215 */
1216 void *data = intel_miptree_map_raw(intel, mt->mcs_mt);
1217 memset(data, 0xff, mt->mcs_mt->region->bo->size);
1218 intel_miptree_unmap_raw(intel, mt->mcs_mt);
1219
1220 return mt->mcs_mt;
1221 #endif
1222 }
1223
1224
1225 bool
1226 intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
1227 struct intel_mipmap_tree *mt)
1228 {
1229 #ifdef I915
1230 assert(!"MCS not supported on i915");
1231 return false;
1232 #else
1233 assert(mt->mcs_mt == NULL);
1234
1235 /* The format of the MCS buffer is opaque to the driver; all that matters
1236 * is that we get its size and pitch right. We'll pretend that the format
1237 * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1238 * R32 buffer is 32 pixels across, we'll need to scale the width down by
1239 * the block width and then a further factor of 4. Since an MCS tile
1240 * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1241 * we'll need to scale the height down by the block height and then a
1242 * further factor of 8.
1243 */
1244 const gl_format format = MESA_FORMAT_R_UINT32;
1245 unsigned block_width_px;
1246 unsigned block_height;
1247 intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height);
1248 unsigned width_divisor = block_width_px * 4;
1249 unsigned height_divisor = block_height * 8;
1250 unsigned mcs_width =
1251 ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1252 unsigned mcs_height =
1253 ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1254 assert(mt->logical_depth0 == 1);
1255 mt->mcs_mt = intel_miptree_create(intel,
1256 mt->target,
1257 format,
1258 mt->first_level,
1259 mt->last_level,
1260 mcs_width,
1261 mcs_height,
1262 mt->logical_depth0,
1263 true,
1264 0 /* num_samples */,
1265 INTEL_MIPTREE_TILING_Y);
1266
1267 return mt->mcs_mt;
1268 #endif
1269 }
1270
1271
1272 /**
1273 * Helper for intel_miptree_alloc_hiz() that sets
1274 * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
1275 * \c has_hiz was set.
1276 */
1277 static bool
1278 intel_miptree_slice_enable_hiz(struct intel_context *intel,
1279 struct intel_mipmap_tree *mt,
1280 uint32_t level,
1281 uint32_t layer)
1282 {
1283 assert(mt->hiz_mt);
1284
1285 if (intel->is_haswell) {
1286 /* Disable HiZ for some slices to work around a hardware bug.
1287 *
1288 * Haswell hardware fails to respect
1289 * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y when during HiZ
1290 * ambiguate operations. The failure is inconsistent and affected by
1291 * other GPU contexts. Running a heavy GPU workload in a separate
1292 * process causes the failure rate to drop to nearly 0.
1293 *
1294 * To workaround the bug, we enable HiZ only when we can guarantee that
1295 * the Depth Coordinate Offset fields will be set to 0. The function
1296 * brw_get_depthstencil_tile_masks() is used to calculate the fields,
1297 * and the function is sometimes called in such a way that the presence
1298 * of an attached stencil buffer changes the fuction's return value.
1299 *
1300 * The largest tile size considered by brw_get_depthstencil_tile_masks()
1301 * is that of the stencil buffer. Therefore, if this hiz slice's
1302 * corresponding depth slice has an offset that is aligned to the
1303 * stencil buffer tile size, 64x64 pixels, then
1304 * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y is set to 0.
1305 */
1306 uint32_t depth_x_offset = mt->level[level].slice[layer].x_offset;
1307 uint32_t depth_y_offset = mt->level[level].slice[layer].y_offset;
1308 if ((depth_x_offset & 63) || (depth_y_offset & 63)) {
1309 return false;
1310 }
1311 }
1312
1313 mt->level[level].slice[layer].has_hiz = true;
1314 return true;
1315 }
1316
1317
1318
1319 bool
1320 intel_miptree_alloc_hiz(struct intel_context *intel,
1321 struct intel_mipmap_tree *mt)
1322 {
1323 assert(mt->hiz_mt == NULL);
1324 mt->hiz_mt = intel_miptree_create(intel,
1325 mt->target,
1326 mt->format,
1327 mt->first_level,
1328 mt->last_level,
1329 mt->logical_width0,
1330 mt->logical_height0,
1331 mt->logical_depth0,
1332 true,
1333 mt->num_samples,
1334 INTEL_MIPTREE_TILING_ANY);
1335
1336 if (!mt->hiz_mt)
1337 return false;
1338
1339 /* Mark that all slices need a HiZ resolve. */
1340 struct intel_resolve_map *head = &mt->hiz_map;
1341 for (int level = mt->first_level; level <= mt->last_level; ++level) {
1342 for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1343 if (!intel_miptree_slice_enable_hiz(intel, mt, level, layer))
1344 continue;
1345
1346 head->next = malloc(sizeof(*head->next));
1347 head->next->prev = head;
1348 head->next->next = NULL;
1349 head = head->next;
1350
1351 head->level = level;
1352 head->layer = layer;
1353 head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1354 }
1355 }
1356
1357 return true;
1358 }
1359
1360 /**
1361 * Does the miptree slice have hiz enabled?
1362 */
1363 bool
1364 intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
1365 uint32_t level,
1366 uint32_t layer)
1367 {
1368 intel_miptree_check_level_layer(mt, level, layer);
1369 return mt->level[level].slice[layer].has_hiz;
1370 }
1371
1372 void
1373 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1374 uint32_t level,
1375 uint32_t layer)
1376 {
1377 if (!intel_miptree_slice_has_hiz(mt, level, layer))
1378 return;
1379
1380 intel_resolve_map_set(&mt->hiz_map,
1381 level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1382 }
1383
1384
1385 void
1386 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1387 uint32_t level,
1388 uint32_t layer)
1389 {
1390 if (!intel_miptree_slice_has_hiz(mt, level, layer))
1391 return;
1392
1393 intel_resolve_map_set(&mt->hiz_map,
1394 level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1395 }
1396
1397 static bool
1398 intel_miptree_slice_resolve(struct intel_context *intel,
1399 struct intel_mipmap_tree *mt,
1400 uint32_t level,
1401 uint32_t layer,
1402 enum gen6_hiz_op need)
1403 {
1404 intel_miptree_check_level_layer(mt, level, layer);
1405
1406 struct intel_resolve_map *item =
1407 intel_resolve_map_get(&mt->hiz_map, level, layer);
1408
1409 if (!item || item->need != need)
1410 return false;
1411
1412 intel_hiz_exec(intel, mt, level, layer, need);
1413 intel_resolve_map_remove(item);
1414 return true;
1415 }
1416
1417 bool
1418 intel_miptree_slice_resolve_hiz(struct intel_context *intel,
1419 struct intel_mipmap_tree *mt,
1420 uint32_t level,
1421 uint32_t layer)
1422 {
1423 return intel_miptree_slice_resolve(intel, mt, level, layer,
1424 GEN6_HIZ_OP_HIZ_RESOLVE);
1425 }
1426
1427 bool
1428 intel_miptree_slice_resolve_depth(struct intel_context *intel,
1429 struct intel_mipmap_tree *mt,
1430 uint32_t level,
1431 uint32_t layer)
1432 {
1433 return intel_miptree_slice_resolve(intel, mt, level, layer,
1434 GEN6_HIZ_OP_DEPTH_RESOLVE);
1435 }
1436
1437 static bool
1438 intel_miptree_all_slices_resolve(struct intel_context *intel,
1439 struct intel_mipmap_tree *mt,
1440 enum gen6_hiz_op need)
1441 {
1442 bool did_resolve = false;
1443 struct intel_resolve_map *i, *next;
1444
1445 for (i = mt->hiz_map.next; i; i = next) {
1446 next = i->next;
1447 if (i->need != need)
1448 continue;
1449
1450 intel_hiz_exec(intel, mt, i->level, i->layer, need);
1451 intel_resolve_map_remove(i);
1452 did_resolve = true;
1453 }
1454
1455 return did_resolve;
1456 }
1457
1458 bool
1459 intel_miptree_all_slices_resolve_hiz(struct intel_context *intel,
1460 struct intel_mipmap_tree *mt)
1461 {
1462 return intel_miptree_all_slices_resolve(intel, mt,
1463 GEN6_HIZ_OP_HIZ_RESOLVE);
1464 }
1465
1466 bool
1467 intel_miptree_all_slices_resolve_depth(struct intel_context *intel,
1468 struct intel_mipmap_tree *mt)
1469 {
1470 return intel_miptree_all_slices_resolve(intel, mt,
1471 GEN6_HIZ_OP_DEPTH_RESOLVE);
1472 }
1473
1474
1475 void
1476 intel_miptree_resolve_color(struct intel_context *intel,
1477 struct intel_mipmap_tree *mt)
1478 {
1479 #ifdef I915
1480 /* Fast color clear is not supported on the i915 (pre-Gen4) driver */
1481 #else
1482 switch (mt->mcs_state) {
1483 case INTEL_MCS_STATE_NONE:
1484 case INTEL_MCS_STATE_MSAA:
1485 case INTEL_MCS_STATE_RESOLVED:
1486 /* No resolve needed */
1487 break;
1488 case INTEL_MCS_STATE_UNRESOLVED:
1489 case INTEL_MCS_STATE_CLEAR:
1490 brw_blorp_resolve_color(intel, mt);
1491 break;
1492 }
1493 #endif
1494 }
1495
1496
1497 /**
1498 * Make it possible to share the region backing the given miptree with another
1499 * process or another miptree.
1500 *
1501 * Fast color clears are unsafe with shared buffers, so we need to resolve and
1502 * then discard the MCS buffer, if present. We also set the mcs_state to
1503 * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the
1504 * future.
1505 */
1506 void
1507 intel_miptree_make_shareable(struct intel_context *intel,
1508 struct intel_mipmap_tree *mt)
1509 {
1510 #ifdef I915
1511 /* Nothing needs to be done for I915 */
1512 (void) intel;
1513 (void) mt;
1514 #else
1515 /* MCS buffers are also used for multisample buffers, but we can't resolve
1516 * away a multisample MCS buffer because it's an integral part of how the
1517 * pixel data is stored. Fortunately this code path should never be
1518 * reached for multisample buffers.
1519 */
1520 assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1521
1522 if (mt->mcs_mt) {
1523 intel_miptree_resolve_color(intel, mt);
1524 intel_miptree_release(&mt->mcs_mt);
1525 mt->mcs_state = INTEL_MCS_STATE_NONE;
1526 }
1527 #endif
1528 }
1529
1530
1531 /**
1532 * \brief Get pointer offset into stencil buffer.
1533 *
1534 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1535 * must decode the tile's layout in software.
1536 *
1537 * See
1538 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1539 * Format.
1540 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1541 *
1542 * Even though the returned offset is always positive, the return type is
1543 * signed due to
1544 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1545 * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
1546 */
1547 static intptr_t
1548 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1549 {
1550 uint32_t tile_size = 4096;
1551 uint32_t tile_width = 64;
1552 uint32_t tile_height = 64;
1553 uint32_t row_size = 64 * stride;
1554
1555 uint32_t tile_x = x / tile_width;
1556 uint32_t tile_y = y / tile_height;
1557
1558 /* The byte's address relative to the tile's base addres. */
1559 uint32_t byte_x = x % tile_width;
1560 uint32_t byte_y = y % tile_height;
1561
1562 uintptr_t u = tile_y * row_size
1563 + tile_x * tile_size
1564 + 512 * (byte_x / 8)
1565 + 64 * (byte_y / 8)
1566 + 32 * ((byte_y / 4) % 2)
1567 + 16 * ((byte_x / 4) % 2)
1568 + 8 * ((byte_y / 2) % 2)
1569 + 4 * ((byte_x / 2) % 2)
1570 + 2 * (byte_y % 2)
1571 + 1 * (byte_x % 2);
1572
1573 if (swizzled) {
1574 /* adjust for bit6 swizzling */
1575 if (((byte_x / 8) % 2) == 1) {
1576 if (((byte_y / 8) % 2) == 0) {
1577 u += 64;
1578 } else {
1579 u -= 64;
1580 }
1581 }
1582 }
1583
1584 return u;
1585 }
1586
1587 static void
1588 intel_miptree_updownsample(struct intel_context *intel,
1589 struct intel_mipmap_tree *src,
1590 struct intel_mipmap_tree *dst,
1591 unsigned width,
1592 unsigned height)
1593 {
1594 #ifndef I915
1595 int src_x0 = 0;
1596 int src_y0 = 0;
1597 int dst_x0 = 0;
1598 int dst_y0 = 0;
1599
1600 brw_blorp_blit_miptrees(intel,
1601 src, 0 /* level */, 0 /* layer */,
1602 dst, 0 /* level */, 0 /* layer */,
1603 src_x0, src_y0,
1604 width, height,
1605 dst_x0, dst_y0,
1606 width, height,
1607 false, false /*mirror x, y*/);
1608
1609 if (src->stencil_mt) {
1610 brw_blorp_blit_miptrees(intel,
1611 src->stencil_mt, 0 /* level */, 0 /* layer */,
1612 dst->stencil_mt, 0 /* level */, 0 /* layer */,
1613 src_x0, src_y0,
1614 width, height,
1615 dst_x0, dst_y0,
1616 width, height,
1617 false, false /*mirror x, y*/);
1618 }
1619 #endif /* I915 */
1620 }
1621
1622 static void
1623 assert_is_flat(struct intel_mipmap_tree *mt)
1624 {
1625 assert(mt->target == GL_TEXTURE_2D);
1626 assert(mt->first_level == 0);
1627 assert(mt->last_level == 0);
1628 }
1629
1630 /**
1631 * \brief Downsample from mt to mt->singlesample_mt.
1632 *
1633 * If the miptree needs no downsample, then skip.
1634 */
1635 void
1636 intel_miptree_downsample(struct intel_context *intel,
1637 struct intel_mipmap_tree *mt)
1638 {
1639 /* Only flat, renderbuffer-like miptrees are supported. */
1640 assert_is_flat(mt);
1641
1642 if (!mt->need_downsample)
1643 return;
1644 intel_miptree_updownsample(intel,
1645 mt, mt->singlesample_mt,
1646 mt->logical_width0,
1647 mt->logical_height0);
1648 mt->need_downsample = false;
1649 }
1650
1651 /**
1652 * \brief Upsample from mt->singlesample_mt to mt.
1653 *
1654 * The upsample is done unconditionally.
1655 */
1656 void
1657 intel_miptree_upsample(struct intel_context *intel,
1658 struct intel_mipmap_tree *mt)
1659 {
1660 /* Only flat, renderbuffer-like miptrees are supported. */
1661 assert_is_flat(mt);
1662 assert(!mt->need_downsample);
1663
1664 intel_miptree_updownsample(intel,
1665 mt->singlesample_mt, mt,
1666 mt->logical_width0,
1667 mt->logical_height0);
1668 }
1669
1670 void *
1671 intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt)
1672 {
1673 /* CPU accesses to color buffers don't understand fast color clears, so
1674 * resolve any pending fast color clears before we map.
1675 */
1676 intel_miptree_resolve_color(intel, mt);
1677
1678 drm_intel_bo *bo = mt->region->bo;
1679
1680 if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
1681 if (drm_intel_bo_busy(bo)) {
1682 perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
1683 }
1684 }
1685
1686 intel_flush(&intel->ctx);
1687
1688 if (mt->region->tiling != I915_TILING_NONE)
1689 drm_intel_gem_bo_map_gtt(bo);
1690 else
1691 drm_intel_bo_map(bo, true);
1692
1693 return bo->virtual;
1694 }
1695
1696 void
1697 intel_miptree_unmap_raw(struct intel_context *intel,
1698 struct intel_mipmap_tree *mt)
1699 {
1700 drm_intel_bo_unmap(mt->region->bo);
1701 }
1702
1703 static void
1704 intel_miptree_map_gtt(struct intel_context *intel,
1705 struct intel_mipmap_tree *mt,
1706 struct intel_miptree_map *map,
1707 unsigned int level, unsigned int slice)
1708 {
1709 unsigned int bw, bh;
1710 void *base;
1711 unsigned int image_x, image_y;
1712 int x = map->x;
1713 int y = map->y;
1714
1715 /* For compressed formats, the stride is the number of bytes per
1716 * row of blocks. intel_miptree_get_image_offset() already does
1717 * the divide.
1718 */
1719 _mesa_get_format_block_size(mt->format, &bw, &bh);
1720 assert(y % bh == 0);
1721 y /= bh;
1722
1723 base = intel_miptree_map_raw(intel, mt) + mt->offset;
1724
1725 if (base == NULL)
1726 map->ptr = NULL;
1727 else {
1728 /* Note that in the case of cube maps, the caller must have passed the
1729 * slice number referencing the face.
1730 */
1731 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1732 x += image_x;
1733 y += image_y;
1734
1735 map->stride = mt->region->pitch;
1736 map->ptr = base + y * map->stride + x * mt->cpp;
1737 }
1738
1739 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1740 map->x, map->y, map->w, map->h,
1741 mt, _mesa_get_format_name(mt->format),
1742 x, y, map->ptr, map->stride);
1743 }
1744
1745 static void
1746 intel_miptree_unmap_gtt(struct intel_context *intel,
1747 struct intel_mipmap_tree *mt,
1748 struct intel_miptree_map *map,
1749 unsigned int level,
1750 unsigned int slice)
1751 {
1752 intel_miptree_unmap_raw(intel, mt);
1753 }
1754
1755 static void
1756 intel_miptree_map_blit(struct intel_context *intel,
1757 struct intel_mipmap_tree *mt,
1758 struct intel_miptree_map *map,
1759 unsigned int level, unsigned int slice)
1760 {
1761 map->mt = intel_miptree_create(intel, GL_TEXTURE_2D, mt->format,
1762 0, 0,
1763 map->w, map->h, 1,
1764 false, 0,
1765 INTEL_MIPTREE_TILING_NONE);
1766 if (!map->mt) {
1767 fprintf(stderr, "Failed to allocate blit temporary\n");
1768 goto fail;
1769 }
1770 map->stride = map->mt->region->pitch;
1771
1772 if (!intel_miptree_blit(intel,
1773 mt, level, slice,
1774 map->x, map->y, false,
1775 map->mt, 0, 0,
1776 0, 0, false,
1777 map->w, map->h, GL_COPY)) {
1778 fprintf(stderr, "Failed to blit\n");
1779 goto fail;
1780 }
1781
1782 intel_batchbuffer_flush(intel);
1783 map->ptr = intel_miptree_map_raw(intel, map->mt);
1784
1785 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1786 map->x, map->y, map->w, map->h,
1787 mt, _mesa_get_format_name(mt->format),
1788 level, slice, map->ptr, map->stride);
1789
1790 return;
1791
1792 fail:
1793 intel_miptree_release(&map->mt);
1794 map->ptr = NULL;
1795 map->stride = 0;
1796 }
1797
1798 static void
1799 intel_miptree_unmap_blit(struct intel_context *intel,
1800 struct intel_mipmap_tree *mt,
1801 struct intel_miptree_map *map,
1802 unsigned int level,
1803 unsigned int slice)
1804 {
1805 struct gl_context *ctx = &intel->ctx;
1806
1807 intel_miptree_unmap_raw(intel, map->mt);
1808
1809 if (map->mode & GL_MAP_WRITE_BIT) {
1810 bool ok = intel_miptree_blit(intel,
1811 map->mt, 0, 0,
1812 0, 0, false,
1813 mt, level, slice,
1814 map->x, map->y, false,
1815 map->w, map->h, GL_COPY);
1816 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1817 }
1818
1819 intel_miptree_release(&map->mt);
1820 }
1821
1822 static void
1823 intel_miptree_map_s8(struct intel_context *intel,
1824 struct intel_mipmap_tree *mt,
1825 struct intel_miptree_map *map,
1826 unsigned int level, unsigned int slice)
1827 {
1828 map->stride = map->w;
1829 map->buffer = map->ptr = malloc(map->stride * map->h);
1830 if (!map->buffer)
1831 return;
1832
1833 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
1834 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
1835 * invalidate is set, since we'll be writing the whole rectangle from our
1836 * temporary buffer back out.
1837 */
1838 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1839 uint8_t *untiled_s8_map = map->ptr;
1840 uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt);
1841 unsigned int image_x, image_y;
1842
1843 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1844
1845 for (uint32_t y = 0; y < map->h; y++) {
1846 for (uint32_t x = 0; x < map->w; x++) {
1847 ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1848 x + image_x + map->x,
1849 y + image_y + map->y,
1850 intel->has_swizzling);
1851 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
1852 }
1853 }
1854
1855 intel_miptree_unmap_raw(intel, mt);
1856
1857 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
1858 map->x, map->y, map->w, map->h,
1859 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
1860 } else {
1861 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1862 map->x, map->y, map->w, map->h,
1863 mt, map->ptr, map->stride);
1864 }
1865 }
1866
1867 static void
1868 intel_miptree_unmap_s8(struct intel_context *intel,
1869 struct intel_mipmap_tree *mt,
1870 struct intel_miptree_map *map,
1871 unsigned int level,
1872 unsigned int slice)
1873 {
1874 if (map->mode & GL_MAP_WRITE_BIT) {
1875 unsigned int image_x, image_y;
1876 uint8_t *untiled_s8_map = map->ptr;
1877 uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt);
1878
1879 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1880
1881 for (uint32_t y = 0; y < map->h; y++) {
1882 for (uint32_t x = 0; x < map->w; x++) {
1883 ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1884 x + map->x,
1885 y + map->y,
1886 intel->has_swizzling);
1887 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
1888 }
1889 }
1890
1891 intel_miptree_unmap_raw(intel, mt);
1892 }
1893
1894 free(map->buffer);
1895 }
1896
1897 static void
1898 intel_miptree_map_etc(struct intel_context *intel,
1899 struct intel_mipmap_tree *mt,
1900 struct intel_miptree_map *map,
1901 unsigned int level,
1902 unsigned int slice)
1903 {
1904 assert(mt->etc_format != MESA_FORMAT_NONE);
1905 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
1906 assert(mt->format == MESA_FORMAT_RGBX8888_REV);
1907 }
1908
1909 assert(map->mode & GL_MAP_WRITE_BIT);
1910 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
1911
1912 map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
1913 map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
1914 map->w, map->h, 1));
1915 map->ptr = map->buffer;
1916 }
1917
1918 static void
1919 intel_miptree_unmap_etc(struct intel_context *intel,
1920 struct intel_mipmap_tree *mt,
1921 struct intel_miptree_map *map,
1922 unsigned int level,
1923 unsigned int slice)
1924 {
1925 uint32_t image_x;
1926 uint32_t image_y;
1927 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1928
1929 image_x += map->x;
1930 image_y += map->y;
1931
1932 uint8_t *dst = intel_miptree_map_raw(intel, mt)
1933 + image_y * mt->region->pitch
1934 + image_x * mt->region->cpp;
1935
1936 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
1937 _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
1938 map->ptr, map->stride,
1939 map->w, map->h);
1940 else
1941 _mesa_unpack_etc2_format(dst, mt->region->pitch,
1942 map->ptr, map->stride,
1943 map->w, map->h, mt->etc_format);
1944
1945 intel_miptree_unmap_raw(intel, mt);
1946 free(map->buffer);
1947 }
1948
1949 /**
1950 * Mapping function for packed depth/stencil miptrees backed by real separate
1951 * miptrees for depth and stencil.
1952 *
1953 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
1954 * separate from the depth buffer. Yet at the GL API level, we have to expose
1955 * packed depth/stencil textures and FBO attachments, and Mesa core expects to
1956 * be able to map that memory for texture storage and glReadPixels-type
1957 * operations. We give Mesa core that access by mallocing a temporary and
1958 * copying the data between the actual backing store and the temporary.
1959 */
1960 static void
1961 intel_miptree_map_depthstencil(struct intel_context *intel,
1962 struct intel_mipmap_tree *mt,
1963 struct intel_miptree_map *map,
1964 unsigned int level, unsigned int slice)
1965 {
1966 struct intel_mipmap_tree *z_mt = mt;
1967 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
1968 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
1969 int packed_bpp = map_z32f_x24s8 ? 8 : 4;
1970
1971 map->stride = map->w * packed_bpp;
1972 map->buffer = map->ptr = malloc(map->stride * map->h);
1973 if (!map->buffer)
1974 return;
1975
1976 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
1977 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
1978 * invalidate is set, since we'll be writing the whole rectangle from our
1979 * temporary buffer back out.
1980 */
1981 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1982 uint32_t *packed_map = map->ptr;
1983 uint8_t *s_map = intel_miptree_map_raw(intel, s_mt);
1984 uint32_t *z_map = intel_miptree_map_raw(intel, z_mt);
1985 unsigned int s_image_x, s_image_y;
1986 unsigned int z_image_x, z_image_y;
1987
1988 intel_miptree_get_image_offset(s_mt, level, slice,
1989 &s_image_x, &s_image_y);
1990 intel_miptree_get_image_offset(z_mt, level, slice,
1991 &z_image_x, &z_image_y);
1992
1993 for (uint32_t y = 0; y < map->h; y++) {
1994 for (uint32_t x = 0; x < map->w; x++) {
1995 int map_x = map->x + x, map_y = map->y + y;
1996 ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
1997 map_x + s_image_x,
1998 map_y + s_image_y,
1999 intel->has_swizzling);
2000 ptrdiff_t z_offset = ((map_y + z_image_y) *
2001 (z_mt->region->pitch / 4) +
2002 (map_x + z_image_x));
2003 uint8_t s = s_map[s_offset];
2004 uint32_t z = z_map[z_offset];
2005
2006 if (map_z32f_x24s8) {
2007 packed_map[(y * map->w + x) * 2 + 0] = z;
2008 packed_map[(y * map->w + x) * 2 + 1] = s;
2009 } else {
2010 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2011 }
2012 }
2013 }
2014
2015 intel_miptree_unmap_raw(intel, s_mt);
2016 intel_miptree_unmap_raw(intel, z_mt);
2017
2018 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2019 __FUNCTION__,
2020 map->x, map->y, map->w, map->h,
2021 z_mt, map->x + z_image_x, map->y + z_image_y,
2022 s_mt, map->x + s_image_x, map->y + s_image_y,
2023 map->ptr, map->stride);
2024 } else {
2025 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2026 map->x, map->y, map->w, map->h,
2027 mt, map->ptr, map->stride);
2028 }
2029 }
2030
2031 static void
2032 intel_miptree_unmap_depthstencil(struct intel_context *intel,
2033 struct intel_mipmap_tree *mt,
2034 struct intel_miptree_map *map,
2035 unsigned int level,
2036 unsigned int slice)
2037 {
2038 struct intel_mipmap_tree *z_mt = mt;
2039 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2040 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2041
2042 if (map->mode & GL_MAP_WRITE_BIT) {
2043 uint32_t *packed_map = map->ptr;
2044 uint8_t *s_map = intel_miptree_map_raw(intel, s_mt);
2045 uint32_t *z_map = intel_miptree_map_raw(intel, z_mt);
2046 unsigned int s_image_x, s_image_y;
2047 unsigned int z_image_x, z_image_y;
2048
2049 intel_miptree_get_image_offset(s_mt, level, slice,
2050 &s_image_x, &s_image_y);
2051 intel_miptree_get_image_offset(z_mt, level, slice,
2052 &z_image_x, &z_image_y);
2053
2054 for (uint32_t y = 0; y < map->h; y++) {
2055 for (uint32_t x = 0; x < map->w; x++) {
2056 ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2057 x + s_image_x + map->x,
2058 y + s_image_y + map->y,
2059 intel->has_swizzling);
2060 ptrdiff_t z_offset = ((y + z_image_y) *
2061 (z_mt->region->pitch / 4) +
2062 (x + z_image_x));
2063
2064 if (map_z32f_x24s8) {
2065 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2066 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2067 } else {
2068 uint32_t packed = packed_map[y * map->w + x];
2069 s_map[s_offset] = packed >> 24;
2070 z_map[z_offset] = packed;
2071 }
2072 }
2073 }
2074
2075 intel_miptree_unmap_raw(intel, s_mt);
2076 intel_miptree_unmap_raw(intel, z_mt);
2077
2078 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2079 __FUNCTION__,
2080 map->x, map->y, map->w, map->h,
2081 z_mt, _mesa_get_format_name(z_mt->format),
2082 map->x + z_image_x, map->y + z_image_y,
2083 s_mt, map->x + s_image_x, map->y + s_image_y,
2084 map->ptr, map->stride);
2085 }
2086
2087 free(map->buffer);
2088 }
2089
2090 /**
2091 * Create and attach a map to the miptree at (level, slice). Return the
2092 * attached map.
2093 */
2094 static struct intel_miptree_map*
2095 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2096 unsigned int level,
2097 unsigned int slice,
2098 unsigned int x,
2099 unsigned int y,
2100 unsigned int w,
2101 unsigned int h,
2102 GLbitfield mode)
2103 {
2104 struct intel_miptree_map *map = calloc(1, sizeof(*map));
2105
2106 if (!map)
2107 return NULL;
2108
2109 assert(mt->level[level].slice[slice].map == NULL);
2110 mt->level[level].slice[slice].map = map;
2111
2112 map->mode = mode;
2113 map->x = x;
2114 map->y = y;
2115 map->w = w;
2116 map->h = h;
2117
2118 return map;
2119 }
2120
2121 /**
2122 * Release the map at (level, slice).
2123 */
2124 static void
2125 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2126 unsigned int level,
2127 unsigned int slice)
2128 {
2129 struct intel_miptree_map **map;
2130
2131 map = &mt->level[level].slice[slice].map;
2132 free(*map);
2133 *map = NULL;
2134 }
2135
2136 static void
2137 intel_miptree_map_singlesample(struct intel_context *intel,
2138 struct intel_mipmap_tree *mt,
2139 unsigned int level,
2140 unsigned int slice,
2141 unsigned int x,
2142 unsigned int y,
2143 unsigned int w,
2144 unsigned int h,
2145 GLbitfield mode,
2146 void **out_ptr,
2147 int *out_stride)
2148 {
2149 struct intel_miptree_map *map;
2150
2151 assert(mt->num_samples <= 1);
2152
2153 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2154 if (!map){
2155 *out_ptr = NULL;
2156 *out_stride = 0;
2157 return;
2158 }
2159
2160 intel_miptree_slice_resolve_depth(intel, mt, level, slice);
2161 if (map->mode & GL_MAP_WRITE_BIT) {
2162 intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2163 }
2164
2165 if (mt->format == MESA_FORMAT_S8) {
2166 intel_miptree_map_s8(intel, mt, map, level, slice);
2167 } else if (mt->etc_format != MESA_FORMAT_NONE &&
2168 !(mode & BRW_MAP_DIRECT_BIT)) {
2169 intel_miptree_map_etc(intel, mt, map, level, slice);
2170 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2171 intel_miptree_map_depthstencil(intel, mt, map, level, slice);
2172 }
2173 /* See intel_miptree_blit() for details on the 32k pitch limit. */
2174 else if (intel->has_llc &&
2175 !(mode & GL_MAP_WRITE_BIT) &&
2176 !mt->compressed &&
2177 (mt->region->tiling == I915_TILING_X ||
2178 (intel->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
2179 mt->region->pitch < 32768) {
2180 intel_miptree_map_blit(intel, mt, map, level, slice);
2181 } else if (mt->region->tiling != I915_TILING_NONE &&
2182 mt->region->bo->size >= intel->max_gtt_map_object_size) {
2183 assert(mt->region->pitch < 32768);
2184 intel_miptree_map_blit(intel, mt, map, level, slice);
2185 } else {
2186 intel_miptree_map_gtt(intel, mt, map, level, slice);
2187 }
2188
2189 *out_ptr = map->ptr;
2190 *out_stride = map->stride;
2191
2192 if (map->ptr == NULL)
2193 intel_miptree_release_map(mt, level, slice);
2194 }
2195
2196 static void
2197 intel_miptree_unmap_singlesample(struct intel_context *intel,
2198 struct intel_mipmap_tree *mt,
2199 unsigned int level,
2200 unsigned int slice)
2201 {
2202 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2203
2204 assert(mt->num_samples <= 1);
2205
2206 if (!map)
2207 return;
2208
2209 DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2210 mt, _mesa_get_format_name(mt->format), level, slice);
2211
2212 if (mt->format == MESA_FORMAT_S8) {
2213 intel_miptree_unmap_s8(intel, mt, map, level, slice);
2214 } else if (mt->etc_format != MESA_FORMAT_NONE &&
2215 !(map->mode & BRW_MAP_DIRECT_BIT)) {
2216 intel_miptree_unmap_etc(intel, mt, map, level, slice);
2217 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2218 intel_miptree_unmap_depthstencil(intel, mt, map, level, slice);
2219 } else if (map->mt) {
2220 intel_miptree_unmap_blit(intel, mt, map, level, slice);
2221 } else {
2222 intel_miptree_unmap_gtt(intel, mt, map, level, slice);
2223 }
2224
2225 intel_miptree_release_map(mt, level, slice);
2226 }
2227
2228 static void
2229 intel_miptree_map_multisample(struct intel_context *intel,
2230 struct intel_mipmap_tree *mt,
2231 unsigned int level,
2232 unsigned int slice,
2233 unsigned int x,
2234 unsigned int y,
2235 unsigned int w,
2236 unsigned int h,
2237 GLbitfield mode,
2238 void **out_ptr,
2239 int *out_stride)
2240 {
2241 struct intel_miptree_map *map;
2242
2243 assert(mt->num_samples > 1);
2244
2245 /* Only flat, renderbuffer-like miptrees are supported. */
2246 if (mt->target != GL_TEXTURE_2D ||
2247 mt->first_level != 0 ||
2248 mt->last_level != 0) {
2249 _mesa_problem(&intel->ctx, "attempt to map a multisample miptree for "
2250 "which (target, first_level, last_level != "
2251 "(GL_TEXTURE_2D, 0, 0)");
2252 goto fail;
2253 }
2254
2255 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2256 if (!map)
2257 goto fail;
2258
2259 if (!mt->singlesample_mt) {
2260 mt->singlesample_mt =
2261 intel_miptree_create_for_renderbuffer(intel,
2262 mt->format,
2263 mt->logical_width0,
2264 mt->logical_height0,
2265 0 /*num_samples*/);
2266 if (!mt->singlesample_mt)
2267 goto fail;
2268
2269 map->singlesample_mt_is_tmp = true;
2270 mt->need_downsample = true;
2271 }
2272
2273 intel_miptree_downsample(intel, mt);
2274 intel_miptree_map_singlesample(intel, mt->singlesample_mt,
2275 level, slice,
2276 x, y, w, h,
2277 mode,
2278 out_ptr, out_stride);
2279 return;
2280
2281 fail:
2282 intel_miptree_release_map(mt, level, slice);
2283 *out_ptr = NULL;
2284 *out_stride = 0;
2285 }
2286
2287 static void
2288 intel_miptree_unmap_multisample(struct intel_context *intel,
2289 struct intel_mipmap_tree *mt,
2290 unsigned int level,
2291 unsigned int slice)
2292 {
2293 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2294
2295 assert(mt->num_samples > 1);
2296
2297 if (!map)
2298 return;
2299
2300 intel_miptree_unmap_singlesample(intel, mt->singlesample_mt, level, slice);
2301
2302 mt->need_downsample = false;
2303 if (map->mode & GL_MAP_WRITE_BIT)
2304 intel_miptree_upsample(intel, mt);
2305
2306 if (map->singlesample_mt_is_tmp)
2307 intel_miptree_release(&mt->singlesample_mt);
2308
2309 intel_miptree_release_map(mt, level, slice);
2310 }
2311
2312 void
2313 intel_miptree_map(struct intel_context *intel,
2314 struct intel_mipmap_tree *mt,
2315 unsigned int level,
2316 unsigned int slice,
2317 unsigned int x,
2318 unsigned int y,
2319 unsigned int w,
2320 unsigned int h,
2321 GLbitfield mode,
2322 void **out_ptr,
2323 int *out_stride)
2324 {
2325 if (mt->num_samples <= 1)
2326 intel_miptree_map_singlesample(intel, mt,
2327 level, slice,
2328 x, y, w, h,
2329 mode,
2330 out_ptr, out_stride);
2331 else
2332 intel_miptree_map_multisample(intel, mt,
2333 level, slice,
2334 x, y, w, h,
2335 mode,
2336 out_ptr, out_stride);
2337 }
2338
2339 void
2340 intel_miptree_unmap(struct intel_context *intel,
2341 struct intel_mipmap_tree *mt,
2342 unsigned int level,
2343 unsigned int slice)
2344 {
2345 if (mt->num_samples <= 1)
2346 intel_miptree_unmap_singlesample(intel, mt, level, slice);
2347 else
2348 intel_miptree_unmap_multisample(intel, mt, level, slice);
2349 }