i965: add intel_miptree_set_all_slices_need_depth_resolve
[mesa.git] / src / mesa / drivers / dri / i965 / intel_mipmap_tree.c
1 /**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <GL/gl.h>
29 #include <GL/internal/dri_interface.h>
30
31 #include "intel_batchbuffer.h"
32 #include "intel_chipset.h"
33 #include "intel_mipmap_tree.h"
34 #include "intel_regions.h"
35 #include "intel_resolve_map.h"
36 #include "intel_tex.h"
37 #include "intel_blit.h"
38
39 #include "brw_blorp.h"
40 #include "brw_context.h"
41
42 #include "main/enums.h"
43 #include "main/formats.h"
44 #include "main/glformats.h"
45 #include "main/texcompress_etc.h"
46 #include "main/teximage.h"
47
48 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
49
50 static GLenum
51 target_to_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
55 case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
56 case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
57 case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
58 case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
59 case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
60 return GL_TEXTURE_CUBE_MAP_ARB;
61 default:
62 return target;
63 }
64 }
65
66
67 /**
68 * Determine which MSAA layout should be used by the MSAA surface being
69 * created, based on the chip generation and the surface type.
70 */
71 static enum intel_msaa_layout
72 compute_msaa_layout(struct brw_context *brw, gl_format format, GLenum target)
73 {
74 /* Prior to Gen7, all MSAA surfaces used IMS layout. */
75 if (brw->gen < 7)
76 return INTEL_MSAA_LAYOUT_IMS;
77
78 /* In Gen7, IMS layout is only used for depth and stencil buffers. */
79 switch (_mesa_get_format_base_format(format)) {
80 case GL_DEPTH_COMPONENT:
81 case GL_STENCIL_INDEX:
82 case GL_DEPTH_STENCIL:
83 return INTEL_MSAA_LAYOUT_IMS;
84 default:
85 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
86 *
87 * This field must be set to 0 for all SINT MSRTs when all RT channels
88 * are not written
89 *
90 * In practice this means that we have to disable MCS for all signed
91 * integer MSAA buffers. The alternative, to disable MCS only when one
92 * of the render target channels is disabled, is impractical because it
93 * would require converting between CMS and UMS MSAA layouts on the fly,
94 * which is expensive.
95 */
96 if (_mesa_get_format_datatype(format) == GL_INT) {
97 /* TODO: is this workaround needed for future chipsets? */
98 assert(brw->gen == 7);
99 return INTEL_MSAA_LAYOUT_UMS;
100 } else {
101 /* For now, if we're going to be texturing from this surface,
102 * force UMS, so that the shader doesn't have to do different things
103 * based on whether there's a multisample control surface needing sampled first.
104 * We can't just blindly read the MCS surface in all cases because:
105 *
106 * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
107 *
108 * If this field is disabled and the sampling engine <ld_mcs> message
109 * is issued on this surface, the MCS surface may be accessed. Software
110 * must ensure that the surface is defined to avoid GTT errors.
111 */
112 if (target == GL_TEXTURE_2D_MULTISAMPLE ||
113 target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
114 return INTEL_MSAA_LAYOUT_UMS;
115 } else {
116 return INTEL_MSAA_LAYOUT_CMS;
117 }
118 }
119 }
120 }
121
122
123 /**
124 * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
125 * scaled-down bitfield representation of the color buffer which is capable of
126 * recording when blocks of the color buffer are equal to the clear value.
127 * This function returns the block size that will be used by the MCS buffer
128 * corresponding to a certain color miptree.
129 *
130 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
131 * beneath the "Fast Color Clear" bullet (p327):
132 *
133 * The following table describes the RT alignment
134 *
135 * Pixels Lines
136 * TiledY RT CL
137 * bpp
138 * 32 8 4
139 * 64 4 4
140 * 128 2 4
141 * TiledX RT CL
142 * bpp
143 * 32 16 2
144 * 64 8 2
145 * 128 4 2
146 *
147 * This alignment has the following uses:
148 *
149 * - For figuring out the size of the MCS buffer. Each 4k tile in the MCS
150 * buffer contains 128 blocks horizontally and 256 blocks vertically.
151 *
152 * - For figuring out alignment restrictions for a fast clear operation. Fast
153 * clear operations must always clear aligned multiples of 16 blocks
154 * horizontally and 32 blocks vertically.
155 *
156 * - For scaling down the coordinates sent through the render pipeline during
157 * a fast clear. X coordinates must be scaled down by 8 times the block
158 * width, and Y coordinates by 16 times the block height.
159 *
160 * - For scaling down the coordinates sent through the render pipeline during
161 * a "Render Target Resolve" operation. X coordinates must be scaled down
162 * by half the block width, and Y coordinates by half the block height.
163 */
164 void
165 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
166 struct intel_mipmap_tree *mt,
167 unsigned *width_px, unsigned *height)
168 {
169 switch (mt->region->tiling) {
170 default:
171 assert(!"Non-MSRT MCS requires X or Y tiling");
172 /* In release builds, fall through */
173 case I915_TILING_Y:
174 *width_px = 32 / mt->cpp;
175 *height = 4;
176 break;
177 case I915_TILING_X:
178 *width_px = 64 / mt->cpp;
179 *height = 2;
180 }
181 }
182
183
184 /**
185 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
186 * can be used.
187 *
188 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
189 * beneath the "Fast Color Clear" bullet (p326):
190 *
191 * - Support is limited to tiled render targets.
192 * - Support is for non-mip-mapped and non-array surface types only.
193 *
194 * And then later, on p327:
195 *
196 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
197 * 64bpp, and 128bpp.
198 */
199 bool
200 intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
201 struct intel_mipmap_tree *mt)
202 {
203 /* MCS support does not exist prior to Gen7 */
204 if (brw->gen < 7)
205 return false;
206
207 /* MCS is only supported for color buffers */
208 switch (_mesa_get_format_base_format(mt->format)) {
209 case GL_DEPTH_COMPONENT:
210 case GL_DEPTH_STENCIL:
211 case GL_STENCIL_INDEX:
212 return false;
213 }
214
215 if (mt->region->tiling != I915_TILING_X &&
216 mt->region->tiling != I915_TILING_Y)
217 return false;
218 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
219 return false;
220 if (mt->first_level != 0 || mt->last_level != 0)
221 return false;
222 if (mt->physical_depth0 != 1)
223 return false;
224
225 /* There's no point in using an MCS buffer if the surface isn't in a
226 * renderable format.
227 */
228 if (!brw->format_supported_as_render_target[mt->format])
229 return false;
230
231 return true;
232 }
233
234
235 /**
236 * @param for_bo Indicates that the caller is
237 * intel_miptree_create_for_bo(). If true, then do not create
238 * \c stencil_mt.
239 */
240 struct intel_mipmap_tree *
241 intel_miptree_create_layout(struct brw_context *brw,
242 GLenum target,
243 gl_format format,
244 GLuint first_level,
245 GLuint last_level,
246 GLuint width0,
247 GLuint height0,
248 GLuint depth0,
249 bool for_bo,
250 GLuint num_samples)
251 {
252 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
253 if (!mt)
254 return NULL;
255
256 DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
257 _mesa_lookup_enum_by_nr(target),
258 _mesa_get_format_name(format),
259 first_level, last_level, mt);
260
261 mt->target = target_to_target(target);
262 mt->format = format;
263 mt->first_level = first_level;
264 mt->last_level = last_level;
265 mt->logical_width0 = width0;
266 mt->logical_height0 = height0;
267 mt->logical_depth0 = depth0;
268 mt->mcs_state = INTEL_MCS_STATE_NONE;
269
270 /* The cpp is bytes per (1, blockheight)-sized block for compressed
271 * textures. This is why you'll see divides by blockheight all over
272 */
273 unsigned bw, bh;
274 _mesa_get_format_block_size(format, &bw, &bh);
275 assert(_mesa_get_format_bytes(mt->format) % bw == 0);
276 mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
277
278 mt->num_samples = num_samples;
279 mt->compressed = _mesa_is_format_compressed(format);
280 mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
281 mt->refcount = 1;
282
283 if (num_samples > 1) {
284 /* Adjust width/height/depth for MSAA */
285 mt->msaa_layout = compute_msaa_layout(brw, format, mt->target);
286 if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
287 /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
288 *
289 * "Any of the other messages (sample*, LOD, load4) used with a
290 * (4x) multisampled surface will in-effect sample a surface with
291 * double the height and width as that indicated in the surface
292 * state. Each pixel position on the original-sized surface is
293 * replaced with a 2x2 of samples with the following arrangement:
294 *
295 * sample 0 sample 2
296 * sample 1 sample 3"
297 *
298 * Thus, when sampling from a multisampled texture, it behaves as
299 * though the layout in memory for (x,y,sample) is:
300 *
301 * (0,0,0) (0,0,2) (1,0,0) (1,0,2)
302 * (0,0,1) (0,0,3) (1,0,1) (1,0,3)
303 *
304 * (0,1,0) (0,1,2) (1,1,0) (1,1,2)
305 * (0,1,1) (0,1,3) (1,1,1) (1,1,3)
306 *
307 * However, the actual layout of multisampled data in memory is:
308 *
309 * (0,0,0) (1,0,0) (0,0,1) (1,0,1)
310 * (0,1,0) (1,1,0) (0,1,1) (1,1,1)
311 *
312 * (0,0,2) (1,0,2) (0,0,3) (1,0,3)
313 * (0,1,2) (1,1,2) (0,1,3) (1,1,3)
314 *
315 * This pattern repeats for each 2x2 pixel block.
316 *
317 * As a result, when calculating the size of our 4-sample buffer for
318 * an odd width or height, we have to align before scaling up because
319 * sample 3 is in that bottom right 2x2 block.
320 */
321 switch (num_samples) {
322 case 4:
323 width0 = ALIGN(width0, 2) * 2;
324 height0 = ALIGN(height0, 2) * 2;
325 break;
326 case 8:
327 width0 = ALIGN(width0, 2) * 4;
328 height0 = ALIGN(height0, 2) * 2;
329 break;
330 default:
331 /* num_samples should already have been quantized to 0, 1, 4, or
332 * 8.
333 */
334 assert(false);
335 }
336 } else {
337 /* Non-interleaved */
338 depth0 *= num_samples;
339 }
340 }
341
342 /* array_spacing_lod0 is only used for non-IMS MSAA surfaces. TODO: can we
343 * use it elsewhere?
344 */
345 switch (mt->msaa_layout) {
346 case INTEL_MSAA_LAYOUT_NONE:
347 case INTEL_MSAA_LAYOUT_IMS:
348 mt->array_spacing_lod0 = false;
349 break;
350 case INTEL_MSAA_LAYOUT_UMS:
351 case INTEL_MSAA_LAYOUT_CMS:
352 mt->array_spacing_lod0 = true;
353 break;
354 }
355
356 if (target == GL_TEXTURE_CUBE_MAP) {
357 assert(depth0 == 1);
358 depth0 = 6;
359 }
360
361 mt->physical_width0 = width0;
362 mt->physical_height0 = height0;
363 mt->physical_depth0 = depth0;
364
365 if (!for_bo &&
366 _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
367 (brw->must_use_separate_stencil ||
368 (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) {
369 mt->stencil_mt = intel_miptree_create(brw,
370 mt->target,
371 MESA_FORMAT_S8,
372 mt->first_level,
373 mt->last_level,
374 mt->logical_width0,
375 mt->logical_height0,
376 mt->logical_depth0,
377 true,
378 num_samples,
379 INTEL_MIPTREE_TILING_ANY);
380 if (!mt->stencil_mt) {
381 intel_miptree_release(&mt);
382 return NULL;
383 }
384
385 /* Fix up the Z miptree format for how we're splitting out separate
386 * stencil. Gen7 expects there to be no stencil bits in its depth buffer.
387 */
388 if (mt->format == MESA_FORMAT_S8_Z24) {
389 mt->format = MESA_FORMAT_X8_Z24;
390 } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) {
391 mt->format = MESA_FORMAT_Z32_FLOAT;
392 mt->cpp = 4;
393 } else {
394 _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
395 _mesa_get_format_name(mt->format));
396 }
397 }
398
399 brw_miptree_layout(brw, mt);
400
401 return mt;
402 }
403
404 /**
405 * \brief Helper function for intel_miptree_create().
406 */
407 static uint32_t
408 intel_miptree_choose_tiling(struct brw_context *brw,
409 gl_format format,
410 uint32_t width0,
411 uint32_t num_samples,
412 enum intel_miptree_tiling_mode requested,
413 struct intel_mipmap_tree *mt)
414 {
415 if (format == MESA_FORMAT_S8) {
416 /* The stencil buffer is W tiled. However, we request from the kernel a
417 * non-tiled buffer because the GTT is incapable of W fencing.
418 */
419 return I915_TILING_NONE;
420 }
421
422 /* Some usages may want only one type of tiling, like depth miptrees (Y
423 * tiled), or temporary BOs for uploading data once (linear).
424 */
425 switch (requested) {
426 case INTEL_MIPTREE_TILING_ANY:
427 break;
428 case INTEL_MIPTREE_TILING_Y:
429 return I915_TILING_Y;
430 case INTEL_MIPTREE_TILING_NONE:
431 return I915_TILING_NONE;
432 }
433
434 if (num_samples > 1) {
435 /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
436 * Surface"):
437 *
438 * [DevSNB+]: For multi-sample render targets, this field must be
439 * 1. MSRTs can only be tiled.
440 *
441 * Our usual reason for preferring X tiling (fast blits using the
442 * blitting engine) doesn't apply to MSAA, since we'll generally be
443 * downsampling or upsampling when blitting between the MSAA buffer
444 * and another buffer, and the blitting engine doesn't support that.
445 * So use Y tiling, since it makes better use of the cache.
446 */
447 return I915_TILING_Y;
448 }
449
450 GLenum base_format = _mesa_get_format_base_format(format);
451 if (base_format == GL_DEPTH_COMPONENT ||
452 base_format == GL_DEPTH_STENCIL_EXT)
453 return I915_TILING_Y;
454
455 int minimum_pitch = mt->total_width * mt->cpp;
456
457 /* If the width is much smaller than a tile, don't bother tiling. */
458 if (minimum_pitch < 64)
459 return I915_TILING_NONE;
460
461 if (ALIGN(minimum_pitch, 512) >= 32768) {
462 perf_debug("%dx%d miptree too large to blit, falling back to untiled",
463 mt->total_width, mt->total_height);
464 return I915_TILING_NONE;
465 }
466
467 /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
468 if (brw->gen < 6)
469 return I915_TILING_X;
470
471 return I915_TILING_Y | I915_TILING_X;
472 }
473
474 struct intel_mipmap_tree *
475 intel_miptree_create(struct brw_context *brw,
476 GLenum target,
477 gl_format format,
478 GLuint first_level,
479 GLuint last_level,
480 GLuint width0,
481 GLuint height0,
482 GLuint depth0,
483 bool expect_accelerated_upload,
484 GLuint num_samples,
485 enum intel_miptree_tiling_mode requested_tiling)
486 {
487 struct intel_mipmap_tree *mt;
488 gl_format tex_format = format;
489 gl_format etc_format = MESA_FORMAT_NONE;
490 GLuint total_width, total_height;
491
492 if (!brw->is_baytrail) {
493 switch (format) {
494 case MESA_FORMAT_ETC1_RGB8:
495 format = MESA_FORMAT_RGBX8888_REV;
496 break;
497 case MESA_FORMAT_ETC2_RGB8:
498 format = MESA_FORMAT_RGBX8888_REV;
499 break;
500 case MESA_FORMAT_ETC2_SRGB8:
501 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
502 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
503 format = MESA_FORMAT_SARGB8;
504 break;
505 case MESA_FORMAT_ETC2_RGBA8_EAC:
506 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
507 format = MESA_FORMAT_RGBA8888_REV;
508 break;
509 case MESA_FORMAT_ETC2_R11_EAC:
510 format = MESA_FORMAT_R16;
511 break;
512 case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
513 format = MESA_FORMAT_SIGNED_R16;
514 break;
515 case MESA_FORMAT_ETC2_RG11_EAC:
516 format = MESA_FORMAT_GR1616;
517 break;
518 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
519 format = MESA_FORMAT_SIGNED_GR1616;
520 break;
521 default:
522 /* Non ETC1 / ETC2 format */
523 break;
524 }
525 }
526
527 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
528
529 mt = intel_miptree_create_layout(brw, target, format,
530 first_level, last_level, width0,
531 height0, depth0,
532 false, num_samples);
533 /*
534 * pitch == 0 || height == 0 indicates the null texture
535 */
536 if (!mt || !mt->total_width || !mt->total_height) {
537 intel_miptree_release(&mt);
538 return NULL;
539 }
540
541 total_width = mt->total_width;
542 total_height = mt->total_height;
543
544 if (format == MESA_FORMAT_S8) {
545 /* Align to size of W tile, 64x64. */
546 total_width = ALIGN(total_width, 64);
547 total_height = ALIGN(total_height, 64);
548 }
549
550 uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
551 num_samples, requested_tiling,
552 mt);
553 bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
554
555 mt->etc_format = etc_format;
556 mt->region = intel_region_alloc(brw->intelScreen,
557 y_or_x ? I915_TILING_Y : tiling,
558 mt->cpp,
559 total_width,
560 total_height,
561 expect_accelerated_upload);
562
563 /* If the region is too large to fit in the aperture, we need to use the
564 * BLT engine to support it. The BLT paths can't currently handle Y-tiling,
565 * so we need to fall back to X.
566 */
567 if (y_or_x && mt->region->bo->size >= brw->max_gtt_map_object_size) {
568 perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
569 mt->total_width, mt->total_height);
570 intel_region_release(&mt->region);
571
572 mt->region = intel_region_alloc(brw->intelScreen,
573 I915_TILING_X,
574 mt->cpp,
575 total_width,
576 total_height,
577 expect_accelerated_upload);
578 }
579
580 mt->offset = 0;
581
582 if (!mt->region) {
583 intel_miptree_release(&mt);
584 return NULL;
585 }
586
587 /* If this miptree is capable of supporting fast color clears, set
588 * mcs_state appropriately to ensure that fast clears will occur.
589 * Allocation of the MCS miptree will be deferred until the first fast
590 * clear actually occurs.
591 */
592 if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
593 mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
594
595 return mt;
596 }
597
598 struct intel_mipmap_tree *
599 intel_miptree_create_for_bo(struct brw_context *brw,
600 drm_intel_bo *bo,
601 gl_format format,
602 uint32_t offset,
603 uint32_t width,
604 uint32_t height,
605 int pitch,
606 uint32_t tiling)
607 {
608 struct intel_mipmap_tree *mt;
609
610 struct intel_region *region = calloc(1, sizeof(*region));
611 if (!region)
612 return NULL;
613
614 /* Nothing will be able to use this miptree with the BO if the offset isn't
615 * aligned.
616 */
617 if (tiling != I915_TILING_NONE)
618 assert(offset % 4096 == 0);
619
620 /* miptrees can't handle negative pitch. If you need flipping of images,
621 * that's outside of the scope of the mt.
622 */
623 assert(pitch >= 0);
624
625 mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
626 0, 0,
627 width, height, 1,
628 true, 0 /* num_samples */);
629 if (!mt)
630 return mt;
631
632 region->cpp = mt->cpp;
633 region->width = width;
634 region->height = height;
635 region->pitch = pitch;
636 region->refcount = 1;
637 drm_intel_bo_reference(bo);
638 region->bo = bo;
639 region->tiling = tiling;
640
641 mt->region = region;
642 mt->offset = offset;
643
644 return mt;
645 }
646
647
648 /**
649 * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
650 *
651 * For a multisample DRI2 buffer, this wraps the given region with
652 * a singlesample miptree, then creates a multisample miptree into which the
653 * singlesample miptree is embedded as a child.
654 */
655 struct intel_mipmap_tree*
656 intel_miptree_create_for_dri2_buffer(struct brw_context *brw,
657 unsigned dri_attachment,
658 gl_format format,
659 uint32_t num_samples,
660 struct intel_region *region)
661 {
662 struct intel_mipmap_tree *singlesample_mt = NULL;
663 struct intel_mipmap_tree *multisample_mt = NULL;
664
665 /* Only the front and back buffers, which are color buffers, are shared
666 * through DRI2.
667 */
668 assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
669 dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
670 dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
671 assert(_mesa_get_format_base_format(format) == GL_RGB ||
672 _mesa_get_format_base_format(format) == GL_RGBA);
673
674 singlesample_mt = intel_miptree_create_for_bo(brw,
675 region->bo,
676 format,
677 0,
678 region->width,
679 region->height,
680 region->pitch,
681 region->tiling);
682 if (!singlesample_mt)
683 return NULL;
684 singlesample_mt->region->name = region->name;
685
686 /* If this miptree is capable of supporting fast color clears, set
687 * mcs_state appropriately to ensure that fast clears will occur.
688 * Allocation of the MCS miptree will be deferred until the first fast
689 * clear actually occurs.
690 */
691 if (intel_is_non_msrt_mcs_buffer_supported(brw, singlesample_mt))
692 singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
693
694 if (num_samples == 0)
695 return singlesample_mt;
696
697 multisample_mt = intel_miptree_create_for_renderbuffer(brw,
698 format,
699 region->width,
700 region->height,
701 num_samples);
702 if (!multisample_mt) {
703 intel_miptree_release(&singlesample_mt);
704 return NULL;
705 }
706
707 multisample_mt->singlesample_mt = singlesample_mt;
708 multisample_mt->need_downsample = false;
709
710 if (brw->is_front_buffer_rendering &&
711 (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
712 dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
713 intel_miptree_upsample(brw, multisample_mt);
714 }
715
716 return multisample_mt;
717 }
718
719 struct intel_mipmap_tree*
720 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
721 gl_format format,
722 uint32_t width,
723 uint32_t height,
724 uint32_t num_samples)
725 {
726 struct intel_mipmap_tree *mt;
727 uint32_t depth = 1;
728 bool ok;
729
730 mt = intel_miptree_create(brw, GL_TEXTURE_2D, format, 0, 0,
731 width, height, depth, true, num_samples,
732 INTEL_MIPTREE_TILING_ANY);
733 if (!mt)
734 goto fail;
735
736 if (brw_is_hiz_depth_format(brw, format)) {
737 ok = intel_miptree_alloc_hiz(brw, mt);
738 if (!ok)
739 goto fail;
740 }
741
742 if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
743 ok = intel_miptree_alloc_mcs(brw, mt, num_samples);
744 if (!ok)
745 goto fail;
746 }
747
748 return mt;
749
750 fail:
751 intel_miptree_release(&mt);
752 return NULL;
753 }
754
755 void
756 intel_miptree_reference(struct intel_mipmap_tree **dst,
757 struct intel_mipmap_tree *src)
758 {
759 if (*dst == src)
760 return;
761
762 intel_miptree_release(dst);
763
764 if (src) {
765 src->refcount++;
766 DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
767 }
768
769 *dst = src;
770 }
771
772
773 void
774 intel_miptree_release(struct intel_mipmap_tree **mt)
775 {
776 if (!*mt)
777 return;
778
779 DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
780 if (--(*mt)->refcount <= 0) {
781 GLuint i;
782
783 DBG("%s deleting %p\n", __FUNCTION__, *mt);
784
785 intel_region_release(&((*mt)->region));
786 intel_miptree_release(&(*mt)->stencil_mt);
787 intel_miptree_release(&(*mt)->hiz_mt);
788 intel_miptree_release(&(*mt)->mcs_mt);
789 intel_miptree_release(&(*mt)->singlesample_mt);
790 intel_resolve_map_clear(&(*mt)->hiz_map);
791
792 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
793 free((*mt)->level[i].slice);
794 }
795
796 free(*mt);
797 }
798 *mt = NULL;
799 }
800
801 void
802 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
803 int *width, int *height, int *depth)
804 {
805 switch (image->TexObject->Target) {
806 case GL_TEXTURE_1D_ARRAY:
807 *width = image->Width;
808 *height = 1;
809 *depth = image->Height;
810 break;
811 default:
812 *width = image->Width;
813 *height = image->Height;
814 *depth = image->Depth;
815 break;
816 }
817 }
818
819 /**
820 * Can the image be pulled into a unified mipmap tree? This mirrors
821 * the completeness test in a lot of ways.
822 *
823 * Not sure whether I want to pass gl_texture_image here.
824 */
825 bool
826 intel_miptree_match_image(struct intel_mipmap_tree *mt,
827 struct gl_texture_image *image)
828 {
829 struct intel_texture_image *intelImage = intel_texture_image(image);
830 GLuint level = intelImage->base.Base.Level;
831 int width, height, depth;
832
833 /* glTexImage* choose the texture object based on the target passed in, and
834 * objects can't change targets over their lifetimes, so this should be
835 * true.
836 */
837 assert(target_to_target(image->TexObject->Target) == mt->target);
838
839 gl_format mt_format = mt->format;
840 if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt)
841 mt_format = MESA_FORMAT_S8_Z24;
842 if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt)
843 mt_format = MESA_FORMAT_Z32_FLOAT_X24S8;
844 if (mt->etc_format != MESA_FORMAT_NONE)
845 mt_format = mt->etc_format;
846
847 if (image->TexFormat != mt_format)
848 return false;
849
850 intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
851
852 if (mt->target == GL_TEXTURE_CUBE_MAP)
853 depth = 6;
854
855 /* Test image dimensions against the base level image adjusted for
856 * minification. This will also catch images not present in the
857 * tree, changed targets, etc.
858 */
859 if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
860 mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
861 /* nonzero level here is always bogus */
862 assert(level == 0);
863
864 if (width != mt->logical_width0 ||
865 height != mt->logical_height0 ||
866 depth != mt->logical_depth0) {
867 return false;
868 }
869 }
870 else {
871 /* all normal textures, renderbuffers, etc */
872 if (width != mt->level[level].width ||
873 height != mt->level[level].height ||
874 depth != mt->level[level].depth) {
875 return false;
876 }
877 }
878
879 if (image->NumSamples != mt->num_samples)
880 return false;
881
882 return true;
883 }
884
885
886 void
887 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
888 GLuint level,
889 GLuint x, GLuint y,
890 GLuint w, GLuint h, GLuint d)
891 {
892 mt->level[level].width = w;
893 mt->level[level].height = h;
894 mt->level[level].depth = d;
895 mt->level[level].level_x = x;
896 mt->level[level].level_y = y;
897
898 DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
899 level, w, h, d, x, y);
900
901 assert(mt->level[level].slice == NULL);
902
903 mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
904 mt->level[level].slice[0].x_offset = mt->level[level].level_x;
905 mt->level[level].slice[0].y_offset = mt->level[level].level_y;
906 }
907
908
909 void
910 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
911 GLuint level, GLuint img,
912 GLuint x, GLuint y)
913 {
914 if (img == 0 && level == 0)
915 assert(x == 0 && y == 0);
916
917 assert(img < mt->level[level].depth);
918
919 mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
920 mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
921
922 DBG("%s level %d img %d pos %d,%d\n",
923 __FUNCTION__, level, img,
924 mt->level[level].slice[img].x_offset,
925 mt->level[level].slice[img].y_offset);
926 }
927
928 void
929 intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
930 GLuint level, GLuint slice,
931 GLuint *x, GLuint *y)
932 {
933 assert(slice < mt->level[level].depth);
934
935 *x = mt->level[level].slice[slice].x_offset;
936 *y = mt->level[level].slice[slice].y_offset;
937 }
938
939 /**
940 * Rendering with tiled buffers requires that the base address of the buffer
941 * be aligned to a page boundary. For renderbuffers, and sometimes with
942 * textures, we may want the surface to point at a texture image level that
943 * isn't at a page boundary.
944 *
945 * This function returns an appropriately-aligned base offset
946 * according to the tiling restrictions, plus any required x/y offset
947 * from there.
948 */
949 uint32_t
950 intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
951 GLuint level, GLuint slice,
952 uint32_t *tile_x,
953 uint32_t *tile_y)
954 {
955 struct intel_region *region = mt->region;
956 uint32_t x, y;
957 uint32_t mask_x, mask_y;
958
959 intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
960 intel_miptree_get_image_offset(mt, level, slice, &x, &y);
961
962 *tile_x = x & mask_x;
963 *tile_y = y & mask_y;
964
965 return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
966 false);
967 }
968
969 static void
970 intel_miptree_copy_slice_sw(struct brw_context *brw,
971 struct intel_mipmap_tree *dst_mt,
972 struct intel_mipmap_tree *src_mt,
973 int level,
974 int slice,
975 int width,
976 int height)
977 {
978 void *src, *dst;
979 int src_stride, dst_stride;
980 int cpp = dst_mt->cpp;
981
982 intel_miptree_map(brw, src_mt,
983 level, slice,
984 0, 0,
985 width, height,
986 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
987 &src, &src_stride);
988
989 intel_miptree_map(brw, dst_mt,
990 level, slice,
991 0, 0,
992 width, height,
993 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
994 BRW_MAP_DIRECT_BIT,
995 &dst, &dst_stride);
996
997 DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
998 _mesa_get_format_name(src_mt->format),
999 src_mt, src, src_stride,
1000 _mesa_get_format_name(dst_mt->format),
1001 dst_mt, dst, dst_stride,
1002 width, height);
1003
1004 int row_size = cpp * width;
1005 if (src_stride == row_size &&
1006 dst_stride == row_size) {
1007 memcpy(dst, src, row_size * height);
1008 } else {
1009 for (int i = 0; i < height; i++) {
1010 memcpy(dst, src, row_size);
1011 dst += dst_stride;
1012 src += src_stride;
1013 }
1014 }
1015
1016 intel_miptree_unmap(brw, dst_mt, level, slice);
1017 intel_miptree_unmap(brw, src_mt, level, slice);
1018
1019 /* Don't forget to copy the stencil data over, too. We could have skipped
1020 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1021 * shuffling the two data sources in/out of temporary storage instead of
1022 * the direct mapping we get this way.
1023 */
1024 if (dst_mt->stencil_mt) {
1025 assert(src_mt->stencil_mt);
1026 intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
1027 level, slice, width, height);
1028 }
1029 }
1030
1031 static void
1032 intel_miptree_copy_slice(struct brw_context *brw,
1033 struct intel_mipmap_tree *dst_mt,
1034 struct intel_mipmap_tree *src_mt,
1035 int level,
1036 int face,
1037 int depth)
1038
1039 {
1040 gl_format format = src_mt->format;
1041 uint32_t width = src_mt->level[level].width;
1042 uint32_t height = src_mt->level[level].height;
1043 int slice;
1044
1045 if (face > 0)
1046 slice = face;
1047 else
1048 slice = depth;
1049
1050 assert(depth < src_mt->level[level].depth);
1051 assert(src_mt->format == dst_mt->format);
1052
1053 if (dst_mt->compressed) {
1054 height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1055 width = ALIGN(width, dst_mt->align_w);
1056 }
1057
1058 /* If it's a packed depth/stencil buffer with separate stencil, the blit
1059 * below won't apply since we can't do the depth's Y tiling or the
1060 * stencil's W tiling in the blitter.
1061 */
1062 if (src_mt->stencil_mt) {
1063 intel_miptree_copy_slice_sw(brw,
1064 dst_mt, src_mt,
1065 level, slice,
1066 width, height);
1067 return;
1068 }
1069
1070 uint32_t dst_x, dst_y, src_x, src_y;
1071 intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1072 intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1073
1074 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1075 _mesa_get_format_name(src_mt->format),
1076 src_mt, src_x, src_y, src_mt->region->pitch,
1077 _mesa_get_format_name(dst_mt->format),
1078 dst_mt, dst_x, dst_y, dst_mt->region->pitch,
1079 width, height);
1080
1081 if (!intel_miptree_blit(brw,
1082 src_mt, level, slice, 0, 0, false,
1083 dst_mt, level, slice, 0, 0, false,
1084 width, height, GL_COPY)) {
1085 perf_debug("miptree validate blit for %s failed\n",
1086 _mesa_get_format_name(format));
1087
1088 intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
1089 width, height);
1090 }
1091 }
1092
1093 /**
1094 * Copies the image's current data to the given miptree, and associates that
1095 * miptree with the image.
1096 *
1097 * If \c invalidate is true, then the actual image data does not need to be
1098 * copied, but the image still needs to be associated to the new miptree (this
1099 * is set to true if we're about to clear the image).
1100 */
1101 void
1102 intel_miptree_copy_teximage(struct brw_context *brw,
1103 struct intel_texture_image *intelImage,
1104 struct intel_mipmap_tree *dst_mt,
1105 bool invalidate)
1106 {
1107 struct intel_mipmap_tree *src_mt = intelImage->mt;
1108 struct intel_texture_object *intel_obj =
1109 intel_texture_object(intelImage->base.Base.TexObject);
1110 int level = intelImage->base.Base.Level;
1111 int face = intelImage->base.Base.Face;
1112 GLuint depth = intelImage->base.Base.Depth;
1113
1114 if (!invalidate) {
1115 for (int slice = 0; slice < depth; slice++) {
1116 intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
1117 }
1118 }
1119
1120 intel_miptree_reference(&intelImage->mt, dst_mt);
1121 intel_obj->needs_validate = true;
1122 }
1123
1124 bool
1125 intel_miptree_alloc_mcs(struct brw_context *brw,
1126 struct intel_mipmap_tree *mt,
1127 GLuint num_samples)
1128 {
1129 assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1130 assert(mt->mcs_mt == NULL);
1131
1132 /* Choose the correct format for the MCS buffer. All that really matters
1133 * is that we allocate the right buffer size, since we'll always be
1134 * accessing this miptree using MCS-specific hardware mechanisms, which
1135 * infer the correct format based on num_samples.
1136 */
1137 gl_format format;
1138 switch (num_samples) {
1139 case 4:
1140 /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1141 * each sample).
1142 */
1143 format = MESA_FORMAT_R8;
1144 break;
1145 case 8:
1146 /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1147 * for each sample, plus 8 padding bits).
1148 */
1149 format = MESA_FORMAT_R_UINT32;
1150 break;
1151 default:
1152 assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
1153 return false;
1154 };
1155
1156 /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1157 *
1158 * "The MCS surface must be stored as Tile Y."
1159 */
1160 mt->mcs_state = INTEL_MCS_STATE_MSAA;
1161 mt->mcs_mt = intel_miptree_create(brw,
1162 mt->target,
1163 format,
1164 mt->first_level,
1165 mt->last_level,
1166 mt->logical_width0,
1167 mt->logical_height0,
1168 mt->logical_depth0,
1169 true,
1170 0 /* num_samples */,
1171 INTEL_MIPTREE_TILING_Y);
1172
1173 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1174 *
1175 * When MCS buffer is enabled and bound to MSRT, it is required that it
1176 * is cleared prior to any rendering.
1177 *
1178 * Since we don't use the MCS buffer for any purpose other than rendering,
1179 * it makes sense to just clear it immediately upon allocation.
1180 *
1181 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1182 */
1183 void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
1184 memset(data, 0xff, mt->mcs_mt->region->bo->size);
1185 intel_miptree_unmap_raw(brw, mt->mcs_mt);
1186
1187 return mt->mcs_mt;
1188 }
1189
1190
1191 bool
1192 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1193 struct intel_mipmap_tree *mt)
1194 {
1195 assert(mt->mcs_mt == NULL);
1196
1197 /* The format of the MCS buffer is opaque to the driver; all that matters
1198 * is that we get its size and pitch right. We'll pretend that the format
1199 * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1200 * R32 buffer is 32 pixels across, we'll need to scale the width down by
1201 * the block width and then a further factor of 4. Since an MCS tile
1202 * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1203 * we'll need to scale the height down by the block height and then a
1204 * further factor of 8.
1205 */
1206 const gl_format format = MESA_FORMAT_R_UINT32;
1207 unsigned block_width_px;
1208 unsigned block_height;
1209 intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
1210 unsigned width_divisor = block_width_px * 4;
1211 unsigned height_divisor = block_height * 8;
1212 unsigned mcs_width =
1213 ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1214 unsigned mcs_height =
1215 ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1216 assert(mt->logical_depth0 == 1);
1217 mt->mcs_mt = intel_miptree_create(brw,
1218 mt->target,
1219 format,
1220 mt->first_level,
1221 mt->last_level,
1222 mcs_width,
1223 mcs_height,
1224 mt->logical_depth0,
1225 true,
1226 0 /* num_samples */,
1227 INTEL_MIPTREE_TILING_Y);
1228
1229 return mt->mcs_mt;
1230 }
1231
1232
1233 /**
1234 * Helper for intel_miptree_alloc_hiz() that sets
1235 * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
1236 * \c has_hiz was set.
1237 */
1238 static bool
1239 intel_miptree_slice_enable_hiz(struct brw_context *brw,
1240 struct intel_mipmap_tree *mt,
1241 uint32_t level,
1242 uint32_t layer)
1243 {
1244 assert(mt->hiz_mt);
1245
1246 if (brw->is_haswell) {
1247 const struct intel_mipmap_level *l = &mt->level[level];
1248
1249 /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1250 * and the height is 4 aligned. This allows our HiZ support
1251 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1252 * we can grow the width & height to allow the HiZ op to
1253 * force the proper size alignments.
1254 */
1255 if (level > 0 && ((l->width & 7) || (l->height & 3))) {
1256 return false;
1257 }
1258 }
1259
1260 mt->level[level].slice[layer].has_hiz = true;
1261 return true;
1262 }
1263
1264
1265
1266 bool
1267 intel_miptree_alloc_hiz(struct brw_context *brw,
1268 struct intel_mipmap_tree *mt)
1269 {
1270 assert(mt->hiz_mt == NULL);
1271 mt->hiz_mt = intel_miptree_create(brw,
1272 mt->target,
1273 mt->format,
1274 mt->first_level,
1275 mt->last_level,
1276 mt->logical_width0,
1277 mt->logical_height0,
1278 mt->logical_depth0,
1279 true,
1280 mt->num_samples,
1281 INTEL_MIPTREE_TILING_ANY);
1282
1283 if (!mt->hiz_mt)
1284 return false;
1285
1286 /* Mark that all slices need a HiZ resolve. */
1287 struct intel_resolve_map *head = &mt->hiz_map;
1288 for (int level = mt->first_level; level <= mt->last_level; ++level) {
1289 for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1290 if (!intel_miptree_slice_enable_hiz(brw, mt, level, layer))
1291 continue;
1292
1293 head->next = malloc(sizeof(*head->next));
1294 head->next->prev = head;
1295 head->next->next = NULL;
1296 head = head->next;
1297
1298 head->level = level;
1299 head->layer = layer;
1300 head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1301 }
1302 }
1303
1304 return true;
1305 }
1306
1307 /**
1308 * Does the miptree slice have hiz enabled?
1309 */
1310 bool
1311 intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
1312 uint32_t level,
1313 uint32_t layer)
1314 {
1315 intel_miptree_check_level_layer(mt, level, layer);
1316 return mt->level[level].slice[layer].has_hiz;
1317 }
1318
1319 void
1320 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1321 uint32_t level,
1322 uint32_t layer)
1323 {
1324 if (!intel_miptree_slice_has_hiz(mt, level, layer))
1325 return;
1326
1327 intel_resolve_map_set(&mt->hiz_map,
1328 level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1329 }
1330
1331
1332 void
1333 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1334 uint32_t level,
1335 uint32_t layer)
1336 {
1337 if (!intel_miptree_slice_has_hiz(mt, level, layer))
1338 return;
1339
1340 intel_resolve_map_set(&mt->hiz_map,
1341 level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1342 }
1343
1344 void
1345 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
1346 uint32_t level)
1347 {
1348 uint32_t layer;
1349 uint32_t end_layer = mt->level[level].depth;
1350
1351 for (layer = 0; layer < end_layer; layer++) {
1352 intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
1353 }
1354 }
1355
1356 static bool
1357 intel_miptree_slice_resolve(struct brw_context *brw,
1358 struct intel_mipmap_tree *mt,
1359 uint32_t level,
1360 uint32_t layer,
1361 enum gen6_hiz_op need)
1362 {
1363 intel_miptree_check_level_layer(mt, level, layer);
1364
1365 struct intel_resolve_map *item =
1366 intel_resolve_map_get(&mt->hiz_map, level, layer);
1367
1368 if (!item || item->need != need)
1369 return false;
1370
1371 intel_hiz_exec(brw, mt, level, layer, need);
1372 intel_resolve_map_remove(item);
1373 return true;
1374 }
1375
1376 bool
1377 intel_miptree_slice_resolve_hiz(struct brw_context *brw,
1378 struct intel_mipmap_tree *mt,
1379 uint32_t level,
1380 uint32_t layer)
1381 {
1382 return intel_miptree_slice_resolve(brw, mt, level, layer,
1383 GEN6_HIZ_OP_HIZ_RESOLVE);
1384 }
1385
1386 bool
1387 intel_miptree_slice_resolve_depth(struct brw_context *brw,
1388 struct intel_mipmap_tree *mt,
1389 uint32_t level,
1390 uint32_t layer)
1391 {
1392 return intel_miptree_slice_resolve(brw, mt, level, layer,
1393 GEN6_HIZ_OP_DEPTH_RESOLVE);
1394 }
1395
1396 static bool
1397 intel_miptree_all_slices_resolve(struct brw_context *brw,
1398 struct intel_mipmap_tree *mt,
1399 enum gen6_hiz_op need)
1400 {
1401 bool did_resolve = false;
1402 struct intel_resolve_map *i, *next;
1403
1404 for (i = mt->hiz_map.next; i; i = next) {
1405 next = i->next;
1406 if (i->need != need)
1407 continue;
1408
1409 intel_hiz_exec(brw, mt, i->level, i->layer, need);
1410 intel_resolve_map_remove(i);
1411 did_resolve = true;
1412 }
1413
1414 return did_resolve;
1415 }
1416
1417 bool
1418 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
1419 struct intel_mipmap_tree *mt)
1420 {
1421 return intel_miptree_all_slices_resolve(brw, mt,
1422 GEN6_HIZ_OP_HIZ_RESOLVE);
1423 }
1424
1425 bool
1426 intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
1427 struct intel_mipmap_tree *mt)
1428 {
1429 return intel_miptree_all_slices_resolve(brw, mt,
1430 GEN6_HIZ_OP_DEPTH_RESOLVE);
1431 }
1432
1433
1434 void
1435 intel_miptree_resolve_color(struct brw_context *brw,
1436 struct intel_mipmap_tree *mt)
1437 {
1438 switch (mt->mcs_state) {
1439 case INTEL_MCS_STATE_NONE:
1440 case INTEL_MCS_STATE_MSAA:
1441 case INTEL_MCS_STATE_RESOLVED:
1442 /* No resolve needed */
1443 break;
1444 case INTEL_MCS_STATE_UNRESOLVED:
1445 case INTEL_MCS_STATE_CLEAR:
1446 brw_blorp_resolve_color(brw, mt);
1447 break;
1448 }
1449 }
1450
1451
1452 /**
1453 * Make it possible to share the region backing the given miptree with another
1454 * process or another miptree.
1455 *
1456 * Fast color clears are unsafe with shared buffers, so we need to resolve and
1457 * then discard the MCS buffer, if present. We also set the mcs_state to
1458 * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the
1459 * future.
1460 */
1461 void
1462 intel_miptree_make_shareable(struct brw_context *brw,
1463 struct intel_mipmap_tree *mt)
1464 {
1465 /* MCS buffers are also used for multisample buffers, but we can't resolve
1466 * away a multisample MCS buffer because it's an integral part of how the
1467 * pixel data is stored. Fortunately this code path should never be
1468 * reached for multisample buffers.
1469 */
1470 assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1471
1472 if (mt->mcs_mt) {
1473 intel_miptree_resolve_color(brw, mt);
1474 intel_miptree_release(&mt->mcs_mt);
1475 mt->mcs_state = INTEL_MCS_STATE_NONE;
1476 }
1477 }
1478
1479
1480 /**
1481 * \brief Get pointer offset into stencil buffer.
1482 *
1483 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1484 * must decode the tile's layout in software.
1485 *
1486 * See
1487 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1488 * Format.
1489 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1490 *
1491 * Even though the returned offset is always positive, the return type is
1492 * signed due to
1493 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1494 * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
1495 */
1496 static intptr_t
1497 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1498 {
1499 uint32_t tile_size = 4096;
1500 uint32_t tile_width = 64;
1501 uint32_t tile_height = 64;
1502 uint32_t row_size = 64 * stride;
1503
1504 uint32_t tile_x = x / tile_width;
1505 uint32_t tile_y = y / tile_height;
1506
1507 /* The byte's address relative to the tile's base addres. */
1508 uint32_t byte_x = x % tile_width;
1509 uint32_t byte_y = y % tile_height;
1510
1511 uintptr_t u = tile_y * row_size
1512 + tile_x * tile_size
1513 + 512 * (byte_x / 8)
1514 + 64 * (byte_y / 8)
1515 + 32 * ((byte_y / 4) % 2)
1516 + 16 * ((byte_x / 4) % 2)
1517 + 8 * ((byte_y / 2) % 2)
1518 + 4 * ((byte_x / 2) % 2)
1519 + 2 * (byte_y % 2)
1520 + 1 * (byte_x % 2);
1521
1522 if (swizzled) {
1523 /* adjust for bit6 swizzling */
1524 if (((byte_x / 8) % 2) == 1) {
1525 if (((byte_y / 8) % 2) == 0) {
1526 u += 64;
1527 } else {
1528 u -= 64;
1529 }
1530 }
1531 }
1532
1533 return u;
1534 }
1535
1536 static void
1537 intel_miptree_updownsample(struct brw_context *brw,
1538 struct intel_mipmap_tree *src,
1539 struct intel_mipmap_tree *dst,
1540 unsigned width,
1541 unsigned height)
1542 {
1543 int src_x0 = 0;
1544 int src_y0 = 0;
1545 int dst_x0 = 0;
1546 int dst_y0 = 0;
1547
1548 brw_blorp_blit_miptrees(brw,
1549 src, 0 /* level */, 0 /* layer */,
1550 dst, 0 /* level */, 0 /* layer */,
1551 src_x0, src_y0,
1552 width, height,
1553 dst_x0, dst_y0,
1554 width, height,
1555 false, false /*mirror x, y*/);
1556
1557 if (src->stencil_mt) {
1558 brw_blorp_blit_miptrees(brw,
1559 src->stencil_mt, 0 /* level */, 0 /* layer */,
1560 dst->stencil_mt, 0 /* level */, 0 /* layer */,
1561 src_x0, src_y0,
1562 width, height,
1563 dst_x0, dst_y0,
1564 width, height,
1565 false, false /*mirror x, y*/);
1566 }
1567 }
1568
1569 static void
1570 assert_is_flat(struct intel_mipmap_tree *mt)
1571 {
1572 assert(mt->target == GL_TEXTURE_2D);
1573 assert(mt->first_level == 0);
1574 assert(mt->last_level == 0);
1575 }
1576
1577 /**
1578 * \brief Downsample from mt to mt->singlesample_mt.
1579 *
1580 * If the miptree needs no downsample, then skip.
1581 */
1582 void
1583 intel_miptree_downsample(struct brw_context *brw,
1584 struct intel_mipmap_tree *mt)
1585 {
1586 /* Only flat, renderbuffer-like miptrees are supported. */
1587 assert_is_flat(mt);
1588
1589 if (!mt->need_downsample)
1590 return;
1591 intel_miptree_updownsample(brw,
1592 mt, mt->singlesample_mt,
1593 mt->logical_width0,
1594 mt->logical_height0);
1595 mt->need_downsample = false;
1596 }
1597
1598 /**
1599 * \brief Upsample from mt->singlesample_mt to mt.
1600 *
1601 * The upsample is done unconditionally.
1602 */
1603 void
1604 intel_miptree_upsample(struct brw_context *brw,
1605 struct intel_mipmap_tree *mt)
1606 {
1607 /* Only flat, renderbuffer-like miptrees are supported. */
1608 assert_is_flat(mt);
1609 assert(!mt->need_downsample);
1610
1611 intel_miptree_updownsample(brw,
1612 mt->singlesample_mt, mt,
1613 mt->logical_width0,
1614 mt->logical_height0);
1615 }
1616
1617 void *
1618 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
1619 {
1620 struct gl_context *ctx = &brw->ctx;
1621 /* CPU accesses to color buffers don't understand fast color clears, so
1622 * resolve any pending fast color clears before we map.
1623 */
1624 intel_miptree_resolve_color(brw, mt);
1625
1626 drm_intel_bo *bo = mt->region->bo;
1627
1628 if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
1629 if (drm_intel_bo_busy(bo)) {
1630 perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
1631 }
1632 }
1633
1634 intel_flush(ctx);
1635
1636 if (mt->region->tiling != I915_TILING_NONE)
1637 drm_intel_gem_bo_map_gtt(bo);
1638 else
1639 drm_intel_bo_map(bo, true);
1640
1641 return bo->virtual;
1642 }
1643
1644 void
1645 intel_miptree_unmap_raw(struct brw_context *brw,
1646 struct intel_mipmap_tree *mt)
1647 {
1648 drm_intel_bo_unmap(mt->region->bo);
1649 }
1650
1651 static void
1652 intel_miptree_map_gtt(struct brw_context *brw,
1653 struct intel_mipmap_tree *mt,
1654 struct intel_miptree_map *map,
1655 unsigned int level, unsigned int slice)
1656 {
1657 unsigned int bw, bh;
1658 void *base;
1659 unsigned int image_x, image_y;
1660 int x = map->x;
1661 int y = map->y;
1662
1663 /* For compressed formats, the stride is the number of bytes per
1664 * row of blocks. intel_miptree_get_image_offset() already does
1665 * the divide.
1666 */
1667 _mesa_get_format_block_size(mt->format, &bw, &bh);
1668 assert(y % bh == 0);
1669 y /= bh;
1670
1671 base = intel_miptree_map_raw(brw, mt) + mt->offset;
1672
1673 if (base == NULL)
1674 map->ptr = NULL;
1675 else {
1676 /* Note that in the case of cube maps, the caller must have passed the
1677 * slice number referencing the face.
1678 */
1679 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1680 x += image_x;
1681 y += image_y;
1682
1683 map->stride = mt->region->pitch;
1684 map->ptr = base + y * map->stride + x * mt->cpp;
1685 }
1686
1687 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1688 map->x, map->y, map->w, map->h,
1689 mt, _mesa_get_format_name(mt->format),
1690 x, y, map->ptr, map->stride);
1691 }
1692
1693 static void
1694 intel_miptree_unmap_gtt(struct brw_context *brw,
1695 struct intel_mipmap_tree *mt,
1696 struct intel_miptree_map *map,
1697 unsigned int level,
1698 unsigned int slice)
1699 {
1700 intel_miptree_unmap_raw(brw, mt);
1701 }
1702
1703 static void
1704 intel_miptree_map_blit(struct brw_context *brw,
1705 struct intel_mipmap_tree *mt,
1706 struct intel_miptree_map *map,
1707 unsigned int level, unsigned int slice)
1708 {
1709 map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
1710 0, 0,
1711 map->w, map->h, 1,
1712 false, 0,
1713 INTEL_MIPTREE_TILING_NONE);
1714 if (!map->mt) {
1715 fprintf(stderr, "Failed to allocate blit temporary\n");
1716 goto fail;
1717 }
1718 map->stride = map->mt->region->pitch;
1719
1720 if (!intel_miptree_blit(brw,
1721 mt, level, slice,
1722 map->x, map->y, false,
1723 map->mt, 0, 0,
1724 0, 0, false,
1725 map->w, map->h, GL_COPY)) {
1726 fprintf(stderr, "Failed to blit\n");
1727 goto fail;
1728 }
1729
1730 intel_batchbuffer_flush(brw);
1731 map->ptr = intel_miptree_map_raw(brw, map->mt);
1732
1733 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1734 map->x, map->y, map->w, map->h,
1735 mt, _mesa_get_format_name(mt->format),
1736 level, slice, map->ptr, map->stride);
1737
1738 return;
1739
1740 fail:
1741 intel_miptree_release(&map->mt);
1742 map->ptr = NULL;
1743 map->stride = 0;
1744 }
1745
1746 static void
1747 intel_miptree_unmap_blit(struct brw_context *brw,
1748 struct intel_mipmap_tree *mt,
1749 struct intel_miptree_map *map,
1750 unsigned int level,
1751 unsigned int slice)
1752 {
1753 struct gl_context *ctx = &brw->ctx;
1754
1755 intel_miptree_unmap_raw(brw, map->mt);
1756
1757 if (map->mode & GL_MAP_WRITE_BIT) {
1758 bool ok = intel_miptree_blit(brw,
1759 map->mt, 0, 0,
1760 0, 0, false,
1761 mt, level, slice,
1762 map->x, map->y, false,
1763 map->w, map->h, GL_COPY);
1764 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1765 }
1766
1767 intel_miptree_release(&map->mt);
1768 }
1769
1770 static void
1771 intel_miptree_map_s8(struct brw_context *brw,
1772 struct intel_mipmap_tree *mt,
1773 struct intel_miptree_map *map,
1774 unsigned int level, unsigned int slice)
1775 {
1776 map->stride = map->w;
1777 map->buffer = map->ptr = malloc(map->stride * map->h);
1778 if (!map->buffer)
1779 return;
1780
1781 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
1782 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
1783 * invalidate is set, since we'll be writing the whole rectangle from our
1784 * temporary buffer back out.
1785 */
1786 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1787 uint8_t *untiled_s8_map = map->ptr;
1788 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1789 unsigned int image_x, image_y;
1790
1791 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1792
1793 for (uint32_t y = 0; y < map->h; y++) {
1794 for (uint32_t x = 0; x < map->w; x++) {
1795 ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1796 x + image_x + map->x,
1797 y + image_y + map->y,
1798 brw->has_swizzling);
1799 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
1800 }
1801 }
1802
1803 intel_miptree_unmap_raw(brw, mt);
1804
1805 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
1806 map->x, map->y, map->w, map->h,
1807 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
1808 } else {
1809 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1810 map->x, map->y, map->w, map->h,
1811 mt, map->ptr, map->stride);
1812 }
1813 }
1814
1815 static void
1816 intel_miptree_unmap_s8(struct brw_context *brw,
1817 struct intel_mipmap_tree *mt,
1818 struct intel_miptree_map *map,
1819 unsigned int level,
1820 unsigned int slice)
1821 {
1822 if (map->mode & GL_MAP_WRITE_BIT) {
1823 unsigned int image_x, image_y;
1824 uint8_t *untiled_s8_map = map->ptr;
1825 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1826
1827 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1828
1829 for (uint32_t y = 0; y < map->h; y++) {
1830 for (uint32_t x = 0; x < map->w; x++) {
1831 ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1832 x + map->x,
1833 y + map->y,
1834 brw->has_swizzling);
1835 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
1836 }
1837 }
1838
1839 intel_miptree_unmap_raw(brw, mt);
1840 }
1841
1842 free(map->buffer);
1843 }
1844
1845 static void
1846 intel_miptree_map_etc(struct brw_context *brw,
1847 struct intel_mipmap_tree *mt,
1848 struct intel_miptree_map *map,
1849 unsigned int level,
1850 unsigned int slice)
1851 {
1852 assert(mt->etc_format != MESA_FORMAT_NONE);
1853 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
1854 assert(mt->format == MESA_FORMAT_RGBX8888_REV);
1855 }
1856
1857 assert(map->mode & GL_MAP_WRITE_BIT);
1858 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
1859
1860 map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
1861 map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
1862 map->w, map->h, 1));
1863 map->ptr = map->buffer;
1864 }
1865
1866 static void
1867 intel_miptree_unmap_etc(struct brw_context *brw,
1868 struct intel_mipmap_tree *mt,
1869 struct intel_miptree_map *map,
1870 unsigned int level,
1871 unsigned int slice)
1872 {
1873 uint32_t image_x;
1874 uint32_t image_y;
1875 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1876
1877 image_x += map->x;
1878 image_y += map->y;
1879
1880 uint8_t *dst = intel_miptree_map_raw(brw, mt)
1881 + image_y * mt->region->pitch
1882 + image_x * mt->region->cpp;
1883
1884 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
1885 _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
1886 map->ptr, map->stride,
1887 map->w, map->h);
1888 else
1889 _mesa_unpack_etc2_format(dst, mt->region->pitch,
1890 map->ptr, map->stride,
1891 map->w, map->h, mt->etc_format);
1892
1893 intel_miptree_unmap_raw(brw, mt);
1894 free(map->buffer);
1895 }
1896
1897 /**
1898 * Mapping function for packed depth/stencil miptrees backed by real separate
1899 * miptrees for depth and stencil.
1900 *
1901 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
1902 * separate from the depth buffer. Yet at the GL API level, we have to expose
1903 * packed depth/stencil textures and FBO attachments, and Mesa core expects to
1904 * be able to map that memory for texture storage and glReadPixels-type
1905 * operations. We give Mesa core that access by mallocing a temporary and
1906 * copying the data between the actual backing store and the temporary.
1907 */
1908 static void
1909 intel_miptree_map_depthstencil(struct brw_context *brw,
1910 struct intel_mipmap_tree *mt,
1911 struct intel_miptree_map *map,
1912 unsigned int level, unsigned int slice)
1913 {
1914 struct intel_mipmap_tree *z_mt = mt;
1915 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
1916 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
1917 int packed_bpp = map_z32f_x24s8 ? 8 : 4;
1918
1919 map->stride = map->w * packed_bpp;
1920 map->buffer = map->ptr = malloc(map->stride * map->h);
1921 if (!map->buffer)
1922 return;
1923
1924 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
1925 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
1926 * invalidate is set, since we'll be writing the whole rectangle from our
1927 * temporary buffer back out.
1928 */
1929 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1930 uint32_t *packed_map = map->ptr;
1931 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
1932 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
1933 unsigned int s_image_x, s_image_y;
1934 unsigned int z_image_x, z_image_y;
1935
1936 intel_miptree_get_image_offset(s_mt, level, slice,
1937 &s_image_x, &s_image_y);
1938 intel_miptree_get_image_offset(z_mt, level, slice,
1939 &z_image_x, &z_image_y);
1940
1941 for (uint32_t y = 0; y < map->h; y++) {
1942 for (uint32_t x = 0; x < map->w; x++) {
1943 int map_x = map->x + x, map_y = map->y + y;
1944 ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
1945 map_x + s_image_x,
1946 map_y + s_image_y,
1947 brw->has_swizzling);
1948 ptrdiff_t z_offset = ((map_y + z_image_y) *
1949 (z_mt->region->pitch / 4) +
1950 (map_x + z_image_x));
1951 uint8_t s = s_map[s_offset];
1952 uint32_t z = z_map[z_offset];
1953
1954 if (map_z32f_x24s8) {
1955 packed_map[(y * map->w + x) * 2 + 0] = z;
1956 packed_map[(y * map->w + x) * 2 + 1] = s;
1957 } else {
1958 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
1959 }
1960 }
1961 }
1962
1963 intel_miptree_unmap_raw(brw, s_mt);
1964 intel_miptree_unmap_raw(brw, z_mt);
1965
1966 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
1967 __FUNCTION__,
1968 map->x, map->y, map->w, map->h,
1969 z_mt, map->x + z_image_x, map->y + z_image_y,
1970 s_mt, map->x + s_image_x, map->y + s_image_y,
1971 map->ptr, map->stride);
1972 } else {
1973 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1974 map->x, map->y, map->w, map->h,
1975 mt, map->ptr, map->stride);
1976 }
1977 }
1978
1979 static void
1980 intel_miptree_unmap_depthstencil(struct brw_context *brw,
1981 struct intel_mipmap_tree *mt,
1982 struct intel_miptree_map *map,
1983 unsigned int level,
1984 unsigned int slice)
1985 {
1986 struct intel_mipmap_tree *z_mt = mt;
1987 struct intel_mipmap_tree *s_mt = mt->stencil_mt;
1988 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
1989
1990 if (map->mode & GL_MAP_WRITE_BIT) {
1991 uint32_t *packed_map = map->ptr;
1992 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
1993 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
1994 unsigned int s_image_x, s_image_y;
1995 unsigned int z_image_x, z_image_y;
1996
1997 intel_miptree_get_image_offset(s_mt, level, slice,
1998 &s_image_x, &s_image_y);
1999 intel_miptree_get_image_offset(z_mt, level, slice,
2000 &z_image_x, &z_image_y);
2001
2002 for (uint32_t y = 0; y < map->h; y++) {
2003 for (uint32_t x = 0; x < map->w; x++) {
2004 ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2005 x + s_image_x + map->x,
2006 y + s_image_y + map->y,
2007 brw->has_swizzling);
2008 ptrdiff_t z_offset = ((y + z_image_y) *
2009 (z_mt->region->pitch / 4) +
2010 (x + z_image_x));
2011
2012 if (map_z32f_x24s8) {
2013 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2014 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2015 } else {
2016 uint32_t packed = packed_map[y * map->w + x];
2017 s_map[s_offset] = packed >> 24;
2018 z_map[z_offset] = packed;
2019 }
2020 }
2021 }
2022
2023 intel_miptree_unmap_raw(brw, s_mt);
2024 intel_miptree_unmap_raw(brw, z_mt);
2025
2026 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2027 __FUNCTION__,
2028 map->x, map->y, map->w, map->h,
2029 z_mt, _mesa_get_format_name(z_mt->format),
2030 map->x + z_image_x, map->y + z_image_y,
2031 s_mt, map->x + s_image_x, map->y + s_image_y,
2032 map->ptr, map->stride);
2033 }
2034
2035 free(map->buffer);
2036 }
2037
2038 /**
2039 * Create and attach a map to the miptree at (level, slice). Return the
2040 * attached map.
2041 */
2042 static struct intel_miptree_map*
2043 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2044 unsigned int level,
2045 unsigned int slice,
2046 unsigned int x,
2047 unsigned int y,
2048 unsigned int w,
2049 unsigned int h,
2050 GLbitfield mode)
2051 {
2052 struct intel_miptree_map *map = calloc(1, sizeof(*map));
2053
2054 if (!map)
2055 return NULL;
2056
2057 assert(mt->level[level].slice[slice].map == NULL);
2058 mt->level[level].slice[slice].map = map;
2059
2060 map->mode = mode;
2061 map->x = x;
2062 map->y = y;
2063 map->w = w;
2064 map->h = h;
2065
2066 return map;
2067 }
2068
2069 /**
2070 * Release the map at (level, slice).
2071 */
2072 static void
2073 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2074 unsigned int level,
2075 unsigned int slice)
2076 {
2077 struct intel_miptree_map **map;
2078
2079 map = &mt->level[level].slice[slice].map;
2080 free(*map);
2081 *map = NULL;
2082 }
2083
2084 static void
2085 intel_miptree_map_singlesample(struct brw_context *brw,
2086 struct intel_mipmap_tree *mt,
2087 unsigned int level,
2088 unsigned int slice,
2089 unsigned int x,
2090 unsigned int y,
2091 unsigned int w,
2092 unsigned int h,
2093 GLbitfield mode,
2094 void **out_ptr,
2095 int *out_stride)
2096 {
2097 struct intel_miptree_map *map;
2098
2099 assert(mt->num_samples <= 1);
2100
2101 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2102 if (!map){
2103 *out_ptr = NULL;
2104 *out_stride = 0;
2105 return;
2106 }
2107
2108 intel_miptree_slice_resolve_depth(brw, mt, level, slice);
2109 if (map->mode & GL_MAP_WRITE_BIT) {
2110 intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2111 }
2112
2113 if (mt->format == MESA_FORMAT_S8) {
2114 intel_miptree_map_s8(brw, mt, map, level, slice);
2115 } else if (mt->etc_format != MESA_FORMAT_NONE &&
2116 !(mode & BRW_MAP_DIRECT_BIT)) {
2117 intel_miptree_map_etc(brw, mt, map, level, slice);
2118 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2119 intel_miptree_map_depthstencil(brw, mt, map, level, slice);
2120 }
2121 /* See intel_miptree_blit() for details on the 32k pitch limit. */
2122 else if (brw->has_llc &&
2123 !(mode & GL_MAP_WRITE_BIT) &&
2124 !mt->compressed &&
2125 (mt->region->tiling == I915_TILING_X ||
2126 (brw->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
2127 mt->region->pitch < 32768) {
2128 intel_miptree_map_blit(brw, mt, map, level, slice);
2129 } else if (mt->region->tiling != I915_TILING_NONE &&
2130 mt->region->bo->size >= brw->max_gtt_map_object_size) {
2131 assert(mt->region->pitch < 32768);
2132 intel_miptree_map_blit(brw, mt, map, level, slice);
2133 } else {
2134 intel_miptree_map_gtt(brw, mt, map, level, slice);
2135 }
2136
2137 *out_ptr = map->ptr;
2138 *out_stride = map->stride;
2139
2140 if (map->ptr == NULL)
2141 intel_miptree_release_map(mt, level, slice);
2142 }
2143
2144 static void
2145 intel_miptree_unmap_singlesample(struct brw_context *brw,
2146 struct intel_mipmap_tree *mt,
2147 unsigned int level,
2148 unsigned int slice)
2149 {
2150 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2151
2152 assert(mt->num_samples <= 1);
2153
2154 if (!map)
2155 return;
2156
2157 DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2158 mt, _mesa_get_format_name(mt->format), level, slice);
2159
2160 if (mt->format == MESA_FORMAT_S8) {
2161 intel_miptree_unmap_s8(brw, mt, map, level, slice);
2162 } else if (mt->etc_format != MESA_FORMAT_NONE &&
2163 !(map->mode & BRW_MAP_DIRECT_BIT)) {
2164 intel_miptree_unmap_etc(brw, mt, map, level, slice);
2165 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2166 intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
2167 } else if (map->mt) {
2168 intel_miptree_unmap_blit(brw, mt, map, level, slice);
2169 } else {
2170 intel_miptree_unmap_gtt(brw, mt, map, level, slice);
2171 }
2172
2173 intel_miptree_release_map(mt, level, slice);
2174 }
2175
2176 static void
2177 intel_miptree_map_multisample(struct brw_context *brw,
2178 struct intel_mipmap_tree *mt,
2179 unsigned int level,
2180 unsigned int slice,
2181 unsigned int x,
2182 unsigned int y,
2183 unsigned int w,
2184 unsigned int h,
2185 GLbitfield mode,
2186 void **out_ptr,
2187 int *out_stride)
2188 {
2189 struct gl_context *ctx = &brw->ctx;
2190 struct intel_miptree_map *map;
2191
2192 assert(mt->num_samples > 1);
2193
2194 /* Only flat, renderbuffer-like miptrees are supported. */
2195 if (mt->target != GL_TEXTURE_2D ||
2196 mt->first_level != 0 ||
2197 mt->last_level != 0) {
2198 _mesa_problem(ctx, "attempt to map a multisample miptree for "
2199 "which (target, first_level, last_level != "
2200 "(GL_TEXTURE_2D, 0, 0)");
2201 goto fail;
2202 }
2203
2204 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2205 if (!map)
2206 goto fail;
2207
2208 if (!mt->singlesample_mt) {
2209 mt->singlesample_mt =
2210 intel_miptree_create_for_renderbuffer(brw,
2211 mt->format,
2212 mt->logical_width0,
2213 mt->logical_height0,
2214 0 /*num_samples*/);
2215 if (!mt->singlesample_mt)
2216 goto fail;
2217
2218 map->singlesample_mt_is_tmp = true;
2219 mt->need_downsample = true;
2220 }
2221
2222 intel_miptree_downsample(brw, mt);
2223 intel_miptree_map_singlesample(brw, mt->singlesample_mt,
2224 level, slice,
2225 x, y, w, h,
2226 mode,
2227 out_ptr, out_stride);
2228 return;
2229
2230 fail:
2231 intel_miptree_release_map(mt, level, slice);
2232 *out_ptr = NULL;
2233 *out_stride = 0;
2234 }
2235
2236 static void
2237 intel_miptree_unmap_multisample(struct brw_context *brw,
2238 struct intel_mipmap_tree *mt,
2239 unsigned int level,
2240 unsigned int slice)
2241 {
2242 struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2243
2244 assert(mt->num_samples > 1);
2245
2246 if (!map)
2247 return;
2248
2249 intel_miptree_unmap_singlesample(brw, mt->singlesample_mt, level, slice);
2250
2251 mt->need_downsample = false;
2252 if (map->mode & GL_MAP_WRITE_BIT)
2253 intel_miptree_upsample(brw, mt);
2254
2255 if (map->singlesample_mt_is_tmp)
2256 intel_miptree_release(&mt->singlesample_mt);
2257
2258 intel_miptree_release_map(mt, level, slice);
2259 }
2260
2261 void
2262 intel_miptree_map(struct brw_context *brw,
2263 struct intel_mipmap_tree *mt,
2264 unsigned int level,
2265 unsigned int slice,
2266 unsigned int x,
2267 unsigned int y,
2268 unsigned int w,
2269 unsigned int h,
2270 GLbitfield mode,
2271 void **out_ptr,
2272 int *out_stride)
2273 {
2274 if (mt->num_samples <= 1)
2275 intel_miptree_map_singlesample(brw, mt,
2276 level, slice,
2277 x, y, w, h,
2278 mode,
2279 out_ptr, out_stride);
2280 else
2281 intel_miptree_map_multisample(brw, mt,
2282 level, slice,
2283 x, y, w, h,
2284 mode,
2285 out_ptr, out_stride);
2286 }
2287
2288 void
2289 intel_miptree_unmap(struct brw_context *brw,
2290 struct intel_mipmap_tree *mt,
2291 unsigned int level,
2292 unsigned int slice)
2293 {
2294 if (mt->num_samples <= 1)
2295 intel_miptree_unmap_singlesample(brw, mt, level, slice);
2296 else
2297 intel_miptree_unmap_multisample(brw, mt, level, slice);
2298 }