src/mesa/drivers/dri/i915/intel_mipmap_tree.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <GL/gl.h>
  29 #include <GL/internal/dri_interface.h>
  30
  31 #include "intel_batchbuffer.h"
  32 #include "intel_chipset.h"
  33 #include "intel_context.h"
  34 #include "intel_mipmap_tree.h"
  35 #include "intel_regions.h"
  36 #include "intel_resolve_map.h"
  37 #include "intel_tex_layout.h"
  38 #include "intel_tex.h"
  39 #include "intel_blit.h"
  40
  41 #ifndef I915
  42 #include "brw_blorp.h"
  43 #endif
  44
  45 #include "main/enums.h"
  46 #include "main/formats.h"
  47 #include "main/glformats.h"
  48 #include "main/texcompress_etc.h"
  49 #include "main/teximage.h"
  50
  51 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  52
  53 static GLenum
  54 target_to_target(GLenum target)
  55 {
  56    switch (target) {
  57    case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
  58    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
  59    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
  60    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
  61    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
  62    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
  63       return GL_TEXTURE_CUBE_MAP_ARB;
  64    default:
  65       return target;
  66    }
  67 }
  68
  69
  70 /**
  71  * Determine which MSAA layout should be used by the MSAA surface being
  72  * created, based on the chip generation and the surface type.
  73  */
  74 static enum intel_msaa_layout
  75 compute_msaa_layout(struct intel_context *intel, gl_format format, GLenum target)
  76 {
  77    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  78    if (intel->gen < 7)
  79       return INTEL_MSAA_LAYOUT_IMS;
  80
  81    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  82    switch (_mesa_get_format_base_format(format)) {
  83    case GL_DEPTH_COMPONENT:
  84    case GL_STENCIL_INDEX:
  85    case GL_DEPTH_STENCIL:
  86       return INTEL_MSAA_LAYOUT_IMS;
  87    default:
  88       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  89        *
  90        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  91        *   are not written
  92        *
  93        * In practice this means that we have to disable MCS for all signed
  94        * integer MSAA buffers.  The alternative, to disable MCS only when one
  95        * of the render target channels is disabled, is impractical because it
  96        * would require converting between CMS and UMS MSAA layouts on the fly,
  97        * which is expensive.
  98        */
  99       if (_mesa_get_format_datatype(format) == GL_INT) {
 100          /* TODO: is this workaround needed for future chipsets? */
 101          assert(intel->gen == 7);
 102          return INTEL_MSAA_LAYOUT_UMS;
 103       } else {
 104          /* For now, if we're going to be texturing from this surface,
 105           * force UMS, so that the shader doesn't have to do different things
 106           * based on whether there's a multisample control surface needing sampled first.
 107           * We can't just blindly read the MCS surface in all cases because:
 108           *
 109           * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
 110           *
 111           *    If this field is disabled and the sampling engine <ld_mcs> message
 112           *    is issued on this surface, the MCS surface may be accessed. Software
 113           *    must ensure that the surface is defined to avoid GTT errors.
 114           */
 115          if (target == GL_TEXTURE_2D_MULTISAMPLE ||
 116              target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
 117             return INTEL_MSAA_LAYOUT_UMS;
 118          } else {
 119             return INTEL_MSAA_LAYOUT_CMS;
 120          }
 121       }
 122    }
 123 }
 124
 125
 126 /**
 127  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
 128  * scaled-down bitfield representation of the color buffer which is capable of
 129  * recording when blocks of the color buffer are equal to the clear value.
 130  * This function returns the block size that will be used by the MCS buffer
 131  * corresponding to a certain color miptree.
 132  *
 133  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 134  * beneath the "Fast Color Clear" bullet (p327):
 135  *
 136  *     The following table describes the RT alignment
 137  *
 138  *                       Pixels  Lines
 139  *         TiledY RT CL
 140  *             bpp
 141  *              32          8      4
 142  *              64          4      4
 143  *             128          2      4
 144  *         TiledX RT CL
 145  *             bpp
 146  *              32         16      2
 147  *              64          8      2
 148  *             128          4      2
 149  *
 150  * This alignment has the following uses:
 151  *
 152  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
 153  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
 154  *
 155  * - For figuring out alignment restrictions for a fast clear operation.  Fast
 156  *   clear operations must always clear aligned multiples of 16 blocks
 157  *   horizontally and 32 blocks vertically.
 158  *
 159  * - For scaling down the coordinates sent through the render pipeline during
 160  *   a fast clear.  X coordinates must be scaled down by 8 times the block
 161  *   width, and Y coordinates by 16 times the block height.
 162  *
 163  * - For scaling down the coordinates sent through the render pipeline during
 164  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
 165  *   by half the block width, and Y coordinates by half the block height.
 166  */
 167 void
 168 intel_get_non_msrt_mcs_alignment(struct intel_context *intel,
 169                                  struct intel_mipmap_tree *mt,
 170                                  unsigned *width_px, unsigned *height)
 171 {
 172    switch (mt->region->tiling) {
 173    default:
 174       assert(!"Non-MSRT MCS requires X or Y tiling");
 175       /* In release builds, fall through */
 176    case I915_TILING_Y:
 177       *width_px = 32 / mt->cpp;
 178       *height = 4;
 179       break;
 180    case I915_TILING_X:
 181       *width_px = 64 / mt->cpp;
 182       *height = 2;
 183    }
 184 }
 185
 186
 187 /**
 188  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 189  * can be used.
 190  *
 191  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 192  * beneath the "Fast Color Clear" bullet (p326):
 193  *
 194  *     - Support is limited to tiled render targets.
 195  *     - Support is for non-mip-mapped and non-array surface types only.
 196  *
 197  * And then later, on p327:
 198  *
 199  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 200  *       64bpp, and 128bpp.
 201  */
 202 bool
 203 intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel,
 204                                        struct intel_mipmap_tree *mt)
 205 {
 206 #ifdef I915
 207    /* MCS is not supported on the i915 (pre-Gen4) driver */
 208    return false;
 209 #else
 210    struct brw_context *brw = brw_context(&intel->ctx);
 211
 212    /* MCS support does not exist prior to Gen7 */
 213    if (intel->gen < 7)
 214       return false;
 215
 216    /* MCS is only supported for color buffers */
 217    switch (_mesa_get_format_base_format(mt->format)) {
 218    case GL_DEPTH_COMPONENT:
 219    case GL_DEPTH_STENCIL:
 220    case GL_STENCIL_INDEX:
 221       return false;
 222    }
 223
 224    if (mt->region->tiling != I915_TILING_X &&
 225        mt->region->tiling != I915_TILING_Y)
 226       return false;
 227    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 228       return false;
 229    if (mt->first_level != 0 || mt->last_level != 0)
 230       return false;
 231    if (mt->physical_depth0 != 1)
 232       return false;
 233
 234    /* There's no point in using an MCS buffer if the surface isn't in a
 235     * renderable format.
 236     */
 237    if (!brw->format_supported_as_render_target[mt->format])
 238       return false;
 239
 240    return true;
 241 #endif
 242 }
 243
 244
 245 /**
 246  * @param for_bo Indicates that the caller is
 247  *        intel_miptree_create_for_bo(). If true, then do not create
 248  *        \c stencil_mt.
 249  */
 250 struct intel_mipmap_tree *
 251 intel_miptree_create_layout(struct intel_context *intel,
 252                             GLenum target,
 253                             gl_format format,
 254                             GLuint first_level,
 255                             GLuint last_level,
 256                             GLuint width0,
 257                             GLuint height0,
 258                             GLuint depth0,
 259                             bool for_bo,
 260                             GLuint num_samples)
 261 {
 262    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 263
 264    DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
 265        _mesa_lookup_enum_by_nr(target),
 266        _mesa_get_format_name(format),
 267        first_level, last_level, mt);
 268
 269    mt->target = target_to_target(target);
 270    mt->format = format;
 271    mt->first_level = first_level;
 272    mt->last_level = last_level;
 273    mt->logical_width0 = width0;
 274    mt->logical_height0 = height0;
 275    mt->logical_depth0 = depth0;
 276 #ifndef I915
 277    mt->mcs_state = INTEL_MCS_STATE_NONE;
 278 #endif
 279
 280    /* The cpp is bytes per (1, blockheight)-sized block for compressed
 281     * textures.  This is why you'll see divides by blockheight all over
 282     */
 283    unsigned bw, bh;
 284    _mesa_get_format_block_size(format, &bw, &bh);
 285    assert(_mesa_get_format_bytes(mt->format) % bw == 0);
 286    mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
 287
 288    mt->num_samples = num_samples;
 289    mt->compressed = _mesa_is_format_compressed(format);
 290    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 291    mt->refcount = 1;
 292
 293    if (num_samples > 1) {
 294       /* Adjust width/height/depth for MSAA */
 295       mt->msaa_layout = compute_msaa_layout(intel, format, mt->target);
 296       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 297          /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
 298           *
 299           *     "Any of the other messages (sample*, LOD, load4) used with a
 300           *      (4x) multisampled surface will in-effect sample a surface with
 301           *      double the height and width as that indicated in the surface
 302           *      state. Each pixel position on the original-sized surface is
 303           *      replaced with a 2x2 of samples with the following arrangement:
 304           *
 305           *         sample 0 sample 2
 306           *         sample 1 sample 3"
 307           *
 308           * Thus, when sampling from a multisampled texture, it behaves as
 309           * though the layout in memory for (x,y,sample) is:
 310           *
 311           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 312           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 313           *
 314           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 315           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 316           *
 317           * However, the actual layout of multisampled data in memory is:
 318           *
 319           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 320           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 321           *
 322           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 323           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 324           *
 325           * This pattern repeats for each 2x2 pixel block.
 326           *
 327           * As a result, when calculating the size of our 4-sample buffer for
 328           * an odd width or height, we have to align before scaling up because
 329           * sample 3 is in that bottom right 2x2 block.
 330           */
 331          switch (num_samples) {
 332          case 4:
 333             width0 = ALIGN(width0, 2) * 2;
 334             height0 = ALIGN(height0, 2) * 2;
 335             break;
 336          case 8:
 337             width0 = ALIGN(width0, 2) * 4;
 338             height0 = ALIGN(height0, 2) * 2;
 339             break;
 340          default:
 341             /* num_samples should already have been quantized to 0, 1, 4, or
 342              * 8.
 343              */
 344             assert(false);
 345          }
 346       } else {
 347          /* Non-interleaved */
 348          depth0 *= num_samples;
 349       }
 350    }
 351
 352    /* array_spacing_lod0 is only used for non-IMS MSAA surfaces.  TODO: can we
 353     * use it elsewhere?
 354     */
 355    switch (mt->msaa_layout) {
 356    case INTEL_MSAA_LAYOUT_NONE:
 357    case INTEL_MSAA_LAYOUT_IMS:
 358       mt->array_spacing_lod0 = false;
 359       break;
 360    case INTEL_MSAA_LAYOUT_UMS:
 361    case INTEL_MSAA_LAYOUT_CMS:
 362       mt->array_spacing_lod0 = true;
 363       break;
 364    }
 365
 366    if (target == GL_TEXTURE_CUBE_MAP) {
 367       assert(depth0 == 1);
 368       depth0 = 6;
 369    }
 370
 371    mt->physical_width0 = width0;
 372    mt->physical_height0 = height0;
 373    mt->physical_depth0 = depth0;
 374
 375    if (!for_bo &&
 376        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 377        (intel->must_use_separate_stencil ||
 378         (intel->has_separate_stencil &&
 379          intel->vtbl.is_hiz_depth_format(intel, format)))) {
 380       mt->stencil_mt = intel_miptree_create(intel,
 381                                             mt->target,
 382                                             MESA_FORMAT_S8,
 383                                             mt->first_level,
 384                                             mt->last_level,
 385                                             mt->logical_width0,
 386                                             mt->logical_height0,
 387                                             mt->logical_depth0,
 388                                             true,
 389                                             num_samples,
 390                                             INTEL_MIPTREE_TILING_ANY);
 391       if (!mt->stencil_mt) {
 392          intel_miptree_release(&mt);
 393          return NULL;
 394       }
 395
 396       /* Fix up the Z miptree format for how we're splitting out separate
 397        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 398        */
 399       if (mt->format == MESA_FORMAT_S8_Z24) {
 400          mt->format = MESA_FORMAT_X8_Z24;
 401       } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) {
 402          mt->format = MESA_FORMAT_Z32_FLOAT;
 403          mt->cpp = 4;
 404       } else {
 405          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 406                        _mesa_get_format_name(mt->format));
 407       }
 408    }
 409
 410    intel_get_texture_alignment_unit(intel, mt->format,
 411                                     &mt->align_w, &mt->align_h);
 412
 413 #ifdef I915
 414    (void) intel;
 415    if (intel->is_945)
 416       i945_miptree_layout(mt);
 417    else
 418       i915_miptree_layout(mt);
 419 #else
 420    brw_miptree_layout(intel, mt);
 421 #endif
 422
 423    return mt;
 424 }
 425
 426 /**
 427  * \brief Helper function for intel_miptree_create().
 428  */
 429 static uint32_t
 430 intel_miptree_choose_tiling(struct intel_context *intel,
 431                             gl_format format,
 432                             uint32_t width0,
 433                             uint32_t num_samples,
 434                             enum intel_miptree_tiling_mode requested,
 435                             struct intel_mipmap_tree *mt)
 436 {
 437
 438    if (format == MESA_FORMAT_S8) {
 439       /* The stencil buffer is W tiled. However, we request from the kernel a
 440        * non-tiled buffer because the GTT is incapable of W fencing.
 441        */
 442       return I915_TILING_NONE;
 443    }
 444
 445    /* Some usages may want only one type of tiling, like depth miptrees (Y
 446     * tiled), or temporary BOs for uploading data once (linear).
 447     */
 448    switch (requested) {
 449    case INTEL_MIPTREE_TILING_ANY:
 450       break;
 451    case INTEL_MIPTREE_TILING_Y:
 452       return I915_TILING_Y;
 453    case INTEL_MIPTREE_TILING_NONE:
 454       return I915_TILING_NONE;
 455    }
 456
 457    if (num_samples > 1) {
 458       /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
 459        * Surface"):
 460        *
 461        *   [DevSNB+]: For multi-sample render targets, this field must be
 462        *   1. MSRTs can only be tiled.
 463        *
 464        * Our usual reason for preferring X tiling (fast blits using the
 465        * blitting engine) doesn't apply to MSAA, since we'll generally be
 466        * downsampling or upsampling when blitting between the MSAA buffer
 467        * and another buffer, and the blitting engine doesn't support that.
 468        * So use Y tiling, since it makes better use of the cache.
 469        */
 470       return I915_TILING_Y;
 471    }
 472
 473    GLenum base_format = _mesa_get_format_base_format(format);
 474    if (intel->gen >= 4 &&
 475        (base_format == GL_DEPTH_COMPONENT ||
 476         base_format == GL_DEPTH_STENCIL_EXT))
 477       return I915_TILING_Y;
 478
 479    int minimum_pitch = mt->total_width * mt->cpp;
 480
 481    /* If the width is much smaller than a tile, don't bother tiling. */
 482    if (minimum_pitch < 64)
 483       return I915_TILING_NONE;
 484
 485    if (ALIGN(minimum_pitch, 512) >= 32768) {
 486       perf_debug("%dx%d miptree too large to blit, falling back to untiled",
 487                  mt->total_width, mt->total_height);
 488       return I915_TILING_NONE;
 489    }
 490
 491    /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
 492    if (intel->gen < 6)
 493       return I915_TILING_X;
 494
 495    return I915_TILING_Y | I915_TILING_X;
 496 }
 497
 498 struct intel_mipmap_tree *
 499 intel_miptree_create(struct intel_context *intel,
 500                      GLenum target,
 501                      gl_format format,
 502                      GLuint first_level,
 503                      GLuint last_level,
 504                      GLuint width0,
 505                      GLuint height0,
 506                      GLuint depth0,
 507                      bool expect_accelerated_upload,
 508                      GLuint num_samples,
 509                      enum intel_miptree_tiling_mode requested_tiling)
 510 {
 511    struct intel_mipmap_tree *mt;
 512    gl_format tex_format = format;
 513    gl_format etc_format = MESA_FORMAT_NONE;
 514    GLuint total_width, total_height;
 515
 516    if (!intel->is_baytrail) {
 517       switch (format) {
 518       case MESA_FORMAT_ETC1_RGB8:
 519          format = MESA_FORMAT_RGBX8888_REV;
 520          break;
 521       case MESA_FORMAT_ETC2_RGB8:
 522          format = MESA_FORMAT_RGBX8888_REV;
 523          break;
 524       case MESA_FORMAT_ETC2_SRGB8:
 525       case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 526       case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 527          format = MESA_FORMAT_SARGB8;
 528          break;
 529       case MESA_FORMAT_ETC2_RGBA8_EAC:
 530       case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 531          format = MESA_FORMAT_RGBA8888_REV;
 532          break;
 533       case MESA_FORMAT_ETC2_R11_EAC:
 534          format = MESA_FORMAT_R16;
 535          break;
 536       case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 537          format = MESA_FORMAT_SIGNED_R16;
 538          break;
 539       case MESA_FORMAT_ETC2_RG11_EAC:
 540          format = MESA_FORMAT_GR1616;
 541          break;
 542       case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 543          format = MESA_FORMAT_SIGNED_GR1616;
 544          break;
 545       default:
 546          /* Non ETC1 / ETC2 format */
 547          break;
 548       }
 549    }
 550
 551    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 552
 553    mt = intel_miptree_create_layout(intel, target, format,
 554                                       first_level, last_level, width0,
 555                                       height0, depth0,
 556                                       false, num_samples);
 557    /*
 558     * pitch == 0 || height == 0  indicates the null texture
 559     */
 560    if (!mt || !mt->total_width || !mt->total_height) {
 561       intel_miptree_release(&mt);
 562       return NULL;
 563    }
 564
 565    total_width = mt->total_width;
 566    total_height = mt->total_height;
 567
 568    if (format == MESA_FORMAT_S8) {
 569       /* Align to size of W tile, 64x64. */
 570       total_width = ALIGN(total_width, 64);
 571       total_height = ALIGN(total_height, 64);
 572    }
 573
 574    uint32_t tiling = intel_miptree_choose_tiling(intel, format, width0,
 575                                                  num_samples, requested_tiling,
 576                                                  mt);
 577    bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
 578
 579    mt->etc_format = etc_format;
 580    mt->region = intel_region_alloc(intel->intelScreen,
 581                                    y_or_x ? I915_TILING_Y : tiling,
 582                                    mt->cpp,
 583                                    total_width,
 584                                    total_height,
 585                                    expect_accelerated_upload);
 586
 587    /* If the region is too large to fit in the aperture, we need to use the
 588     * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
 589     * so we need to fall back to X.
 590     */
 591    if (y_or_x && mt->region->bo->size >= intel->max_gtt_map_object_size) {
 592       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 593                  mt->total_width, mt->total_height);
 594       intel_region_release(&mt->region);
 595
 596       mt->region = intel_region_alloc(intel->intelScreen,
 597                                       I915_TILING_X,
 598                                       mt->cpp,
 599                                       total_width,
 600                                       total_height,
 601                                       expect_accelerated_upload);
 602    }
 603
 604    mt->offset = 0;
 605
 606    if (!mt->region) {
 607        intel_miptree_release(&mt);
 608        return NULL;
 609    }
 610
 611 #ifndef I915
 612    /* If this miptree is capable of supporting fast color clears, set
 613     * mcs_state appropriately to ensure that fast clears will occur.
 614     * Allocation of the MCS miptree will be deferred until the first fast
 615     * clear actually occurs.
 616     */
 617    if (intel_is_non_msrt_mcs_buffer_supported(intel, mt))
 618       mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
 619 #endif
 620
 621    return mt;
 622 }
 623
 624 struct intel_mipmap_tree *
 625 intel_miptree_create_for_bo(struct intel_context *intel,
 626                             drm_intel_bo *bo,
 627                             gl_format format,
 628                             uint32_t offset,
 629                             uint32_t width,
 630                             uint32_t height,
 631                             int pitch,
 632                             uint32_t tiling)
 633 {
 634    struct intel_mipmap_tree *mt;
 635
 636    struct intel_region *region = calloc(1, sizeof(*region));
 637    if (!region)
 638       return NULL;
 639
 640    /* Nothing will be able to use this miptree with the BO if the offset isn't
 641     * aligned.
 642     */
 643    if (tiling != I915_TILING_NONE)
 644       assert(offset % 4096 == 0);
 645
 646    /* miptrees can't handle negative pitch.  If you need flipping of images,
 647     * that's outside of the scope of the mt.
 648     */
 649    assert(pitch >= 0);
 650
 651    mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format,
 652                                     0, 0,
 653                                     width, height, 1,
 654                                     true, 0 /* num_samples */);
 655    if (!mt)
 656       return mt;
 657
 658    region->cpp = mt->cpp;
 659    region->width = width;
 660    region->height = height;
 661    region->pitch = pitch;
 662    region->refcount = 1;
 663    drm_intel_bo_reference(bo);
 664    region->bo = bo;
 665    region->tiling = tiling;
 666
 667    mt->region = region;
 668    mt->offset = offset;
 669
 670    return mt;
 671 }
 672
 673
 674 /**
 675  * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
 676  *
 677  * For a multisample DRI2 buffer, this wraps the given region with
 678  * a singlesample miptree, then creates a multisample miptree into which the
 679  * singlesample miptree is embedded as a child.
 680  */
 681 struct intel_mipmap_tree*
 682 intel_miptree_create_for_dri2_buffer(struct intel_context *intel,
 683                                      unsigned dri_attachment,
 684                                      gl_format format,
 685                                      uint32_t num_samples,
 686                                      struct intel_region *region)
 687 {
 688    struct intel_mipmap_tree *singlesample_mt = NULL;
 689    struct intel_mipmap_tree *multisample_mt = NULL;
 690
 691    /* Only the front and back buffers, which are color buffers, are shared
 692     * through DRI2.
 693     */
 694    assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
 695           dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 696           dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
 697    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 698           _mesa_get_format_base_format(format) == GL_RGBA);
 699
 700    singlesample_mt = intel_miptree_create_for_bo(intel,
 701                                                  region->bo,
 702                                                  format,
 703                                                  0,
 704                                                  region->width,
 705                                                  region->height,
 706                                                  region->pitch,
 707                                                  region->tiling);
 708    if (!singlesample_mt)
 709       return NULL;
 710    singlesample_mt->region->name = region->name;
 711
 712 #ifndef I915
 713    /* If this miptree is capable of supporting fast color clears, set
 714     * mcs_state appropriately to ensure that fast clears will occur.
 715     * Allocation of the MCS miptree will be deferred until the first fast
 716     * clear actually occurs.
 717     */
 718    if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
 719       singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
 720 #endif
 721
 722    if (num_samples == 0)
 723       return singlesample_mt;
 724
 725    multisample_mt = intel_miptree_create_for_renderbuffer(intel,
 726                                                           format,
 727                                                           region->width,
 728                                                           region->height,
 729                                                           num_samples);
 730    if (!multisample_mt) {
 731       intel_miptree_release(&singlesample_mt);
 732       return NULL;
 733    }
 734
 735    multisample_mt->singlesample_mt = singlesample_mt;
 736    multisample_mt->need_downsample = false;
 737
 738    if (intel->is_front_buffer_rendering &&
 739        (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 740         dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
 741       intel_miptree_upsample(intel, multisample_mt);
 742    }
 743
 744    return multisample_mt;
 745 }
 746
 747 struct intel_mipmap_tree*
 748 intel_miptree_create_for_renderbuffer(struct intel_context *intel,
 749                                       gl_format format,
 750                                       uint32_t width,
 751                                       uint32_t height,
 752                                       uint32_t num_samples)
 753 {
 754    struct intel_mipmap_tree *mt;
 755    uint32_t depth = 1;
 756    bool ok;
 757
 758    mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0,
 759                              width, height, depth, true, num_samples,
 760                              INTEL_MIPTREE_TILING_ANY);
 761    if (!mt)
 762       goto fail;
 763
 764    if (intel->vtbl.is_hiz_depth_format(intel, format)) {
 765       ok = intel_miptree_alloc_hiz(intel, mt);
 766       if (!ok)
 767          goto fail;
 768    }
 769
 770    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 771       ok = intel_miptree_alloc_mcs(intel, mt, num_samples);
 772       if (!ok)
 773          goto fail;
 774    }
 775
 776    return mt;
 777
 778 fail:
 779    intel_miptree_release(&mt);
 780    return NULL;
 781 }
 782
 783 void
 784 intel_miptree_reference(struct intel_mipmap_tree **dst,
 785                         struct intel_mipmap_tree *src)
 786 {
 787    if (*dst == src)
 788       return;
 789
 790    intel_miptree_release(dst);
 791
 792    if (src) {
 793       src->refcount++;
 794       DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
 795    }
 796
 797    *dst = src;
 798 }
 799
 800
 801 void
 802 intel_miptree_release(struct intel_mipmap_tree **mt)
 803 {
 804    if (!*mt)
 805       return;
 806
 807    DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
 808    if (--(*mt)->refcount <= 0) {
 809       GLuint i;
 810
 811       DBG("%s deleting %p\n", __FUNCTION__, *mt);
 812
 813       intel_region_release(&((*mt)->region));
 814       intel_miptree_release(&(*mt)->stencil_mt);
 815       intel_miptree_release(&(*mt)->hiz_mt);
 816 #ifndef I915
 817       intel_miptree_release(&(*mt)->mcs_mt);
 818 #endif
 819       intel_miptree_release(&(*mt)->singlesample_mt);
 820       intel_resolve_map_clear(&(*mt)->hiz_map);
 821
 822       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
 823          free((*mt)->level[i].slice);
 824       }
 825
 826       free(*mt);
 827    }
 828    *mt = NULL;
 829 }
 830
 831 void
 832 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
 833                                        int *width, int *height, int *depth)
 834 {
 835    switch (image->TexObject->Target) {
 836    case GL_TEXTURE_1D_ARRAY:
 837       *width = image->Width;
 838       *height = 1;
 839       *depth = image->Height;
 840       break;
 841    default:
 842       *width = image->Width;
 843       *height = image->Height;
 844       *depth = image->Depth;
 845       break;
 846    }
 847 }
 848
 849 /**
 850  * Can the image be pulled into a unified mipmap tree?  This mirrors
 851  * the completeness test in a lot of ways.
 852  *
 853  * Not sure whether I want to pass gl_texture_image here.
 854  */
 855 bool
 856 intel_miptree_match_image(struct intel_mipmap_tree *mt,
 857                           struct gl_texture_image *image)
 858 {
 859    struct intel_texture_image *intelImage = intel_texture_image(image);
 860    GLuint level = intelImage->base.Base.Level;
 861    int width, height, depth;
 862
 863    /* glTexImage* choose the texture object based on the target passed in, and
 864     * objects can't change targets over their lifetimes, so this should be
 865     * true.
 866     */
 867    assert(target_to_target(image->TexObject->Target) == mt->target);
 868
 869    gl_format mt_format = mt->format;
 870    if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt)
 871       mt_format = MESA_FORMAT_S8_Z24;
 872    if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt)
 873       mt_format = MESA_FORMAT_Z32_FLOAT_X24S8;
 874    if (mt->etc_format != MESA_FORMAT_NONE)
 875       mt_format = mt->etc_format;
 876
 877    if (image->TexFormat != mt_format)
 878       return false;
 879
 880    intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
 881
 882    if (mt->target == GL_TEXTURE_CUBE_MAP)
 883       depth = 6;
 884
 885    /* Test image dimensions against the base level image adjusted for
 886     * minification.  This will also catch images not present in the
 887     * tree, changed targets, etc.
 888     */
 889    if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
 890          mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
 891       /* nonzero level here is always bogus */
 892       assert(level == 0);
 893
 894       if (width != mt->logical_width0 ||
 895             height != mt->logical_height0 ||
 896             depth != mt->logical_depth0) {
 897          return false;
 898       }
 899    }
 900    else {
 901       /* all normal textures, renderbuffers, etc */
 902       if (width != mt->level[level].width ||
 903           height != mt->level[level].height ||
 904           depth != mt->level[level].depth) {
 905          return false;
 906       }
 907    }
 908
 909    if (image->NumSamples != mt->num_samples)
 910       return false;
 911
 912    return true;
 913 }
 914
 915
 916 void
 917 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 918                              GLuint level,
 919                              GLuint x, GLuint y,
 920                              GLuint w, GLuint h, GLuint d)
 921 {
 922    mt->level[level].width = w;
 923    mt->level[level].height = h;
 924    mt->level[level].depth = d;
 925    mt->level[level].level_x = x;
 926    mt->level[level].level_y = y;
 927
 928    DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
 929        level, w, h, d, x, y);
 930
 931    assert(mt->level[level].slice == NULL);
 932
 933    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
 934    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
 935    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
 936 }
 937
 938
 939 void
 940 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
 941                                GLuint level, GLuint img,
 942                                GLuint x, GLuint y)
 943 {
 944    if (img == 0 && level == 0)
 945       assert(x == 0 && y == 0);
 946
 947    assert(img < mt->level[level].depth);
 948
 949    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
 950    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
 951
 952    DBG("%s level %d img %d pos %d,%d\n",
 953        __FUNCTION__, level, img,
 954        mt->level[level].slice[img].x_offset,
 955        mt->level[level].slice[img].y_offset);
 956 }
 957
 958 void
 959 intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
 960                                GLuint level, GLuint slice,
 961                                GLuint *x, GLuint *y)
 962 {
 963    assert(slice < mt->level[level].depth);
 964
 965    *x = mt->level[level].slice[slice].x_offset;
 966    *y = mt->level[level].slice[slice].y_offset;
 967 }
 968
 969 /**
 970  * Rendering with tiled buffers requires that the base address of the buffer
 971  * be aligned to a page boundary.  For renderbuffers, and sometimes with
 972  * textures, we may want the surface to point at a texture image level that
 973  * isn't at a page boundary.
 974  *
 975  * This function returns an appropriately-aligned base offset
 976  * according to the tiling restrictions, plus any required x/y offset
 977  * from there.
 978  */
 979 uint32_t
 980 intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
 981                                GLuint level, GLuint slice,
 982                                uint32_t *tile_x,
 983                                uint32_t *tile_y)
 984 {
 985    struct intel_region *region = mt->region;
 986    uint32_t x, y;
 987    uint32_t mask_x, mask_y;
 988
 989    intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
 990    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
 991
 992    *tile_x = x & mask_x;
 993    *tile_y = y & mask_y;
 994
 995    return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
 996                                           false);
 997 }
 998
 999 static void
1000 intel_miptree_copy_slice_sw(struct intel_context *intel,
1001                             struct intel_mipmap_tree *dst_mt,
1002                             struct intel_mipmap_tree *src_mt,
1003                             int level,
1004                             int slice,
1005                             int width,
1006                             int height)
1007 {
1008    void *src, *dst;
1009    int src_stride, dst_stride;
1010    int cpp = dst_mt->cpp;
1011
1012    intel_miptree_map(intel, src_mt,
1013                      level, slice,
1014                      0, 0,
1015                      width, height,
1016                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1017                      &src, &src_stride);
1018
1019    intel_miptree_map(intel, dst_mt,
1020                      level, slice,
1021                      0, 0,
1022                      width, height,
1023                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1024                      BRW_MAP_DIRECT_BIT,
1025                      &dst, &dst_stride);
1026
1027    DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
1028        _mesa_get_format_name(src_mt->format),
1029        src_mt, src, src_stride,
1030        _mesa_get_format_name(dst_mt->format),
1031        dst_mt, dst, dst_stride,
1032        width, height);
1033
1034    int row_size = cpp * width;
1035    if (src_stride == row_size &&
1036        dst_stride == row_size) {
1037       memcpy(dst, src, row_size * height);
1038    } else {
1039       for (int i = 0; i < height; i++) {
1040          memcpy(dst, src, row_size);
1041          dst += dst_stride;
1042          src += src_stride;
1043       }
1044    }
1045
1046    intel_miptree_unmap(intel, dst_mt, level, slice);
1047    intel_miptree_unmap(intel, src_mt, level, slice);
1048
1049    /* Don't forget to copy the stencil data over, too.  We could have skipped
1050     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1051     * shuffling the two data sources in/out of temporary storage instead of
1052     * the direct mapping we get this way.
1053     */
1054    if (dst_mt->stencil_mt) {
1055       assert(src_mt->stencil_mt);
1056       intel_miptree_copy_slice_sw(intel, dst_mt->stencil_mt, src_mt->stencil_mt,
1057                                   level, slice, width, height);
1058    }
1059 }
1060
1061 static void
1062 intel_miptree_copy_slice(struct intel_context *intel,
1063                          struct intel_mipmap_tree *dst_mt,
1064                          struct intel_mipmap_tree *src_mt,
1065                          int level,
1066                          int face,
1067                          int depth)
1068
1069 {
1070    gl_format format = src_mt->format;
1071    uint32_t width = src_mt->level[level].width;
1072    uint32_t height = src_mt->level[level].height;
1073    int slice;
1074
1075    if (face > 0)
1076       slice = face;
1077    else
1078       slice = depth;
1079
1080    assert(depth < src_mt->level[level].depth);
1081    assert(src_mt->format == dst_mt->format);
1082
1083    if (dst_mt->compressed) {
1084       height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1085       width = ALIGN(width, dst_mt->align_w);
1086    }
1087
1088    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1089     * below won't apply since we can't do the depth's Y tiling or the
1090     * stencil's W tiling in the blitter.
1091     */
1092    if (src_mt->stencil_mt) {
1093       intel_miptree_copy_slice_sw(intel,
1094                                   dst_mt, src_mt,
1095                                   level, slice,
1096                                   width, height);
1097       return;
1098    }
1099
1100    uint32_t dst_x, dst_y, src_x, src_y;
1101    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1102    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1103
1104    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1105        _mesa_get_format_name(src_mt->format),
1106        src_mt, src_x, src_y, src_mt->region->pitch,
1107        _mesa_get_format_name(dst_mt->format),
1108        dst_mt, dst_x, dst_y, dst_mt->region->pitch,
1109        width, height);
1110
1111    if (!intel_miptree_blit(intel,
1112                            src_mt, level, slice, 0, 0, false,
1113                            dst_mt, level, slice, 0, 0, false,
1114                            width, height, GL_COPY)) {
1115       perf_debug("miptree validate blit for %s failed\n",
1116                  _mesa_get_format_name(format));
1117
1118       intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice,
1119                                   width, height);
1120    }
1121 }
1122
1123 /**
1124  * Copies the image's current data to the given miptree, and associates that
1125  * miptree with the image.
1126  *
1127  * If \c invalidate is true, then the actual image data does not need to be
1128  * copied, but the image still needs to be associated to the new miptree (this
1129  * is set to true if we're about to clear the image).
1130  */
1131 void
1132 intel_miptree_copy_teximage(struct intel_context *intel,
1133                             struct intel_texture_image *intelImage,
1134                             struct intel_mipmap_tree *dst_mt,
1135                             bool invalidate)
1136 {
1137    struct intel_mipmap_tree *src_mt = intelImage->mt;
1138    struct intel_texture_object *intel_obj =
1139       intel_texture_object(intelImage->base.Base.TexObject);
1140    int level = intelImage->base.Base.Level;
1141    int face = intelImage->base.Base.Face;
1142    GLuint depth = intelImage->base.Base.Depth;
1143
1144    if (!invalidate) {
1145       for (int slice = 0; slice < depth; slice++) {
1146          intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice);
1147       }
1148    }
1149
1150    intel_miptree_reference(&intelImage->mt, dst_mt);
1151    intel_obj->needs_validate = true;
1152 }
1153
1154 bool
1155 intel_miptree_alloc_mcs(struct intel_context *intel,
1156                         struct intel_mipmap_tree *mt,
1157                         GLuint num_samples)
1158 {
1159    assert(intel->gen >= 7); /* MCS only used on Gen7+ */
1160 #ifdef I915
1161    return false;
1162 #else
1163    assert(mt->mcs_mt == NULL);
1164
1165    /* Choose the correct format for the MCS buffer.  All that really matters
1166     * is that we allocate the right buffer size, since we'll always be
1167     * accessing this miptree using MCS-specific hardware mechanisms, which
1168     * infer the correct format based on num_samples.
1169     */
1170    gl_format format;
1171    switch (num_samples) {
1172    case 4:
1173       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1174        * each sample).
1175        */
1176       format = MESA_FORMAT_R8;
1177       break;
1178    case 8:
1179       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1180        * for each sample, plus 8 padding bits).
1181        */
1182       format = MESA_FORMAT_R_UINT32;
1183       break;
1184    default:
1185       assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
1186       return false;
1187    };
1188
1189    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1190     *
1191     *     "The MCS surface must be stored as Tile Y."
1192     */
1193    mt->mcs_state = INTEL_MCS_STATE_MSAA;
1194    mt->mcs_mt = intel_miptree_create(intel,
1195                                      mt->target,
1196                                      format,
1197                                      mt->first_level,
1198                                      mt->last_level,
1199                                      mt->logical_width0,
1200                                      mt->logical_height0,
1201                                      mt->logical_depth0,
1202                                      true,
1203                                      0 /* num_samples */,
1204                                      INTEL_MIPTREE_TILING_Y);
1205
1206    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1207     *
1208     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1209     *     is cleared prior to any rendering.
1210     *
1211     * Since we don't use the MCS buffer for any purpose other than rendering,
1212     * it makes sense to just clear it immediately upon allocation.
1213     *
1214     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1215     */
1216    void *data = intel_miptree_map_raw(intel, mt->mcs_mt);
1217    memset(data, 0xff, mt->mcs_mt->region->bo->size);
1218    intel_miptree_unmap_raw(intel, mt->mcs_mt);
1219
1220    return mt->mcs_mt;
1221 #endif
1222 }
1223
1224
1225 bool
1226 intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
1227                                  struct intel_mipmap_tree *mt)
1228 {
1229 #ifdef I915
1230    assert(!"MCS not supported on i915");
1231    return false;
1232 #else
1233    assert(mt->mcs_mt == NULL);
1234
1235    /* The format of the MCS buffer is opaque to the driver; all that matters
1236     * is that we get its size and pitch right.  We'll pretend that the format
1237     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1238     * R32 buffer is 32 pixels across, we'll need to scale the width down by
1239     * the block width and then a further factor of 4.  Since an MCS tile
1240     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1241     * we'll need to scale the height down by the block height and then a
1242     * further factor of 8.
1243     */
1244    const gl_format format = MESA_FORMAT_R_UINT32;
1245    unsigned block_width_px;
1246    unsigned block_height;
1247    intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height);
1248    unsigned width_divisor = block_width_px * 4;
1249    unsigned height_divisor = block_height * 8;
1250    unsigned mcs_width =
1251       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1252    unsigned mcs_height =
1253       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1254    assert(mt->logical_depth0 == 1);
1255    mt->mcs_mt = intel_miptree_create(intel,
1256                                      mt->target,
1257                                      format,
1258                                      mt->first_level,
1259                                      mt->last_level,
1260                                      mcs_width,
1261                                      mcs_height,
1262                                      mt->logical_depth0,
1263                                      true,
1264                                      0 /* num_samples */,
1265                                      INTEL_MIPTREE_TILING_Y);
1266
1267    return mt->mcs_mt;
1268 #endif
1269 }
1270
1271
1272 /**
1273  * Helper for intel_miptree_alloc_hiz() that sets
1274  * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
1275  * \c has_hiz was set.
1276  */
1277 static bool
1278 intel_miptree_slice_enable_hiz(struct intel_context *intel,
1279                                struct intel_mipmap_tree *mt,
1280                                uint32_t level,
1281                                uint32_t layer)
1282 {
1283    assert(mt->hiz_mt);
1284
1285    if (intel->is_haswell) {
1286       /* Disable HiZ for some slices to work around a hardware bug.
1287        *
1288        * Haswell hardware fails to respect
1289        * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y when during HiZ
1290        * ambiguate operations.  The failure is inconsistent and affected by
1291        * other GPU contexts. Running a heavy GPU workload in a separate
1292        * process causes the failure rate to drop to nearly 0.
1293        *
1294        * To workaround the bug, we enable HiZ only when we can guarantee that
1295        * the Depth Coordinate Offset fields will be set to 0. The function
1296        * brw_get_depthstencil_tile_masks() is used to calculate the fields,
1297        * and the function is sometimes called in such a way that the presence
1298        * of an attached stencil buffer changes the fuction's return value.
1299        *
1300        * The largest tile size considered by brw_get_depthstencil_tile_masks()
1301        * is that of the stencil buffer. Therefore, if this hiz slice's
1302        * corresponding depth slice has an offset that is aligned to the
1303        * stencil buffer tile size, 64x64 pixels, then
1304        * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y is set to 0.
1305        */
1306       uint32_t depth_x_offset = mt->level[level].slice[layer].x_offset;
1307       uint32_t depth_y_offset = mt->level[level].slice[layer].y_offset;
1308       if ((depth_x_offset & 63) || (depth_y_offset & 63)) {
1309          return false;
1310       }
1311    }
1312
1313    mt->level[level].slice[layer].has_hiz = true;
1314    return true;
1315 }
1316
1317
1318
1319 bool
1320 intel_miptree_alloc_hiz(struct intel_context *intel,
1321                         struct intel_mipmap_tree *mt)
1322 {
1323    assert(mt->hiz_mt == NULL);
1324    mt->hiz_mt = intel_miptree_create(intel,
1325                                      mt->target,
1326                                      mt->format,
1327                                      mt->first_level,
1328                                      mt->last_level,
1329                                      mt->logical_width0,
1330                                      mt->logical_height0,
1331                                      mt->logical_depth0,
1332                                      true,
1333                                      mt->num_samples,
1334                                      INTEL_MIPTREE_TILING_ANY);
1335
1336    if (!mt->hiz_mt)
1337       return false;
1338
1339    /* Mark that all slices need a HiZ resolve. */
1340    struct intel_resolve_map *head = &mt->hiz_map;
1341    for (int level = mt->first_level; level <= mt->last_level; ++level) {
1342       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1343          if (!intel_miptree_slice_enable_hiz(intel, mt, level, layer))
1344             continue;
1345
1346          head->next = malloc(sizeof(*head->next));
1347          head->next->prev = head;
1348          head->next->next = NULL;
1349          head = head->next;
1350
1351          head->level = level;
1352          head->layer = layer;
1353          head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1354       }
1355    }
1356
1357    return true;
1358 }
1359
1360 /**
1361  * Does the miptree slice have hiz enabled?
1362  */
1363 bool
1364 intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
1365                             uint32_t level,
1366                             uint32_t layer)
1367 {
1368    intel_miptree_check_level_layer(mt, level, layer);
1369    return mt->level[level].slice[layer].has_hiz;
1370 }
1371
1372 void
1373 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1374                                           uint32_t level,
1375                                           uint32_t layer)
1376 {
1377    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1378       return;
1379
1380    intel_resolve_map_set(&mt->hiz_map,
1381                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1382 }
1383
1384
1385 void
1386 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1387                                             uint32_t level,
1388                                             uint32_t layer)
1389 {
1390    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1391       return;
1392
1393    intel_resolve_map_set(&mt->hiz_map,
1394                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1395 }
1396
1397 static bool
1398 intel_miptree_slice_resolve(struct intel_context *intel,
1399                             struct intel_mipmap_tree *mt,
1400                             uint32_t level,
1401                             uint32_t layer,
1402                             enum gen6_hiz_op need)
1403 {
1404    intel_miptree_check_level_layer(mt, level, layer);
1405
1406    struct intel_resolve_map *item =
1407          intel_resolve_map_get(&mt->hiz_map, level, layer);
1408
1409    if (!item || item->need != need)
1410       return false;
1411
1412    intel_hiz_exec(intel, mt, level, layer, need);
1413    intel_resolve_map_remove(item);
1414    return true;
1415 }
1416
1417 bool
1418 intel_miptree_slice_resolve_hiz(struct intel_context *intel,
1419                                 struct intel_mipmap_tree *mt,
1420                                 uint32_t level,
1421                                 uint32_t layer)
1422 {
1423    return intel_miptree_slice_resolve(intel, mt, level, layer,
1424                                       GEN6_HIZ_OP_HIZ_RESOLVE);
1425 }
1426
1427 bool
1428 intel_miptree_slice_resolve_depth(struct intel_context *intel,
1429                                   struct intel_mipmap_tree *mt,
1430                                   uint32_t level,
1431                                   uint32_t layer)
1432 {
1433    return intel_miptree_slice_resolve(intel, mt, level, layer,
1434                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
1435 }
1436
1437 static bool
1438 intel_miptree_all_slices_resolve(struct intel_context *intel,
1439                                  struct intel_mipmap_tree *mt,
1440                                  enum gen6_hiz_op need)
1441 {
1442    bool did_resolve = false;
1443    struct intel_resolve_map *i, *next;
1444
1445    for (i = mt->hiz_map.next; i; i = next) {
1446       next = i->next;
1447       if (i->need != need)
1448          continue;
1449
1450       intel_hiz_exec(intel, mt, i->level, i->layer, need);
1451       intel_resolve_map_remove(i);
1452       did_resolve = true;
1453    }
1454
1455    return did_resolve;
1456 }
1457
1458 bool
1459 intel_miptree_all_slices_resolve_hiz(struct intel_context *intel,
1460                                      struct intel_mipmap_tree *mt)
1461 {
1462    return intel_miptree_all_slices_resolve(intel, mt,
1463                                            GEN6_HIZ_OP_HIZ_RESOLVE);
1464 }
1465
1466 bool
1467 intel_miptree_all_slices_resolve_depth(struct intel_context *intel,
1468                                        struct intel_mipmap_tree *mt)
1469 {
1470    return intel_miptree_all_slices_resolve(intel, mt,
1471                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
1472 }
1473
1474
1475 void
1476 intel_miptree_resolve_color(struct intel_context *intel,
1477                             struct intel_mipmap_tree *mt)
1478 {
1479 #ifdef I915
1480    /* Fast color clear is not supported on the i915 (pre-Gen4) driver */
1481 #else
1482    switch (mt->mcs_state) {
1483    case INTEL_MCS_STATE_NONE:
1484    case INTEL_MCS_STATE_MSAA:
1485    case INTEL_MCS_STATE_RESOLVED:
1486       /* No resolve needed */
1487       break;
1488    case INTEL_MCS_STATE_UNRESOLVED:
1489    case INTEL_MCS_STATE_CLEAR:
1490       brw_blorp_resolve_color(intel, mt);
1491       break;
1492    }
1493 #endif
1494 }
1495
1496
1497 /**
1498  * Make it possible to share the region backing the given miptree with another
1499  * process or another miptree.
1500  *
1501  * Fast color clears are unsafe with shared buffers, so we need to resolve and
1502  * then discard the MCS buffer, if present.  We also set the mcs_state to
1503  * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the
1504  * future.
1505  */
1506 void
1507 intel_miptree_make_shareable(struct intel_context *intel,
1508                              struct intel_mipmap_tree *mt)
1509 {
1510 #ifdef I915
1511    /* Nothing needs to be done for I915 */
1512    (void) intel;
1513    (void) mt;
1514 #else
1515    /* MCS buffers are also used for multisample buffers, but we can't resolve
1516     * away a multisample MCS buffer because it's an integral part of how the
1517     * pixel data is stored.  Fortunately this code path should never be
1518     * reached for multisample buffers.
1519     */
1520    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1521
1522    if (mt->mcs_mt) {
1523       intel_miptree_resolve_color(intel, mt);
1524       intel_miptree_release(&mt->mcs_mt);
1525       mt->mcs_state = INTEL_MCS_STATE_NONE;
1526    }
1527 #endif
1528 }
1529
1530
1531 /**
1532  * \brief Get pointer offset into stencil buffer.
1533  *
1534  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1535  * must decode the tile's layout in software.
1536  *
1537  * See
1538  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1539  *     Format.
1540  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1541  *
1542  * Even though the returned offset is always positive, the return type is
1543  * signed due to
1544  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1545  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
1546  */
1547 static intptr_t
1548 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1549 {
1550    uint32_t tile_size = 4096;
1551    uint32_t tile_width = 64;
1552    uint32_t tile_height = 64;
1553    uint32_t row_size = 64 * stride;
1554
1555    uint32_t tile_x = x / tile_width;
1556    uint32_t tile_y = y / tile_height;
1557
1558    /* The byte's address relative to the tile's base addres. */
1559    uint32_t byte_x = x % tile_width;
1560    uint32_t byte_y = y % tile_height;
1561
1562    uintptr_t u = tile_y * row_size
1563                + tile_x * tile_size
1564                + 512 * (byte_x / 8)
1565                +  64 * (byte_y / 8)
1566                +  32 * ((byte_y / 4) % 2)
1567                +  16 * ((byte_x / 4) % 2)
1568                +   8 * ((byte_y / 2) % 2)
1569                +   4 * ((byte_x / 2) % 2)
1570                +   2 * (byte_y % 2)
1571                +   1 * (byte_x % 2);
1572
1573    if (swizzled) {
1574       /* adjust for bit6 swizzling */
1575       if (((byte_x / 8) % 2) == 1) {
1576          if (((byte_y / 8) % 2) == 0) {
1577             u += 64;
1578          } else {
1579             u -= 64;
1580          }
1581       }
1582    }
1583
1584    return u;
1585 }
1586
1587 static void
1588 intel_miptree_updownsample(struct intel_context *intel,
1589                            struct intel_mipmap_tree *src,
1590                            struct intel_mipmap_tree *dst,
1591                            unsigned width,
1592                            unsigned height)
1593 {
1594 #ifndef I915
1595    int src_x0 = 0;
1596    int src_y0 = 0;
1597    int dst_x0 = 0;
1598    int dst_y0 = 0;
1599
1600    brw_blorp_blit_miptrees(intel,
1601                            src, 0 /* level */, 0 /* layer */,
1602                            dst, 0 /* level */, 0 /* layer */,
1603                            src_x0, src_y0,
1604                            width, height,
1605                            dst_x0, dst_y0,
1606                            width, height,
1607                            false, false /*mirror x, y*/);
1608
1609    if (src->stencil_mt) {
1610       brw_blorp_blit_miptrees(intel,
1611                               src->stencil_mt, 0 /* level */, 0 /* layer */,
1612                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
1613                               src_x0, src_y0,
1614                               width, height,
1615                               dst_x0, dst_y0,
1616                               width, height,
1617                               false, false /*mirror x, y*/);
1618    }
1619 #endif /* I915 */
1620 }
1621
1622 static void
1623 assert_is_flat(struct intel_mipmap_tree *mt)
1624 {
1625    assert(mt->target == GL_TEXTURE_2D);
1626    assert(mt->first_level == 0);
1627    assert(mt->last_level == 0);
1628 }
1629
1630 /**
1631  * \brief Downsample from mt to mt->singlesample_mt.
1632  *
1633  * If the miptree needs no downsample, then skip.
1634  */
1635 void
1636 intel_miptree_downsample(struct intel_context *intel,
1637                          struct intel_mipmap_tree *mt)
1638 {
1639    /* Only flat, renderbuffer-like miptrees are supported. */
1640    assert_is_flat(mt);
1641
1642    if (!mt->need_downsample)
1643       return;
1644    intel_miptree_updownsample(intel,
1645                               mt, mt->singlesample_mt,
1646                               mt->logical_width0,
1647                               mt->logical_height0);
1648    mt->need_downsample = false;
1649 }
1650
1651 /**
1652  * \brief Upsample from mt->singlesample_mt to mt.
1653  *
1654  * The upsample is done unconditionally.
1655  */
1656 void
1657 intel_miptree_upsample(struct intel_context *intel,
1658                        struct intel_mipmap_tree *mt)
1659 {
1660    /* Only flat, renderbuffer-like miptrees are supported. */
1661    assert_is_flat(mt);
1662    assert(!mt->need_downsample);
1663
1664    intel_miptree_updownsample(intel,
1665                               mt->singlesample_mt, mt,
1666                               mt->logical_width0,
1667                               mt->logical_height0);
1668 }
1669
1670 void *
1671 intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt)
1672 {
1673    /* CPU accesses to color buffers don't understand fast color clears, so
1674     * resolve any pending fast color clears before we map.
1675     */
1676    intel_miptree_resolve_color(intel, mt);
1677
1678    drm_intel_bo *bo = mt->region->bo;
1679
1680    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
1681       if (drm_intel_bo_busy(bo)) {
1682          perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
1683       }
1684    }
1685
1686    intel_flush(&intel->ctx);
1687
1688    if (mt->region->tiling != I915_TILING_NONE)
1689       drm_intel_gem_bo_map_gtt(bo);
1690    else
1691       drm_intel_bo_map(bo, true);
1692
1693    return bo->virtual;
1694 }
1695
1696 void
1697 intel_miptree_unmap_raw(struct intel_context *intel,
1698                         struct intel_mipmap_tree *mt)
1699 {
1700    drm_intel_bo_unmap(mt->region->bo);
1701 }
1702
1703 static void
1704 intel_miptree_map_gtt(struct intel_context *intel,
1705                       struct intel_mipmap_tree *mt,
1706                       struct intel_miptree_map *map,
1707                       unsigned int level, unsigned int slice)
1708 {
1709    unsigned int bw, bh;
1710    void *base;
1711    unsigned int image_x, image_y;
1712    int x = map->x;
1713    int y = map->y;
1714
1715    /* For compressed formats, the stride is the number of bytes per
1716     * row of blocks.  intel_miptree_get_image_offset() already does
1717     * the divide.
1718     */
1719    _mesa_get_format_block_size(mt->format, &bw, &bh);
1720    assert(y % bh == 0);
1721    y /= bh;
1722
1723    base = intel_miptree_map_raw(intel, mt) + mt->offset;
1724
1725    if (base == NULL)
1726       map->ptr = NULL;
1727    else {
1728       /* Note that in the case of cube maps, the caller must have passed the
1729        * slice number referencing the face.
1730       */
1731       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1732       x += image_x;
1733       y += image_y;
1734
1735       map->stride = mt->region->pitch;
1736       map->ptr = base + y * map->stride + x * mt->cpp;
1737    }
1738
1739    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1740        map->x, map->y, map->w, map->h,
1741        mt, _mesa_get_format_name(mt->format),
1742        x, y, map->ptr, map->stride);
1743 }
1744
1745 static void
1746 intel_miptree_unmap_gtt(struct intel_context *intel,
1747                         struct intel_mipmap_tree *mt,
1748                         struct intel_miptree_map *map,
1749                         unsigned int level,
1750                         unsigned int slice)
1751 {
1752    intel_miptree_unmap_raw(intel, mt);
1753 }
1754
1755 static void
1756 intel_miptree_map_blit(struct intel_context *intel,
1757                        struct intel_mipmap_tree *mt,
1758                        struct intel_miptree_map *map,
1759                        unsigned int level, unsigned int slice)
1760 {
1761    map->mt = intel_miptree_create(intel, GL_TEXTURE_2D, mt->format,
1762                                   0, 0,
1763                                   map->w, map->h, 1,
1764                                   false, 0,
1765                                   INTEL_MIPTREE_TILING_NONE);
1766    if (!map->mt) {
1767       fprintf(stderr, "Failed to allocate blit temporary\n");
1768       goto fail;
1769    }
1770    map->stride = map->mt->region->pitch;
1771
1772    if (!intel_miptree_blit(intel,
1773                            mt, level, slice,
1774                            map->x, map->y, false,
1775                            map->mt, 0, 0,
1776                            0, 0, false,
1777                            map->w, map->h, GL_COPY)) {
1778       fprintf(stderr, "Failed to blit\n");
1779       goto fail;
1780    }
1781
1782    intel_batchbuffer_flush(intel);
1783    map->ptr = intel_miptree_map_raw(intel, map->mt);
1784
1785    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1786        map->x, map->y, map->w, map->h,
1787        mt, _mesa_get_format_name(mt->format),
1788        level, slice, map->ptr, map->stride);
1789
1790    return;
1791
1792 fail:
1793    intel_miptree_release(&map->mt);
1794    map->ptr = NULL;
1795    map->stride = 0;
1796 }
1797
1798 static void
1799 intel_miptree_unmap_blit(struct intel_context *intel,
1800                          struct intel_mipmap_tree *mt,
1801                          struct intel_miptree_map *map,
1802                          unsigned int level,
1803                          unsigned int slice)
1804 {
1805    struct gl_context *ctx = &intel->ctx;
1806
1807    intel_miptree_unmap_raw(intel, map->mt);
1808
1809    if (map->mode & GL_MAP_WRITE_BIT) {
1810       bool ok = intel_miptree_blit(intel,
1811                                    map->mt, 0, 0,
1812                                    0, 0, false,
1813                                    mt, level, slice,
1814                                    map->x, map->y, false,
1815                                    map->w, map->h, GL_COPY);
1816       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1817    }
1818
1819    intel_miptree_release(&map->mt);
1820 }
1821
1822 static void
1823 intel_miptree_map_s8(struct intel_context *intel,
1824                      struct intel_mipmap_tree *mt,
1825                      struct intel_miptree_map *map,
1826                      unsigned int level, unsigned int slice)
1827 {
1828    map->stride = map->w;
1829    map->buffer = map->ptr = malloc(map->stride * map->h);
1830    if (!map->buffer)
1831       return;
1832
1833    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1834     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1835     * invalidate is set, since we'll be writing the whole rectangle from our
1836     * temporary buffer back out.
1837     */
1838    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1839       uint8_t *untiled_s8_map = map->ptr;
1840       uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt);
1841       unsigned int image_x, image_y;
1842
1843       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1844
1845       for (uint32_t y = 0; y < map->h; y++) {
1846          for (uint32_t x = 0; x < map->w; x++) {
1847             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1848                                                x + image_x + map->x,
1849                                                y + image_y + map->y,
1850                                                intel->has_swizzling);
1851             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
1852          }
1853       }
1854
1855       intel_miptree_unmap_raw(intel, mt);
1856
1857       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
1858           map->x, map->y, map->w, map->h,
1859           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
1860    } else {
1861       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1862           map->x, map->y, map->w, map->h,
1863           mt, map->ptr, map->stride);
1864    }
1865 }
1866
1867 static void
1868 intel_miptree_unmap_s8(struct intel_context *intel,
1869                        struct intel_mipmap_tree *mt,
1870                        struct intel_miptree_map *map,
1871                        unsigned int level,
1872                        unsigned int slice)
1873 {
1874    if (map->mode & GL_MAP_WRITE_BIT) {
1875       unsigned int image_x, image_y;
1876       uint8_t *untiled_s8_map = map->ptr;
1877       uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt);
1878
1879       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1880
1881       for (uint32_t y = 0; y < map->h; y++) {
1882          for (uint32_t x = 0; x < map->w; x++) {
1883             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1884                                                x + map->x,
1885                                                y + map->y,
1886                                                intel->has_swizzling);
1887             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
1888          }
1889       }
1890
1891       intel_miptree_unmap_raw(intel, mt);
1892    }
1893
1894    free(map->buffer);
1895 }
1896
1897 static void
1898 intel_miptree_map_etc(struct intel_context *intel,
1899                       struct intel_mipmap_tree *mt,
1900                       struct intel_miptree_map *map,
1901                       unsigned int level,
1902                       unsigned int slice)
1903 {
1904    assert(mt->etc_format != MESA_FORMAT_NONE);
1905    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
1906       assert(mt->format == MESA_FORMAT_RGBX8888_REV);
1907    }
1908
1909    assert(map->mode & GL_MAP_WRITE_BIT);
1910    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
1911
1912    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
1913    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
1914                                                 map->w, map->h, 1));
1915    map->ptr = map->buffer;
1916 }
1917
1918 static void
1919 intel_miptree_unmap_etc(struct intel_context *intel,
1920                         struct intel_mipmap_tree *mt,
1921                         struct intel_miptree_map *map,
1922                         unsigned int level,
1923                         unsigned int slice)
1924 {
1925    uint32_t image_x;
1926    uint32_t image_y;
1927    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1928
1929    image_x += map->x;
1930    image_y += map->y;
1931
1932    uint8_t *dst = intel_miptree_map_raw(intel, mt)
1933                 + image_y * mt->region->pitch
1934                 + image_x * mt->region->cpp;
1935
1936    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
1937       _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
1938                                  map->ptr, map->stride,
1939                                  map->w, map->h);
1940    else
1941       _mesa_unpack_etc2_format(dst, mt->region->pitch,
1942                                map->ptr, map->stride,
1943                                map->w, map->h, mt->etc_format);
1944
1945    intel_miptree_unmap_raw(intel, mt);
1946    free(map->buffer);
1947 }
1948
1949 /**
1950  * Mapping function for packed depth/stencil miptrees backed by real separate
1951  * miptrees for depth and stencil.
1952  *
1953  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
1954  * separate from the depth buffer.  Yet at the GL API level, we have to expose
1955  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
1956  * be able to map that memory for texture storage and glReadPixels-type
1957  * operations.  We give Mesa core that access by mallocing a temporary and
1958  * copying the data between the actual backing store and the temporary.
1959  */
1960 static void
1961 intel_miptree_map_depthstencil(struct intel_context *intel,
1962                                struct intel_mipmap_tree *mt,
1963                                struct intel_miptree_map *map,
1964                                unsigned int level, unsigned int slice)
1965 {
1966    struct intel_mipmap_tree *z_mt = mt;
1967    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
1968    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
1969    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
1970
1971    map->stride = map->w * packed_bpp;
1972    map->buffer = map->ptr = malloc(map->stride * map->h);
1973    if (!map->buffer)
1974       return;
1975
1976    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1977     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1978     * invalidate is set, since we'll be writing the whole rectangle from our
1979     * temporary buffer back out.
1980     */
1981    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1982       uint32_t *packed_map = map->ptr;
1983       uint8_t *s_map = intel_miptree_map_raw(intel, s_mt);
1984       uint32_t *z_map = intel_miptree_map_raw(intel, z_mt);
1985       unsigned int s_image_x, s_image_y;
1986       unsigned int z_image_x, z_image_y;
1987
1988       intel_miptree_get_image_offset(s_mt, level, slice,
1989                                      &s_image_x, &s_image_y);
1990       intel_miptree_get_image_offset(z_mt, level, slice,
1991                                      &z_image_x, &z_image_y);
1992
1993       for (uint32_t y = 0; y < map->h; y++) {
1994          for (uint32_t x = 0; x < map->w; x++) {
1995             int map_x = map->x + x, map_y = map->y + y;
1996             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
1997                                                  map_x + s_image_x,
1998                                                  map_y + s_image_y,
1999                                                  intel->has_swizzling);
2000             ptrdiff_t z_offset = ((map_y + z_image_y) *
2001                                   (z_mt->region->pitch / 4) +
2002                                   (map_x + z_image_x));
2003             uint8_t s = s_map[s_offset];
2004             uint32_t z = z_map[z_offset];
2005
2006             if (map_z32f_x24s8) {
2007                packed_map[(y * map->w + x) * 2 + 0] = z;
2008                packed_map[(y * map->w + x) * 2 + 1] = s;
2009             } else {
2010                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2011             }
2012          }
2013       }
2014
2015       intel_miptree_unmap_raw(intel, s_mt);
2016       intel_miptree_unmap_raw(intel, z_mt);
2017
2018       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2019           __FUNCTION__,
2020           map->x, map->y, map->w, map->h,
2021           z_mt, map->x + z_image_x, map->y + z_image_y,
2022           s_mt, map->x + s_image_x, map->y + s_image_y,
2023           map->ptr, map->stride);
2024    } else {
2025       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2026           map->x, map->y, map->w, map->h,
2027           mt, map->ptr, map->stride);
2028    }
2029 }
2030
2031 static void
2032 intel_miptree_unmap_depthstencil(struct intel_context *intel,
2033                                  struct intel_mipmap_tree *mt,
2034                                  struct intel_miptree_map *map,
2035                                  unsigned int level,
2036                                  unsigned int slice)
2037 {
2038    struct intel_mipmap_tree *z_mt = mt;
2039    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2040    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2041
2042    if (map->mode & GL_MAP_WRITE_BIT) {
2043       uint32_t *packed_map = map->ptr;
2044       uint8_t *s_map = intel_miptree_map_raw(intel, s_mt);
2045       uint32_t *z_map = intel_miptree_map_raw(intel, z_mt);
2046       unsigned int s_image_x, s_image_y;
2047       unsigned int z_image_x, z_image_y;
2048
2049       intel_miptree_get_image_offset(s_mt, level, slice,
2050                                      &s_image_x, &s_image_y);
2051       intel_miptree_get_image_offset(z_mt, level, slice,
2052                                      &z_image_x, &z_image_y);
2053
2054       for (uint32_t y = 0; y < map->h; y++) {
2055          for (uint32_t x = 0; x < map->w; x++) {
2056             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2057                                                  x + s_image_x + map->x,
2058                                                  y + s_image_y + map->y,
2059                                                  intel->has_swizzling);
2060             ptrdiff_t z_offset = ((y + z_image_y) *
2061                                   (z_mt->region->pitch / 4) +
2062                                   (x + z_image_x));
2063
2064             if (map_z32f_x24s8) {
2065                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2066                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2067             } else {
2068                uint32_t packed = packed_map[y * map->w + x];
2069                s_map[s_offset] = packed >> 24;
2070                z_map[z_offset] = packed;
2071             }
2072          }
2073       }
2074
2075       intel_miptree_unmap_raw(intel, s_mt);
2076       intel_miptree_unmap_raw(intel, z_mt);
2077
2078       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2079           __FUNCTION__,
2080           map->x, map->y, map->w, map->h,
2081           z_mt, _mesa_get_format_name(z_mt->format),
2082           map->x + z_image_x, map->y + z_image_y,
2083           s_mt, map->x + s_image_x, map->y + s_image_y,
2084           map->ptr, map->stride);
2085    }
2086
2087    free(map->buffer);
2088 }
2089
2090 /**
2091  * Create and attach a map to the miptree at (level, slice). Return the
2092  * attached map.
2093  */
2094 static struct intel_miptree_map*
2095 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2096                          unsigned int level,
2097                          unsigned int slice,
2098                          unsigned int x,
2099                          unsigned int y,
2100                          unsigned int w,
2101                          unsigned int h,
2102                          GLbitfield mode)
2103 {
2104    struct intel_miptree_map *map = calloc(1, sizeof(*map));
2105
2106    if (!map)
2107       return NULL;
2108
2109    assert(mt->level[level].slice[slice].map == NULL);
2110    mt->level[level].slice[slice].map = map;
2111
2112    map->mode = mode;
2113    map->x = x;
2114    map->y = y;
2115    map->w = w;
2116    map->h = h;
2117
2118    return map;
2119 }
2120
2121 /**
2122  * Release the map at (level, slice).
2123  */
2124 static void
2125 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2126                          unsigned int level,
2127                          unsigned int slice)
2128 {
2129    struct intel_miptree_map **map;
2130
2131    map = &mt->level[level].slice[slice].map;
2132    free(*map);
2133    *map = NULL;
2134 }
2135
2136 static void
2137 intel_miptree_map_singlesample(struct intel_context *intel,
2138                                struct intel_mipmap_tree *mt,
2139                                unsigned int level,
2140                                unsigned int slice,
2141                                unsigned int x,
2142                                unsigned int y,
2143                                unsigned int w,
2144                                unsigned int h,
2145                                GLbitfield mode,
2146                                void **out_ptr,
2147                                int *out_stride)
2148 {
2149    struct intel_miptree_map *map;
2150
2151    assert(mt->num_samples <= 1);
2152
2153    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2154    if (!map){
2155       *out_ptr = NULL;
2156       *out_stride = 0;
2157       return;
2158    }
2159
2160    intel_miptree_slice_resolve_depth(intel, mt, level, slice);
2161    if (map->mode & GL_MAP_WRITE_BIT) {
2162       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2163    }
2164
2165    if (mt->format == MESA_FORMAT_S8) {
2166       intel_miptree_map_s8(intel, mt, map, level, slice);
2167    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2168               !(mode & BRW_MAP_DIRECT_BIT)) {
2169       intel_miptree_map_etc(intel, mt, map, level, slice);
2170    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2171       intel_miptree_map_depthstencil(intel, mt, map, level, slice);
2172    }
2173    /* See intel_miptree_blit() for details on the 32k pitch limit. */
2174    else if (intel->has_llc &&
2175             !(mode & GL_MAP_WRITE_BIT) &&
2176             !mt->compressed &&
2177             (mt->region->tiling == I915_TILING_X ||
2178              (intel->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
2179             mt->region->pitch < 32768) {
2180       intel_miptree_map_blit(intel, mt, map, level, slice);
2181    } else if (mt->region->tiling != I915_TILING_NONE &&
2182               mt->region->bo->size >= intel->max_gtt_map_object_size) {
2183       assert(mt->region->pitch < 32768);
2184       intel_miptree_map_blit(intel, mt, map, level, slice);
2185    } else {
2186       intel_miptree_map_gtt(intel, mt, map, level, slice);
2187    }
2188
2189    *out_ptr = map->ptr;
2190    *out_stride = map->stride;
2191
2192    if (map->ptr == NULL)
2193       intel_miptree_release_map(mt, level, slice);
2194 }
2195
2196 static void
2197 intel_miptree_unmap_singlesample(struct intel_context *intel,
2198                                  struct intel_mipmap_tree *mt,
2199                                  unsigned int level,
2200                                  unsigned int slice)
2201 {
2202    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2203
2204    assert(mt->num_samples <= 1);
2205
2206    if (!map)
2207       return;
2208
2209    DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2210        mt, _mesa_get_format_name(mt->format), level, slice);
2211
2212    if (mt->format == MESA_FORMAT_S8) {
2213       intel_miptree_unmap_s8(intel, mt, map, level, slice);
2214    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2215               !(map->mode & BRW_MAP_DIRECT_BIT)) {
2216       intel_miptree_unmap_etc(intel, mt, map, level, slice);
2217    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2218       intel_miptree_unmap_depthstencil(intel, mt, map, level, slice);
2219    } else if (map->mt) {
2220       intel_miptree_unmap_blit(intel, mt, map, level, slice);
2221    } else {
2222       intel_miptree_unmap_gtt(intel, mt, map, level, slice);
2223    }
2224
2225    intel_miptree_release_map(mt, level, slice);
2226 }
2227
2228 static void
2229 intel_miptree_map_multisample(struct intel_context *intel,
2230                               struct intel_mipmap_tree *mt,
2231                               unsigned int level,
2232                               unsigned int slice,
2233                               unsigned int x,
2234                               unsigned int y,
2235                               unsigned int w,
2236                               unsigned int h,
2237                               GLbitfield mode,
2238                               void **out_ptr,
2239                               int *out_stride)
2240 {
2241    struct intel_miptree_map *map;
2242
2243    assert(mt->num_samples > 1);
2244
2245    /* Only flat, renderbuffer-like miptrees are supported. */
2246    if (mt->target != GL_TEXTURE_2D ||
2247        mt->first_level != 0 ||
2248        mt->last_level != 0) {
2249       _mesa_problem(&intel->ctx, "attempt to map a multisample miptree for "
2250                     "which (target, first_level, last_level != "
2251                     "(GL_TEXTURE_2D, 0, 0)");
2252       goto fail;
2253    }
2254
2255    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2256    if (!map)
2257       goto fail;
2258
2259    if (!mt->singlesample_mt) {
2260       mt->singlesample_mt =
2261          intel_miptree_create_for_renderbuffer(intel,
2262                                                mt->format,
2263                                                mt->logical_width0,
2264                                                mt->logical_height0,
2265                                                0 /*num_samples*/);
2266       if (!mt->singlesample_mt)
2267          goto fail;
2268
2269       map->singlesample_mt_is_tmp = true;
2270       mt->need_downsample = true;
2271    }
2272
2273    intel_miptree_downsample(intel, mt);
2274    intel_miptree_map_singlesample(intel, mt->singlesample_mt,
2275                                   level, slice,
2276                                   x, y, w, h,
2277                                   mode,
2278                                   out_ptr, out_stride);
2279    return;
2280
2281 fail:
2282    intel_miptree_release_map(mt, level, slice);
2283    *out_ptr = NULL;
2284    *out_stride = 0;
2285 }
2286
2287 static void
2288 intel_miptree_unmap_multisample(struct intel_context *intel,
2289                                 struct intel_mipmap_tree *mt,
2290                                 unsigned int level,
2291                                 unsigned int slice)
2292 {
2293    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2294
2295    assert(mt->num_samples > 1);
2296
2297    if (!map)
2298       return;
2299
2300    intel_miptree_unmap_singlesample(intel, mt->singlesample_mt, level, slice);
2301
2302    mt->need_downsample = false;
2303    if (map->mode & GL_MAP_WRITE_BIT)
2304       intel_miptree_upsample(intel, mt);
2305
2306    if (map->singlesample_mt_is_tmp)
2307       intel_miptree_release(&mt->singlesample_mt);
2308
2309    intel_miptree_release_map(mt, level, slice);
2310 }
2311
2312 void
2313 intel_miptree_map(struct intel_context *intel,
2314                   struct intel_mipmap_tree *mt,
2315                   unsigned int level,
2316                   unsigned int slice,
2317                   unsigned int x,
2318                   unsigned int y,
2319                   unsigned int w,
2320                   unsigned int h,
2321                   GLbitfield mode,
2322                   void **out_ptr,
2323                   int *out_stride)
2324 {
2325    if (mt->num_samples <= 1)
2326       intel_miptree_map_singlesample(intel, mt,
2327                                      level, slice,
2328                                      x, y, w, h,
2329                                      mode,
2330                                      out_ptr, out_stride);
2331    else
2332       intel_miptree_map_multisample(intel, mt,
2333                                     level, slice,
2334                                     x, y, w, h,
2335                                     mode,
2336                                     out_ptr, out_stride);
2337 }
2338
2339 void
2340 intel_miptree_unmap(struct intel_context *intel,
2341                     struct intel_mipmap_tree *mt,
2342                     unsigned int level,
2343                     unsigned int slice)
2344 {
2345    if (mt->num_samples <= 1)
2346       intel_miptree_unmap_singlesample(intel, mt, level, slice);
2347    else
2348       intel_miptree_unmap_multisample(intel, mt, level, slice);
2349 }