src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <GL/gl.h>
  29 #include <GL/internal/dri_interface.h>
  30
  31 #include "intel_batchbuffer.h"
  32 #include "intel_chipset.h"
  33 #include "intel_mipmap_tree.h"
  34 #include "intel_regions.h"
  35 #include "intel_resolve_map.h"
  36 #include "intel_tex.h"
  37 #include "intel_blit.h"
  38
  39 #include "brw_blorp.h"
  40 #include "brw_context.h"
  41
  42 #include "main/enums.h"
  43 #include "main/formats.h"
  44 #include "main/glformats.h"
  45 #include "main/texcompress_etc.h"
  46 #include "main/teximage.h"
  47 #include "main/streaming-load-memcpy.h"
  48
  49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  50
  51 static GLenum
  52 target_to_target(GLenum target)
  53 {
  54    switch (target) {
  55    case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
  56    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
  57    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
  58    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
  59    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
  60    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
  61       return GL_TEXTURE_CUBE_MAP_ARB;
  62    default:
  63       return target;
  64    }
  65 }
  66
  67
  68 /**
  69  * Determine which MSAA layout should be used by the MSAA surface being
  70  * created, based on the chip generation and the surface type.
  71  */
  72 static enum intel_msaa_layout
  73 compute_msaa_layout(struct brw_context *brw, gl_format format, GLenum target)
  74 {
  75    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  76    if (brw->gen < 7)
  77       return INTEL_MSAA_LAYOUT_IMS;
  78
  79    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  80    switch (_mesa_get_format_base_format(format)) {
  81    case GL_DEPTH_COMPONENT:
  82    case GL_STENCIL_INDEX:
  83    case GL_DEPTH_STENCIL:
  84       return INTEL_MSAA_LAYOUT_IMS;
  85    default:
  86       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  87        *
  88        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  89        *   are not written
  90        *
  91        * In practice this means that we have to disable MCS for all signed
  92        * integer MSAA buffers.  The alternative, to disable MCS only when one
  93        * of the render target channels is disabled, is impractical because it
  94        * would require converting between CMS and UMS MSAA layouts on the fly,
  95        * which is expensive.
  96        */
  97       if (_mesa_get_format_datatype(format) == GL_INT) {
  98          /* TODO: is this workaround needed for future chipsets? */
  99          assert(brw->gen == 7);
 100          return INTEL_MSAA_LAYOUT_UMS;
 101       } else {
 102          return INTEL_MSAA_LAYOUT_CMS;
 103       }
 104    }
 105 }
 106
 107
 108 /**
 109  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
 110  * scaled-down bitfield representation of the color buffer which is capable of
 111  * recording when blocks of the color buffer are equal to the clear value.
 112  * This function returns the block size that will be used by the MCS buffer
 113  * corresponding to a certain color miptree.
 114  *
 115  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 116  * beneath the "Fast Color Clear" bullet (p327):
 117  *
 118  *     The following table describes the RT alignment
 119  *
 120  *                       Pixels  Lines
 121  *         TiledY RT CL
 122  *             bpp
 123  *              32          8      4
 124  *              64          4      4
 125  *             128          2      4
 126  *         TiledX RT CL
 127  *             bpp
 128  *              32         16      2
 129  *              64          8      2
 130  *             128          4      2
 131  *
 132  * This alignment has the following uses:
 133  *
 134  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
 135  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
 136  *
 137  * - For figuring out alignment restrictions for a fast clear operation.  Fast
 138  *   clear operations must always clear aligned multiples of 16 blocks
 139  *   horizontally and 32 blocks vertically.
 140  *
 141  * - For scaling down the coordinates sent through the render pipeline during
 142  *   a fast clear.  X coordinates must be scaled down by 8 times the block
 143  *   width, and Y coordinates by 16 times the block height.
 144  *
 145  * - For scaling down the coordinates sent through the render pipeline during
 146  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
 147  *   by half the block width, and Y coordinates by half the block height.
 148  */
 149 void
 150 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
 151                                  struct intel_mipmap_tree *mt,
 152                                  unsigned *width_px, unsigned *height)
 153 {
 154    switch (mt->region->tiling) {
 155    default:
 156       assert(!"Non-MSRT MCS requires X or Y tiling");
 157       /* In release builds, fall through */
 158    case I915_TILING_Y:
 159       *width_px = 32 / mt->cpp;
 160       *height = 4;
 161       break;
 162    case I915_TILING_X:
 163       *width_px = 64 / mt->cpp;
 164       *height = 2;
 165    }
 166 }
 167
 168
 169 /**
 170  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 171  * can be used.
 172  *
 173  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 174  * beneath the "Fast Color Clear" bullet (p326):
 175  *
 176  *     - Support is limited to tiled render targets.
 177  *     - Support is for non-mip-mapped and non-array surface types only.
 178  *
 179  * And then later, on p327:
 180  *
 181  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 182  *       64bpp, and 128bpp.
 183  */
 184 bool
 185 intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
 186                                        struct intel_mipmap_tree *mt)
 187 {
 188    /* MCS support does not exist prior to Gen7 */
 189    if (brw->gen < 7 || brw->gen >= 8)
 190       return false;
 191
 192    /* MCS is only supported for color buffers */
 193    switch (_mesa_get_format_base_format(mt->format)) {
 194    case GL_DEPTH_COMPONENT:
 195    case GL_DEPTH_STENCIL:
 196    case GL_STENCIL_INDEX:
 197       return false;
 198    }
 199
 200    if (mt->region->tiling != I915_TILING_X &&
 201        mt->region->tiling != I915_TILING_Y)
 202       return false;
 203    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 204       return false;
 205    if (mt->first_level != 0 || mt->last_level != 0)
 206       return false;
 207    if (mt->physical_depth0 != 1)
 208       return false;
 209
 210    /* There's no point in using an MCS buffer if the surface isn't in a
 211     * renderable format.
 212     */
 213    if (!brw->format_supported_as_render_target[mt->format])
 214       return false;
 215
 216    return true;
 217 }
 218
 219
 220 /**
 221  * @param for_bo Indicates that the caller is
 222  *        intel_miptree_create_for_bo(). If true, then do not create
 223  *        \c stencil_mt.
 224  */
 225 struct intel_mipmap_tree *
 226 intel_miptree_create_layout(struct brw_context *brw,
 227                             GLenum target,
 228                             gl_format format,
 229                             GLuint first_level,
 230                             GLuint last_level,
 231                             GLuint width0,
 232                             GLuint height0,
 233                             GLuint depth0,
 234                             bool for_bo,
 235                             GLuint num_samples)
 236 {
 237    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 238    if (!mt)
 239       return NULL;
 240
 241    DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
 242        _mesa_lookup_enum_by_nr(target),
 243        _mesa_get_format_name(format),
 244        first_level, last_level, mt);
 245
 246    mt->target = target_to_target(target);
 247    mt->format = format;
 248    mt->first_level = first_level;
 249    mt->last_level = last_level;
 250    mt->logical_width0 = width0;
 251    mt->logical_height0 = height0;
 252    mt->logical_depth0 = depth0;
 253    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
 254
 255    /* The cpp is bytes per (1, blockheight)-sized block for compressed
 256     * textures.  This is why you'll see divides by blockheight all over
 257     */
 258    unsigned bw, bh;
 259    _mesa_get_format_block_size(format, &bw, &bh);
 260    assert(_mesa_get_format_bytes(mt->format) % bw == 0);
 261    mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
 262
 263    mt->num_samples = num_samples;
 264    mt->compressed = _mesa_is_format_compressed(format);
 265    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 266    mt->refcount = 1;
 267
 268    if (num_samples > 1) {
 269       /* Adjust width/height/depth for MSAA */
 270       mt->msaa_layout = compute_msaa_layout(brw, format, mt->target);
 271       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 272          /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
 273           *
 274           *     "Any of the other messages (sample*, LOD, load4) used with a
 275           *      (4x) multisampled surface will in-effect sample a surface with
 276           *      double the height and width as that indicated in the surface
 277           *      state. Each pixel position on the original-sized surface is
 278           *      replaced with a 2x2 of samples with the following arrangement:
 279           *
 280           *         sample 0 sample 2
 281           *         sample 1 sample 3"
 282           *
 283           * Thus, when sampling from a multisampled texture, it behaves as
 284           * though the layout in memory for (x,y,sample) is:
 285           *
 286           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 287           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 288           *
 289           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 290           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 291           *
 292           * However, the actual layout of multisampled data in memory is:
 293           *
 294           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 295           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 296           *
 297           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 298           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 299           *
 300           * This pattern repeats for each 2x2 pixel block.
 301           *
 302           * As a result, when calculating the size of our 4-sample buffer for
 303           * an odd width or height, we have to align before scaling up because
 304           * sample 3 is in that bottom right 2x2 block.
 305           */
 306          switch (num_samples) {
 307          case 4:
 308             width0 = ALIGN(width0, 2) * 2;
 309             height0 = ALIGN(height0, 2) * 2;
 310             break;
 311          case 8:
 312             width0 = ALIGN(width0, 2) * 4;
 313             height0 = ALIGN(height0, 2) * 2;
 314             break;
 315          default:
 316             /* num_samples should already have been quantized to 0, 1, 4, or
 317              * 8.
 318              */
 319             assert(false);
 320          }
 321       } else {
 322          /* Non-interleaved */
 323          depth0 *= num_samples;
 324       }
 325    }
 326
 327    /* array_spacing_lod0 is only used for non-IMS MSAA surfaces.  TODO: can we
 328     * use it elsewhere?
 329     */
 330    switch (mt->msaa_layout) {
 331    case INTEL_MSAA_LAYOUT_NONE:
 332    case INTEL_MSAA_LAYOUT_IMS:
 333       mt->array_spacing_lod0 = false;
 334       break;
 335    case INTEL_MSAA_LAYOUT_UMS:
 336    case INTEL_MSAA_LAYOUT_CMS:
 337       mt->array_spacing_lod0 = true;
 338       break;
 339    }
 340
 341    if (target == GL_TEXTURE_CUBE_MAP) {
 342       assert(depth0 == 1);
 343       depth0 = 6;
 344    }
 345
 346    mt->physical_width0 = width0;
 347    mt->physical_height0 = height0;
 348    mt->physical_depth0 = depth0;
 349
 350    if (!for_bo &&
 351        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 352        (brw->must_use_separate_stencil ||
 353         (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) {
 354       mt->stencil_mt = intel_miptree_create(brw,
 355                                             mt->target,
 356                                             MESA_FORMAT_S8,
 357                                             mt->first_level,
 358                                             mt->last_level,
 359                                             mt->logical_width0,
 360                                             mt->logical_height0,
 361                                             mt->logical_depth0,
 362                                             true,
 363                                             num_samples,
 364                                             INTEL_MIPTREE_TILING_ANY);
 365       if (!mt->stencil_mt) {
 366          intel_miptree_release(&mt);
 367          return NULL;
 368       }
 369
 370       /* Fix up the Z miptree format for how we're splitting out separate
 371        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 372        */
 373       if (mt->format == MESA_FORMAT_S8_Z24) {
 374          mt->format = MESA_FORMAT_X8_Z24;
 375       } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) {
 376          mt->format = MESA_FORMAT_Z32_FLOAT;
 377          mt->cpp = 4;
 378       } else {
 379          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 380                        _mesa_get_format_name(mt->format));
 381       }
 382    }
 383
 384    brw_miptree_layout(brw, mt);
 385
 386    return mt;
 387 }
 388
 389 /**
 390  * \brief Helper function for intel_miptree_create().
 391  */
 392 static uint32_t
 393 intel_miptree_choose_tiling(struct brw_context *brw,
 394                             gl_format format,
 395                             uint32_t width0,
 396                             uint32_t num_samples,
 397                             enum intel_miptree_tiling_mode requested,
 398                             struct intel_mipmap_tree *mt)
 399 {
 400    if (format == MESA_FORMAT_S8) {
 401       /* The stencil buffer is W tiled. However, we request from the kernel a
 402        * non-tiled buffer because the GTT is incapable of W fencing.
 403        */
 404       return I915_TILING_NONE;
 405    }
 406
 407    /* Some usages may want only one type of tiling, like depth miptrees (Y
 408     * tiled), or temporary BOs for uploading data once (linear).
 409     */
 410    switch (requested) {
 411    case INTEL_MIPTREE_TILING_ANY:
 412       break;
 413    case INTEL_MIPTREE_TILING_Y:
 414       return I915_TILING_Y;
 415    case INTEL_MIPTREE_TILING_NONE:
 416       return I915_TILING_NONE;
 417    }
 418
 419    if (num_samples > 1) {
 420       /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
 421        * Surface"):
 422        *
 423        *   [DevSNB+]: For multi-sample render targets, this field must be
 424        *   1. MSRTs can only be tiled.
 425        *
 426        * Our usual reason for preferring X tiling (fast blits using the
 427        * blitting engine) doesn't apply to MSAA, since we'll generally be
 428        * downsampling or upsampling when blitting between the MSAA buffer
 429        * and another buffer, and the blitting engine doesn't support that.
 430        * So use Y tiling, since it makes better use of the cache.
 431        */
 432       return I915_TILING_Y;
 433    }
 434
 435    GLenum base_format = _mesa_get_format_base_format(format);
 436    if (base_format == GL_DEPTH_COMPONENT ||
 437        base_format == GL_DEPTH_STENCIL_EXT)
 438       return I915_TILING_Y;
 439
 440    int minimum_pitch = mt->total_width * mt->cpp;
 441
 442    /* If the width is much smaller than a tile, don't bother tiling. */
 443    if (minimum_pitch < 64)
 444       return I915_TILING_NONE;
 445
 446    if (ALIGN(minimum_pitch, 512) >= 32768) {
 447       perf_debug("%dx%d miptree too large to blit, falling back to untiled",
 448                  mt->total_width, mt->total_height);
 449       return I915_TILING_NONE;
 450    }
 451
 452    /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
 453    if (brw->gen < 6)
 454       return I915_TILING_X;
 455
 456    /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
 457     * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
 458     *  or Linear."
 459     * 128 bits per pixel translates to 16 bytes per pixel.  This is necessary
 460     * all the way back to 965, but is explicitly permitted on Gen7.
 461     */
 462    if (brw->gen != 7 && mt->cpp >= 16)
 463       return I915_TILING_X;
 464
 465    /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
 466     * messages), on p64, under the heading "Surface Vertical Alignment":
 467     *
 468     *     This field must be set to VALIGN_4 for all tiled Y Render Target
 469     *     surfaces.
 470     *
 471     * So if the surface is renderable and uses a vertical alignment of 2,
 472     * force it to be X tiled.  This is somewhat conservative (it's possible
 473     * that the client won't ever render to this surface), but it's difficult
 474     * to know that ahead of time.  And besides, since we use a vertical
 475     * alignment of 4 as often as we can, this shouldn't happen very often.
 476     */
 477    if (brw->gen == 7 && mt->align_h == 2 &&
 478        brw->format_supported_as_render_target[format]) {
 479       return I915_TILING_X;
 480    }
 481
 482    return I915_TILING_Y | I915_TILING_X;
 483 }
 484
 485 struct intel_mipmap_tree *
 486 intel_miptree_create(struct brw_context *brw,
 487                      GLenum target,
 488                      gl_format format,
 489                      GLuint first_level,
 490                      GLuint last_level,
 491                      GLuint width0,
 492                      GLuint height0,
 493                      GLuint depth0,
 494                      bool expect_accelerated_upload,
 495                      GLuint num_samples,
 496                      enum intel_miptree_tiling_mode requested_tiling)
 497 {
 498    struct intel_mipmap_tree *mt;
 499    gl_format tex_format = format;
 500    gl_format etc_format = MESA_FORMAT_NONE;
 501    GLuint total_width, total_height;
 502
 503    if (!brw->is_baytrail) {
 504       switch (format) {
 505       case MESA_FORMAT_ETC1_RGB8:
 506          format = MESA_FORMAT_RGBX8888_REV;
 507          break;
 508       case MESA_FORMAT_ETC2_RGB8:
 509          format = MESA_FORMAT_RGBX8888_REV;
 510          break;
 511       case MESA_FORMAT_ETC2_SRGB8:
 512       case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 513       case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 514          format = MESA_FORMAT_SARGB8;
 515          break;
 516       case MESA_FORMAT_ETC2_RGBA8_EAC:
 517       case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 518          format = MESA_FORMAT_RGBA8888_REV;
 519          break;
 520       case MESA_FORMAT_ETC2_R11_EAC:
 521          format = MESA_FORMAT_R16;
 522          break;
 523       case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 524          format = MESA_FORMAT_SIGNED_R16;
 525          break;
 526       case MESA_FORMAT_ETC2_RG11_EAC:
 527          format = MESA_FORMAT_GR1616;
 528          break;
 529       case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 530          format = MESA_FORMAT_SIGNED_GR1616;
 531          break;
 532       default:
 533          /* Non ETC1 / ETC2 format */
 534          break;
 535       }
 536    }
 537
 538    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 539
 540    mt = intel_miptree_create_layout(brw, target, format,
 541                                       first_level, last_level, width0,
 542                                       height0, depth0,
 543                                       false, num_samples);
 544    /*
 545     * pitch == 0 || height == 0  indicates the null texture
 546     */
 547    if (!mt || !mt->total_width || !mt->total_height) {
 548       intel_miptree_release(&mt);
 549       return NULL;
 550    }
 551
 552    total_width = mt->total_width;
 553    total_height = mt->total_height;
 554
 555    if (format == MESA_FORMAT_S8) {
 556       /* Align to size of W tile, 64x64. */
 557       total_width = ALIGN(total_width, 64);
 558       total_height = ALIGN(total_height, 64);
 559    }
 560
 561    uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
 562                                                  num_samples, requested_tiling,
 563                                                  mt);
 564    bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
 565
 566    mt->etc_format = etc_format;
 567    mt->region = intel_region_alloc(brw->intelScreen,
 568                                    y_or_x ? I915_TILING_Y : tiling,
 569                                    mt->cpp,
 570                                    total_width,
 571                                    total_height,
 572                                    expect_accelerated_upload);
 573
 574    /* If the region is too large to fit in the aperture, we need to use the
 575     * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
 576     * so we need to fall back to X.
 577     */
 578    if (y_or_x && mt->region->bo->size >= brw->max_gtt_map_object_size) {
 579       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 580                  mt->total_width, mt->total_height);
 581       intel_region_release(&mt->region);
 582
 583       mt->region = intel_region_alloc(brw->intelScreen,
 584                                       I915_TILING_X,
 585                                       mt->cpp,
 586                                       total_width,
 587                                       total_height,
 588                                       expect_accelerated_upload);
 589    }
 590
 591    mt->offset = 0;
 592
 593    if (!mt->region) {
 594        intel_miptree_release(&mt);
 595        return NULL;
 596    }
 597
 598
 599    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 600       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
 601          intel_miptree_release(&mt);
 602          return NULL;
 603       }
 604    }
 605
 606    /* If this miptree is capable of supporting fast color clears, set
 607     * fast_clear_state appropriately to ensure that fast clears will occur.
 608     * Allocation of the MCS miptree will be deferred until the first fast
 609     * clear actually occurs.
 610     */
 611    if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
 612       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 613
 614    return mt;
 615 }
 616
 617 struct intel_mipmap_tree *
 618 intel_miptree_create_for_bo(struct brw_context *brw,
 619                             drm_intel_bo *bo,
 620                             gl_format format,
 621                             uint32_t offset,
 622                             uint32_t width,
 623                             uint32_t height,
 624                             int pitch,
 625                             uint32_t tiling)
 626 {
 627    struct intel_mipmap_tree *mt;
 628
 629    struct intel_region *region = calloc(1, sizeof(*region));
 630    if (!region)
 631       return NULL;
 632
 633    /* Nothing will be able to use this miptree with the BO if the offset isn't
 634     * aligned.
 635     */
 636    if (tiling != I915_TILING_NONE)
 637       assert(offset % 4096 == 0);
 638
 639    /* miptrees can't handle negative pitch.  If you need flipping of images,
 640     * that's outside of the scope of the mt.
 641     */
 642    assert(pitch >= 0);
 643
 644    mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
 645                                     0, 0,
 646                                     width, height, 1,
 647                                     true, 0 /* num_samples */);
 648    if (!mt) {
 649       free(region);
 650       return mt;
 651    }
 652
 653    region->cpp = mt->cpp;
 654    region->width = width;
 655    region->height = height;
 656    region->pitch = pitch;
 657    region->refcount = 1;
 658    drm_intel_bo_reference(bo);
 659    region->bo = bo;
 660    region->tiling = tiling;
 661
 662    mt->region = region;
 663    mt->offset = offset;
 664
 665    return mt;
 666 }
 667
 668
 669 /**
 670  * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
 671  *
 672  * For a multisample DRI2 buffer, this wraps the given region with
 673  * a singlesample miptree, then creates a multisample miptree into which the
 674  * singlesample miptree is embedded as a child.
 675  */
 676 struct intel_mipmap_tree*
 677 intel_miptree_create_for_dri2_buffer(struct brw_context *brw,
 678                                      unsigned dri_attachment,
 679                                      gl_format format,
 680                                      uint32_t num_samples,
 681                                      struct intel_region *region)
 682 {
 683    struct intel_mipmap_tree *singlesample_mt = NULL;
 684    struct intel_mipmap_tree *multisample_mt = NULL;
 685
 686    /* Only the front and back buffers, which are color buffers, are shared
 687     * through DRI2.
 688     */
 689    assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
 690           dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 691           dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
 692    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 693           _mesa_get_format_base_format(format) == GL_RGBA);
 694
 695    singlesample_mt = intel_miptree_create_for_bo(brw,
 696                                                  region->bo,
 697                                                  format,
 698                                                  0,
 699                                                  region->width,
 700                                                  region->height,
 701                                                  region->pitch,
 702                                                  region->tiling);
 703    if (!singlesample_mt)
 704       return NULL;
 705    singlesample_mt->region->name = region->name;
 706
 707    /* If this miptree is capable of supporting fast color clears, set
 708     * fast_clear_state appropriately to ensure that fast clears will occur.
 709     * Allocation of the MCS miptree will be deferred until the first fast
 710     * clear actually occurs.
 711     */
 712    if (intel_is_non_msrt_mcs_buffer_supported(brw, singlesample_mt))
 713       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 714
 715    if (num_samples == 0)
 716       return singlesample_mt;
 717
 718    multisample_mt = intel_miptree_create_for_renderbuffer(brw,
 719                                                           format,
 720                                                           region->width,
 721                                                           region->height,
 722                                                           num_samples);
 723    if (!multisample_mt) {
 724       intel_miptree_release(&singlesample_mt);
 725       return NULL;
 726    }
 727
 728    multisample_mt->singlesample_mt = singlesample_mt;
 729    multisample_mt->need_downsample = false;
 730
 731    if (brw->is_front_buffer_rendering &&
 732        (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 733         dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
 734       intel_miptree_upsample(brw, multisample_mt);
 735    }
 736
 737    return multisample_mt;
 738 }
 739
 740 /**
 741  * For a singlesample image buffer, this simply wraps the given region with a miptree.
 742  *
 743  * For a multisample image buffer, this wraps the given region with
 744  * a singlesample miptree, then creates a multisample miptree into which the
 745  * singlesample miptree is embedded as a child.
 746  */
 747 struct intel_mipmap_tree*
 748 intel_miptree_create_for_image_buffer(struct brw_context *intel,
 749                                       enum __DRIimageBufferMask buffer_type,
 750                                       gl_format format,
 751                                       uint32_t num_samples,
 752                                       struct intel_region *region)
 753 {
 754    struct intel_mipmap_tree *singlesample_mt = NULL;
 755    struct intel_mipmap_tree *multisample_mt = NULL;
 756
 757    /* Only the front and back buffers, which are color buffers, are allocated
 758     * through the image loader.
 759     */
 760    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 761           _mesa_get_format_base_format(format) == GL_RGBA);
 762
 763    singlesample_mt = intel_miptree_create_for_bo(intel,
 764                                                  region->bo,
 765                                                  format,
 766                                                  0,
 767                                                  region->width,
 768                                                  region->height,
 769                                                  region->pitch,
 770                                                  region->tiling);
 771    if (!singlesample_mt)
 772       return NULL;
 773
 774    intel_region_reference(&singlesample_mt->region, region);
 775
 776    if (num_samples == 0)
 777       return singlesample_mt;
 778
 779    multisample_mt = intel_miptree_create_for_renderbuffer(intel,
 780                                                           format,
 781                                                           region->width,
 782                                                           region->height,
 783                                                           num_samples);
 784    if (!multisample_mt) {
 785       intel_miptree_release(&singlesample_mt);
 786       return NULL;
 787    }
 788
 789    multisample_mt->singlesample_mt = singlesample_mt;
 790    multisample_mt->need_downsample = false;
 791
 792    intel_region_reference(&multisample_mt->region, region);
 793
 794    if (intel->is_front_buffer_rendering && buffer_type == __DRI_IMAGE_BUFFER_FRONT) {
 795       intel_miptree_upsample(intel, multisample_mt);
 796    }
 797
 798    return multisample_mt;
 799 }
 800
 801 struct intel_mipmap_tree*
 802 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
 803                                       gl_format format,
 804                                       uint32_t width,
 805                                       uint32_t height,
 806                                       uint32_t num_samples)
 807 {
 808    struct intel_mipmap_tree *mt;
 809    uint32_t depth = 1;
 810    bool ok;
 811
 812    mt = intel_miptree_create(brw, GL_TEXTURE_2D, format, 0, 0,
 813                              width, height, depth, true, num_samples,
 814                              INTEL_MIPTREE_TILING_ANY);
 815    if (!mt)
 816       goto fail;
 817
 818    if (brw_is_hiz_depth_format(brw, format)) {
 819       ok = intel_miptree_alloc_hiz(brw, mt);
 820       if (!ok)
 821          goto fail;
 822    }
 823
 824    return mt;
 825
 826 fail:
 827    intel_miptree_release(&mt);
 828    return NULL;
 829 }
 830
 831 void
 832 intel_miptree_reference(struct intel_mipmap_tree **dst,
 833                         struct intel_mipmap_tree *src)
 834 {
 835    if (*dst == src)
 836       return;
 837
 838    intel_miptree_release(dst);
 839
 840    if (src) {
 841       src->refcount++;
 842       DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
 843    }
 844
 845    *dst = src;
 846 }
 847
 848
 849 void
 850 intel_miptree_release(struct intel_mipmap_tree **mt)
 851 {
 852    if (!*mt)
 853       return;
 854
 855    DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
 856    if (--(*mt)->refcount <= 0) {
 857       GLuint i;
 858
 859       DBG("%s deleting %p\n", __FUNCTION__, *mt);
 860
 861       intel_region_release(&((*mt)->region));
 862       intel_miptree_release(&(*mt)->stencil_mt);
 863       intel_miptree_release(&(*mt)->hiz_mt);
 864       intel_miptree_release(&(*mt)->mcs_mt);
 865       intel_miptree_release(&(*mt)->singlesample_mt);
 866       intel_resolve_map_clear(&(*mt)->hiz_map);
 867
 868       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
 869          free((*mt)->level[i].slice);
 870       }
 871
 872       free(*mt);
 873    }
 874    *mt = NULL;
 875 }
 876
 877 void
 878 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
 879                                        int *width, int *height, int *depth)
 880 {
 881    switch (image->TexObject->Target) {
 882    case GL_TEXTURE_1D_ARRAY:
 883       *width = image->Width;
 884       *height = 1;
 885       *depth = image->Height;
 886       break;
 887    default:
 888       *width = image->Width;
 889       *height = image->Height;
 890       *depth = image->Depth;
 891       break;
 892    }
 893 }
 894
 895 /**
 896  * Can the image be pulled into a unified mipmap tree?  This mirrors
 897  * the completeness test in a lot of ways.
 898  *
 899  * Not sure whether I want to pass gl_texture_image here.
 900  */
 901 bool
 902 intel_miptree_match_image(struct intel_mipmap_tree *mt,
 903                           struct gl_texture_image *image)
 904 {
 905    struct intel_texture_image *intelImage = intel_texture_image(image);
 906    GLuint level = intelImage->base.Base.Level;
 907    int width, height, depth;
 908
 909    /* glTexImage* choose the texture object based on the target passed in, and
 910     * objects can't change targets over their lifetimes, so this should be
 911     * true.
 912     */
 913    assert(target_to_target(image->TexObject->Target) == mt->target);
 914
 915    gl_format mt_format = mt->format;
 916    if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt)
 917       mt_format = MESA_FORMAT_S8_Z24;
 918    if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt)
 919       mt_format = MESA_FORMAT_Z32_FLOAT_X24S8;
 920    if (mt->etc_format != MESA_FORMAT_NONE)
 921       mt_format = mt->etc_format;
 922
 923    if (image->TexFormat != mt_format)
 924       return false;
 925
 926    intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
 927
 928    if (mt->target == GL_TEXTURE_CUBE_MAP)
 929       depth = 6;
 930
 931    /* Test image dimensions against the base level image adjusted for
 932     * minification.  This will also catch images not present in the
 933     * tree, changed targets, etc.
 934     */
 935    if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
 936          mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
 937       /* nonzero level here is always bogus */
 938       assert(level == 0);
 939
 940       if (width != mt->logical_width0 ||
 941             height != mt->logical_height0 ||
 942             depth != mt->logical_depth0) {
 943          return false;
 944       }
 945    }
 946    else {
 947       /* all normal textures, renderbuffers, etc */
 948       if (width != mt->level[level].width ||
 949           height != mt->level[level].height ||
 950           depth != mt->level[level].depth) {
 951          return false;
 952       }
 953    }
 954
 955    if (image->NumSamples != mt->num_samples)
 956       return false;
 957
 958    return true;
 959 }
 960
 961
 962 void
 963 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 964                              GLuint level,
 965                              GLuint x, GLuint y,
 966                              GLuint w, GLuint h, GLuint d)
 967 {
 968    mt->level[level].width = w;
 969    mt->level[level].height = h;
 970    mt->level[level].depth = d;
 971    mt->level[level].level_x = x;
 972    mt->level[level].level_y = y;
 973
 974    DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
 975        level, w, h, d, x, y);
 976
 977    assert(mt->level[level].slice == NULL);
 978
 979    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
 980    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
 981    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
 982 }
 983
 984
 985 void
 986 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
 987                                GLuint level, GLuint img,
 988                                GLuint x, GLuint y)
 989 {
 990    if (img == 0 && level == 0)
 991       assert(x == 0 && y == 0);
 992
 993    assert(img < mt->level[level].depth);
 994
 995    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
 996    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
 997
 998    DBG("%s level %d img %d pos %d,%d\n",
 999        __FUNCTION__, level, img,
1000        mt->level[level].slice[img].x_offset,
1001        mt->level[level].slice[img].y_offset);
1002 }
1003
1004 void
1005 intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
1006                                GLuint level, GLuint slice,
1007                                GLuint *x, GLuint *y)
1008 {
1009    assert(slice < mt->level[level].depth);
1010
1011    *x = mt->level[level].slice[slice].x_offset;
1012    *y = mt->level[level].slice[slice].y_offset;
1013 }
1014
1015 /**
1016  * Rendering with tiled buffers requires that the base address of the buffer
1017  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1018  * textures, we may want the surface to point at a texture image level that
1019  * isn't at a page boundary.
1020  *
1021  * This function returns an appropriately-aligned base offset
1022  * according to the tiling restrictions, plus any required x/y offset
1023  * from there.
1024  */
1025 uint32_t
1026 intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
1027                                GLuint level, GLuint slice,
1028                                uint32_t *tile_x,
1029                                uint32_t *tile_y)
1030 {
1031    struct intel_region *region = mt->region;
1032    uint32_t x, y;
1033    uint32_t mask_x, mask_y;
1034
1035    intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
1036    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1037
1038    *tile_x = x & mask_x;
1039    *tile_y = y & mask_y;
1040
1041    return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
1042                                           false);
1043 }
1044
1045 static void
1046 intel_miptree_copy_slice_sw(struct brw_context *brw,
1047                             struct intel_mipmap_tree *dst_mt,
1048                             struct intel_mipmap_tree *src_mt,
1049                             int level,
1050                             int slice,
1051                             int width,
1052                             int height)
1053 {
1054    void *src, *dst;
1055    int src_stride, dst_stride;
1056    int cpp = dst_mt->cpp;
1057
1058    intel_miptree_map(brw, src_mt,
1059                      level, slice,
1060                      0, 0,
1061                      width, height,
1062                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1063                      &src, &src_stride);
1064
1065    intel_miptree_map(brw, dst_mt,
1066                      level, slice,
1067                      0, 0,
1068                      width, height,
1069                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1070                      BRW_MAP_DIRECT_BIT,
1071                      &dst, &dst_stride);
1072
1073    DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
1074        _mesa_get_format_name(src_mt->format),
1075        src_mt, src, src_stride,
1076        _mesa_get_format_name(dst_mt->format),
1077        dst_mt, dst, dst_stride,
1078        width, height);
1079
1080    int row_size = cpp * width;
1081    if (src_stride == row_size &&
1082        dst_stride == row_size) {
1083       memcpy(dst, src, row_size * height);
1084    } else {
1085       for (int i = 0; i < height; i++) {
1086          memcpy(dst, src, row_size);
1087          dst += dst_stride;
1088          src += src_stride;
1089       }
1090    }
1091
1092    intel_miptree_unmap(brw, dst_mt, level, slice);
1093    intel_miptree_unmap(brw, src_mt, level, slice);
1094
1095    /* Don't forget to copy the stencil data over, too.  We could have skipped
1096     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1097     * shuffling the two data sources in/out of temporary storage instead of
1098     * the direct mapping we get this way.
1099     */
1100    if (dst_mt->stencil_mt) {
1101       assert(src_mt->stencil_mt);
1102       intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
1103                                   level, slice, width, height);
1104    }
1105 }
1106
1107 static void
1108 intel_miptree_copy_slice(struct brw_context *brw,
1109                          struct intel_mipmap_tree *dst_mt,
1110                          struct intel_mipmap_tree *src_mt,
1111                          int level,
1112                          int face,
1113                          int depth)
1114
1115 {
1116    gl_format format = src_mt->format;
1117    uint32_t width = src_mt->level[level].width;
1118    uint32_t height = src_mt->level[level].height;
1119    int slice;
1120
1121    if (face > 0)
1122       slice = face;
1123    else
1124       slice = depth;
1125
1126    assert(depth < src_mt->level[level].depth);
1127    assert(src_mt->format == dst_mt->format);
1128
1129    if (dst_mt->compressed) {
1130       height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1131       width = ALIGN(width, dst_mt->align_w);
1132    }
1133
1134    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1135     * below won't apply since we can't do the depth's Y tiling or the
1136     * stencil's W tiling in the blitter.
1137     */
1138    if (src_mt->stencil_mt) {
1139       intel_miptree_copy_slice_sw(brw,
1140                                   dst_mt, src_mt,
1141                                   level, slice,
1142                                   width, height);
1143       return;
1144    }
1145
1146    uint32_t dst_x, dst_y, src_x, src_y;
1147    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1148    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1149
1150    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1151        _mesa_get_format_name(src_mt->format),
1152        src_mt, src_x, src_y, src_mt->region->pitch,
1153        _mesa_get_format_name(dst_mt->format),
1154        dst_mt, dst_x, dst_y, dst_mt->region->pitch,
1155        width, height);
1156
1157    if (!intel_miptree_blit(brw,
1158                            src_mt, level, slice, 0, 0, false,
1159                            dst_mt, level, slice, 0, 0, false,
1160                            width, height, GL_COPY)) {
1161       perf_debug("miptree validate blit for %s failed\n",
1162                  _mesa_get_format_name(format));
1163
1164       intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
1165                                   width, height);
1166    }
1167 }
1168
1169 /**
1170  * Copies the image's current data to the given miptree, and associates that
1171  * miptree with the image.
1172  *
1173  * If \c invalidate is true, then the actual image data does not need to be
1174  * copied, but the image still needs to be associated to the new miptree (this
1175  * is set to true if we're about to clear the image).
1176  */
1177 void
1178 intel_miptree_copy_teximage(struct brw_context *brw,
1179                             struct intel_texture_image *intelImage,
1180                             struct intel_mipmap_tree *dst_mt,
1181                             bool invalidate)
1182 {
1183    struct intel_mipmap_tree *src_mt = intelImage->mt;
1184    struct intel_texture_object *intel_obj =
1185       intel_texture_object(intelImage->base.Base.TexObject);
1186    int level = intelImage->base.Base.Level;
1187    int face = intelImage->base.Base.Face;
1188    GLuint depth = intelImage->base.Base.Depth;
1189
1190    if (!invalidate) {
1191       for (int slice = 0; slice < depth; slice++) {
1192          intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
1193       }
1194    }
1195
1196    intel_miptree_reference(&intelImage->mt, dst_mt);
1197    intel_obj->needs_validate = true;
1198 }
1199
1200 bool
1201 intel_miptree_alloc_mcs(struct brw_context *brw,
1202                         struct intel_mipmap_tree *mt,
1203                         GLuint num_samples)
1204 {
1205    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1206    assert(mt->mcs_mt == NULL);
1207
1208    /* Choose the correct format for the MCS buffer.  All that really matters
1209     * is that we allocate the right buffer size, since we'll always be
1210     * accessing this miptree using MCS-specific hardware mechanisms, which
1211     * infer the correct format based on num_samples.
1212     */
1213    gl_format format;
1214    switch (num_samples) {
1215    case 4:
1216       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1217        * each sample).
1218        */
1219       format = MESA_FORMAT_R8;
1220       break;
1221    case 8:
1222       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1223        * for each sample, plus 8 padding bits).
1224        */
1225       format = MESA_FORMAT_R_UINT32;
1226       break;
1227    default:
1228       assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
1229       return false;
1230    };
1231
1232    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1233     *
1234     *     "The MCS surface must be stored as Tile Y."
1235     */
1236    mt->mcs_mt = intel_miptree_create(brw,
1237                                      mt->target,
1238                                      format,
1239                                      mt->first_level,
1240                                      mt->last_level,
1241                                      mt->logical_width0,
1242                                      mt->logical_height0,
1243                                      mt->logical_depth0,
1244                                      true,
1245                                      0 /* num_samples */,
1246                                      INTEL_MIPTREE_TILING_Y);
1247
1248    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1249     *
1250     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1251     *     is cleared prior to any rendering.
1252     *
1253     * Since we don't use the MCS buffer for any purpose other than rendering,
1254     * it makes sense to just clear it immediately upon allocation.
1255     *
1256     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1257     */
1258    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
1259    memset(data, 0xff, mt->mcs_mt->region->bo->size);
1260    intel_miptree_unmap_raw(brw, mt->mcs_mt);
1261    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
1262
1263    return mt->mcs_mt;
1264 }
1265
1266
1267 bool
1268 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1269                                  struct intel_mipmap_tree *mt)
1270 {
1271    assert(mt->mcs_mt == NULL);
1272
1273    /* The format of the MCS buffer is opaque to the driver; all that matters
1274     * is that we get its size and pitch right.  We'll pretend that the format
1275     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1276     * R32 buffer is 32 pixels across, we'll need to scale the width down by
1277     * the block width and then a further factor of 4.  Since an MCS tile
1278     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1279     * we'll need to scale the height down by the block height and then a
1280     * further factor of 8.
1281     */
1282    const gl_format format = MESA_FORMAT_R_UINT32;
1283    unsigned block_width_px;
1284    unsigned block_height;
1285    intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
1286    unsigned width_divisor = block_width_px * 4;
1287    unsigned height_divisor = block_height * 8;
1288    unsigned mcs_width =
1289       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1290    unsigned mcs_height =
1291       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1292    assert(mt->logical_depth0 == 1);
1293    mt->mcs_mt = intel_miptree_create(brw,
1294                                      mt->target,
1295                                      format,
1296                                      mt->first_level,
1297                                      mt->last_level,
1298                                      mcs_width,
1299                                      mcs_height,
1300                                      mt->logical_depth0,
1301                                      true,
1302                                      0 /* num_samples */,
1303                                      INTEL_MIPTREE_TILING_Y);
1304
1305    return mt->mcs_mt;
1306 }
1307
1308
1309 /**
1310  * Helper for intel_miptree_alloc_hiz() that sets
1311  * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
1312  * \c has_hiz was set.
1313  */
1314 static bool
1315 intel_miptree_slice_enable_hiz(struct brw_context *brw,
1316                                struct intel_mipmap_tree *mt,
1317                                uint32_t level,
1318                                uint32_t layer)
1319 {
1320    assert(mt->hiz_mt);
1321
1322    if (brw->is_haswell) {
1323       const struct intel_mipmap_level *l = &mt->level[level];
1324
1325       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1326        * and the height is 4 aligned. This allows our HiZ support
1327        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1328        * we can grow the width & height to allow the HiZ op to
1329        * force the proper size alignments.
1330        */
1331       if (level > 0 && ((l->width & 7) || (l->height & 3))) {
1332          return false;
1333       }
1334    }
1335
1336    mt->level[level].slice[layer].has_hiz = true;
1337    return true;
1338 }
1339
1340
1341
1342 bool
1343 intel_miptree_alloc_hiz(struct brw_context *brw,
1344                         struct intel_mipmap_tree *mt)
1345 {
1346    assert(mt->hiz_mt == NULL);
1347    mt->hiz_mt = intel_miptree_create(brw,
1348                                      mt->target,
1349                                      mt->format,
1350                                      mt->first_level,
1351                                      mt->last_level,
1352                                      mt->logical_width0,
1353                                      mt->logical_height0,
1354                                      mt->logical_depth0,
1355                                      true,
1356                                      mt->num_samples,
1357                                      INTEL_MIPTREE_TILING_ANY);
1358
1359    if (!mt->hiz_mt)
1360       return false;
1361
1362    /* Mark that all slices need a HiZ resolve. */
1363    struct intel_resolve_map *head = &mt->hiz_map;
1364    for (int level = mt->first_level; level <= mt->last_level; ++level) {
1365       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1366          if (!intel_miptree_slice_enable_hiz(brw, mt, level, layer))
1367             continue;
1368
1369          head->next = malloc(sizeof(*head->next));
1370          head->next->prev = head;
1371          head->next->next = NULL;
1372          head = head->next;
1373
1374          head->level = level;
1375          head->layer = layer;
1376          head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1377       }
1378    }
1379
1380    return true;
1381 }
1382
1383 /**
1384  * Does the miptree slice have hiz enabled?
1385  */
1386 bool
1387 intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
1388                             uint32_t level,
1389                             uint32_t layer)
1390 {
1391    intel_miptree_check_level_layer(mt, level, layer);
1392    return mt->level[level].slice[layer].has_hiz;
1393 }
1394
1395 void
1396 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1397                                           uint32_t level,
1398                                           uint32_t layer)
1399 {
1400    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1401       return;
1402
1403    intel_resolve_map_set(&mt->hiz_map,
1404                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1405 }
1406
1407
1408 void
1409 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1410                                             uint32_t level,
1411                                             uint32_t layer)
1412 {
1413    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1414       return;
1415
1416    intel_resolve_map_set(&mt->hiz_map,
1417                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1418 }
1419
1420 void
1421 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
1422                                                 uint32_t level)
1423 {
1424    uint32_t layer;
1425    uint32_t end_layer = mt->level[level].depth;
1426
1427    for (layer = 0; layer < end_layer; layer++) {
1428       intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
1429    }
1430 }
1431
1432 static bool
1433 intel_miptree_slice_resolve(struct brw_context *brw,
1434                             struct intel_mipmap_tree *mt,
1435                             uint32_t level,
1436                             uint32_t layer,
1437                             enum gen6_hiz_op need)
1438 {
1439    intel_miptree_check_level_layer(mt, level, layer);
1440
1441    struct intel_resolve_map *item =
1442          intel_resolve_map_get(&mt->hiz_map, level, layer);
1443
1444    if (!item || item->need != need)
1445       return false;
1446
1447    intel_hiz_exec(brw, mt, level, layer, need);
1448    intel_resolve_map_remove(item);
1449    return true;
1450 }
1451
1452 bool
1453 intel_miptree_slice_resolve_hiz(struct brw_context *brw,
1454                                 struct intel_mipmap_tree *mt,
1455                                 uint32_t level,
1456                                 uint32_t layer)
1457 {
1458    return intel_miptree_slice_resolve(brw, mt, level, layer,
1459                                       GEN6_HIZ_OP_HIZ_RESOLVE);
1460 }
1461
1462 bool
1463 intel_miptree_slice_resolve_depth(struct brw_context *brw,
1464                                   struct intel_mipmap_tree *mt,
1465                                   uint32_t level,
1466                                   uint32_t layer)
1467 {
1468    return intel_miptree_slice_resolve(brw, mt, level, layer,
1469                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
1470 }
1471
1472 static bool
1473 intel_miptree_all_slices_resolve(struct brw_context *brw,
1474                                  struct intel_mipmap_tree *mt,
1475                                  enum gen6_hiz_op need)
1476 {
1477    bool did_resolve = false;
1478    struct intel_resolve_map *i, *next;
1479
1480    for (i = mt->hiz_map.next; i; i = next) {
1481       next = i->next;
1482       if (i->need != need)
1483          continue;
1484
1485       intel_hiz_exec(brw, mt, i->level, i->layer, need);
1486       intel_resolve_map_remove(i);
1487       did_resolve = true;
1488    }
1489
1490    return did_resolve;
1491 }
1492
1493 bool
1494 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
1495                                      struct intel_mipmap_tree *mt)
1496 {
1497    return intel_miptree_all_slices_resolve(brw, mt,
1498                                            GEN6_HIZ_OP_HIZ_RESOLVE);
1499 }
1500
1501 bool
1502 intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
1503                                        struct intel_mipmap_tree *mt)
1504 {
1505    return intel_miptree_all_slices_resolve(brw, mt,
1506                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
1507 }
1508
1509
1510 void
1511 intel_miptree_resolve_color(struct brw_context *brw,
1512                             struct intel_mipmap_tree *mt)
1513 {
1514    switch (mt->fast_clear_state) {
1515    case INTEL_FAST_CLEAR_STATE_NO_MCS:
1516    case INTEL_FAST_CLEAR_STATE_RESOLVED:
1517       /* No resolve needed */
1518       break;
1519    case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
1520    case INTEL_FAST_CLEAR_STATE_CLEAR:
1521       /* Fast color clear resolves only make sense for non-MSAA buffers. */
1522       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
1523          brw_blorp_resolve_color(brw, mt);
1524       break;
1525    }
1526 }
1527
1528
1529 /**
1530  * Make it possible to share the region backing the given miptree with another
1531  * process or another miptree.
1532  *
1533  * Fast color clears are unsafe with shared buffers, so we need to resolve and
1534  * then discard the MCS buffer, if present.  We also set the fast_clear_state
1535  * to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
1536  * allocated in the future.
1537  */
1538 void
1539 intel_miptree_make_shareable(struct brw_context *brw,
1540                              struct intel_mipmap_tree *mt)
1541 {
1542    /* MCS buffers are also used for multisample buffers, but we can't resolve
1543     * away a multisample MCS buffer because it's an integral part of how the
1544     * pixel data is stored.  Fortunately this code path should never be
1545     * reached for multisample buffers.
1546     */
1547    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1548
1549    if (mt->mcs_mt) {
1550       intel_miptree_resolve_color(brw, mt);
1551       intel_miptree_release(&mt->mcs_mt);
1552       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
1553    }
1554 }
1555
1556
1557 /**
1558  * \brief Get pointer offset into stencil buffer.
1559  *
1560  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1561  * must decode the tile's layout in software.
1562  *
1563  * See
1564  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1565  *     Format.
1566  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1567  *
1568  * Even though the returned offset is always positive, the return type is
1569  * signed due to
1570  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1571  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
1572  */
1573 static intptr_t
1574 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1575 {
1576    uint32_t tile_size = 4096;
1577    uint32_t tile_width = 64;
1578    uint32_t tile_height = 64;
1579    uint32_t row_size = 64 * stride;
1580
1581    uint32_t tile_x = x / tile_width;
1582    uint32_t tile_y = y / tile_height;
1583
1584    /* The byte's address relative to the tile's base addres. */
1585    uint32_t byte_x = x % tile_width;
1586    uint32_t byte_y = y % tile_height;
1587
1588    uintptr_t u = tile_y * row_size
1589                + tile_x * tile_size
1590                + 512 * (byte_x / 8)
1591                +  64 * (byte_y / 8)
1592                +  32 * ((byte_y / 4) % 2)
1593                +  16 * ((byte_x / 4) % 2)
1594                +   8 * ((byte_y / 2) % 2)
1595                +   4 * ((byte_x / 2) % 2)
1596                +   2 * (byte_y % 2)
1597                +   1 * (byte_x % 2);
1598
1599    if (swizzled) {
1600       /* adjust for bit6 swizzling */
1601       if (((byte_x / 8) % 2) == 1) {
1602          if (((byte_y / 8) % 2) == 0) {
1603             u += 64;
1604          } else {
1605             u -= 64;
1606          }
1607       }
1608    }
1609
1610    return u;
1611 }
1612
1613 static void
1614 intel_miptree_updownsample(struct brw_context *brw,
1615                            struct intel_mipmap_tree *src,
1616                            struct intel_mipmap_tree *dst,
1617                            unsigned width,
1618                            unsigned height)
1619 {
1620    int src_x0 = 0;
1621    int src_y0 = 0;
1622    int dst_x0 = 0;
1623    int dst_y0 = 0;
1624
1625    brw_blorp_blit_miptrees(brw,
1626                            src, 0 /* level */, 0 /* layer */,
1627                            dst, 0 /* level */, 0 /* layer */,
1628                            src_x0, src_y0,
1629                            width, height,
1630                            dst_x0, dst_y0,
1631                            width, height,
1632                            GL_NEAREST, false, false /*mirror x, y*/);
1633
1634    if (src->stencil_mt) {
1635       brw_blorp_blit_miptrees(brw,
1636                               src->stencil_mt, 0 /* level */, 0 /* layer */,
1637                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
1638                               src_x0, src_y0,
1639                               width, height,
1640                               dst_x0, dst_y0,
1641                               width, height,
1642                               GL_NEAREST, false, false /*mirror x, y*/);
1643    }
1644 }
1645
1646 static void
1647 assert_is_flat(struct intel_mipmap_tree *mt)
1648 {
1649    assert(mt->target == GL_TEXTURE_2D);
1650    assert(mt->first_level == 0);
1651    assert(mt->last_level == 0);
1652 }
1653
1654 /**
1655  * \brief Downsample from mt to mt->singlesample_mt.
1656  *
1657  * If the miptree needs no downsample, then skip.
1658  */
1659 void
1660 intel_miptree_downsample(struct brw_context *brw,
1661                          struct intel_mipmap_tree *mt)
1662 {
1663    /* Only flat, renderbuffer-like miptrees are supported. */
1664    assert_is_flat(mt);
1665
1666    if (!mt->need_downsample)
1667       return;
1668    intel_miptree_updownsample(brw,
1669                               mt, mt->singlesample_mt,
1670                               mt->logical_width0,
1671                               mt->logical_height0);
1672    mt->need_downsample = false;
1673 }
1674
1675 /**
1676  * \brief Upsample from mt->singlesample_mt to mt.
1677  *
1678  * The upsample is done unconditionally.
1679  */
1680 void
1681 intel_miptree_upsample(struct brw_context *brw,
1682                        struct intel_mipmap_tree *mt)
1683 {
1684    /* Only flat, renderbuffer-like miptrees are supported. */
1685    assert_is_flat(mt);
1686    assert(!mt->need_downsample);
1687
1688    intel_miptree_updownsample(brw,
1689                               mt->singlesample_mt, mt,
1690                               mt->logical_width0,
1691                               mt->logical_height0);
1692 }
1693
1694 void *
1695 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
1696 {
1697    /* CPU accesses to color buffers don't understand fast color clears, so
1698     * resolve any pending fast color clears before we map.
1699     */
1700    intel_miptree_resolve_color(brw, mt);
1701
1702    drm_intel_bo *bo = mt->region->bo;
1703
1704    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
1705       if (drm_intel_bo_busy(bo)) {
1706          perf_debug("Mapping a busy miptree, causing a stall on the GPU.\n");
1707       }
1708    }
1709
1710    intel_batchbuffer_flush(brw);
1711
1712    if (mt->region->tiling != I915_TILING_NONE)
1713       drm_intel_gem_bo_map_gtt(bo);
1714    else
1715       drm_intel_bo_map(bo, true);
1716
1717    return bo->virtual;
1718 }
1719
1720 void
1721 intel_miptree_unmap_raw(struct brw_context *brw,
1722                         struct intel_mipmap_tree *mt)
1723 {
1724    drm_intel_bo_unmap(mt->region->bo);
1725 }
1726
1727 static void
1728 intel_miptree_map_gtt(struct brw_context *brw,
1729                       struct intel_mipmap_tree *mt,
1730                       struct intel_miptree_map *map,
1731                       unsigned int level, unsigned int slice)
1732 {
1733    unsigned int bw, bh;
1734    void *base;
1735    unsigned int image_x, image_y;
1736    int x = map->x;
1737    int y = map->y;
1738
1739    /* For compressed formats, the stride is the number of bytes per
1740     * row of blocks.  intel_miptree_get_image_offset() already does
1741     * the divide.
1742     */
1743    _mesa_get_format_block_size(mt->format, &bw, &bh);
1744    assert(y % bh == 0);
1745    y /= bh;
1746
1747    base = intel_miptree_map_raw(brw, mt) + mt->offset;
1748
1749    if (base == NULL)
1750       map->ptr = NULL;
1751    else {
1752       /* Note that in the case of cube maps, the caller must have passed the
1753        * slice number referencing the face.
1754       */
1755       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1756       x += image_x;
1757       y += image_y;
1758
1759       map->stride = mt->region->pitch;
1760       map->ptr = base + y * map->stride + x * mt->cpp;
1761    }
1762
1763    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1764        map->x, map->y, map->w, map->h,
1765        mt, _mesa_get_format_name(mt->format),
1766        x, y, map->ptr, map->stride);
1767 }
1768
1769 static void
1770 intel_miptree_unmap_gtt(struct brw_context *brw,
1771                         struct intel_mipmap_tree *mt,
1772                         struct intel_miptree_map *map,
1773                         unsigned int level,
1774                         unsigned int slice)
1775 {
1776    intel_miptree_unmap_raw(brw, mt);
1777 }
1778
1779 static void
1780 intel_miptree_map_blit(struct brw_context *brw,
1781                        struct intel_mipmap_tree *mt,
1782                        struct intel_miptree_map *map,
1783                        unsigned int level, unsigned int slice)
1784 {
1785    map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
1786                                   0, 0,
1787                                   map->w, map->h, 1,
1788                                   false, 0,
1789                                   INTEL_MIPTREE_TILING_NONE);
1790    if (!map->mt) {
1791       fprintf(stderr, "Failed to allocate blit temporary\n");
1792       goto fail;
1793    }
1794    map->stride = map->mt->region->pitch;
1795
1796    if (!intel_miptree_blit(brw,
1797                            mt, level, slice,
1798                            map->x, map->y, false,
1799                            map->mt, 0, 0,
1800                            0, 0, false,
1801                            map->w, map->h, GL_COPY)) {
1802       fprintf(stderr, "Failed to blit\n");
1803       goto fail;
1804    }
1805
1806    map->ptr = intel_miptree_map_raw(brw, map->mt);
1807
1808    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1809        map->x, map->y, map->w, map->h,
1810        mt, _mesa_get_format_name(mt->format),
1811        level, slice, map->ptr, map->stride);
1812
1813    return;
1814
1815 fail:
1816    intel_miptree_release(&map->mt);
1817    map->ptr = NULL;
1818    map->stride = 0;
1819 }
1820
1821 static void
1822 intel_miptree_unmap_blit(struct brw_context *brw,
1823                          struct intel_mipmap_tree *mt,
1824                          struct intel_miptree_map *map,
1825                          unsigned int level,
1826                          unsigned int slice)
1827 {
1828    struct gl_context *ctx = &brw->ctx;
1829
1830    intel_miptree_unmap_raw(brw, map->mt);
1831
1832    if (map->mode & GL_MAP_WRITE_BIT) {
1833       bool ok = intel_miptree_blit(brw,
1834                                    map->mt, 0, 0,
1835                                    0, 0, false,
1836                                    mt, level, slice,
1837                                    map->x, map->y, false,
1838                                    map->w, map->h, GL_COPY);
1839       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1840    }
1841
1842    intel_miptree_release(&map->mt);
1843 }
1844
1845 #ifdef __SSE4_1__
1846 /**
1847  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
1848  */
1849 static void
1850 intel_miptree_map_movntdqa(struct brw_context *brw,
1851                            struct intel_mipmap_tree *mt,
1852                            struct intel_miptree_map *map,
1853                            unsigned int level, unsigned int slice)
1854 {
1855    assert(map->mode & GL_MAP_READ_BIT);
1856    assert(!(map->mode & GL_MAP_WRITE_BIT));
1857
1858    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1859        map->x, map->y, map->w, map->h,
1860        mt, _mesa_get_format_name(mt->format),
1861        level, slice, map->ptr, map->stride);
1862
1863    /* Map the original image */
1864    uint32_t image_x;
1865    uint32_t image_y;
1866    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1867    image_x += map->x;
1868    image_y += map->y;
1869
1870    void *src = intel_miptree_map_raw(brw, mt);
1871    if (!src)
1872       return;
1873    src += image_y * mt->region->pitch;
1874    src += image_x * mt->region->cpp;
1875
1876    /* Due to the pixel offsets for the particular image being mapped, our
1877     * src pointer may not be 16-byte aligned.  However, if the pitch is
1878     * divisible by 16, then the amount by which it's misaligned will remain
1879     * consistent from row to row.
1880     */
1881    assert((mt->region->pitch % 16) == 0);
1882    const int misalignment = ((uintptr_t) src) & 15;
1883
1884    /* Create an untiled temporary buffer for the mapping. */
1885    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
1886
1887    map->stride = ALIGN(misalignment + width_bytes, 16);
1888
1889    map->buffer = malloc(map->stride * map->h);
1890    /* Offset the destination so it has the same misalignment as src. */
1891    map->ptr = map->buffer + misalignment;
1892
1893    assert((((uintptr_t) map->ptr) & 15) == misalignment);
1894
1895    for (uint32_t y = 0; y < map->h; y++) {
1896       void *dst_ptr = map->ptr + y * map->stride;
1897       void *src_ptr = src + y * mt->region->pitch;
1898
1899       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
1900    }
1901
1902    intel_miptree_unmap_raw(brw, mt);
1903 }
1904
1905 static void
1906 intel_miptree_unmap_movntdqa(struct brw_context *brw,
1907                              struct intel_mipmap_tree *mt,
1908                              struct intel_miptree_map *map,
1909                              unsigned int level,
1910                              unsigned int slice)
1911 {
1912    free(map->buffer);
1913    map->buffer = NULL;
1914    map->ptr = NULL;
1915 }
1916 #endif
1917
1918 static void
1919 intel_miptree_map_s8(struct brw_context *brw,
1920                      struct intel_mipmap_tree *mt,
1921                      struct intel_miptree_map *map,
1922                      unsigned int level, unsigned int slice)
1923 {
1924    map->stride = map->w;
1925    map->buffer = map->ptr = malloc(map->stride * map->h);
1926    if (!map->buffer)
1927       return;
1928
1929    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1930     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1931     * invalidate is set, since we'll be writing the whole rectangle from our
1932     * temporary buffer back out.
1933     */
1934    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1935       uint8_t *untiled_s8_map = map->ptr;
1936       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1937       unsigned int image_x, image_y;
1938
1939       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1940
1941       for (uint32_t y = 0; y < map->h; y++) {
1942          for (uint32_t x = 0; x < map->w; x++) {
1943             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1944                                                x + image_x + map->x,
1945                                                y + image_y + map->y,
1946                                                brw->has_swizzling);
1947             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
1948          }
1949       }
1950
1951       intel_miptree_unmap_raw(brw, mt);
1952
1953       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
1954           map->x, map->y, map->w, map->h,
1955           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
1956    } else {
1957       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1958           map->x, map->y, map->w, map->h,
1959           mt, map->ptr, map->stride);
1960    }
1961 }
1962
1963 static void
1964 intel_miptree_unmap_s8(struct brw_context *brw,
1965                        struct intel_mipmap_tree *mt,
1966                        struct intel_miptree_map *map,
1967                        unsigned int level,
1968                        unsigned int slice)
1969 {
1970    if (map->mode & GL_MAP_WRITE_BIT) {
1971       unsigned int image_x, image_y;
1972       uint8_t *untiled_s8_map = map->ptr;
1973       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1974
1975       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1976
1977       for (uint32_t y = 0; y < map->h; y++) {
1978          for (uint32_t x = 0; x < map->w; x++) {
1979             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1980                                                x + map->x,
1981                                                y + map->y,
1982                                                brw->has_swizzling);
1983             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
1984          }
1985       }
1986
1987       intel_miptree_unmap_raw(brw, mt);
1988    }
1989
1990    free(map->buffer);
1991 }
1992
1993 static void
1994 intel_miptree_map_etc(struct brw_context *brw,
1995                       struct intel_mipmap_tree *mt,
1996                       struct intel_miptree_map *map,
1997                       unsigned int level,
1998                       unsigned int slice)
1999 {
2000    assert(mt->etc_format != MESA_FORMAT_NONE);
2001    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
2002       assert(mt->format == MESA_FORMAT_RGBX8888_REV);
2003    }
2004
2005    assert(map->mode & GL_MAP_WRITE_BIT);
2006    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
2007
2008    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
2009    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
2010                                                 map->w, map->h, 1));
2011    map->ptr = map->buffer;
2012 }
2013
2014 static void
2015 intel_miptree_unmap_etc(struct brw_context *brw,
2016                         struct intel_mipmap_tree *mt,
2017                         struct intel_miptree_map *map,
2018                         unsigned int level,
2019                         unsigned int slice)
2020 {
2021    uint32_t image_x;
2022    uint32_t image_y;
2023    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2024
2025    image_x += map->x;
2026    image_y += map->y;
2027
2028    uint8_t *dst = intel_miptree_map_raw(brw, mt)
2029                 + image_y * mt->region->pitch
2030                 + image_x * mt->region->cpp;
2031
2032    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
2033       _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
2034                                  map->ptr, map->stride,
2035                                  map->w, map->h);
2036    else
2037       _mesa_unpack_etc2_format(dst, mt->region->pitch,
2038                                map->ptr, map->stride,
2039                                map->w, map->h, mt->etc_format);
2040
2041    intel_miptree_unmap_raw(brw, mt);
2042    free(map->buffer);
2043 }
2044
2045 /**
2046  * Mapping function for packed depth/stencil miptrees backed by real separate
2047  * miptrees for depth and stencil.
2048  *
2049  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
2050  * separate from the depth buffer.  Yet at the GL API level, we have to expose
2051  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
2052  * be able to map that memory for texture storage and glReadPixels-type
2053  * operations.  We give Mesa core that access by mallocing a temporary and
2054  * copying the data between the actual backing store and the temporary.
2055  */
2056 static void
2057 intel_miptree_map_depthstencil(struct brw_context *brw,
2058                                struct intel_mipmap_tree *mt,
2059                                struct intel_miptree_map *map,
2060                                unsigned int level, unsigned int slice)
2061 {
2062    struct intel_mipmap_tree *z_mt = mt;
2063    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2064    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2065    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
2066
2067    map->stride = map->w * packed_bpp;
2068    map->buffer = map->ptr = malloc(map->stride * map->h);
2069    if (!map->buffer)
2070       return;
2071
2072    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2073     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2074     * invalidate is set, since we'll be writing the whole rectangle from our
2075     * temporary buffer back out.
2076     */
2077    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2078       uint32_t *packed_map = map->ptr;
2079       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2080       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2081       unsigned int s_image_x, s_image_y;
2082       unsigned int z_image_x, z_image_y;
2083
2084       intel_miptree_get_image_offset(s_mt, level, slice,
2085                                      &s_image_x, &s_image_y);
2086       intel_miptree_get_image_offset(z_mt, level, slice,
2087                                      &z_image_x, &z_image_y);
2088
2089       for (uint32_t y = 0; y < map->h; y++) {
2090          for (uint32_t x = 0; x < map->w; x++) {
2091             int map_x = map->x + x, map_y = map->y + y;
2092             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2093                                                  map_x + s_image_x,
2094                                                  map_y + s_image_y,
2095                                                  brw->has_swizzling);
2096             ptrdiff_t z_offset = ((map_y + z_image_y) *
2097                                   (z_mt->region->pitch / 4) +
2098                                   (map_x + z_image_x));
2099             uint8_t s = s_map[s_offset];
2100             uint32_t z = z_map[z_offset];
2101
2102             if (map_z32f_x24s8) {
2103                packed_map[(y * map->w + x) * 2 + 0] = z;
2104                packed_map[(y * map->w + x) * 2 + 1] = s;
2105             } else {
2106                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2107             }
2108          }
2109       }
2110
2111       intel_miptree_unmap_raw(brw, s_mt);
2112       intel_miptree_unmap_raw(brw, z_mt);
2113
2114       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2115           __FUNCTION__,
2116           map->x, map->y, map->w, map->h,
2117           z_mt, map->x + z_image_x, map->y + z_image_y,
2118           s_mt, map->x + s_image_x, map->y + s_image_y,
2119           map->ptr, map->stride);
2120    } else {
2121       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2122           map->x, map->y, map->w, map->h,
2123           mt, map->ptr, map->stride);
2124    }
2125 }
2126
2127 static void
2128 intel_miptree_unmap_depthstencil(struct brw_context *brw,
2129                                  struct intel_mipmap_tree *mt,
2130                                  struct intel_miptree_map *map,
2131                                  unsigned int level,
2132                                  unsigned int slice)
2133 {
2134    struct intel_mipmap_tree *z_mt = mt;
2135    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2136    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2137
2138    if (map->mode & GL_MAP_WRITE_BIT) {
2139       uint32_t *packed_map = map->ptr;
2140       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2141       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2142       unsigned int s_image_x, s_image_y;
2143       unsigned int z_image_x, z_image_y;
2144
2145       intel_miptree_get_image_offset(s_mt, level, slice,
2146                                      &s_image_x, &s_image_y);
2147       intel_miptree_get_image_offset(z_mt, level, slice,
2148                                      &z_image_x, &z_image_y);
2149
2150       for (uint32_t y = 0; y < map->h; y++) {
2151          for (uint32_t x = 0; x < map->w; x++) {
2152             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2153                                                  x + s_image_x + map->x,
2154                                                  y + s_image_y + map->y,
2155                                                  brw->has_swizzling);
2156             ptrdiff_t z_offset = ((y + z_image_y) *
2157                                   (z_mt->region->pitch / 4) +
2158                                   (x + z_image_x));
2159
2160             if (map_z32f_x24s8) {
2161                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2162                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2163             } else {
2164                uint32_t packed = packed_map[y * map->w + x];
2165                s_map[s_offset] = packed >> 24;
2166                z_map[z_offset] = packed;
2167             }
2168          }
2169       }
2170
2171       intel_miptree_unmap_raw(brw, s_mt);
2172       intel_miptree_unmap_raw(brw, z_mt);
2173
2174       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2175           __FUNCTION__,
2176           map->x, map->y, map->w, map->h,
2177           z_mt, _mesa_get_format_name(z_mt->format),
2178           map->x + z_image_x, map->y + z_image_y,
2179           s_mt, map->x + s_image_x, map->y + s_image_y,
2180           map->ptr, map->stride);
2181    }
2182
2183    free(map->buffer);
2184 }
2185
2186 /**
2187  * Create and attach a map to the miptree at (level, slice). Return the
2188  * attached map.
2189  */
2190 static struct intel_miptree_map*
2191 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2192                          unsigned int level,
2193                          unsigned int slice,
2194                          unsigned int x,
2195                          unsigned int y,
2196                          unsigned int w,
2197                          unsigned int h,
2198                          GLbitfield mode)
2199 {
2200    struct intel_miptree_map *map = calloc(1, sizeof(*map));
2201
2202    if (!map)
2203       return NULL;
2204
2205    assert(mt->level[level].slice[slice].map == NULL);
2206    mt->level[level].slice[slice].map = map;
2207
2208    map->mode = mode;
2209    map->x = x;
2210    map->y = y;
2211    map->w = w;
2212    map->h = h;
2213
2214    return map;
2215 }
2216
2217 /**
2218  * Release the map at (level, slice).
2219  */
2220 static void
2221 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2222                          unsigned int level,
2223                          unsigned int slice)
2224 {
2225    struct intel_miptree_map **map;
2226
2227    map = &mt->level[level].slice[slice].map;
2228    free(*map);
2229    *map = NULL;
2230 }
2231
2232 static void
2233 intel_miptree_map_singlesample(struct brw_context *brw,
2234                                struct intel_mipmap_tree *mt,
2235                                unsigned int level,
2236                                unsigned int slice,
2237                                unsigned int x,
2238                                unsigned int y,
2239                                unsigned int w,
2240                                unsigned int h,
2241                                GLbitfield mode,
2242                                void **out_ptr,
2243                                int *out_stride)
2244 {
2245    struct intel_miptree_map *map;
2246
2247    assert(mt->num_samples <= 1);
2248
2249    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2250    if (!map){
2251       *out_ptr = NULL;
2252       *out_stride = 0;
2253       return;
2254    }
2255
2256    intel_miptree_slice_resolve_depth(brw, mt, level, slice);
2257    if (map->mode & GL_MAP_WRITE_BIT) {
2258       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2259    }
2260
2261    if (mt->format == MESA_FORMAT_S8) {
2262       intel_miptree_map_s8(brw, mt, map, level, slice);
2263    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2264               !(mode & BRW_MAP_DIRECT_BIT)) {
2265       intel_miptree_map_etc(brw, mt, map, level, slice);
2266    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2267       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
2268    }
2269    /* See intel_miptree_blit() for details on the 32k pitch limit. */
2270    else if (brw->has_llc &&
2271             !(mode & GL_MAP_WRITE_BIT) &&
2272             !mt->compressed &&
2273             (mt->region->tiling == I915_TILING_X ||
2274              (brw->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
2275             mt->region->pitch < 32768) {
2276       intel_miptree_map_blit(brw, mt, map, level, slice);
2277    } else if (mt->region->tiling != I915_TILING_NONE &&
2278               mt->region->bo->size >= brw->max_gtt_map_object_size) {
2279       assert(mt->region->pitch < 32768);
2280       intel_miptree_map_blit(brw, mt, map, level, slice);
2281 #ifdef __SSE4_1__
2282    } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed) {
2283       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
2284 #endif
2285    } else {
2286       intel_miptree_map_gtt(brw, mt, map, level, slice);
2287    }
2288
2289    *out_ptr = map->ptr;
2290    *out_stride = map->stride;
2291
2292    if (map->ptr == NULL)
2293       intel_miptree_release_map(mt, level, slice);
2294 }
2295
2296 static void
2297 intel_miptree_unmap_singlesample(struct brw_context *brw,
2298                                  struct intel_mipmap_tree *mt,
2299                                  unsigned int level,
2300                                  unsigned int slice)
2301 {
2302    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2303
2304    assert(mt->num_samples <= 1);
2305
2306    if (!map)
2307       return;
2308
2309    DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2310        mt, _mesa_get_format_name(mt->format), level, slice);
2311
2312    if (mt->format == MESA_FORMAT_S8) {
2313       intel_miptree_unmap_s8(brw, mt, map, level, slice);
2314    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2315               !(map->mode & BRW_MAP_DIRECT_BIT)) {
2316       intel_miptree_unmap_etc(brw, mt, map, level, slice);
2317    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2318       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
2319    } else if (map->mt) {
2320       intel_miptree_unmap_blit(brw, mt, map, level, slice);
2321 #ifdef __SSE4_1__
2322    } else if (map->buffer) {
2323       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
2324 #endif
2325    } else {
2326       intel_miptree_unmap_gtt(brw, mt, map, level, slice);
2327    }
2328
2329    intel_miptree_release_map(mt, level, slice);
2330 }
2331
2332 static void
2333 intel_miptree_map_multisample(struct brw_context *brw,
2334                               struct intel_mipmap_tree *mt,
2335                               unsigned int level,
2336                               unsigned int slice,
2337                               unsigned int x,
2338                               unsigned int y,
2339                               unsigned int w,
2340                               unsigned int h,
2341                               GLbitfield mode,
2342                               void **out_ptr,
2343                               int *out_stride)
2344 {
2345    struct gl_context *ctx = &brw->ctx;
2346    struct intel_miptree_map *map;
2347
2348    assert(mt->num_samples > 1);
2349
2350    /* Only flat, renderbuffer-like miptrees are supported. */
2351    if (mt->target != GL_TEXTURE_2D ||
2352        mt->first_level != 0 ||
2353        mt->last_level != 0) {
2354       _mesa_problem(ctx, "attempt to map a multisample miptree for "
2355                     "which (target, first_level, last_level != "
2356                     "(GL_TEXTURE_2D, 0, 0)");
2357       goto fail;
2358    }
2359
2360    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2361    if (!map)
2362       goto fail;
2363
2364    if (!mt->singlesample_mt) {
2365       mt->singlesample_mt =
2366          intel_miptree_create_for_renderbuffer(brw,
2367                                                mt->format,
2368                                                mt->logical_width0,
2369                                                mt->logical_height0,
2370                                                0 /*num_samples*/);
2371       if (!mt->singlesample_mt)
2372          goto fail;
2373
2374       map->singlesample_mt_is_tmp = true;
2375       mt->need_downsample = true;
2376    }
2377
2378    intel_miptree_downsample(brw, mt);
2379    intel_miptree_map_singlesample(brw, mt->singlesample_mt,
2380                                   level, slice,
2381                                   x, y, w, h,
2382                                   mode,
2383                                   out_ptr, out_stride);
2384    return;
2385
2386 fail:
2387    intel_miptree_release_map(mt, level, slice);
2388    *out_ptr = NULL;
2389    *out_stride = 0;
2390 }
2391
2392 static void
2393 intel_miptree_unmap_multisample(struct brw_context *brw,
2394                                 struct intel_mipmap_tree *mt,
2395                                 unsigned int level,
2396                                 unsigned int slice)
2397 {
2398    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2399
2400    assert(mt->num_samples > 1);
2401
2402    if (!map)
2403       return;
2404
2405    intel_miptree_unmap_singlesample(brw, mt->singlesample_mt, level, slice);
2406
2407    mt->need_downsample = false;
2408    if (map->mode & GL_MAP_WRITE_BIT)
2409       intel_miptree_upsample(brw, mt);
2410
2411    if (map->singlesample_mt_is_tmp)
2412       intel_miptree_release(&mt->singlesample_mt);
2413
2414    intel_miptree_release_map(mt, level, slice);
2415 }
2416
2417 void
2418 intel_miptree_map(struct brw_context *brw,
2419                   struct intel_mipmap_tree *mt,
2420                   unsigned int level,
2421                   unsigned int slice,
2422                   unsigned int x,
2423                   unsigned int y,
2424                   unsigned int w,
2425                   unsigned int h,
2426                   GLbitfield mode,
2427                   void **out_ptr,
2428                   int *out_stride)
2429 {
2430    if (mt->num_samples <= 1)
2431       intel_miptree_map_singlesample(brw, mt,
2432                                      level, slice,
2433                                      x, y, w, h,
2434                                      mode,
2435                                      out_ptr, out_stride);
2436    else
2437       intel_miptree_map_multisample(brw, mt,
2438                                     level, slice,
2439                                     x, y, w, h,
2440                                     mode,
2441                                     out_ptr, out_stride);
2442 }
2443
2444 void
2445 intel_miptree_unmap(struct brw_context *brw,
2446                     struct intel_mipmap_tree *mt,
2447                     unsigned int level,
2448                     unsigned int slice)
2449 {
2450    if (mt->num_samples <= 1)
2451       intel_miptree_unmap_singlesample(brw, mt, level, slice);
2452    else
2453       intel_miptree_unmap_multisample(brw, mt, level, slice);
2454 }