src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <GL/gl.h>
  29 #include <GL/internal/dri_interface.h>
  30
  31 #include "intel_batchbuffer.h"
  32 #include "intel_chipset.h"
  33 #include "intel_mipmap_tree.h"
  34 #include "intel_regions.h"
  35 #include "intel_resolve_map.h"
  36 #include "intel_tex.h"
  37 #include "intel_blit.h"
  38
  39 #include "brw_blorp.h"
  40 #include "brw_context.h"
  41
  42 #include "main/enums.h"
  43 #include "main/formats.h"
  44 #include "main/glformats.h"
  45 #include "main/texcompress_etc.h"
  46 #include "main/teximage.h"
  47 #include "main/streaming-load-memcpy.h"
  48
  49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  50
  51 static GLenum
  52 target_to_target(GLenum target)
  53 {
  54    switch (target) {
  55    case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
  56    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
  57    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
  58    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
  59    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
  60    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
  61       return GL_TEXTURE_CUBE_MAP_ARB;
  62    default:
  63       return target;
  64    }
  65 }
  66
  67
  68 /**
  69  * Determine which MSAA layout should be used by the MSAA surface being
  70  * created, based on the chip generation and the surface type.
  71  */
  72 static enum intel_msaa_layout
  73 compute_msaa_layout(struct brw_context *brw, gl_format format, GLenum target)
  74 {
  75    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  76    if (brw->gen < 7)
  77       return INTEL_MSAA_LAYOUT_IMS;
  78
  79    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  80    switch (_mesa_get_format_base_format(format)) {
  81    case GL_DEPTH_COMPONENT:
  82    case GL_STENCIL_INDEX:
  83    case GL_DEPTH_STENCIL:
  84       return INTEL_MSAA_LAYOUT_IMS;
  85    default:
  86       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  87        *
  88        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  89        *   are not written
  90        *
  91        * In practice this means that we have to disable MCS for all signed
  92        * integer MSAA buffers.  The alternative, to disable MCS only when one
  93        * of the render target channels is disabled, is impractical because it
  94        * would require converting between CMS and UMS MSAA layouts on the fly,
  95        * which is expensive.
  96        */
  97       if (_mesa_get_format_datatype(format) == GL_INT) {
  98          /* TODO: is this workaround needed for future chipsets? */
  99          assert(brw->gen == 7);
 100          return INTEL_MSAA_LAYOUT_UMS;
 101       } else {
 102          return INTEL_MSAA_LAYOUT_CMS;
 103       }
 104    }
 105 }
 106
 107
 108 /**
 109  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
 110  * scaled-down bitfield representation of the color buffer which is capable of
 111  * recording when blocks of the color buffer are equal to the clear value.
 112  * This function returns the block size that will be used by the MCS buffer
 113  * corresponding to a certain color miptree.
 114  *
 115  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 116  * beneath the "Fast Color Clear" bullet (p327):
 117  *
 118  *     The following table describes the RT alignment
 119  *
 120  *                       Pixels  Lines
 121  *         TiledY RT CL
 122  *             bpp
 123  *              32          8      4
 124  *              64          4      4
 125  *             128          2      4
 126  *         TiledX RT CL
 127  *             bpp
 128  *              32         16      2
 129  *              64          8      2
 130  *             128          4      2
 131  *
 132  * This alignment has the following uses:
 133  *
 134  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
 135  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
 136  *
 137  * - For figuring out alignment restrictions for a fast clear operation.  Fast
 138  *   clear operations must always clear aligned multiples of 16 blocks
 139  *   horizontally and 32 blocks vertically.
 140  *
 141  * - For scaling down the coordinates sent through the render pipeline during
 142  *   a fast clear.  X coordinates must be scaled down by 8 times the block
 143  *   width, and Y coordinates by 16 times the block height.
 144  *
 145  * - For scaling down the coordinates sent through the render pipeline during
 146  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
 147  *   by half the block width, and Y coordinates by half the block height.
 148  */
 149 void
 150 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
 151                                  struct intel_mipmap_tree *mt,
 152                                  unsigned *width_px, unsigned *height)
 153 {
 154    switch (mt->region->tiling) {
 155    default:
 156       assert(!"Non-MSRT MCS requires X or Y tiling");
 157       /* In release builds, fall through */
 158    case I915_TILING_Y:
 159       *width_px = 32 / mt->cpp;
 160       *height = 4;
 161       break;
 162    case I915_TILING_X:
 163       *width_px = 64 / mt->cpp;
 164       *height = 2;
 165    }
 166 }
 167
 168
 169 /**
 170  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 171  * can be used.
 172  *
 173  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 174  * beneath the "Fast Color Clear" bullet (p326):
 175  *
 176  *     - Support is limited to tiled render targets.
 177  *     - Support is for non-mip-mapped and non-array surface types only.
 178  *
 179  * And then later, on p327:
 180  *
 181  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 182  *       64bpp, and 128bpp.
 183  */
 184 bool
 185 intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
 186                                        struct intel_mipmap_tree *mt)
 187 {
 188    /* MCS support does not exist prior to Gen7 */
 189    if (brw->gen < 7 || brw->gen >= 8)
 190       return false;
 191
 192    /* MCS is only supported for color buffers */
 193    switch (_mesa_get_format_base_format(mt->format)) {
 194    case GL_DEPTH_COMPONENT:
 195    case GL_DEPTH_STENCIL:
 196    case GL_STENCIL_INDEX:
 197       return false;
 198    }
 199
 200    if (mt->region->tiling != I915_TILING_X &&
 201        mt->region->tiling != I915_TILING_Y)
 202       return false;
 203    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 204       return false;
 205    if (mt->first_level != 0 || mt->last_level != 0)
 206       return false;
 207    if (mt->physical_depth0 != 1)
 208       return false;
 209
 210    /* There's no point in using an MCS buffer if the surface isn't in a
 211     * renderable format.
 212     */
 213    if (!brw->format_supported_as_render_target[mt->format])
 214       return false;
 215
 216    return true;
 217 }
 218
 219
 220 /**
 221  * @param for_bo Indicates that the caller is
 222  *        intel_miptree_create_for_bo(). If true, then do not create
 223  *        \c stencil_mt.
 224  */
 225 struct intel_mipmap_tree *
 226 intel_miptree_create_layout(struct brw_context *brw,
 227                             GLenum target,
 228                             gl_format format,
 229                             GLuint first_level,
 230                             GLuint last_level,
 231                             GLuint width0,
 232                             GLuint height0,
 233                             GLuint depth0,
 234                             bool for_bo,
 235                             GLuint num_samples)
 236 {
 237    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 238    if (!mt)
 239       return NULL;
 240
 241    DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
 242        _mesa_lookup_enum_by_nr(target),
 243        _mesa_get_format_name(format),
 244        first_level, last_level, mt);
 245
 246    mt->target = target_to_target(target);
 247    mt->format = format;
 248    mt->first_level = first_level;
 249    mt->last_level = last_level;
 250    mt->logical_width0 = width0;
 251    mt->logical_height0 = height0;
 252    mt->logical_depth0 = depth0;
 253    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
 254
 255    /* The cpp is bytes per (1, blockheight)-sized block for compressed
 256     * textures.  This is why you'll see divides by blockheight all over
 257     */
 258    unsigned bw, bh;
 259    _mesa_get_format_block_size(format, &bw, &bh);
 260    assert(_mesa_get_format_bytes(mt->format) % bw == 0);
 261    mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
 262
 263    mt->num_samples = num_samples;
 264    mt->compressed = _mesa_is_format_compressed(format);
 265    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 266    mt->refcount = 1;
 267
 268    if (num_samples > 1) {
 269       /* Adjust width/height/depth for MSAA */
 270       mt->msaa_layout = compute_msaa_layout(brw, format, mt->target);
 271       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 272          /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
 273           *
 274           *     "Any of the other messages (sample*, LOD, load4) used with a
 275           *      (4x) multisampled surface will in-effect sample a surface with
 276           *      double the height and width as that indicated in the surface
 277           *      state. Each pixel position on the original-sized surface is
 278           *      replaced with a 2x2 of samples with the following arrangement:
 279           *
 280           *         sample 0 sample 2
 281           *         sample 1 sample 3"
 282           *
 283           * Thus, when sampling from a multisampled texture, it behaves as
 284           * though the layout in memory for (x,y,sample) is:
 285           *
 286           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 287           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 288           *
 289           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 290           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 291           *
 292           * However, the actual layout of multisampled data in memory is:
 293           *
 294           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 295           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 296           *
 297           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 298           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 299           *
 300           * This pattern repeats for each 2x2 pixel block.
 301           *
 302           * As a result, when calculating the size of our 4-sample buffer for
 303           * an odd width or height, we have to align before scaling up because
 304           * sample 3 is in that bottom right 2x2 block.
 305           */
 306          switch (num_samples) {
 307          case 4:
 308             width0 = ALIGN(width0, 2) * 2;
 309             height0 = ALIGN(height0, 2) * 2;
 310             break;
 311          case 8:
 312             width0 = ALIGN(width0, 2) * 4;
 313             height0 = ALIGN(height0, 2) * 2;
 314             break;
 315          default:
 316             /* num_samples should already have been quantized to 0, 1, 4, or
 317              * 8.
 318              */
 319             assert(false);
 320          }
 321       } else {
 322          /* Non-interleaved */
 323          depth0 *= num_samples;
 324       }
 325    }
 326
 327    /* array_spacing_lod0 is only used for non-IMS MSAA surfaces.  TODO: can we
 328     * use it elsewhere?
 329     */
 330    switch (mt->msaa_layout) {
 331    case INTEL_MSAA_LAYOUT_NONE:
 332    case INTEL_MSAA_LAYOUT_IMS:
 333       mt->array_spacing_lod0 = false;
 334       break;
 335    case INTEL_MSAA_LAYOUT_UMS:
 336    case INTEL_MSAA_LAYOUT_CMS:
 337       mt->array_spacing_lod0 = true;
 338       break;
 339    }
 340
 341    if (target == GL_TEXTURE_CUBE_MAP) {
 342       assert(depth0 == 1);
 343       depth0 = 6;
 344    }
 345
 346    mt->physical_width0 = width0;
 347    mt->physical_height0 = height0;
 348    mt->physical_depth0 = depth0;
 349
 350    if (!for_bo &&
 351        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 352        (brw->must_use_separate_stencil ||
 353         (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) {
 354       mt->stencil_mt = intel_miptree_create(brw,
 355                                             mt->target,
 356                                             MESA_FORMAT_S8,
 357                                             mt->first_level,
 358                                             mt->last_level,
 359                                             mt->logical_width0,
 360                                             mt->logical_height0,
 361                                             mt->logical_depth0,
 362                                             true,
 363                                             num_samples,
 364                                             INTEL_MIPTREE_TILING_ANY);
 365       if (!mt->stencil_mt) {
 366          intel_miptree_release(&mt);
 367          return NULL;
 368       }
 369
 370       /* Fix up the Z miptree format for how we're splitting out separate
 371        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 372        */
 373       if (mt->format == MESA_FORMAT_S8_Z24) {
 374          mt->format = MESA_FORMAT_X8_Z24;
 375       } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) {
 376          mt->format = MESA_FORMAT_Z32_FLOAT;
 377          mt->cpp = 4;
 378       } else {
 379          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 380                        _mesa_get_format_name(mt->format));
 381       }
 382    }
 383
 384    brw_miptree_layout(brw, mt);
 385
 386    return mt;
 387 }
 388
 389 /**
 390  * \brief Helper function for intel_miptree_create().
 391  */
 392 static uint32_t
 393 intel_miptree_choose_tiling(struct brw_context *brw,
 394                             gl_format format,
 395                             uint32_t width0,
 396                             uint32_t num_samples,
 397                             enum intel_miptree_tiling_mode requested,
 398                             struct intel_mipmap_tree *mt)
 399 {
 400    if (format == MESA_FORMAT_S8) {
 401       /* The stencil buffer is W tiled. However, we request from the kernel a
 402        * non-tiled buffer because the GTT is incapable of W fencing.
 403        */
 404       return I915_TILING_NONE;
 405    }
 406
 407    /* Some usages may want only one type of tiling, like depth miptrees (Y
 408     * tiled), or temporary BOs for uploading data once (linear).
 409     */
 410    switch (requested) {
 411    case INTEL_MIPTREE_TILING_ANY:
 412       break;
 413    case INTEL_MIPTREE_TILING_Y:
 414       return I915_TILING_Y;
 415    case INTEL_MIPTREE_TILING_NONE:
 416       return I915_TILING_NONE;
 417    }
 418
 419    if (num_samples > 1) {
 420       /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
 421        * Surface"):
 422        *
 423        *   [DevSNB+]: For multi-sample render targets, this field must be
 424        *   1. MSRTs can only be tiled.
 425        *
 426        * Our usual reason for preferring X tiling (fast blits using the
 427        * blitting engine) doesn't apply to MSAA, since we'll generally be
 428        * downsampling or upsampling when blitting between the MSAA buffer
 429        * and another buffer, and the blitting engine doesn't support that.
 430        * So use Y tiling, since it makes better use of the cache.
 431        */
 432       return I915_TILING_Y;
 433    }
 434
 435    GLenum base_format = _mesa_get_format_base_format(format);
 436    if (base_format == GL_DEPTH_COMPONENT ||
 437        base_format == GL_DEPTH_STENCIL_EXT)
 438       return I915_TILING_Y;
 439
 440    int minimum_pitch = mt->total_width * mt->cpp;
 441
 442    /* If the width is much smaller than a tile, don't bother tiling. */
 443    if (minimum_pitch < 64)
 444       return I915_TILING_NONE;
 445
 446    if (ALIGN(minimum_pitch, 512) >= 32768) {
 447       perf_debug("%dx%d miptree too large to blit, falling back to untiled",
 448                  mt->total_width, mt->total_height);
 449       return I915_TILING_NONE;
 450    }
 451
 452    /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
 453    if (brw->gen < 6)
 454       return I915_TILING_X;
 455
 456    /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
 457     * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
 458     *  or Linear."
 459     * 128 bits per pixel translates to 16 bytes per pixel.  This is necessary
 460     * all the way back to 965, but is explicitly permitted on Gen7.
 461     */
 462    if (brw->gen != 7 && mt->cpp >= 16)
 463       return I915_TILING_X;
 464
 465    /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
 466     * messages), on p64, under the heading "Surface Vertical Alignment":
 467     *
 468     *     This field must be set to VALIGN_4 for all tiled Y Render Target
 469     *     surfaces.
 470     *
 471     * So if the surface is renderable and uses a vertical alignment of 2,
 472     * force it to be X tiled.  This is somewhat conservative (it's possible
 473     * that the client won't ever render to this surface), but it's difficult
 474     * to know that ahead of time.  And besides, since we use a vertical
 475     * alignment of 4 as often as we can, this shouldn't happen very often.
 476     */
 477    if (brw->gen == 7 && mt->align_h == 2 &&
 478        brw->format_supported_as_render_target[format]) {
 479       return I915_TILING_X;
 480    }
 481
 482    return I915_TILING_Y | I915_TILING_X;
 483 }
 484
 485 struct intel_mipmap_tree *
 486 intel_miptree_create(struct brw_context *brw,
 487                      GLenum target,
 488                      gl_format format,
 489                      GLuint first_level,
 490                      GLuint last_level,
 491                      GLuint width0,
 492                      GLuint height0,
 493                      GLuint depth0,
 494                      bool expect_accelerated_upload,
 495                      GLuint num_samples,
 496                      enum intel_miptree_tiling_mode requested_tiling)
 497 {
 498    struct intel_mipmap_tree *mt;
 499    gl_format tex_format = format;
 500    gl_format etc_format = MESA_FORMAT_NONE;
 501    GLuint total_width, total_height;
 502
 503    if (!brw->is_baytrail) {
 504       switch (format) {
 505       case MESA_FORMAT_ETC1_RGB8:
 506          format = MESA_FORMAT_RGBX8888_REV;
 507          break;
 508       case MESA_FORMAT_ETC2_RGB8:
 509          format = MESA_FORMAT_RGBX8888_REV;
 510          break;
 511       case MESA_FORMAT_ETC2_SRGB8:
 512       case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 513       case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 514          format = MESA_FORMAT_SARGB8;
 515          break;
 516       case MESA_FORMAT_ETC2_RGBA8_EAC:
 517       case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 518          format = MESA_FORMAT_RGBA8888_REV;
 519          break;
 520       case MESA_FORMAT_ETC2_R11_EAC:
 521          format = MESA_FORMAT_R16;
 522          break;
 523       case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 524          format = MESA_FORMAT_SIGNED_R16;
 525          break;
 526       case MESA_FORMAT_ETC2_RG11_EAC:
 527          format = MESA_FORMAT_GR1616;
 528          break;
 529       case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 530          format = MESA_FORMAT_SIGNED_GR1616;
 531          break;
 532       default:
 533          /* Non ETC1 / ETC2 format */
 534          break;
 535       }
 536    }
 537
 538    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 539
 540    mt = intel_miptree_create_layout(brw, target, format,
 541                                       first_level, last_level, width0,
 542                                       height0, depth0,
 543                                       false, num_samples);
 544    /*
 545     * pitch == 0 || height == 0  indicates the null texture
 546     */
 547    if (!mt || !mt->total_width || !mt->total_height) {
 548       intel_miptree_release(&mt);
 549       return NULL;
 550    }
 551
 552    total_width = mt->total_width;
 553    total_height = mt->total_height;
 554
 555    if (format == MESA_FORMAT_S8) {
 556       /* Align to size of W tile, 64x64. */
 557       total_width = ALIGN(total_width, 64);
 558       total_height = ALIGN(total_height, 64);
 559    }
 560
 561    uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
 562                                                  num_samples, requested_tiling,
 563                                                  mt);
 564    bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
 565
 566    mt->etc_format = etc_format;
 567    mt->region = intel_region_alloc(brw->intelScreen,
 568                                    y_or_x ? I915_TILING_Y : tiling,
 569                                    mt->cpp,
 570                                    total_width,
 571                                    total_height,
 572                                    expect_accelerated_upload);
 573
 574    /* If the region is too large to fit in the aperture, we need to use the
 575     * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
 576     * so we need to fall back to X.
 577     */
 578    if (y_or_x && mt->region->bo->size >= brw->max_gtt_map_object_size) {
 579       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 580                  mt->total_width, mt->total_height);
 581       intel_region_release(&mt->region);
 582
 583       mt->region = intel_region_alloc(brw->intelScreen,
 584                                       I915_TILING_X,
 585                                       mt->cpp,
 586                                       total_width,
 587                                       total_height,
 588                                       expect_accelerated_upload);
 589    }
 590
 591    mt->offset = 0;
 592
 593    if (!mt->region) {
 594        intel_miptree_release(&mt);
 595        return NULL;
 596    }
 597
 598
 599    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 600       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
 601          intel_miptree_release(&mt);
 602          return NULL;
 603       }
 604    }
 605
 606    /* If this miptree is capable of supporting fast color clears, set
 607     * fast_clear_state appropriately to ensure that fast clears will occur.
 608     * Allocation of the MCS miptree will be deferred until the first fast
 609     * clear actually occurs.
 610     */
 611    if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
 612       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 613
 614    return mt;
 615 }
 616
 617 struct intel_mipmap_tree *
 618 intel_miptree_create_for_bo(struct brw_context *brw,
 619                             drm_intel_bo *bo,
 620                             gl_format format,
 621                             uint32_t offset,
 622                             uint32_t width,
 623                             uint32_t height,
 624                             int pitch,
 625                             uint32_t tiling)
 626 {
 627    struct intel_mipmap_tree *mt;
 628
 629    struct intel_region *region = calloc(1, sizeof(*region));
 630    if (!region)
 631       return NULL;
 632
 633    /* Nothing will be able to use this miptree with the BO if the offset isn't
 634     * aligned.
 635     */
 636    if (tiling != I915_TILING_NONE)
 637       assert(offset % 4096 == 0);
 638
 639    /* miptrees can't handle negative pitch.  If you need flipping of images,
 640     * that's outside of the scope of the mt.
 641     */
 642    assert(pitch >= 0);
 643
 644    mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
 645                                     0, 0,
 646                                     width, height, 1,
 647                                     true, 0 /* num_samples */);
 648    if (!mt) {
 649       free(region);
 650       return mt;
 651    }
 652
 653    region->cpp = mt->cpp;
 654    region->width = width;
 655    region->height = height;
 656    region->pitch = pitch;
 657    region->refcount = 1;
 658    drm_intel_bo_reference(bo);
 659    region->bo = bo;
 660    region->tiling = tiling;
 661
 662    mt->region = region;
 663    mt->offset = offset;
 664
 665    return mt;
 666 }
 667
 668
 669 /**
 670  * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
 671  *
 672  * For a multisample DRI2 buffer, this wraps the given region with
 673  * a singlesample miptree, then creates a multisample miptree into which the
 674  * singlesample miptree is embedded as a child.
 675  */
 676 struct intel_mipmap_tree*
 677 intel_miptree_create_for_dri2_buffer(struct brw_context *brw,
 678                                      unsigned dri_attachment,
 679                                      gl_format format,
 680                                      uint32_t num_samples,
 681                                      struct intel_region *region)
 682 {
 683    struct intel_mipmap_tree *singlesample_mt = NULL;
 684    struct intel_mipmap_tree *multisample_mt = NULL;
 685
 686    /* Only the front and back buffers, which are color buffers, are shared
 687     * through DRI2.
 688     */
 689    assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
 690           dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 691           dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
 692    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 693           _mesa_get_format_base_format(format) == GL_RGBA);
 694
 695    singlesample_mt = intel_miptree_create_for_bo(brw,
 696                                                  region->bo,
 697                                                  format,
 698                                                  0,
 699                                                  region->width,
 700                                                  region->height,
 701                                                  region->pitch,
 702                                                  region->tiling);
 703    if (!singlesample_mt)
 704       return NULL;
 705    singlesample_mt->region->name = region->name;
 706
 707    /* If this miptree is capable of supporting fast color clears, set
 708     * fast_clear_state appropriately to ensure that fast clears will occur.
 709     * Allocation of the MCS miptree will be deferred until the first fast
 710     * clear actually occurs.
 711     */
 712    if (intel_is_non_msrt_mcs_buffer_supported(brw, singlesample_mt))
 713       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 714
 715    if (num_samples == 0)
 716       return singlesample_mt;
 717
 718    multisample_mt = intel_miptree_create_for_renderbuffer(brw,
 719                                                           format,
 720                                                           region->width,
 721                                                           region->height,
 722                                                           num_samples);
 723    if (!multisample_mt) {
 724       intel_miptree_release(&singlesample_mt);
 725       return NULL;
 726    }
 727
 728    multisample_mt->singlesample_mt = singlesample_mt;
 729    multisample_mt->need_downsample = false;
 730
 731    if (brw->is_front_buffer_rendering &&
 732        (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 733         dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
 734       intel_miptree_upsample(brw, multisample_mt);
 735    }
 736
 737    return multisample_mt;
 738 }
 739
 740 /**
 741  * For a singlesample image buffer, this simply wraps the given region with a miptree.
 742  *
 743  * For a multisample image buffer, this wraps the given region with
 744  * a singlesample miptree, then creates a multisample miptree into which the
 745  * singlesample miptree is embedded as a child.
 746  */
 747 struct intel_mipmap_tree*
 748 intel_miptree_create_for_image_buffer(struct brw_context *intel,
 749                                       enum __DRIimageBufferMask buffer_type,
 750                                       gl_format format,
 751                                       uint32_t num_samples,
 752                                       struct intel_region *region)
 753 {
 754    struct intel_mipmap_tree *singlesample_mt = NULL;
 755    struct intel_mipmap_tree *multisample_mt = NULL;
 756
 757    /* Only the front and back buffers, which are color buffers, are allocated
 758     * through the image loader.
 759     */
 760    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 761           _mesa_get_format_base_format(format) == GL_RGBA);
 762
 763    singlesample_mt = intel_miptree_create_for_bo(intel,
 764                                                  region->bo,
 765                                                  format,
 766                                                  0,
 767                                                  region->width,
 768                                                  region->height,
 769                                                  region->pitch,
 770                                                  region->tiling);
 771    if (!singlesample_mt)
 772       return NULL;
 773
 774    /* If this miptree is capable of supporting fast color clears, set
 775     * mcs_state appropriately to ensure that fast clears will occur.
 776     * Allocation of the MCS miptree will be deferred until the first fast
 777     * clear actually occurs.
 778     */
 779    if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
 780       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 781
 782    if (num_samples == 0)
 783       return singlesample_mt;
 784
 785    multisample_mt = intel_miptree_create_for_renderbuffer(intel,
 786                                                           format,
 787                                                           region->width,
 788                                                           region->height,
 789                                                           num_samples);
 790    if (!multisample_mt) {
 791       intel_miptree_release(&singlesample_mt);
 792       return NULL;
 793    }
 794
 795    multisample_mt->singlesample_mt = singlesample_mt;
 796    multisample_mt->need_downsample = false;
 797
 798    if (intel->is_front_buffer_rendering && buffer_type == __DRI_IMAGE_BUFFER_FRONT) {
 799       intel_miptree_upsample(intel, multisample_mt);
 800    }
 801
 802    return multisample_mt;
 803 }
 804
 805 struct intel_mipmap_tree*
 806 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
 807                                       gl_format format,
 808                                       uint32_t width,
 809                                       uint32_t height,
 810                                       uint32_t num_samples)
 811 {
 812    struct intel_mipmap_tree *mt;
 813    uint32_t depth = 1;
 814    bool ok;
 815
 816    mt = intel_miptree_create(brw, GL_TEXTURE_2D, format, 0, 0,
 817                              width, height, depth, true, num_samples,
 818                              INTEL_MIPTREE_TILING_ANY);
 819    if (!mt)
 820       goto fail;
 821
 822    if (brw_is_hiz_depth_format(brw, format)) {
 823       ok = intel_miptree_alloc_hiz(brw, mt);
 824       if (!ok)
 825          goto fail;
 826    }
 827
 828    return mt;
 829
 830 fail:
 831    intel_miptree_release(&mt);
 832    return NULL;
 833 }
 834
 835 void
 836 intel_miptree_reference(struct intel_mipmap_tree **dst,
 837                         struct intel_mipmap_tree *src)
 838 {
 839    if (*dst == src)
 840       return;
 841
 842    intel_miptree_release(dst);
 843
 844    if (src) {
 845       src->refcount++;
 846       DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
 847    }
 848
 849    *dst = src;
 850 }
 851
 852
 853 void
 854 intel_miptree_release(struct intel_mipmap_tree **mt)
 855 {
 856    if (!*mt)
 857       return;
 858
 859    DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
 860    if (--(*mt)->refcount <= 0) {
 861       GLuint i;
 862
 863       DBG("%s deleting %p\n", __FUNCTION__, *mt);
 864
 865       intel_region_release(&((*mt)->region));
 866       intel_miptree_release(&(*mt)->stencil_mt);
 867       intel_miptree_release(&(*mt)->hiz_mt);
 868       intel_miptree_release(&(*mt)->mcs_mt);
 869       intel_miptree_release(&(*mt)->singlesample_mt);
 870       intel_resolve_map_clear(&(*mt)->hiz_map);
 871
 872       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
 873          free((*mt)->level[i].slice);
 874       }
 875
 876       free(*mt);
 877    }
 878    *mt = NULL;
 879 }
 880
 881 void
 882 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
 883                                        int *width, int *height, int *depth)
 884 {
 885    switch (image->TexObject->Target) {
 886    case GL_TEXTURE_1D_ARRAY:
 887       *width = image->Width;
 888       *height = 1;
 889       *depth = image->Height;
 890       break;
 891    default:
 892       *width = image->Width;
 893       *height = image->Height;
 894       *depth = image->Depth;
 895       break;
 896    }
 897 }
 898
 899 /**
 900  * Can the image be pulled into a unified mipmap tree?  This mirrors
 901  * the completeness test in a lot of ways.
 902  *
 903  * Not sure whether I want to pass gl_texture_image here.
 904  */
 905 bool
 906 intel_miptree_match_image(struct intel_mipmap_tree *mt,
 907                           struct gl_texture_image *image)
 908 {
 909    struct intel_texture_image *intelImage = intel_texture_image(image);
 910    GLuint level = intelImage->base.Base.Level;
 911    int width, height, depth;
 912
 913    /* glTexImage* choose the texture object based on the target passed in, and
 914     * objects can't change targets over their lifetimes, so this should be
 915     * true.
 916     */
 917    assert(target_to_target(image->TexObject->Target) == mt->target);
 918
 919    gl_format mt_format = mt->format;
 920    if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt)
 921       mt_format = MESA_FORMAT_S8_Z24;
 922    if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt)
 923       mt_format = MESA_FORMAT_Z32_FLOAT_X24S8;
 924    if (mt->etc_format != MESA_FORMAT_NONE)
 925       mt_format = mt->etc_format;
 926
 927    if (image->TexFormat != mt_format)
 928       return false;
 929
 930    intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
 931
 932    if (mt->target == GL_TEXTURE_CUBE_MAP)
 933       depth = 6;
 934
 935    /* Test image dimensions against the base level image adjusted for
 936     * minification.  This will also catch images not present in the
 937     * tree, changed targets, etc.
 938     */
 939    if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
 940          mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
 941       /* nonzero level here is always bogus */
 942       assert(level == 0);
 943
 944       if (width != mt->logical_width0 ||
 945             height != mt->logical_height0 ||
 946             depth != mt->logical_depth0) {
 947          return false;
 948       }
 949    }
 950    else {
 951       /* all normal textures, renderbuffers, etc */
 952       if (width != mt->level[level].width ||
 953           height != mt->level[level].height ||
 954           depth != mt->level[level].depth) {
 955          return false;
 956       }
 957    }
 958
 959    if (image->NumSamples != mt->num_samples)
 960       return false;
 961
 962    return true;
 963 }
 964
 965
 966 void
 967 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 968                              GLuint level,
 969                              GLuint x, GLuint y,
 970                              GLuint w, GLuint h, GLuint d)
 971 {
 972    mt->level[level].width = w;
 973    mt->level[level].height = h;
 974    mt->level[level].depth = d;
 975    mt->level[level].level_x = x;
 976    mt->level[level].level_y = y;
 977
 978    DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
 979        level, w, h, d, x, y);
 980
 981    assert(mt->level[level].slice == NULL);
 982
 983    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
 984    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
 985    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
 986 }
 987
 988
 989 void
 990 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
 991                                GLuint level, GLuint img,
 992                                GLuint x, GLuint y)
 993 {
 994    if (img == 0 && level == 0)
 995       assert(x == 0 && y == 0);
 996
 997    assert(img < mt->level[level].depth);
 998
 999    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
1000    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
1001
1002    DBG("%s level %d img %d pos %d,%d\n",
1003        __FUNCTION__, level, img,
1004        mt->level[level].slice[img].x_offset,
1005        mt->level[level].slice[img].y_offset);
1006 }
1007
1008 void
1009 intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
1010                                GLuint level, GLuint slice,
1011                                GLuint *x, GLuint *y)
1012 {
1013    assert(slice < mt->level[level].depth);
1014
1015    *x = mt->level[level].slice[slice].x_offset;
1016    *y = mt->level[level].slice[slice].y_offset;
1017 }
1018
1019 /**
1020  * Rendering with tiled buffers requires that the base address of the buffer
1021  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1022  * textures, we may want the surface to point at a texture image level that
1023  * isn't at a page boundary.
1024  *
1025  * This function returns an appropriately-aligned base offset
1026  * according to the tiling restrictions, plus any required x/y offset
1027  * from there.
1028  */
1029 uint32_t
1030 intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
1031                                GLuint level, GLuint slice,
1032                                uint32_t *tile_x,
1033                                uint32_t *tile_y)
1034 {
1035    struct intel_region *region = mt->region;
1036    uint32_t x, y;
1037    uint32_t mask_x, mask_y;
1038
1039    intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
1040    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1041
1042    *tile_x = x & mask_x;
1043    *tile_y = y & mask_y;
1044
1045    return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
1046                                           false);
1047 }
1048
1049 static void
1050 intel_miptree_copy_slice_sw(struct brw_context *brw,
1051                             struct intel_mipmap_tree *dst_mt,
1052                             struct intel_mipmap_tree *src_mt,
1053                             int level,
1054                             int slice,
1055                             int width,
1056                             int height)
1057 {
1058    void *src, *dst;
1059    int src_stride, dst_stride;
1060    int cpp = dst_mt->cpp;
1061
1062    intel_miptree_map(brw, src_mt,
1063                      level, slice,
1064                      0, 0,
1065                      width, height,
1066                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1067                      &src, &src_stride);
1068
1069    intel_miptree_map(brw, dst_mt,
1070                      level, slice,
1071                      0, 0,
1072                      width, height,
1073                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1074                      BRW_MAP_DIRECT_BIT,
1075                      &dst, &dst_stride);
1076
1077    DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
1078        _mesa_get_format_name(src_mt->format),
1079        src_mt, src, src_stride,
1080        _mesa_get_format_name(dst_mt->format),
1081        dst_mt, dst, dst_stride,
1082        width, height);
1083
1084    int row_size = cpp * width;
1085    if (src_stride == row_size &&
1086        dst_stride == row_size) {
1087       memcpy(dst, src, row_size * height);
1088    } else {
1089       for (int i = 0; i < height; i++) {
1090          memcpy(dst, src, row_size);
1091          dst += dst_stride;
1092          src += src_stride;
1093       }
1094    }
1095
1096    intel_miptree_unmap(brw, dst_mt, level, slice);
1097    intel_miptree_unmap(brw, src_mt, level, slice);
1098
1099    /* Don't forget to copy the stencil data over, too.  We could have skipped
1100     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1101     * shuffling the two data sources in/out of temporary storage instead of
1102     * the direct mapping we get this way.
1103     */
1104    if (dst_mt->stencil_mt) {
1105       assert(src_mt->stencil_mt);
1106       intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
1107                                   level, slice, width, height);
1108    }
1109 }
1110
1111 static void
1112 intel_miptree_copy_slice(struct brw_context *brw,
1113                          struct intel_mipmap_tree *dst_mt,
1114                          struct intel_mipmap_tree *src_mt,
1115                          int level,
1116                          int face,
1117                          int depth)
1118
1119 {
1120    gl_format format = src_mt->format;
1121    uint32_t width = src_mt->level[level].width;
1122    uint32_t height = src_mt->level[level].height;
1123    int slice;
1124
1125    if (face > 0)
1126       slice = face;
1127    else
1128       slice = depth;
1129
1130    assert(depth < src_mt->level[level].depth);
1131    assert(src_mt->format == dst_mt->format);
1132
1133    if (dst_mt->compressed) {
1134       height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1135       width = ALIGN(width, dst_mt->align_w);
1136    }
1137
1138    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1139     * below won't apply since we can't do the depth's Y tiling or the
1140     * stencil's W tiling in the blitter.
1141     */
1142    if (src_mt->stencil_mt) {
1143       intel_miptree_copy_slice_sw(brw,
1144                                   dst_mt, src_mt,
1145                                   level, slice,
1146                                   width, height);
1147       return;
1148    }
1149
1150    uint32_t dst_x, dst_y, src_x, src_y;
1151    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1152    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1153
1154    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1155        _mesa_get_format_name(src_mt->format),
1156        src_mt, src_x, src_y, src_mt->region->pitch,
1157        _mesa_get_format_name(dst_mt->format),
1158        dst_mt, dst_x, dst_y, dst_mt->region->pitch,
1159        width, height);
1160
1161    if (!intel_miptree_blit(brw,
1162                            src_mt, level, slice, 0, 0, false,
1163                            dst_mt, level, slice, 0, 0, false,
1164                            width, height, GL_COPY)) {
1165       perf_debug("miptree validate blit for %s failed\n",
1166                  _mesa_get_format_name(format));
1167
1168       intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
1169                                   width, height);
1170    }
1171 }
1172
1173 /**
1174  * Copies the image's current data to the given miptree, and associates that
1175  * miptree with the image.
1176  *
1177  * If \c invalidate is true, then the actual image data does not need to be
1178  * copied, but the image still needs to be associated to the new miptree (this
1179  * is set to true if we're about to clear the image).
1180  */
1181 void
1182 intel_miptree_copy_teximage(struct brw_context *brw,
1183                             struct intel_texture_image *intelImage,
1184                             struct intel_mipmap_tree *dst_mt,
1185                             bool invalidate)
1186 {
1187    struct intel_mipmap_tree *src_mt = intelImage->mt;
1188    struct intel_texture_object *intel_obj =
1189       intel_texture_object(intelImage->base.Base.TexObject);
1190    int level = intelImage->base.Base.Level;
1191    int face = intelImage->base.Base.Face;
1192    GLuint depth = intelImage->base.Base.Depth;
1193
1194    if (!invalidate) {
1195       for (int slice = 0; slice < depth; slice++) {
1196          intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
1197       }
1198    }
1199
1200    intel_miptree_reference(&intelImage->mt, dst_mt);
1201    intel_obj->needs_validate = true;
1202 }
1203
1204 bool
1205 intel_miptree_alloc_mcs(struct brw_context *brw,
1206                         struct intel_mipmap_tree *mt,
1207                         GLuint num_samples)
1208 {
1209    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1210    assert(mt->mcs_mt == NULL);
1211
1212    /* Choose the correct format for the MCS buffer.  All that really matters
1213     * is that we allocate the right buffer size, since we'll always be
1214     * accessing this miptree using MCS-specific hardware mechanisms, which
1215     * infer the correct format based on num_samples.
1216     */
1217    gl_format format;
1218    switch (num_samples) {
1219    case 4:
1220       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1221        * each sample).
1222        */
1223       format = MESA_FORMAT_R8;
1224       break;
1225    case 8:
1226       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1227        * for each sample, plus 8 padding bits).
1228        */
1229       format = MESA_FORMAT_R_UINT32;
1230       break;
1231    default:
1232       assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
1233       return false;
1234    };
1235
1236    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1237     *
1238     *     "The MCS surface must be stored as Tile Y."
1239     */
1240    mt->mcs_mt = intel_miptree_create(brw,
1241                                      mt->target,
1242                                      format,
1243                                      mt->first_level,
1244                                      mt->last_level,
1245                                      mt->logical_width0,
1246                                      mt->logical_height0,
1247                                      mt->logical_depth0,
1248                                      true,
1249                                      0 /* num_samples */,
1250                                      INTEL_MIPTREE_TILING_Y);
1251
1252    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1253     *
1254     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1255     *     is cleared prior to any rendering.
1256     *
1257     * Since we don't use the MCS buffer for any purpose other than rendering,
1258     * it makes sense to just clear it immediately upon allocation.
1259     *
1260     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1261     */
1262    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
1263    memset(data, 0xff, mt->mcs_mt->region->bo->size);
1264    intel_miptree_unmap_raw(brw, mt->mcs_mt);
1265    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
1266
1267    return mt->mcs_mt;
1268 }
1269
1270
1271 bool
1272 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1273                                  struct intel_mipmap_tree *mt)
1274 {
1275    assert(mt->mcs_mt == NULL);
1276
1277    /* The format of the MCS buffer is opaque to the driver; all that matters
1278     * is that we get its size and pitch right.  We'll pretend that the format
1279     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1280     * R32 buffer is 32 pixels across, we'll need to scale the width down by
1281     * the block width and then a further factor of 4.  Since an MCS tile
1282     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1283     * we'll need to scale the height down by the block height and then a
1284     * further factor of 8.
1285     */
1286    const gl_format format = MESA_FORMAT_R_UINT32;
1287    unsigned block_width_px;
1288    unsigned block_height;
1289    intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
1290    unsigned width_divisor = block_width_px * 4;
1291    unsigned height_divisor = block_height * 8;
1292    unsigned mcs_width =
1293       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1294    unsigned mcs_height =
1295       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1296    assert(mt->logical_depth0 == 1);
1297    mt->mcs_mt = intel_miptree_create(brw,
1298                                      mt->target,
1299                                      format,
1300                                      mt->first_level,
1301                                      mt->last_level,
1302                                      mcs_width,
1303                                      mcs_height,
1304                                      mt->logical_depth0,
1305                                      true,
1306                                      0 /* num_samples */,
1307                                      INTEL_MIPTREE_TILING_Y);
1308
1309    return mt->mcs_mt;
1310 }
1311
1312
1313 /**
1314  * Helper for intel_miptree_alloc_hiz() that sets
1315  * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
1316  * \c has_hiz was set.
1317  */
1318 static bool
1319 intel_miptree_slice_enable_hiz(struct brw_context *brw,
1320                                struct intel_mipmap_tree *mt,
1321                                uint32_t level,
1322                                uint32_t layer)
1323 {
1324    assert(mt->hiz_mt);
1325
1326    if (brw->is_haswell) {
1327       const struct intel_mipmap_level *l = &mt->level[level];
1328
1329       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1330        * and the height is 4 aligned. This allows our HiZ support
1331        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1332        * we can grow the width & height to allow the HiZ op to
1333        * force the proper size alignments.
1334        */
1335       if (level > 0 && ((l->width & 7) || (l->height & 3))) {
1336          return false;
1337       }
1338    }
1339
1340    mt->level[level].slice[layer].has_hiz = true;
1341    return true;
1342 }
1343
1344
1345
1346 bool
1347 intel_miptree_alloc_hiz(struct brw_context *brw,
1348                         struct intel_mipmap_tree *mt)
1349 {
1350    assert(mt->hiz_mt == NULL);
1351    mt->hiz_mt = intel_miptree_create(brw,
1352                                      mt->target,
1353                                      mt->format,
1354                                      mt->first_level,
1355                                      mt->last_level,
1356                                      mt->logical_width0,
1357                                      mt->logical_height0,
1358                                      mt->logical_depth0,
1359                                      true,
1360                                      mt->num_samples,
1361                                      INTEL_MIPTREE_TILING_ANY);
1362
1363    if (!mt->hiz_mt)
1364       return false;
1365
1366    /* Mark that all slices need a HiZ resolve. */
1367    struct intel_resolve_map *head = &mt->hiz_map;
1368    for (int level = mt->first_level; level <= mt->last_level; ++level) {
1369       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1370          if (!intel_miptree_slice_enable_hiz(brw, mt, level, layer))
1371             continue;
1372
1373          head->next = malloc(sizeof(*head->next));
1374          head->next->prev = head;
1375          head->next->next = NULL;
1376          head = head->next;
1377
1378          head->level = level;
1379          head->layer = layer;
1380          head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1381       }
1382    }
1383
1384    return true;
1385 }
1386
1387 /**
1388  * Does the miptree slice have hiz enabled?
1389  */
1390 bool
1391 intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
1392                             uint32_t level,
1393                             uint32_t layer)
1394 {
1395    intel_miptree_check_level_layer(mt, level, layer);
1396    return mt->level[level].slice[layer].has_hiz;
1397 }
1398
1399 void
1400 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1401                                           uint32_t level,
1402                                           uint32_t layer)
1403 {
1404    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1405       return;
1406
1407    intel_resolve_map_set(&mt->hiz_map,
1408                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1409 }
1410
1411
1412 void
1413 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1414                                             uint32_t level,
1415                                             uint32_t layer)
1416 {
1417    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1418       return;
1419
1420    intel_resolve_map_set(&mt->hiz_map,
1421                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1422 }
1423
1424 void
1425 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
1426                                                 uint32_t level)
1427 {
1428    uint32_t layer;
1429    uint32_t end_layer = mt->level[level].depth;
1430
1431    for (layer = 0; layer < end_layer; layer++) {
1432       intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
1433    }
1434 }
1435
1436 static bool
1437 intel_miptree_slice_resolve(struct brw_context *brw,
1438                             struct intel_mipmap_tree *mt,
1439                             uint32_t level,
1440                             uint32_t layer,
1441                             enum gen6_hiz_op need)
1442 {
1443    intel_miptree_check_level_layer(mt, level, layer);
1444
1445    struct intel_resolve_map *item =
1446          intel_resolve_map_get(&mt->hiz_map, level, layer);
1447
1448    if (!item || item->need != need)
1449       return false;
1450
1451    intel_hiz_exec(brw, mt, level, layer, need);
1452    intel_resolve_map_remove(item);
1453    return true;
1454 }
1455
1456 bool
1457 intel_miptree_slice_resolve_hiz(struct brw_context *brw,
1458                                 struct intel_mipmap_tree *mt,
1459                                 uint32_t level,
1460                                 uint32_t layer)
1461 {
1462    return intel_miptree_slice_resolve(brw, mt, level, layer,
1463                                       GEN6_HIZ_OP_HIZ_RESOLVE);
1464 }
1465
1466 bool
1467 intel_miptree_slice_resolve_depth(struct brw_context *brw,
1468                                   struct intel_mipmap_tree *mt,
1469                                   uint32_t level,
1470                                   uint32_t layer)
1471 {
1472    return intel_miptree_slice_resolve(brw, mt, level, layer,
1473                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
1474 }
1475
1476 static bool
1477 intel_miptree_all_slices_resolve(struct brw_context *brw,
1478                                  struct intel_mipmap_tree *mt,
1479                                  enum gen6_hiz_op need)
1480 {
1481    bool did_resolve = false;
1482    struct intel_resolve_map *i, *next;
1483
1484    for (i = mt->hiz_map.next; i; i = next) {
1485       next = i->next;
1486       if (i->need != need)
1487          continue;
1488
1489       intel_hiz_exec(brw, mt, i->level, i->layer, need);
1490       intel_resolve_map_remove(i);
1491       did_resolve = true;
1492    }
1493
1494    return did_resolve;
1495 }
1496
1497 bool
1498 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
1499                                      struct intel_mipmap_tree *mt)
1500 {
1501    return intel_miptree_all_slices_resolve(brw, mt,
1502                                            GEN6_HIZ_OP_HIZ_RESOLVE);
1503 }
1504
1505 bool
1506 intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
1507                                        struct intel_mipmap_tree *mt)
1508 {
1509    return intel_miptree_all_slices_resolve(brw, mt,
1510                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
1511 }
1512
1513
1514 void
1515 intel_miptree_resolve_color(struct brw_context *brw,
1516                             struct intel_mipmap_tree *mt)
1517 {
1518    switch (mt->fast_clear_state) {
1519    case INTEL_FAST_CLEAR_STATE_NO_MCS:
1520    case INTEL_FAST_CLEAR_STATE_RESOLVED:
1521       /* No resolve needed */
1522       break;
1523    case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
1524    case INTEL_FAST_CLEAR_STATE_CLEAR:
1525       /* Fast color clear resolves only make sense for non-MSAA buffers. */
1526       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
1527          brw_blorp_resolve_color(brw, mt);
1528       break;
1529    }
1530 }
1531
1532
1533 /**
1534  * Make it possible to share the region backing the given miptree with another
1535  * process or another miptree.
1536  *
1537  * Fast color clears are unsafe with shared buffers, so we need to resolve and
1538  * then discard the MCS buffer, if present.  We also set the fast_clear_state
1539  * to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
1540  * allocated in the future.
1541  */
1542 void
1543 intel_miptree_make_shareable(struct brw_context *brw,
1544                              struct intel_mipmap_tree *mt)
1545 {
1546    /* MCS buffers are also used for multisample buffers, but we can't resolve
1547     * away a multisample MCS buffer because it's an integral part of how the
1548     * pixel data is stored.  Fortunately this code path should never be
1549     * reached for multisample buffers.
1550     */
1551    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1552
1553    if (mt->mcs_mt) {
1554       intel_miptree_resolve_color(brw, mt);
1555       intel_miptree_release(&mt->mcs_mt);
1556       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
1557    }
1558 }
1559
1560
1561 /**
1562  * \brief Get pointer offset into stencil buffer.
1563  *
1564  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1565  * must decode the tile's layout in software.
1566  *
1567  * See
1568  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1569  *     Format.
1570  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1571  *
1572  * Even though the returned offset is always positive, the return type is
1573  * signed due to
1574  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1575  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
1576  */
1577 static intptr_t
1578 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1579 {
1580    uint32_t tile_size = 4096;
1581    uint32_t tile_width = 64;
1582    uint32_t tile_height = 64;
1583    uint32_t row_size = 64 * stride;
1584
1585    uint32_t tile_x = x / tile_width;
1586    uint32_t tile_y = y / tile_height;
1587
1588    /* The byte's address relative to the tile's base addres. */
1589    uint32_t byte_x = x % tile_width;
1590    uint32_t byte_y = y % tile_height;
1591
1592    uintptr_t u = tile_y * row_size
1593                + tile_x * tile_size
1594                + 512 * (byte_x / 8)
1595                +  64 * (byte_y / 8)
1596                +  32 * ((byte_y / 4) % 2)
1597                +  16 * ((byte_x / 4) % 2)
1598                +   8 * ((byte_y / 2) % 2)
1599                +   4 * ((byte_x / 2) % 2)
1600                +   2 * (byte_y % 2)
1601                +   1 * (byte_x % 2);
1602
1603    if (swizzled) {
1604       /* adjust for bit6 swizzling */
1605       if (((byte_x / 8) % 2) == 1) {
1606          if (((byte_y / 8) % 2) == 0) {
1607             u += 64;
1608          } else {
1609             u -= 64;
1610          }
1611       }
1612    }
1613
1614    return u;
1615 }
1616
1617 static void
1618 intel_miptree_updownsample(struct brw_context *brw,
1619                            struct intel_mipmap_tree *src,
1620                            struct intel_mipmap_tree *dst,
1621                            unsigned width,
1622                            unsigned height)
1623 {
1624    int src_x0 = 0;
1625    int src_y0 = 0;
1626    int dst_x0 = 0;
1627    int dst_y0 = 0;
1628
1629    brw_blorp_blit_miptrees(brw,
1630                            src, 0 /* level */, 0 /* layer */,
1631                            dst, 0 /* level */, 0 /* layer */,
1632                            src_x0, src_y0,
1633                            width, height,
1634                            dst_x0, dst_y0,
1635                            width, height,
1636                            GL_NEAREST, false, false /*mirror x, y*/);
1637
1638    if (src->stencil_mt) {
1639       brw_blorp_blit_miptrees(brw,
1640                               src->stencil_mt, 0 /* level */, 0 /* layer */,
1641                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
1642                               src_x0, src_y0,
1643                               width, height,
1644                               dst_x0, dst_y0,
1645                               width, height,
1646                               GL_NEAREST, false, false /*mirror x, y*/);
1647    }
1648 }
1649
1650 static void
1651 assert_is_flat(struct intel_mipmap_tree *mt)
1652 {
1653    assert(mt->target == GL_TEXTURE_2D);
1654    assert(mt->first_level == 0);
1655    assert(mt->last_level == 0);
1656 }
1657
1658 /**
1659  * \brief Downsample from mt to mt->singlesample_mt.
1660  *
1661  * If the miptree needs no downsample, then skip.
1662  */
1663 void
1664 intel_miptree_downsample(struct brw_context *brw,
1665                          struct intel_mipmap_tree *mt)
1666 {
1667    /* Only flat, renderbuffer-like miptrees are supported. */
1668    assert_is_flat(mt);
1669
1670    if (!mt->need_downsample)
1671       return;
1672    intel_miptree_updownsample(brw,
1673                               mt, mt->singlesample_mt,
1674                               mt->logical_width0,
1675                               mt->logical_height0);
1676    mt->need_downsample = false;
1677 }
1678
1679 /**
1680  * \brief Upsample from mt->singlesample_mt to mt.
1681  *
1682  * The upsample is done unconditionally.
1683  */
1684 void
1685 intel_miptree_upsample(struct brw_context *brw,
1686                        struct intel_mipmap_tree *mt)
1687 {
1688    /* Only flat, renderbuffer-like miptrees are supported. */
1689    assert_is_flat(mt);
1690    assert(!mt->need_downsample);
1691
1692    intel_miptree_updownsample(brw,
1693                               mt->singlesample_mt, mt,
1694                               mt->logical_width0,
1695                               mt->logical_height0);
1696 }
1697
1698 void *
1699 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
1700 {
1701    /* CPU accesses to color buffers don't understand fast color clears, so
1702     * resolve any pending fast color clears before we map.
1703     */
1704    intel_miptree_resolve_color(brw, mt);
1705
1706    drm_intel_bo *bo = mt->region->bo;
1707
1708    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
1709       if (drm_intel_bo_busy(bo)) {
1710          perf_debug("Mapping a busy miptree, causing a stall on the GPU.\n");
1711       }
1712    }
1713
1714    intel_batchbuffer_flush(brw);
1715
1716    if (mt->region->tiling != I915_TILING_NONE)
1717       drm_intel_gem_bo_map_gtt(bo);
1718    else
1719       drm_intel_bo_map(bo, true);
1720
1721    return bo->virtual;
1722 }
1723
1724 void
1725 intel_miptree_unmap_raw(struct brw_context *brw,
1726                         struct intel_mipmap_tree *mt)
1727 {
1728    drm_intel_bo_unmap(mt->region->bo);
1729 }
1730
1731 static void
1732 intel_miptree_map_gtt(struct brw_context *brw,
1733                       struct intel_mipmap_tree *mt,
1734                       struct intel_miptree_map *map,
1735                       unsigned int level, unsigned int slice)
1736 {
1737    unsigned int bw, bh;
1738    void *base;
1739    unsigned int image_x, image_y;
1740    int x = map->x;
1741    int y = map->y;
1742
1743    /* For compressed formats, the stride is the number of bytes per
1744     * row of blocks.  intel_miptree_get_image_offset() already does
1745     * the divide.
1746     */
1747    _mesa_get_format_block_size(mt->format, &bw, &bh);
1748    assert(y % bh == 0);
1749    y /= bh;
1750
1751    base = intel_miptree_map_raw(brw, mt) + mt->offset;
1752
1753    if (base == NULL)
1754       map->ptr = NULL;
1755    else {
1756       /* Note that in the case of cube maps, the caller must have passed the
1757        * slice number referencing the face.
1758       */
1759       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1760       x += image_x;
1761       y += image_y;
1762
1763       map->stride = mt->region->pitch;
1764       map->ptr = base + y * map->stride + x * mt->cpp;
1765    }
1766
1767    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1768        map->x, map->y, map->w, map->h,
1769        mt, _mesa_get_format_name(mt->format),
1770        x, y, map->ptr, map->stride);
1771 }
1772
1773 static void
1774 intel_miptree_unmap_gtt(struct brw_context *brw,
1775                         struct intel_mipmap_tree *mt,
1776                         struct intel_miptree_map *map,
1777                         unsigned int level,
1778                         unsigned int slice)
1779 {
1780    intel_miptree_unmap_raw(brw, mt);
1781 }
1782
1783 static void
1784 intel_miptree_map_blit(struct brw_context *brw,
1785                        struct intel_mipmap_tree *mt,
1786                        struct intel_miptree_map *map,
1787                        unsigned int level, unsigned int slice)
1788 {
1789    map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
1790                                   0, 0,
1791                                   map->w, map->h, 1,
1792                                   false, 0,
1793                                   INTEL_MIPTREE_TILING_NONE);
1794    if (!map->mt) {
1795       fprintf(stderr, "Failed to allocate blit temporary\n");
1796       goto fail;
1797    }
1798    map->stride = map->mt->region->pitch;
1799
1800    if (!intel_miptree_blit(brw,
1801                            mt, level, slice,
1802                            map->x, map->y, false,
1803                            map->mt, 0, 0,
1804                            0, 0, false,
1805                            map->w, map->h, GL_COPY)) {
1806       fprintf(stderr, "Failed to blit\n");
1807       goto fail;
1808    }
1809
1810    map->ptr = intel_miptree_map_raw(brw, map->mt);
1811
1812    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1813        map->x, map->y, map->w, map->h,
1814        mt, _mesa_get_format_name(mt->format),
1815        level, slice, map->ptr, map->stride);
1816
1817    return;
1818
1819 fail:
1820    intel_miptree_release(&map->mt);
1821    map->ptr = NULL;
1822    map->stride = 0;
1823 }
1824
1825 static void
1826 intel_miptree_unmap_blit(struct brw_context *brw,
1827                          struct intel_mipmap_tree *mt,
1828                          struct intel_miptree_map *map,
1829                          unsigned int level,
1830                          unsigned int slice)
1831 {
1832    struct gl_context *ctx = &brw->ctx;
1833
1834    intel_miptree_unmap_raw(brw, map->mt);
1835
1836    if (map->mode & GL_MAP_WRITE_BIT) {
1837       bool ok = intel_miptree_blit(brw,
1838                                    map->mt, 0, 0,
1839                                    0, 0, false,
1840                                    mt, level, slice,
1841                                    map->x, map->y, false,
1842                                    map->w, map->h, GL_COPY);
1843       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1844    }
1845
1846    intel_miptree_release(&map->mt);
1847 }
1848
1849 #ifdef __SSE4_1__
1850 /**
1851  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
1852  */
1853 static void
1854 intel_miptree_map_movntdqa(struct brw_context *brw,
1855                            struct intel_mipmap_tree *mt,
1856                            struct intel_miptree_map *map,
1857                            unsigned int level, unsigned int slice)
1858 {
1859    assert(map->mode & GL_MAP_READ_BIT);
1860    assert(!(map->mode & GL_MAP_WRITE_BIT));
1861
1862    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1863        map->x, map->y, map->w, map->h,
1864        mt, _mesa_get_format_name(mt->format),
1865        level, slice, map->ptr, map->stride);
1866
1867    /* Map the original image */
1868    uint32_t image_x;
1869    uint32_t image_y;
1870    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1871    image_x += map->x;
1872    image_y += map->y;
1873
1874    void *src = intel_miptree_map_raw(brw, mt);
1875    if (!src)
1876       return;
1877    src += image_y * mt->region->pitch;
1878    src += image_x * mt->region->cpp;
1879
1880    /* Due to the pixel offsets for the particular image being mapped, our
1881     * src pointer may not be 16-byte aligned.  However, if the pitch is
1882     * divisible by 16, then the amount by which it's misaligned will remain
1883     * consistent from row to row.
1884     */
1885    assert((mt->region->pitch % 16) == 0);
1886    const int misalignment = ((uintptr_t) src) & 15;
1887
1888    /* Create an untiled temporary buffer for the mapping. */
1889    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
1890
1891    map->stride = ALIGN(misalignment + width_bytes, 16);
1892
1893    map->buffer = malloc(map->stride * map->h);
1894    /* Offset the destination so it has the same misalignment as src. */
1895    map->ptr = map->buffer + misalignment;
1896
1897    assert((((uintptr_t) map->ptr) & 15) == misalignment);
1898
1899    for (uint32_t y = 0; y < map->h; y++) {
1900       void *dst_ptr = map->ptr + y * map->stride;
1901       void *src_ptr = src + y * mt->region->pitch;
1902
1903       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
1904    }
1905
1906    intel_miptree_unmap_raw(brw, mt);
1907 }
1908
1909 static void
1910 intel_miptree_unmap_movntdqa(struct brw_context *brw,
1911                              struct intel_mipmap_tree *mt,
1912                              struct intel_miptree_map *map,
1913                              unsigned int level,
1914                              unsigned int slice)
1915 {
1916    free(map->buffer);
1917    map->buffer = NULL;
1918    map->ptr = NULL;
1919 }
1920 #endif
1921
1922 static void
1923 intel_miptree_map_s8(struct brw_context *brw,
1924                      struct intel_mipmap_tree *mt,
1925                      struct intel_miptree_map *map,
1926                      unsigned int level, unsigned int slice)
1927 {
1928    map->stride = map->w;
1929    map->buffer = map->ptr = malloc(map->stride * map->h);
1930    if (!map->buffer)
1931       return;
1932
1933    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1934     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1935     * invalidate is set, since we'll be writing the whole rectangle from our
1936     * temporary buffer back out.
1937     */
1938    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1939       uint8_t *untiled_s8_map = map->ptr;
1940       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1941       unsigned int image_x, image_y;
1942
1943       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1944
1945       for (uint32_t y = 0; y < map->h; y++) {
1946          for (uint32_t x = 0; x < map->w; x++) {
1947             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1948                                                x + image_x + map->x,
1949                                                y + image_y + map->y,
1950                                                brw->has_swizzling);
1951             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
1952          }
1953       }
1954
1955       intel_miptree_unmap_raw(brw, mt);
1956
1957       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
1958           map->x, map->y, map->w, map->h,
1959           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
1960    } else {
1961       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1962           map->x, map->y, map->w, map->h,
1963           mt, map->ptr, map->stride);
1964    }
1965 }
1966
1967 static void
1968 intel_miptree_unmap_s8(struct brw_context *brw,
1969                        struct intel_mipmap_tree *mt,
1970                        struct intel_miptree_map *map,
1971                        unsigned int level,
1972                        unsigned int slice)
1973 {
1974    if (map->mode & GL_MAP_WRITE_BIT) {
1975       unsigned int image_x, image_y;
1976       uint8_t *untiled_s8_map = map->ptr;
1977       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1978
1979       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1980
1981       for (uint32_t y = 0; y < map->h; y++) {
1982          for (uint32_t x = 0; x < map->w; x++) {
1983             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1984                                                x + map->x,
1985                                                y + map->y,
1986                                                brw->has_swizzling);
1987             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
1988          }
1989       }
1990
1991       intel_miptree_unmap_raw(brw, mt);
1992    }
1993
1994    free(map->buffer);
1995 }
1996
1997 static void
1998 intel_miptree_map_etc(struct brw_context *brw,
1999                       struct intel_mipmap_tree *mt,
2000                       struct intel_miptree_map *map,
2001                       unsigned int level,
2002                       unsigned int slice)
2003 {
2004    assert(mt->etc_format != MESA_FORMAT_NONE);
2005    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
2006       assert(mt->format == MESA_FORMAT_RGBX8888_REV);
2007    }
2008
2009    assert(map->mode & GL_MAP_WRITE_BIT);
2010    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
2011
2012    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
2013    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
2014                                                 map->w, map->h, 1));
2015    map->ptr = map->buffer;
2016 }
2017
2018 static void
2019 intel_miptree_unmap_etc(struct brw_context *brw,
2020                         struct intel_mipmap_tree *mt,
2021                         struct intel_miptree_map *map,
2022                         unsigned int level,
2023                         unsigned int slice)
2024 {
2025    uint32_t image_x;
2026    uint32_t image_y;
2027    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2028
2029    image_x += map->x;
2030    image_y += map->y;
2031
2032    uint8_t *dst = intel_miptree_map_raw(brw, mt)
2033                 + image_y * mt->region->pitch
2034                 + image_x * mt->region->cpp;
2035
2036    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
2037       _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
2038                                  map->ptr, map->stride,
2039                                  map->w, map->h);
2040    else
2041       _mesa_unpack_etc2_format(dst, mt->region->pitch,
2042                                map->ptr, map->stride,
2043                                map->w, map->h, mt->etc_format);
2044
2045    intel_miptree_unmap_raw(brw, mt);
2046    free(map->buffer);
2047 }
2048
2049 /**
2050  * Mapping function for packed depth/stencil miptrees backed by real separate
2051  * miptrees for depth and stencil.
2052  *
2053  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
2054  * separate from the depth buffer.  Yet at the GL API level, we have to expose
2055  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
2056  * be able to map that memory for texture storage and glReadPixels-type
2057  * operations.  We give Mesa core that access by mallocing a temporary and
2058  * copying the data between the actual backing store and the temporary.
2059  */
2060 static void
2061 intel_miptree_map_depthstencil(struct brw_context *brw,
2062                                struct intel_mipmap_tree *mt,
2063                                struct intel_miptree_map *map,
2064                                unsigned int level, unsigned int slice)
2065 {
2066    struct intel_mipmap_tree *z_mt = mt;
2067    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2068    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2069    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
2070
2071    map->stride = map->w * packed_bpp;
2072    map->buffer = map->ptr = malloc(map->stride * map->h);
2073    if (!map->buffer)
2074       return;
2075
2076    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2077     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2078     * invalidate is set, since we'll be writing the whole rectangle from our
2079     * temporary buffer back out.
2080     */
2081    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2082       uint32_t *packed_map = map->ptr;
2083       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2084       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2085       unsigned int s_image_x, s_image_y;
2086       unsigned int z_image_x, z_image_y;
2087
2088       intel_miptree_get_image_offset(s_mt, level, slice,
2089                                      &s_image_x, &s_image_y);
2090       intel_miptree_get_image_offset(z_mt, level, slice,
2091                                      &z_image_x, &z_image_y);
2092
2093       for (uint32_t y = 0; y < map->h; y++) {
2094          for (uint32_t x = 0; x < map->w; x++) {
2095             int map_x = map->x + x, map_y = map->y + y;
2096             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2097                                                  map_x + s_image_x,
2098                                                  map_y + s_image_y,
2099                                                  brw->has_swizzling);
2100             ptrdiff_t z_offset = ((map_y + z_image_y) *
2101                                   (z_mt->region->pitch / 4) +
2102                                   (map_x + z_image_x));
2103             uint8_t s = s_map[s_offset];
2104             uint32_t z = z_map[z_offset];
2105
2106             if (map_z32f_x24s8) {
2107                packed_map[(y * map->w + x) * 2 + 0] = z;
2108                packed_map[(y * map->w + x) * 2 + 1] = s;
2109             } else {
2110                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2111             }
2112          }
2113       }
2114
2115       intel_miptree_unmap_raw(brw, s_mt);
2116       intel_miptree_unmap_raw(brw, z_mt);
2117
2118       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2119           __FUNCTION__,
2120           map->x, map->y, map->w, map->h,
2121           z_mt, map->x + z_image_x, map->y + z_image_y,
2122           s_mt, map->x + s_image_x, map->y + s_image_y,
2123           map->ptr, map->stride);
2124    } else {
2125       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2126           map->x, map->y, map->w, map->h,
2127           mt, map->ptr, map->stride);
2128    }
2129 }
2130
2131 static void
2132 intel_miptree_unmap_depthstencil(struct brw_context *brw,
2133                                  struct intel_mipmap_tree *mt,
2134                                  struct intel_miptree_map *map,
2135                                  unsigned int level,
2136                                  unsigned int slice)
2137 {
2138    struct intel_mipmap_tree *z_mt = mt;
2139    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2140    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2141
2142    if (map->mode & GL_MAP_WRITE_BIT) {
2143       uint32_t *packed_map = map->ptr;
2144       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2145       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2146       unsigned int s_image_x, s_image_y;
2147       unsigned int z_image_x, z_image_y;
2148
2149       intel_miptree_get_image_offset(s_mt, level, slice,
2150                                      &s_image_x, &s_image_y);
2151       intel_miptree_get_image_offset(z_mt, level, slice,
2152                                      &z_image_x, &z_image_y);
2153
2154       for (uint32_t y = 0; y < map->h; y++) {
2155          for (uint32_t x = 0; x < map->w; x++) {
2156             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2157                                                  x + s_image_x + map->x,
2158                                                  y + s_image_y + map->y,
2159                                                  brw->has_swizzling);
2160             ptrdiff_t z_offset = ((y + z_image_y) *
2161                                   (z_mt->region->pitch / 4) +
2162                                   (x + z_image_x));
2163
2164             if (map_z32f_x24s8) {
2165                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2166                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2167             } else {
2168                uint32_t packed = packed_map[y * map->w + x];
2169                s_map[s_offset] = packed >> 24;
2170                z_map[z_offset] = packed;
2171             }
2172          }
2173       }
2174
2175       intel_miptree_unmap_raw(brw, s_mt);
2176       intel_miptree_unmap_raw(brw, z_mt);
2177
2178       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2179           __FUNCTION__,
2180           map->x, map->y, map->w, map->h,
2181           z_mt, _mesa_get_format_name(z_mt->format),
2182           map->x + z_image_x, map->y + z_image_y,
2183           s_mt, map->x + s_image_x, map->y + s_image_y,
2184           map->ptr, map->stride);
2185    }
2186
2187    free(map->buffer);
2188 }
2189
2190 /**
2191  * Create and attach a map to the miptree at (level, slice). Return the
2192  * attached map.
2193  */
2194 static struct intel_miptree_map*
2195 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2196                          unsigned int level,
2197                          unsigned int slice,
2198                          unsigned int x,
2199                          unsigned int y,
2200                          unsigned int w,
2201                          unsigned int h,
2202                          GLbitfield mode)
2203 {
2204    struct intel_miptree_map *map = calloc(1, sizeof(*map));
2205
2206    if (!map)
2207       return NULL;
2208
2209    assert(mt->level[level].slice[slice].map == NULL);
2210    mt->level[level].slice[slice].map = map;
2211
2212    map->mode = mode;
2213    map->x = x;
2214    map->y = y;
2215    map->w = w;
2216    map->h = h;
2217
2218    return map;
2219 }
2220
2221 /**
2222  * Release the map at (level, slice).
2223  */
2224 static void
2225 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2226                          unsigned int level,
2227                          unsigned int slice)
2228 {
2229    struct intel_miptree_map **map;
2230
2231    map = &mt->level[level].slice[slice].map;
2232    free(*map);
2233    *map = NULL;
2234 }
2235
2236 static void
2237 intel_miptree_map_singlesample(struct brw_context *brw,
2238                                struct intel_mipmap_tree *mt,
2239                                unsigned int level,
2240                                unsigned int slice,
2241                                unsigned int x,
2242                                unsigned int y,
2243                                unsigned int w,
2244                                unsigned int h,
2245                                GLbitfield mode,
2246                                void **out_ptr,
2247                                int *out_stride)
2248 {
2249    struct intel_miptree_map *map;
2250
2251    assert(mt->num_samples <= 1);
2252
2253    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2254    if (!map){
2255       *out_ptr = NULL;
2256       *out_stride = 0;
2257       return;
2258    }
2259
2260    intel_miptree_slice_resolve_depth(brw, mt, level, slice);
2261    if (map->mode & GL_MAP_WRITE_BIT) {
2262       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2263    }
2264
2265    if (mt->format == MESA_FORMAT_S8) {
2266       intel_miptree_map_s8(brw, mt, map, level, slice);
2267    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2268               !(mode & BRW_MAP_DIRECT_BIT)) {
2269       intel_miptree_map_etc(brw, mt, map, level, slice);
2270    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2271       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
2272    }
2273    /* See intel_miptree_blit() for details on the 32k pitch limit. */
2274    else if (brw->has_llc &&
2275             !(mode & GL_MAP_WRITE_BIT) &&
2276             !mt->compressed &&
2277             (mt->region->tiling == I915_TILING_X ||
2278              (brw->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
2279             mt->region->pitch < 32768) {
2280       intel_miptree_map_blit(brw, mt, map, level, slice);
2281    } else if (mt->region->tiling != I915_TILING_NONE &&
2282               mt->region->bo->size >= brw->max_gtt_map_object_size) {
2283       assert(mt->region->pitch < 32768);
2284       intel_miptree_map_blit(brw, mt, map, level, slice);
2285 #ifdef __SSE4_1__
2286    } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed) {
2287       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
2288 #endif
2289    } else {
2290       intel_miptree_map_gtt(brw, mt, map, level, slice);
2291    }
2292
2293    *out_ptr = map->ptr;
2294    *out_stride = map->stride;
2295
2296    if (map->ptr == NULL)
2297       intel_miptree_release_map(mt, level, slice);
2298 }
2299
2300 static void
2301 intel_miptree_unmap_singlesample(struct brw_context *brw,
2302                                  struct intel_mipmap_tree *mt,
2303                                  unsigned int level,
2304                                  unsigned int slice)
2305 {
2306    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2307
2308    assert(mt->num_samples <= 1);
2309
2310    if (!map)
2311       return;
2312
2313    DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2314        mt, _mesa_get_format_name(mt->format), level, slice);
2315
2316    if (mt->format == MESA_FORMAT_S8) {
2317       intel_miptree_unmap_s8(brw, mt, map, level, slice);
2318    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2319               !(map->mode & BRW_MAP_DIRECT_BIT)) {
2320       intel_miptree_unmap_etc(brw, mt, map, level, slice);
2321    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2322       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
2323    } else if (map->mt) {
2324       intel_miptree_unmap_blit(brw, mt, map, level, slice);
2325 #ifdef __SSE4_1__
2326    } else if (map->buffer) {
2327       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
2328 #endif
2329    } else {
2330       intel_miptree_unmap_gtt(brw, mt, map, level, slice);
2331    }
2332
2333    intel_miptree_release_map(mt, level, slice);
2334 }
2335
2336 static void
2337 intel_miptree_map_multisample(struct brw_context *brw,
2338                               struct intel_mipmap_tree *mt,
2339                               unsigned int level,
2340                               unsigned int slice,
2341                               unsigned int x,
2342                               unsigned int y,
2343                               unsigned int w,
2344                               unsigned int h,
2345                               GLbitfield mode,
2346                               void **out_ptr,
2347                               int *out_stride)
2348 {
2349    struct gl_context *ctx = &brw->ctx;
2350    struct intel_miptree_map *map;
2351
2352    assert(mt->num_samples > 1);
2353
2354    /* Only flat, renderbuffer-like miptrees are supported. */
2355    if (mt->target != GL_TEXTURE_2D ||
2356        mt->first_level != 0 ||
2357        mt->last_level != 0) {
2358       _mesa_problem(ctx, "attempt to map a multisample miptree for "
2359                     "which (target, first_level, last_level != "
2360                     "(GL_TEXTURE_2D, 0, 0)");
2361       goto fail;
2362    }
2363
2364    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2365    if (!map)
2366       goto fail;
2367
2368    if (!mt->singlesample_mt) {
2369       mt->singlesample_mt =
2370          intel_miptree_create_for_renderbuffer(brw,
2371                                                mt->format,
2372                                                mt->logical_width0,
2373                                                mt->logical_height0,
2374                                                0 /*num_samples*/);
2375       if (!mt->singlesample_mt)
2376          goto fail;
2377
2378       map->singlesample_mt_is_tmp = true;
2379       mt->need_downsample = true;
2380    }
2381
2382    intel_miptree_downsample(brw, mt);
2383    intel_miptree_map_singlesample(brw, mt->singlesample_mt,
2384                                   level, slice,
2385                                   x, y, w, h,
2386                                   mode,
2387                                   out_ptr, out_stride);
2388    return;
2389
2390 fail:
2391    intel_miptree_release_map(mt, level, slice);
2392    *out_ptr = NULL;
2393    *out_stride = 0;
2394 }
2395
2396 static void
2397 intel_miptree_unmap_multisample(struct brw_context *brw,
2398                                 struct intel_mipmap_tree *mt,
2399                                 unsigned int level,
2400                                 unsigned int slice)
2401 {
2402    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2403
2404    assert(mt->num_samples > 1);
2405
2406    if (!map)
2407       return;
2408
2409    intel_miptree_unmap_singlesample(brw, mt->singlesample_mt, level, slice);
2410
2411    mt->need_downsample = false;
2412    if (map->mode & GL_MAP_WRITE_BIT)
2413       intel_miptree_upsample(brw, mt);
2414
2415    if (map->singlesample_mt_is_tmp)
2416       intel_miptree_release(&mt->singlesample_mt);
2417
2418    intel_miptree_release_map(mt, level, slice);
2419 }
2420
2421 void
2422 intel_miptree_map(struct brw_context *brw,
2423                   struct intel_mipmap_tree *mt,
2424                   unsigned int level,
2425                   unsigned int slice,
2426                   unsigned int x,
2427                   unsigned int y,
2428                   unsigned int w,
2429                   unsigned int h,
2430                   GLbitfield mode,
2431                   void **out_ptr,
2432                   int *out_stride)
2433 {
2434    if (mt->num_samples <= 1)
2435       intel_miptree_map_singlesample(brw, mt,
2436                                      level, slice,
2437                                      x, y, w, h,
2438                                      mode,
2439                                      out_ptr, out_stride);
2440    else
2441       intel_miptree_map_multisample(brw, mt,
2442                                     level, slice,
2443                                     x, y, w, h,
2444                                     mode,
2445                                     out_ptr, out_stride);
2446 }
2447
2448 void
2449 intel_miptree_unmap(struct brw_context *brw,
2450                     struct intel_mipmap_tree *mt,
2451                     unsigned int level,
2452                     unsigned int slice)
2453 {
2454    if (mt->num_samples <= 1)
2455       intel_miptree_unmap_singlesample(brw, mt, level, slice);
2456    else
2457       intel_miptree_unmap_multisample(brw, mt, level, slice);
2458 }