src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <GL/gl.h>
  29 #include <GL/internal/dri_interface.h>
  30
  31 #include "intel_batchbuffer.h"
  32 #include "intel_chipset.h"
  33 #include "intel_mipmap_tree.h"
  34 #include "intel_regions.h"
  35 #include "intel_resolve_map.h"
  36 #include "intel_tex.h"
  37 #include "intel_blit.h"
  38
  39 #include "brw_blorp.h"
  40 #include "brw_context.h"
  41
  42 #include "main/enums.h"
  43 #include "main/formats.h"
  44 #include "main/glformats.h"
  45 #include "main/texcompress_etc.h"
  46 #include "main/teximage.h"
  47 #include "main/streaming-load-memcpy.h"
  48
  49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  50
  51 static GLenum
  52 target_to_target(GLenum target)
  53 {
  54    switch (target) {
  55    case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
  56    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
  57    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
  58    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
  59    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
  60    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
  61       return GL_TEXTURE_CUBE_MAP_ARB;
  62    default:
  63       return target;
  64    }
  65 }
  66
  67
  68 /**
  69  * Determine which MSAA layout should be used by the MSAA surface being
  70  * created, based on the chip generation and the surface type.
  71  */
  72 static enum intel_msaa_layout
  73 compute_msaa_layout(struct brw_context *brw, gl_format format, GLenum target)
  74 {
  75    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  76    if (brw->gen < 7)
  77       return INTEL_MSAA_LAYOUT_IMS;
  78
  79    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  80    switch (_mesa_get_format_base_format(format)) {
  81    case GL_DEPTH_COMPONENT:
  82    case GL_STENCIL_INDEX:
  83    case GL_DEPTH_STENCIL:
  84       return INTEL_MSAA_LAYOUT_IMS;
  85    default:
  86       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  87        *
  88        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  89        *   are not written
  90        *
  91        * In practice this means that we have to disable MCS for all signed
  92        * integer MSAA buffers.  The alternative, to disable MCS only when one
  93        * of the render target channels is disabled, is impractical because it
  94        * would require converting between CMS and UMS MSAA layouts on the fly,
  95        * which is expensive.
  96        */
  97       if (_mesa_get_format_datatype(format) == GL_INT) {
  98          /* TODO: is this workaround needed for future chipsets? */
  99          assert(brw->gen == 7);
 100          return INTEL_MSAA_LAYOUT_UMS;
 101       } else {
 102          /* For now, if we're going to be texturing from this surface,
 103           * force UMS, so that the shader doesn't have to do different things
 104           * based on whether there's a multisample control surface needing sampled first.
 105           * We can't just blindly read the MCS surface in all cases because:
 106           *
 107           * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
 108           *
 109           *    If this field is disabled and the sampling engine <ld_mcs> message
 110           *    is issued on this surface, the MCS surface may be accessed. Software
 111           *    must ensure that the surface is defined to avoid GTT errors.
 112           */
 113          if (target == GL_TEXTURE_2D_MULTISAMPLE ||
 114              target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
 115             return INTEL_MSAA_LAYOUT_UMS;
 116          } else {
 117             return INTEL_MSAA_LAYOUT_CMS;
 118          }
 119       }
 120    }
 121 }
 122
 123
 124 /**
 125  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
 126  * scaled-down bitfield representation of the color buffer which is capable of
 127  * recording when blocks of the color buffer are equal to the clear value.
 128  * This function returns the block size that will be used by the MCS buffer
 129  * corresponding to a certain color miptree.
 130  *
 131  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 132  * beneath the "Fast Color Clear" bullet (p327):
 133  *
 134  *     The following table describes the RT alignment
 135  *
 136  *                       Pixels  Lines
 137  *         TiledY RT CL
 138  *             bpp
 139  *              32          8      4
 140  *              64          4      4
 141  *             128          2      4
 142  *         TiledX RT CL
 143  *             bpp
 144  *              32         16      2
 145  *              64          8      2
 146  *             128          4      2
 147  *
 148  * This alignment has the following uses:
 149  *
 150  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
 151  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
 152  *
 153  * - For figuring out alignment restrictions for a fast clear operation.  Fast
 154  *   clear operations must always clear aligned multiples of 16 blocks
 155  *   horizontally and 32 blocks vertically.
 156  *
 157  * - For scaling down the coordinates sent through the render pipeline during
 158  *   a fast clear.  X coordinates must be scaled down by 8 times the block
 159  *   width, and Y coordinates by 16 times the block height.
 160  *
 161  * - For scaling down the coordinates sent through the render pipeline during
 162  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
 163  *   by half the block width, and Y coordinates by half the block height.
 164  */
 165 void
 166 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
 167                                  struct intel_mipmap_tree *mt,
 168                                  unsigned *width_px, unsigned *height)
 169 {
 170    switch (mt->region->tiling) {
 171    default:
 172       assert(!"Non-MSRT MCS requires X or Y tiling");
 173       /* In release builds, fall through */
 174    case I915_TILING_Y:
 175       *width_px = 32 / mt->cpp;
 176       *height = 4;
 177       break;
 178    case I915_TILING_X:
 179       *width_px = 64 / mt->cpp;
 180       *height = 2;
 181    }
 182 }
 183
 184
 185 /**
 186  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 187  * can be used.
 188  *
 189  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 190  * beneath the "Fast Color Clear" bullet (p326):
 191  *
 192  *     - Support is limited to tiled render targets.
 193  *     - Support is for non-mip-mapped and non-array surface types only.
 194  *
 195  * And then later, on p327:
 196  *
 197  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 198  *       64bpp, and 128bpp.
 199  */
 200 bool
 201 intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
 202                                        struct intel_mipmap_tree *mt)
 203 {
 204    /* MCS support does not exist prior to Gen7 */
 205    if (brw->gen < 7 || brw->gen >= 8)
 206       return false;
 207
 208    /* MCS is only supported for color buffers */
 209    switch (_mesa_get_format_base_format(mt->format)) {
 210    case GL_DEPTH_COMPONENT:
 211    case GL_DEPTH_STENCIL:
 212    case GL_STENCIL_INDEX:
 213       return false;
 214    }
 215
 216    if (mt->region->tiling != I915_TILING_X &&
 217        mt->region->tiling != I915_TILING_Y)
 218       return false;
 219    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 220       return false;
 221    if (mt->first_level != 0 || mt->last_level != 0)
 222       return false;
 223    if (mt->physical_depth0 != 1)
 224       return false;
 225
 226    /* There's no point in using an MCS buffer if the surface isn't in a
 227     * renderable format.
 228     */
 229    if (!brw->format_supported_as_render_target[mt->format])
 230       return false;
 231
 232    return true;
 233 }
 234
 235
 236 /**
 237  * @param for_bo Indicates that the caller is
 238  *        intel_miptree_create_for_bo(). If true, then do not create
 239  *        \c stencil_mt.
 240  */
 241 struct intel_mipmap_tree *
 242 intel_miptree_create_layout(struct brw_context *brw,
 243                             GLenum target,
 244                             gl_format format,
 245                             GLuint first_level,
 246                             GLuint last_level,
 247                             GLuint width0,
 248                             GLuint height0,
 249                             GLuint depth0,
 250                             bool for_bo,
 251                             GLuint num_samples)
 252 {
 253    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 254    if (!mt)
 255       return NULL;
 256
 257    DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
 258        _mesa_lookup_enum_by_nr(target),
 259        _mesa_get_format_name(format),
 260        first_level, last_level, mt);
 261
 262    mt->target = target_to_target(target);
 263    mt->format = format;
 264    mt->first_level = first_level;
 265    mt->last_level = last_level;
 266    mt->logical_width0 = width0;
 267    mt->logical_height0 = height0;
 268    mt->logical_depth0 = depth0;
 269    mt->mcs_state = INTEL_MCS_STATE_NONE;
 270
 271    /* The cpp is bytes per (1, blockheight)-sized block for compressed
 272     * textures.  This is why you'll see divides by blockheight all over
 273     */
 274    unsigned bw, bh;
 275    _mesa_get_format_block_size(format, &bw, &bh);
 276    assert(_mesa_get_format_bytes(mt->format) % bw == 0);
 277    mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
 278
 279    mt->num_samples = num_samples;
 280    mt->compressed = _mesa_is_format_compressed(format);
 281    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 282    mt->refcount = 1;
 283
 284    if (num_samples > 1) {
 285       /* Adjust width/height/depth for MSAA */
 286       mt->msaa_layout = compute_msaa_layout(brw, format, mt->target);
 287       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 288          /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
 289           *
 290           *     "Any of the other messages (sample*, LOD, load4) used with a
 291           *      (4x) multisampled surface will in-effect sample a surface with
 292           *      double the height and width as that indicated in the surface
 293           *      state. Each pixel position on the original-sized surface is
 294           *      replaced with a 2x2 of samples with the following arrangement:
 295           *
 296           *         sample 0 sample 2
 297           *         sample 1 sample 3"
 298           *
 299           * Thus, when sampling from a multisampled texture, it behaves as
 300           * though the layout in memory for (x,y,sample) is:
 301           *
 302           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 303           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 304           *
 305           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 306           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 307           *
 308           * However, the actual layout of multisampled data in memory is:
 309           *
 310           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 311           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 312           *
 313           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 314           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 315           *
 316           * This pattern repeats for each 2x2 pixel block.
 317           *
 318           * As a result, when calculating the size of our 4-sample buffer for
 319           * an odd width or height, we have to align before scaling up because
 320           * sample 3 is in that bottom right 2x2 block.
 321           */
 322          switch (num_samples) {
 323          case 4:
 324             width0 = ALIGN(width0, 2) * 2;
 325             height0 = ALIGN(height0, 2) * 2;
 326             break;
 327          case 8:
 328             width0 = ALIGN(width0, 2) * 4;
 329             height0 = ALIGN(height0, 2) * 2;
 330             break;
 331          default:
 332             /* num_samples should already have been quantized to 0, 1, 4, or
 333              * 8.
 334              */
 335             assert(false);
 336          }
 337       } else {
 338          /* Non-interleaved */
 339          depth0 *= num_samples;
 340       }
 341    }
 342
 343    /* array_spacing_lod0 is only used for non-IMS MSAA surfaces.  TODO: can we
 344     * use it elsewhere?
 345     */
 346    switch (mt->msaa_layout) {
 347    case INTEL_MSAA_LAYOUT_NONE:
 348    case INTEL_MSAA_LAYOUT_IMS:
 349       mt->array_spacing_lod0 = false;
 350       break;
 351    case INTEL_MSAA_LAYOUT_UMS:
 352    case INTEL_MSAA_LAYOUT_CMS:
 353       mt->array_spacing_lod0 = true;
 354       break;
 355    }
 356
 357    if (target == GL_TEXTURE_CUBE_MAP) {
 358       assert(depth0 == 1);
 359       depth0 = 6;
 360    }
 361
 362    mt->physical_width0 = width0;
 363    mt->physical_height0 = height0;
 364    mt->physical_depth0 = depth0;
 365
 366    if (!for_bo &&
 367        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 368        (brw->must_use_separate_stencil ||
 369         (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) {
 370       mt->stencil_mt = intel_miptree_create(brw,
 371                                             mt->target,
 372                                             MESA_FORMAT_S8,
 373                                             mt->first_level,
 374                                             mt->last_level,
 375                                             mt->logical_width0,
 376                                             mt->logical_height0,
 377                                             mt->logical_depth0,
 378                                             true,
 379                                             num_samples,
 380                                             INTEL_MIPTREE_TILING_ANY);
 381       if (!mt->stencil_mt) {
 382          intel_miptree_release(&mt);
 383          return NULL;
 384       }
 385
 386       /* Fix up the Z miptree format for how we're splitting out separate
 387        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 388        */
 389       if (mt->format == MESA_FORMAT_S8_Z24) {
 390          mt->format = MESA_FORMAT_X8_Z24;
 391       } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) {
 392          mt->format = MESA_FORMAT_Z32_FLOAT;
 393          mt->cpp = 4;
 394       } else {
 395          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 396                        _mesa_get_format_name(mt->format));
 397       }
 398    }
 399
 400    brw_miptree_layout(brw, mt);
 401
 402    return mt;
 403 }
 404
 405 /**
 406  * \brief Helper function for intel_miptree_create().
 407  */
 408 static uint32_t
 409 intel_miptree_choose_tiling(struct brw_context *brw,
 410                             gl_format format,
 411                             uint32_t width0,
 412                             uint32_t num_samples,
 413                             enum intel_miptree_tiling_mode requested,
 414                             struct intel_mipmap_tree *mt)
 415 {
 416    if (format == MESA_FORMAT_S8) {
 417       /* The stencil buffer is W tiled. However, we request from the kernel a
 418        * non-tiled buffer because the GTT is incapable of W fencing.
 419        */
 420       return I915_TILING_NONE;
 421    }
 422
 423    /* Some usages may want only one type of tiling, like depth miptrees (Y
 424     * tiled), or temporary BOs for uploading data once (linear).
 425     */
 426    switch (requested) {
 427    case INTEL_MIPTREE_TILING_ANY:
 428       break;
 429    case INTEL_MIPTREE_TILING_Y:
 430       return I915_TILING_Y;
 431    case INTEL_MIPTREE_TILING_NONE:
 432       return I915_TILING_NONE;
 433    }
 434
 435    if (num_samples > 1) {
 436       /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
 437        * Surface"):
 438        *
 439        *   [DevSNB+]: For multi-sample render targets, this field must be
 440        *   1. MSRTs can only be tiled.
 441        *
 442        * Our usual reason for preferring X tiling (fast blits using the
 443        * blitting engine) doesn't apply to MSAA, since we'll generally be
 444        * downsampling or upsampling when blitting between the MSAA buffer
 445        * and another buffer, and the blitting engine doesn't support that.
 446        * So use Y tiling, since it makes better use of the cache.
 447        */
 448       return I915_TILING_Y;
 449    }
 450
 451    GLenum base_format = _mesa_get_format_base_format(format);
 452    if (base_format == GL_DEPTH_COMPONENT ||
 453        base_format == GL_DEPTH_STENCIL_EXT)
 454       return I915_TILING_Y;
 455
 456    int minimum_pitch = mt->total_width * mt->cpp;
 457
 458    /* If the width is much smaller than a tile, don't bother tiling. */
 459    if (minimum_pitch < 64)
 460       return I915_TILING_NONE;
 461
 462    if (ALIGN(minimum_pitch, 512) >= 32768) {
 463       perf_debug("%dx%d miptree too large to blit, falling back to untiled",
 464                  mt->total_width, mt->total_height);
 465       return I915_TILING_NONE;
 466    }
 467
 468    /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
 469    if (brw->gen < 6)
 470       return I915_TILING_X;
 471
 472    /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
 473     * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
 474     *  or Linear."
 475     * 128 bits per pixel translates to 16 bytes per pixel.  This is necessary
 476     * all the way back to 965, but is explicitly permitted on Gen7.
 477     */
 478    if (brw->gen != 7 && mt->cpp >= 16)
 479       return I915_TILING_X;
 480
 481    /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
 482     * messages), on p64, under the heading "Surface Vertical Alignment":
 483     *
 484     *     This field must be set to VALIGN_4 for all tiled Y Render Target
 485     *     surfaces.
 486     *
 487     * So if the surface is renderable and uses a vertical alignment of 2,
 488     * force it to be X tiled.  This is somewhat conservative (it's possible
 489     * that the client won't ever render to this surface), but it's difficult
 490     * to know that ahead of time.  And besides, since we use a vertical
 491     * alignment of 4 as often as we can, this shouldn't happen very often.
 492     */
 493    if (brw->gen == 7 && mt->align_h == 2 &&
 494        brw->format_supported_as_render_target[format]) {
 495       return I915_TILING_X;
 496    }
 497
 498    return I915_TILING_Y | I915_TILING_X;
 499 }
 500
 501 struct intel_mipmap_tree *
 502 intel_miptree_create(struct brw_context *brw,
 503                      GLenum target,
 504                      gl_format format,
 505                      GLuint first_level,
 506                      GLuint last_level,
 507                      GLuint width0,
 508                      GLuint height0,
 509                      GLuint depth0,
 510                      bool expect_accelerated_upload,
 511                      GLuint num_samples,
 512                      enum intel_miptree_tiling_mode requested_tiling)
 513 {
 514    struct intel_mipmap_tree *mt;
 515    gl_format tex_format = format;
 516    gl_format etc_format = MESA_FORMAT_NONE;
 517    GLuint total_width, total_height;
 518
 519    if (!brw->is_baytrail) {
 520       switch (format) {
 521       case MESA_FORMAT_ETC1_RGB8:
 522          format = MESA_FORMAT_RGBX8888_REV;
 523          break;
 524       case MESA_FORMAT_ETC2_RGB8:
 525          format = MESA_FORMAT_RGBX8888_REV;
 526          break;
 527       case MESA_FORMAT_ETC2_SRGB8:
 528       case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 529       case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 530          format = MESA_FORMAT_SARGB8;
 531          break;
 532       case MESA_FORMAT_ETC2_RGBA8_EAC:
 533       case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 534          format = MESA_FORMAT_RGBA8888_REV;
 535          break;
 536       case MESA_FORMAT_ETC2_R11_EAC:
 537          format = MESA_FORMAT_R16;
 538          break;
 539       case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 540          format = MESA_FORMAT_SIGNED_R16;
 541          break;
 542       case MESA_FORMAT_ETC2_RG11_EAC:
 543          format = MESA_FORMAT_GR1616;
 544          break;
 545       case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 546          format = MESA_FORMAT_SIGNED_GR1616;
 547          break;
 548       default:
 549          /* Non ETC1 / ETC2 format */
 550          break;
 551       }
 552    }
 553
 554    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 555
 556    mt = intel_miptree_create_layout(brw, target, format,
 557                                       first_level, last_level, width0,
 558                                       height0, depth0,
 559                                       false, num_samples);
 560    /*
 561     * pitch == 0 || height == 0  indicates the null texture
 562     */
 563    if (!mt || !mt->total_width || !mt->total_height) {
 564       intel_miptree_release(&mt);
 565       return NULL;
 566    }
 567
 568    total_width = mt->total_width;
 569    total_height = mt->total_height;
 570
 571    if (format == MESA_FORMAT_S8) {
 572       /* Align to size of W tile, 64x64. */
 573       total_width = ALIGN(total_width, 64);
 574       total_height = ALIGN(total_height, 64);
 575    }
 576
 577    uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
 578                                                  num_samples, requested_tiling,
 579                                                  mt);
 580    bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
 581
 582    mt->etc_format = etc_format;
 583    mt->region = intel_region_alloc(brw->intelScreen,
 584                                    y_or_x ? I915_TILING_Y : tiling,
 585                                    mt->cpp,
 586                                    total_width,
 587                                    total_height,
 588                                    expect_accelerated_upload);
 589
 590    /* If the region is too large to fit in the aperture, we need to use the
 591     * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
 592     * so we need to fall back to X.
 593     */
 594    if (y_or_x && mt->region->bo->size >= brw->max_gtt_map_object_size) {
 595       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 596                  mt->total_width, mt->total_height);
 597       intel_region_release(&mt->region);
 598
 599       mt->region = intel_region_alloc(brw->intelScreen,
 600                                       I915_TILING_X,
 601                                       mt->cpp,
 602                                       total_width,
 603                                       total_height,
 604                                       expect_accelerated_upload);
 605    }
 606
 607    mt->offset = 0;
 608
 609    if (!mt->region) {
 610        intel_miptree_release(&mt);
 611        return NULL;
 612    }
 613
 614
 615    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 616       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
 617          intel_miptree_release(&mt);
 618          return NULL;
 619       }
 620    }
 621
 622    /* If this miptree is capable of supporting fast color clears, set
 623     * mcs_state appropriately to ensure that fast clears will occur.
 624     * Allocation of the MCS miptree will be deferred until the first fast
 625     * clear actually occurs.
 626     */
 627    if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
 628       mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
 629
 630    return mt;
 631 }
 632
 633 struct intel_mipmap_tree *
 634 intel_miptree_create_for_bo(struct brw_context *brw,
 635                             drm_intel_bo *bo,
 636                             gl_format format,
 637                             uint32_t offset,
 638                             uint32_t width,
 639                             uint32_t height,
 640                             int pitch,
 641                             uint32_t tiling)
 642 {
 643    struct intel_mipmap_tree *mt;
 644
 645    struct intel_region *region = calloc(1, sizeof(*region));
 646    if (!region)
 647       return NULL;
 648
 649    /* Nothing will be able to use this miptree with the BO if the offset isn't
 650     * aligned.
 651     */
 652    if (tiling != I915_TILING_NONE)
 653       assert(offset % 4096 == 0);
 654
 655    /* miptrees can't handle negative pitch.  If you need flipping of images,
 656     * that's outside of the scope of the mt.
 657     */
 658    assert(pitch >= 0);
 659
 660    mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
 661                                     0, 0,
 662                                     width, height, 1,
 663                                     true, 0 /* num_samples */);
 664    if (!mt) {
 665       free(region);
 666       return mt;
 667    }
 668
 669    region->cpp = mt->cpp;
 670    region->width = width;
 671    region->height = height;
 672    region->pitch = pitch;
 673    region->refcount = 1;
 674    drm_intel_bo_reference(bo);
 675    region->bo = bo;
 676    region->tiling = tiling;
 677
 678    mt->region = region;
 679    mt->offset = offset;
 680
 681    return mt;
 682 }
 683
 684
 685 /**
 686  * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
 687  *
 688  * For a multisample DRI2 buffer, this wraps the given region with
 689  * a singlesample miptree, then creates a multisample miptree into which the
 690  * singlesample miptree is embedded as a child.
 691  */
 692 struct intel_mipmap_tree*
 693 intel_miptree_create_for_dri2_buffer(struct brw_context *brw,
 694                                      unsigned dri_attachment,
 695                                      gl_format format,
 696                                      uint32_t num_samples,
 697                                      struct intel_region *region)
 698 {
 699    struct intel_mipmap_tree *singlesample_mt = NULL;
 700    struct intel_mipmap_tree *multisample_mt = NULL;
 701
 702    /* Only the front and back buffers, which are color buffers, are shared
 703     * through DRI2.
 704     */
 705    assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
 706           dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 707           dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
 708    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 709           _mesa_get_format_base_format(format) == GL_RGBA);
 710
 711    singlesample_mt = intel_miptree_create_for_bo(brw,
 712                                                  region->bo,
 713                                                  format,
 714                                                  0,
 715                                                  region->width,
 716                                                  region->height,
 717                                                  region->pitch,
 718                                                  region->tiling);
 719    if (!singlesample_mt)
 720       return NULL;
 721    singlesample_mt->region->name = region->name;
 722
 723    /* If this miptree is capable of supporting fast color clears, set
 724     * mcs_state appropriately to ensure that fast clears will occur.
 725     * Allocation of the MCS miptree will be deferred until the first fast
 726     * clear actually occurs.
 727     */
 728    if (intel_is_non_msrt_mcs_buffer_supported(brw, singlesample_mt))
 729       singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
 730
 731    if (num_samples == 0)
 732       return singlesample_mt;
 733
 734    multisample_mt = intel_miptree_create_for_renderbuffer(brw,
 735                                                           format,
 736                                                           region->width,
 737                                                           region->height,
 738                                                           num_samples);
 739    if (!multisample_mt) {
 740       intel_miptree_release(&singlesample_mt);
 741       return NULL;
 742    }
 743
 744    multisample_mt->singlesample_mt = singlesample_mt;
 745    multisample_mt->need_downsample = false;
 746
 747    if (brw->is_front_buffer_rendering &&
 748        (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 749         dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
 750       intel_miptree_upsample(brw, multisample_mt);
 751    }
 752
 753    return multisample_mt;
 754 }
 755
 756 /**
 757  * For a singlesample image buffer, this simply wraps the given region with a miptree.
 758  *
 759  * For a multisample image buffer, this wraps the given region with
 760  * a singlesample miptree, then creates a multisample miptree into which the
 761  * singlesample miptree is embedded as a child.
 762  */
 763 struct intel_mipmap_tree*
 764 intel_miptree_create_for_image_buffer(struct brw_context *intel,
 765                                       enum __DRIimageBufferMask buffer_type,
 766                                       gl_format format,
 767                                       uint32_t num_samples,
 768                                       struct intel_region *region)
 769 {
 770    struct intel_mipmap_tree *singlesample_mt = NULL;
 771    struct intel_mipmap_tree *multisample_mt = NULL;
 772
 773    /* Only the front and back buffers, which are color buffers, are allocated
 774     * through the image loader.
 775     */
 776    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 777           _mesa_get_format_base_format(format) == GL_RGBA);
 778
 779    singlesample_mt = intel_miptree_create_for_bo(intel,
 780                                                  region->bo,
 781                                                  format,
 782                                                  0,
 783                                                  region->width,
 784                                                  region->height,
 785                                                  region->pitch,
 786                                                  region->tiling);
 787    if (!singlesample_mt)
 788       return NULL;
 789
 790    intel_region_reference(&singlesample_mt->region, region);
 791
 792    if (num_samples == 0)
 793       return singlesample_mt;
 794
 795    multisample_mt = intel_miptree_create_for_renderbuffer(intel,
 796                                                           format,
 797                                                           region->width,
 798                                                           region->height,
 799                                                           num_samples);
 800    if (!multisample_mt) {
 801       intel_miptree_release(&singlesample_mt);
 802       return NULL;
 803    }
 804
 805    multisample_mt->singlesample_mt = singlesample_mt;
 806    multisample_mt->need_downsample = false;
 807
 808    intel_region_reference(&multisample_mt->region, region);
 809
 810    if (intel->is_front_buffer_rendering && buffer_type == __DRI_IMAGE_BUFFER_FRONT) {
 811       intel_miptree_upsample(intel, multisample_mt);
 812    }
 813
 814    return multisample_mt;
 815 }
 816
 817 struct intel_mipmap_tree*
 818 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
 819                                       gl_format format,
 820                                       uint32_t width,
 821                                       uint32_t height,
 822                                       uint32_t num_samples)
 823 {
 824    struct intel_mipmap_tree *mt;
 825    uint32_t depth = 1;
 826    bool ok;
 827
 828    mt = intel_miptree_create(brw, GL_TEXTURE_2D, format, 0, 0,
 829                              width, height, depth, true, num_samples,
 830                              INTEL_MIPTREE_TILING_ANY);
 831    if (!mt)
 832       goto fail;
 833
 834    if (brw_is_hiz_depth_format(brw, format)) {
 835       ok = intel_miptree_alloc_hiz(brw, mt);
 836       if (!ok)
 837          goto fail;
 838    }
 839
 840    return mt;
 841
 842 fail:
 843    intel_miptree_release(&mt);
 844    return NULL;
 845 }
 846
 847 void
 848 intel_miptree_reference(struct intel_mipmap_tree **dst,
 849                         struct intel_mipmap_tree *src)
 850 {
 851    if (*dst == src)
 852       return;
 853
 854    intel_miptree_release(dst);
 855
 856    if (src) {
 857       src->refcount++;
 858       DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
 859    }
 860
 861    *dst = src;
 862 }
 863
 864
 865 void
 866 intel_miptree_release(struct intel_mipmap_tree **mt)
 867 {
 868    if (!*mt)
 869       return;
 870
 871    DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
 872    if (--(*mt)->refcount <= 0) {
 873       GLuint i;
 874
 875       DBG("%s deleting %p\n", __FUNCTION__, *mt);
 876
 877       intel_region_release(&((*mt)->region));
 878       intel_miptree_release(&(*mt)->stencil_mt);
 879       intel_miptree_release(&(*mt)->hiz_mt);
 880       intel_miptree_release(&(*mt)->mcs_mt);
 881       intel_miptree_release(&(*mt)->singlesample_mt);
 882       intel_resolve_map_clear(&(*mt)->hiz_map);
 883
 884       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
 885          free((*mt)->level[i].slice);
 886       }
 887
 888       free(*mt);
 889    }
 890    *mt = NULL;
 891 }
 892
 893 void
 894 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
 895                                        int *width, int *height, int *depth)
 896 {
 897    switch (image->TexObject->Target) {
 898    case GL_TEXTURE_1D_ARRAY:
 899       *width = image->Width;
 900       *height = 1;
 901       *depth = image->Height;
 902       break;
 903    default:
 904       *width = image->Width;
 905       *height = image->Height;
 906       *depth = image->Depth;
 907       break;
 908    }
 909 }
 910
 911 /**
 912  * Can the image be pulled into a unified mipmap tree?  This mirrors
 913  * the completeness test in a lot of ways.
 914  *
 915  * Not sure whether I want to pass gl_texture_image here.
 916  */
 917 bool
 918 intel_miptree_match_image(struct intel_mipmap_tree *mt,
 919                           struct gl_texture_image *image)
 920 {
 921    struct intel_texture_image *intelImage = intel_texture_image(image);
 922    GLuint level = intelImage->base.Base.Level;
 923    int width, height, depth;
 924
 925    /* glTexImage* choose the texture object based on the target passed in, and
 926     * objects can't change targets over their lifetimes, so this should be
 927     * true.
 928     */
 929    assert(target_to_target(image->TexObject->Target) == mt->target);
 930
 931    gl_format mt_format = mt->format;
 932    if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt)
 933       mt_format = MESA_FORMAT_S8_Z24;
 934    if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt)
 935       mt_format = MESA_FORMAT_Z32_FLOAT_X24S8;
 936    if (mt->etc_format != MESA_FORMAT_NONE)
 937       mt_format = mt->etc_format;
 938
 939    if (image->TexFormat != mt_format)
 940       return false;
 941
 942    intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
 943
 944    if (mt->target == GL_TEXTURE_CUBE_MAP)
 945       depth = 6;
 946
 947    /* Test image dimensions against the base level image adjusted for
 948     * minification.  This will also catch images not present in the
 949     * tree, changed targets, etc.
 950     */
 951    if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
 952          mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
 953       /* nonzero level here is always bogus */
 954       assert(level == 0);
 955
 956       if (width != mt->logical_width0 ||
 957             height != mt->logical_height0 ||
 958             depth != mt->logical_depth0) {
 959          return false;
 960       }
 961    }
 962    else {
 963       /* all normal textures, renderbuffers, etc */
 964       if (width != mt->level[level].width ||
 965           height != mt->level[level].height ||
 966           depth != mt->level[level].depth) {
 967          return false;
 968       }
 969    }
 970
 971    if (image->NumSamples != mt->num_samples)
 972       return false;
 973
 974    return true;
 975 }
 976
 977
 978 void
 979 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 980                              GLuint level,
 981                              GLuint x, GLuint y,
 982                              GLuint w, GLuint h, GLuint d)
 983 {
 984    mt->level[level].width = w;
 985    mt->level[level].height = h;
 986    mt->level[level].depth = d;
 987    mt->level[level].level_x = x;
 988    mt->level[level].level_y = y;
 989
 990    DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
 991        level, w, h, d, x, y);
 992
 993    assert(mt->level[level].slice == NULL);
 994
 995    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
 996    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
 997    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
 998 }
 999
1000
1001 void
1002 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
1003                                GLuint level, GLuint img,
1004                                GLuint x, GLuint y)
1005 {
1006    if (img == 0 && level == 0)
1007       assert(x == 0 && y == 0);
1008
1009    assert(img < mt->level[level].depth);
1010
1011    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
1012    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
1013
1014    DBG("%s level %d img %d pos %d,%d\n",
1015        __FUNCTION__, level, img,
1016        mt->level[level].slice[img].x_offset,
1017        mt->level[level].slice[img].y_offset);
1018 }
1019
1020 void
1021 intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
1022                                GLuint level, GLuint slice,
1023                                GLuint *x, GLuint *y)
1024 {
1025    assert(slice < mt->level[level].depth);
1026
1027    *x = mt->level[level].slice[slice].x_offset;
1028    *y = mt->level[level].slice[slice].y_offset;
1029 }
1030
1031 /**
1032  * Rendering with tiled buffers requires that the base address of the buffer
1033  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1034  * textures, we may want the surface to point at a texture image level that
1035  * isn't at a page boundary.
1036  *
1037  * This function returns an appropriately-aligned base offset
1038  * according to the tiling restrictions, plus any required x/y offset
1039  * from there.
1040  */
1041 uint32_t
1042 intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
1043                                GLuint level, GLuint slice,
1044                                uint32_t *tile_x,
1045                                uint32_t *tile_y)
1046 {
1047    struct intel_region *region = mt->region;
1048    uint32_t x, y;
1049    uint32_t mask_x, mask_y;
1050
1051    intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
1052    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1053
1054    *tile_x = x & mask_x;
1055    *tile_y = y & mask_y;
1056
1057    return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
1058                                           false);
1059 }
1060
1061 static void
1062 intel_miptree_copy_slice_sw(struct brw_context *brw,
1063                             struct intel_mipmap_tree *dst_mt,
1064                             struct intel_mipmap_tree *src_mt,
1065                             int level,
1066                             int slice,
1067                             int width,
1068                             int height)
1069 {
1070    void *src, *dst;
1071    int src_stride, dst_stride;
1072    int cpp = dst_mt->cpp;
1073
1074    intel_miptree_map(brw, src_mt,
1075                      level, slice,
1076                      0, 0,
1077                      width, height,
1078                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1079                      &src, &src_stride);
1080
1081    intel_miptree_map(brw, dst_mt,
1082                      level, slice,
1083                      0, 0,
1084                      width, height,
1085                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1086                      BRW_MAP_DIRECT_BIT,
1087                      &dst, &dst_stride);
1088
1089    DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
1090        _mesa_get_format_name(src_mt->format),
1091        src_mt, src, src_stride,
1092        _mesa_get_format_name(dst_mt->format),
1093        dst_mt, dst, dst_stride,
1094        width, height);
1095
1096    int row_size = cpp * width;
1097    if (src_stride == row_size &&
1098        dst_stride == row_size) {
1099       memcpy(dst, src, row_size * height);
1100    } else {
1101       for (int i = 0; i < height; i++) {
1102          memcpy(dst, src, row_size);
1103          dst += dst_stride;
1104          src += src_stride;
1105       }
1106    }
1107
1108    intel_miptree_unmap(brw, dst_mt, level, slice);
1109    intel_miptree_unmap(brw, src_mt, level, slice);
1110
1111    /* Don't forget to copy the stencil data over, too.  We could have skipped
1112     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1113     * shuffling the two data sources in/out of temporary storage instead of
1114     * the direct mapping we get this way.
1115     */
1116    if (dst_mt->stencil_mt) {
1117       assert(src_mt->stencil_mt);
1118       intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
1119                                   level, slice, width, height);
1120    }
1121 }
1122
1123 static void
1124 intel_miptree_copy_slice(struct brw_context *brw,
1125                          struct intel_mipmap_tree *dst_mt,
1126                          struct intel_mipmap_tree *src_mt,
1127                          int level,
1128                          int face,
1129                          int depth)
1130
1131 {
1132    gl_format format = src_mt->format;
1133    uint32_t width = src_mt->level[level].width;
1134    uint32_t height = src_mt->level[level].height;
1135    int slice;
1136
1137    if (face > 0)
1138       slice = face;
1139    else
1140       slice = depth;
1141
1142    assert(depth < src_mt->level[level].depth);
1143    assert(src_mt->format == dst_mt->format);
1144
1145    if (dst_mt->compressed) {
1146       height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1147       width = ALIGN(width, dst_mt->align_w);
1148    }
1149
1150    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1151     * below won't apply since we can't do the depth's Y tiling or the
1152     * stencil's W tiling in the blitter.
1153     */
1154    if (src_mt->stencil_mt) {
1155       intel_miptree_copy_slice_sw(brw,
1156                                   dst_mt, src_mt,
1157                                   level, slice,
1158                                   width, height);
1159       return;
1160    }
1161
1162    uint32_t dst_x, dst_y, src_x, src_y;
1163    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1164    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1165
1166    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1167        _mesa_get_format_name(src_mt->format),
1168        src_mt, src_x, src_y, src_mt->region->pitch,
1169        _mesa_get_format_name(dst_mt->format),
1170        dst_mt, dst_x, dst_y, dst_mt->region->pitch,
1171        width, height);
1172
1173    if (!intel_miptree_blit(brw,
1174                            src_mt, level, slice, 0, 0, false,
1175                            dst_mt, level, slice, 0, 0, false,
1176                            width, height, GL_COPY)) {
1177       perf_debug("miptree validate blit for %s failed\n",
1178                  _mesa_get_format_name(format));
1179
1180       intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
1181                                   width, height);
1182    }
1183 }
1184
1185 /**
1186  * Copies the image's current data to the given miptree, and associates that
1187  * miptree with the image.
1188  *
1189  * If \c invalidate is true, then the actual image data does not need to be
1190  * copied, but the image still needs to be associated to the new miptree (this
1191  * is set to true if we're about to clear the image).
1192  */
1193 void
1194 intel_miptree_copy_teximage(struct brw_context *brw,
1195                             struct intel_texture_image *intelImage,
1196                             struct intel_mipmap_tree *dst_mt,
1197                             bool invalidate)
1198 {
1199    struct intel_mipmap_tree *src_mt = intelImage->mt;
1200    struct intel_texture_object *intel_obj =
1201       intel_texture_object(intelImage->base.Base.TexObject);
1202    int level = intelImage->base.Base.Level;
1203    int face = intelImage->base.Base.Face;
1204    GLuint depth = intelImage->base.Base.Depth;
1205
1206    if (!invalidate) {
1207       for (int slice = 0; slice < depth; slice++) {
1208          intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
1209       }
1210    }
1211
1212    intel_miptree_reference(&intelImage->mt, dst_mt);
1213    intel_obj->needs_validate = true;
1214 }
1215
1216 bool
1217 intel_miptree_alloc_mcs(struct brw_context *brw,
1218                         struct intel_mipmap_tree *mt,
1219                         GLuint num_samples)
1220 {
1221    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1222    assert(mt->mcs_mt == NULL);
1223
1224    /* Choose the correct format for the MCS buffer.  All that really matters
1225     * is that we allocate the right buffer size, since we'll always be
1226     * accessing this miptree using MCS-specific hardware mechanisms, which
1227     * infer the correct format based on num_samples.
1228     */
1229    gl_format format;
1230    switch (num_samples) {
1231    case 4:
1232       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1233        * each sample).
1234        */
1235       format = MESA_FORMAT_R8;
1236       break;
1237    case 8:
1238       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1239        * for each sample, plus 8 padding bits).
1240        */
1241       format = MESA_FORMAT_R_UINT32;
1242       break;
1243    default:
1244       assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
1245       return false;
1246    };
1247
1248    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1249     *
1250     *     "The MCS surface must be stored as Tile Y."
1251     */
1252    mt->mcs_state = INTEL_MCS_STATE_MSAA;
1253    mt->mcs_mt = intel_miptree_create(brw,
1254                                      mt->target,
1255                                      format,
1256                                      mt->first_level,
1257                                      mt->last_level,
1258                                      mt->logical_width0,
1259                                      mt->logical_height0,
1260                                      mt->logical_depth0,
1261                                      true,
1262                                      0 /* num_samples */,
1263                                      INTEL_MIPTREE_TILING_Y);
1264
1265    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1266     *
1267     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1268     *     is cleared prior to any rendering.
1269     *
1270     * Since we don't use the MCS buffer for any purpose other than rendering,
1271     * it makes sense to just clear it immediately upon allocation.
1272     *
1273     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1274     */
1275    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
1276    memset(data, 0xff, mt->mcs_mt->region->bo->size);
1277    intel_miptree_unmap_raw(brw, mt->mcs_mt);
1278
1279    return mt->mcs_mt;
1280 }
1281
1282
1283 bool
1284 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1285                                  struct intel_mipmap_tree *mt)
1286 {
1287    assert(mt->mcs_mt == NULL);
1288
1289    /* The format of the MCS buffer is opaque to the driver; all that matters
1290     * is that we get its size and pitch right.  We'll pretend that the format
1291     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1292     * R32 buffer is 32 pixels across, we'll need to scale the width down by
1293     * the block width and then a further factor of 4.  Since an MCS tile
1294     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1295     * we'll need to scale the height down by the block height and then a
1296     * further factor of 8.
1297     */
1298    const gl_format format = MESA_FORMAT_R_UINT32;
1299    unsigned block_width_px;
1300    unsigned block_height;
1301    intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
1302    unsigned width_divisor = block_width_px * 4;
1303    unsigned height_divisor = block_height * 8;
1304    unsigned mcs_width =
1305       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1306    unsigned mcs_height =
1307       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1308    assert(mt->logical_depth0 == 1);
1309    mt->mcs_mt = intel_miptree_create(brw,
1310                                      mt->target,
1311                                      format,
1312                                      mt->first_level,
1313                                      mt->last_level,
1314                                      mcs_width,
1315                                      mcs_height,
1316                                      mt->logical_depth0,
1317                                      true,
1318                                      0 /* num_samples */,
1319                                      INTEL_MIPTREE_TILING_Y);
1320
1321    return mt->mcs_mt;
1322 }
1323
1324
1325 /**
1326  * Helper for intel_miptree_alloc_hiz() that sets
1327  * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
1328  * \c has_hiz was set.
1329  */
1330 static bool
1331 intel_miptree_slice_enable_hiz(struct brw_context *brw,
1332                                struct intel_mipmap_tree *mt,
1333                                uint32_t level,
1334                                uint32_t layer)
1335 {
1336    assert(mt->hiz_mt);
1337
1338    if (brw->is_haswell) {
1339       const struct intel_mipmap_level *l = &mt->level[level];
1340
1341       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1342        * and the height is 4 aligned. This allows our HiZ support
1343        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1344        * we can grow the width & height to allow the HiZ op to
1345        * force the proper size alignments.
1346        */
1347       if (level > 0 && ((l->width & 7) || (l->height & 3))) {
1348          return false;
1349       }
1350    }
1351
1352    mt->level[level].slice[layer].has_hiz = true;
1353    return true;
1354 }
1355
1356
1357
1358 bool
1359 intel_miptree_alloc_hiz(struct brw_context *brw,
1360                         struct intel_mipmap_tree *mt)
1361 {
1362    assert(mt->hiz_mt == NULL);
1363    mt->hiz_mt = intel_miptree_create(brw,
1364                                      mt->target,
1365                                      mt->format,
1366                                      mt->first_level,
1367                                      mt->last_level,
1368                                      mt->logical_width0,
1369                                      mt->logical_height0,
1370                                      mt->logical_depth0,
1371                                      true,
1372                                      mt->num_samples,
1373                                      INTEL_MIPTREE_TILING_ANY);
1374
1375    if (!mt->hiz_mt)
1376       return false;
1377
1378    /* Mark that all slices need a HiZ resolve. */
1379    struct intel_resolve_map *head = &mt->hiz_map;
1380    for (int level = mt->first_level; level <= mt->last_level; ++level) {
1381       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1382          if (!intel_miptree_slice_enable_hiz(brw, mt, level, layer))
1383             continue;
1384
1385          head->next = malloc(sizeof(*head->next));
1386          head->next->prev = head;
1387          head->next->next = NULL;
1388          head = head->next;
1389
1390          head->level = level;
1391          head->layer = layer;
1392          head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1393       }
1394    }
1395
1396    return true;
1397 }
1398
1399 /**
1400  * Does the miptree slice have hiz enabled?
1401  */
1402 bool
1403 intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
1404                             uint32_t level,
1405                             uint32_t layer)
1406 {
1407    intel_miptree_check_level_layer(mt, level, layer);
1408    return mt->level[level].slice[layer].has_hiz;
1409 }
1410
1411 void
1412 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1413                                           uint32_t level,
1414                                           uint32_t layer)
1415 {
1416    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1417       return;
1418
1419    intel_resolve_map_set(&mt->hiz_map,
1420                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1421 }
1422
1423
1424 void
1425 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1426                                             uint32_t level,
1427                                             uint32_t layer)
1428 {
1429    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1430       return;
1431
1432    intel_resolve_map_set(&mt->hiz_map,
1433                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1434 }
1435
1436 void
1437 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
1438                                                 uint32_t level)
1439 {
1440    uint32_t layer;
1441    uint32_t end_layer = mt->level[level].depth;
1442
1443    for (layer = 0; layer < end_layer; layer++) {
1444       intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
1445    }
1446 }
1447
1448 static bool
1449 intel_miptree_slice_resolve(struct brw_context *brw,
1450                             struct intel_mipmap_tree *mt,
1451                             uint32_t level,
1452                             uint32_t layer,
1453                             enum gen6_hiz_op need)
1454 {
1455    intel_miptree_check_level_layer(mt, level, layer);
1456
1457    struct intel_resolve_map *item =
1458          intel_resolve_map_get(&mt->hiz_map, level, layer);
1459
1460    if (!item || item->need != need)
1461       return false;
1462
1463    intel_hiz_exec(brw, mt, level, layer, need);
1464    intel_resolve_map_remove(item);
1465    return true;
1466 }
1467
1468 bool
1469 intel_miptree_slice_resolve_hiz(struct brw_context *brw,
1470                                 struct intel_mipmap_tree *mt,
1471                                 uint32_t level,
1472                                 uint32_t layer)
1473 {
1474    return intel_miptree_slice_resolve(brw, mt, level, layer,
1475                                       GEN6_HIZ_OP_HIZ_RESOLVE);
1476 }
1477
1478 bool
1479 intel_miptree_slice_resolve_depth(struct brw_context *brw,
1480                                   struct intel_mipmap_tree *mt,
1481                                   uint32_t level,
1482                                   uint32_t layer)
1483 {
1484    return intel_miptree_slice_resolve(brw, mt, level, layer,
1485                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
1486 }
1487
1488 static bool
1489 intel_miptree_all_slices_resolve(struct brw_context *brw,
1490                                  struct intel_mipmap_tree *mt,
1491                                  enum gen6_hiz_op need)
1492 {
1493    bool did_resolve = false;
1494    struct intel_resolve_map *i, *next;
1495
1496    for (i = mt->hiz_map.next; i; i = next) {
1497       next = i->next;
1498       if (i->need != need)
1499          continue;
1500
1501       intel_hiz_exec(brw, mt, i->level, i->layer, need);
1502       intel_resolve_map_remove(i);
1503       did_resolve = true;
1504    }
1505
1506    return did_resolve;
1507 }
1508
1509 bool
1510 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
1511                                      struct intel_mipmap_tree *mt)
1512 {
1513    return intel_miptree_all_slices_resolve(brw, mt,
1514                                            GEN6_HIZ_OP_HIZ_RESOLVE);
1515 }
1516
1517 bool
1518 intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
1519                                        struct intel_mipmap_tree *mt)
1520 {
1521    return intel_miptree_all_slices_resolve(brw, mt,
1522                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
1523 }
1524
1525
1526 void
1527 intel_miptree_resolve_color(struct brw_context *brw,
1528                             struct intel_mipmap_tree *mt)
1529 {
1530    switch (mt->mcs_state) {
1531    case INTEL_MCS_STATE_NONE:
1532    case INTEL_MCS_STATE_MSAA:
1533    case INTEL_MCS_STATE_RESOLVED:
1534       /* No resolve needed */
1535       break;
1536    case INTEL_MCS_STATE_UNRESOLVED:
1537    case INTEL_MCS_STATE_CLEAR:
1538       brw_blorp_resolve_color(brw, mt);
1539       break;
1540    }
1541 }
1542
1543
1544 /**
1545  * Make it possible to share the region backing the given miptree with another
1546  * process or another miptree.
1547  *
1548  * Fast color clears are unsafe with shared buffers, so we need to resolve and
1549  * then discard the MCS buffer, if present.  We also set the mcs_state to
1550  * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the
1551  * future.
1552  */
1553 void
1554 intel_miptree_make_shareable(struct brw_context *brw,
1555                              struct intel_mipmap_tree *mt)
1556 {
1557    /* MCS buffers are also used for multisample buffers, but we can't resolve
1558     * away a multisample MCS buffer because it's an integral part of how the
1559     * pixel data is stored.  Fortunately this code path should never be
1560     * reached for multisample buffers.
1561     */
1562    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1563
1564    if (mt->mcs_mt) {
1565       intel_miptree_resolve_color(brw, mt);
1566       intel_miptree_release(&mt->mcs_mt);
1567       mt->mcs_state = INTEL_MCS_STATE_NONE;
1568    }
1569 }
1570
1571
1572 /**
1573  * \brief Get pointer offset into stencil buffer.
1574  *
1575  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1576  * must decode the tile's layout in software.
1577  *
1578  * See
1579  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1580  *     Format.
1581  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1582  *
1583  * Even though the returned offset is always positive, the return type is
1584  * signed due to
1585  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1586  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
1587  */
1588 static intptr_t
1589 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1590 {
1591    uint32_t tile_size = 4096;
1592    uint32_t tile_width = 64;
1593    uint32_t tile_height = 64;
1594    uint32_t row_size = 64 * stride;
1595
1596    uint32_t tile_x = x / tile_width;
1597    uint32_t tile_y = y / tile_height;
1598
1599    /* The byte's address relative to the tile's base addres. */
1600    uint32_t byte_x = x % tile_width;
1601    uint32_t byte_y = y % tile_height;
1602
1603    uintptr_t u = tile_y * row_size
1604                + tile_x * tile_size
1605                + 512 * (byte_x / 8)
1606                +  64 * (byte_y / 8)
1607                +  32 * ((byte_y / 4) % 2)
1608                +  16 * ((byte_x / 4) % 2)
1609                +   8 * ((byte_y / 2) % 2)
1610                +   4 * ((byte_x / 2) % 2)
1611                +   2 * (byte_y % 2)
1612                +   1 * (byte_x % 2);
1613
1614    if (swizzled) {
1615       /* adjust for bit6 swizzling */
1616       if (((byte_x / 8) % 2) == 1) {
1617          if (((byte_y / 8) % 2) == 0) {
1618             u += 64;
1619          } else {
1620             u -= 64;
1621          }
1622       }
1623    }
1624
1625    return u;
1626 }
1627
1628 static void
1629 intel_miptree_updownsample(struct brw_context *brw,
1630                            struct intel_mipmap_tree *src,
1631                            struct intel_mipmap_tree *dst,
1632                            unsigned width,
1633                            unsigned height)
1634 {
1635    int src_x0 = 0;
1636    int src_y0 = 0;
1637    int dst_x0 = 0;
1638    int dst_y0 = 0;
1639
1640    brw_blorp_blit_miptrees(brw,
1641                            src, 0 /* level */, 0 /* layer */,
1642                            dst, 0 /* level */, 0 /* layer */,
1643                            src_x0, src_y0,
1644                            width, height,
1645                            dst_x0, dst_y0,
1646                            width, height,
1647                            GL_NEAREST, false, false /*mirror x, y*/);
1648
1649    if (src->stencil_mt) {
1650       brw_blorp_blit_miptrees(brw,
1651                               src->stencil_mt, 0 /* level */, 0 /* layer */,
1652                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
1653                               src_x0, src_y0,
1654                               width, height,
1655                               dst_x0, dst_y0,
1656                               width, height,
1657                               GL_NEAREST, false, false /*mirror x, y*/);
1658    }
1659 }
1660
1661 static void
1662 assert_is_flat(struct intel_mipmap_tree *mt)
1663 {
1664    assert(mt->target == GL_TEXTURE_2D);
1665    assert(mt->first_level == 0);
1666    assert(mt->last_level == 0);
1667 }
1668
1669 /**
1670  * \brief Downsample from mt to mt->singlesample_mt.
1671  *
1672  * If the miptree needs no downsample, then skip.
1673  */
1674 void
1675 intel_miptree_downsample(struct brw_context *brw,
1676                          struct intel_mipmap_tree *mt)
1677 {
1678    /* Only flat, renderbuffer-like miptrees are supported. */
1679    assert_is_flat(mt);
1680
1681    if (!mt->need_downsample)
1682       return;
1683    intel_miptree_updownsample(brw,
1684                               mt, mt->singlesample_mt,
1685                               mt->logical_width0,
1686                               mt->logical_height0);
1687    mt->need_downsample = false;
1688 }
1689
1690 /**
1691  * \brief Upsample from mt->singlesample_mt to mt.
1692  *
1693  * The upsample is done unconditionally.
1694  */
1695 void
1696 intel_miptree_upsample(struct brw_context *brw,
1697                        struct intel_mipmap_tree *mt)
1698 {
1699    /* Only flat, renderbuffer-like miptrees are supported. */
1700    assert_is_flat(mt);
1701    assert(!mt->need_downsample);
1702
1703    intel_miptree_updownsample(brw,
1704                               mt->singlesample_mt, mt,
1705                               mt->logical_width0,
1706                               mt->logical_height0);
1707 }
1708
1709 void *
1710 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
1711 {
1712    /* CPU accesses to color buffers don't understand fast color clears, so
1713     * resolve any pending fast color clears before we map.
1714     */
1715    intel_miptree_resolve_color(brw, mt);
1716
1717    drm_intel_bo *bo = mt->region->bo;
1718
1719    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
1720       if (drm_intel_bo_busy(bo)) {
1721          perf_debug("Mapping a busy miptree, causing a stall on the GPU.\n");
1722       }
1723    }
1724
1725    intel_batchbuffer_flush(brw);
1726
1727    if (mt->region->tiling != I915_TILING_NONE)
1728       drm_intel_gem_bo_map_gtt(bo);
1729    else
1730       drm_intel_bo_map(bo, true);
1731
1732    return bo->virtual;
1733 }
1734
1735 void
1736 intel_miptree_unmap_raw(struct brw_context *brw,
1737                         struct intel_mipmap_tree *mt)
1738 {
1739    drm_intel_bo_unmap(mt->region->bo);
1740 }
1741
1742 static void
1743 intel_miptree_map_gtt(struct brw_context *brw,
1744                       struct intel_mipmap_tree *mt,
1745                       struct intel_miptree_map *map,
1746                       unsigned int level, unsigned int slice)
1747 {
1748    unsigned int bw, bh;
1749    void *base;
1750    unsigned int image_x, image_y;
1751    int x = map->x;
1752    int y = map->y;
1753
1754    /* For compressed formats, the stride is the number of bytes per
1755     * row of blocks.  intel_miptree_get_image_offset() already does
1756     * the divide.
1757     */
1758    _mesa_get_format_block_size(mt->format, &bw, &bh);
1759    assert(y % bh == 0);
1760    y /= bh;
1761
1762    base = intel_miptree_map_raw(brw, mt) + mt->offset;
1763
1764    if (base == NULL)
1765       map->ptr = NULL;
1766    else {
1767       /* Note that in the case of cube maps, the caller must have passed the
1768        * slice number referencing the face.
1769       */
1770       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1771       x += image_x;
1772       y += image_y;
1773
1774       map->stride = mt->region->pitch;
1775       map->ptr = base + y * map->stride + x * mt->cpp;
1776    }
1777
1778    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1779        map->x, map->y, map->w, map->h,
1780        mt, _mesa_get_format_name(mt->format),
1781        x, y, map->ptr, map->stride);
1782 }
1783
1784 static void
1785 intel_miptree_unmap_gtt(struct brw_context *brw,
1786                         struct intel_mipmap_tree *mt,
1787                         struct intel_miptree_map *map,
1788                         unsigned int level,
1789                         unsigned int slice)
1790 {
1791    intel_miptree_unmap_raw(brw, mt);
1792 }
1793
1794 static void
1795 intel_miptree_map_blit(struct brw_context *brw,
1796                        struct intel_mipmap_tree *mt,
1797                        struct intel_miptree_map *map,
1798                        unsigned int level, unsigned int slice)
1799 {
1800    map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
1801                                   0, 0,
1802                                   map->w, map->h, 1,
1803                                   false, 0,
1804                                   INTEL_MIPTREE_TILING_NONE);
1805    if (!map->mt) {
1806       fprintf(stderr, "Failed to allocate blit temporary\n");
1807       goto fail;
1808    }
1809    map->stride = map->mt->region->pitch;
1810
1811    if (!intel_miptree_blit(brw,
1812                            mt, level, slice,
1813                            map->x, map->y, false,
1814                            map->mt, 0, 0,
1815                            0, 0, false,
1816                            map->w, map->h, GL_COPY)) {
1817       fprintf(stderr, "Failed to blit\n");
1818       goto fail;
1819    }
1820
1821    map->ptr = intel_miptree_map_raw(brw, map->mt);
1822
1823    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1824        map->x, map->y, map->w, map->h,
1825        mt, _mesa_get_format_name(mt->format),
1826        level, slice, map->ptr, map->stride);
1827
1828    return;
1829
1830 fail:
1831    intel_miptree_release(&map->mt);
1832    map->ptr = NULL;
1833    map->stride = 0;
1834 }
1835
1836 static void
1837 intel_miptree_unmap_blit(struct brw_context *brw,
1838                          struct intel_mipmap_tree *mt,
1839                          struct intel_miptree_map *map,
1840                          unsigned int level,
1841                          unsigned int slice)
1842 {
1843    struct gl_context *ctx = &brw->ctx;
1844
1845    intel_miptree_unmap_raw(brw, map->mt);
1846
1847    if (map->mode & GL_MAP_WRITE_BIT) {
1848       bool ok = intel_miptree_blit(brw,
1849                                    map->mt, 0, 0,
1850                                    0, 0, false,
1851                                    mt, level, slice,
1852                                    map->x, map->y, false,
1853                                    map->w, map->h, GL_COPY);
1854       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1855    }
1856
1857    intel_miptree_release(&map->mt);
1858 }
1859
1860 #ifdef __SSE4_1__
1861 /**
1862  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
1863  */
1864 static void
1865 intel_miptree_map_movntdqa(struct brw_context *brw,
1866                            struct intel_mipmap_tree *mt,
1867                            struct intel_miptree_map *map,
1868                            unsigned int level, unsigned int slice)
1869 {
1870    assert(map->mode & GL_MAP_READ_BIT);
1871    assert(!(map->mode & GL_MAP_WRITE_BIT));
1872
1873    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1874        map->x, map->y, map->w, map->h,
1875        mt, _mesa_get_format_name(mt->format),
1876        level, slice, map->ptr, map->stride);
1877
1878    /* Map the original image */
1879    uint32_t image_x;
1880    uint32_t image_y;
1881    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1882    image_x += map->x;
1883    image_y += map->y;
1884
1885    void *src = intel_miptree_map_raw(brw, mt);
1886    if (!src)
1887       return;
1888    src += image_y * mt->region->pitch;
1889    src += image_x * mt->region->cpp;
1890
1891    /* Due to the pixel offsets for the particular image being mapped, our
1892     * src pointer may not be 16-byte aligned.  However, if the pitch is
1893     * divisible by 16, then the amount by which it's misaligned will remain
1894     * consistent from row to row.
1895     */
1896    assert((mt->region->pitch % 16) == 0);
1897    const int misalignment = ((uintptr_t) src) & 15;
1898
1899    /* Create an untiled temporary buffer for the mapping. */
1900    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
1901
1902    map->stride = ALIGN(misalignment + width_bytes, 16);
1903
1904    map->buffer = malloc(map->stride * map->h);
1905    /* Offset the destination so it has the same misalignment as src. */
1906    map->ptr = map->buffer + misalignment;
1907
1908    assert((((uintptr_t) map->ptr) & 15) == misalignment);
1909
1910    for (uint32_t y = 0; y < map->h; y++) {
1911       void *dst_ptr = map->ptr + y * map->stride;
1912       void *src_ptr = src + y * mt->region->pitch;
1913
1914       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
1915    }
1916
1917    intel_miptree_unmap_raw(brw, mt);
1918 }
1919
1920 static void
1921 intel_miptree_unmap_movntdqa(struct brw_context *brw,
1922                              struct intel_mipmap_tree *mt,
1923                              struct intel_miptree_map *map,
1924                              unsigned int level,
1925                              unsigned int slice)
1926 {
1927    free(map->buffer);
1928    map->buffer = NULL;
1929    map->ptr = NULL;
1930 }
1931 #endif
1932
1933 static void
1934 intel_miptree_map_s8(struct brw_context *brw,
1935                      struct intel_mipmap_tree *mt,
1936                      struct intel_miptree_map *map,
1937                      unsigned int level, unsigned int slice)
1938 {
1939    map->stride = map->w;
1940    map->buffer = map->ptr = malloc(map->stride * map->h);
1941    if (!map->buffer)
1942       return;
1943
1944    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1945     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1946     * invalidate is set, since we'll be writing the whole rectangle from our
1947     * temporary buffer back out.
1948     */
1949    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1950       uint8_t *untiled_s8_map = map->ptr;
1951       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1952       unsigned int image_x, image_y;
1953
1954       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1955
1956       for (uint32_t y = 0; y < map->h; y++) {
1957          for (uint32_t x = 0; x < map->w; x++) {
1958             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1959                                                x + image_x + map->x,
1960                                                y + image_y + map->y,
1961                                                brw->has_swizzling);
1962             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
1963          }
1964       }
1965
1966       intel_miptree_unmap_raw(brw, mt);
1967
1968       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
1969           map->x, map->y, map->w, map->h,
1970           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
1971    } else {
1972       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1973           map->x, map->y, map->w, map->h,
1974           mt, map->ptr, map->stride);
1975    }
1976 }
1977
1978 static void
1979 intel_miptree_unmap_s8(struct brw_context *brw,
1980                        struct intel_mipmap_tree *mt,
1981                        struct intel_miptree_map *map,
1982                        unsigned int level,
1983                        unsigned int slice)
1984 {
1985    if (map->mode & GL_MAP_WRITE_BIT) {
1986       unsigned int image_x, image_y;
1987       uint8_t *untiled_s8_map = map->ptr;
1988       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1989
1990       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1991
1992       for (uint32_t y = 0; y < map->h; y++) {
1993          for (uint32_t x = 0; x < map->w; x++) {
1994             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1995                                                x + map->x,
1996                                                y + map->y,
1997                                                brw->has_swizzling);
1998             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
1999          }
2000       }
2001
2002       intel_miptree_unmap_raw(brw, mt);
2003    }
2004
2005    free(map->buffer);
2006 }
2007
2008 static void
2009 intel_miptree_map_etc(struct brw_context *brw,
2010                       struct intel_mipmap_tree *mt,
2011                       struct intel_miptree_map *map,
2012                       unsigned int level,
2013                       unsigned int slice)
2014 {
2015    assert(mt->etc_format != MESA_FORMAT_NONE);
2016    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
2017       assert(mt->format == MESA_FORMAT_RGBX8888_REV);
2018    }
2019
2020    assert(map->mode & GL_MAP_WRITE_BIT);
2021    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
2022
2023    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
2024    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
2025                                                 map->w, map->h, 1));
2026    map->ptr = map->buffer;
2027 }
2028
2029 static void
2030 intel_miptree_unmap_etc(struct brw_context *brw,
2031                         struct intel_mipmap_tree *mt,
2032                         struct intel_miptree_map *map,
2033                         unsigned int level,
2034                         unsigned int slice)
2035 {
2036    uint32_t image_x;
2037    uint32_t image_y;
2038    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2039
2040    image_x += map->x;
2041    image_y += map->y;
2042
2043    uint8_t *dst = intel_miptree_map_raw(brw, mt)
2044                 + image_y * mt->region->pitch
2045                 + image_x * mt->region->cpp;
2046
2047    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
2048       _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
2049                                  map->ptr, map->stride,
2050                                  map->w, map->h);
2051    else
2052       _mesa_unpack_etc2_format(dst, mt->region->pitch,
2053                                map->ptr, map->stride,
2054                                map->w, map->h, mt->etc_format);
2055
2056    intel_miptree_unmap_raw(brw, mt);
2057    free(map->buffer);
2058 }
2059
2060 /**
2061  * Mapping function for packed depth/stencil miptrees backed by real separate
2062  * miptrees for depth and stencil.
2063  *
2064  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
2065  * separate from the depth buffer.  Yet at the GL API level, we have to expose
2066  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
2067  * be able to map that memory for texture storage and glReadPixels-type
2068  * operations.  We give Mesa core that access by mallocing a temporary and
2069  * copying the data between the actual backing store and the temporary.
2070  */
2071 static void
2072 intel_miptree_map_depthstencil(struct brw_context *brw,
2073                                struct intel_mipmap_tree *mt,
2074                                struct intel_miptree_map *map,
2075                                unsigned int level, unsigned int slice)
2076 {
2077    struct intel_mipmap_tree *z_mt = mt;
2078    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2079    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2080    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
2081
2082    map->stride = map->w * packed_bpp;
2083    map->buffer = map->ptr = malloc(map->stride * map->h);
2084    if (!map->buffer)
2085       return;
2086
2087    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2088     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2089     * invalidate is set, since we'll be writing the whole rectangle from our
2090     * temporary buffer back out.
2091     */
2092    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2093       uint32_t *packed_map = map->ptr;
2094       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2095       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2096       unsigned int s_image_x, s_image_y;
2097       unsigned int z_image_x, z_image_y;
2098
2099       intel_miptree_get_image_offset(s_mt, level, slice,
2100                                      &s_image_x, &s_image_y);
2101       intel_miptree_get_image_offset(z_mt, level, slice,
2102                                      &z_image_x, &z_image_y);
2103
2104       for (uint32_t y = 0; y < map->h; y++) {
2105          for (uint32_t x = 0; x < map->w; x++) {
2106             int map_x = map->x + x, map_y = map->y + y;
2107             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2108                                                  map_x + s_image_x,
2109                                                  map_y + s_image_y,
2110                                                  brw->has_swizzling);
2111             ptrdiff_t z_offset = ((map_y + z_image_y) *
2112                                   (z_mt->region->pitch / 4) +
2113                                   (map_x + z_image_x));
2114             uint8_t s = s_map[s_offset];
2115             uint32_t z = z_map[z_offset];
2116
2117             if (map_z32f_x24s8) {
2118                packed_map[(y * map->w + x) * 2 + 0] = z;
2119                packed_map[(y * map->w + x) * 2 + 1] = s;
2120             } else {
2121                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2122             }
2123          }
2124       }
2125
2126       intel_miptree_unmap_raw(brw, s_mt);
2127       intel_miptree_unmap_raw(brw, z_mt);
2128
2129       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2130           __FUNCTION__,
2131           map->x, map->y, map->w, map->h,
2132           z_mt, map->x + z_image_x, map->y + z_image_y,
2133           s_mt, map->x + s_image_x, map->y + s_image_y,
2134           map->ptr, map->stride);
2135    } else {
2136       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2137           map->x, map->y, map->w, map->h,
2138           mt, map->ptr, map->stride);
2139    }
2140 }
2141
2142 static void
2143 intel_miptree_unmap_depthstencil(struct brw_context *brw,
2144                                  struct intel_mipmap_tree *mt,
2145                                  struct intel_miptree_map *map,
2146                                  unsigned int level,
2147                                  unsigned int slice)
2148 {
2149    struct intel_mipmap_tree *z_mt = mt;
2150    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2151    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
2152
2153    if (map->mode & GL_MAP_WRITE_BIT) {
2154       uint32_t *packed_map = map->ptr;
2155       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2156       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2157       unsigned int s_image_x, s_image_y;
2158       unsigned int z_image_x, z_image_y;
2159
2160       intel_miptree_get_image_offset(s_mt, level, slice,
2161                                      &s_image_x, &s_image_y);
2162       intel_miptree_get_image_offset(z_mt, level, slice,
2163                                      &z_image_x, &z_image_y);
2164
2165       for (uint32_t y = 0; y < map->h; y++) {
2166          for (uint32_t x = 0; x < map->w; x++) {
2167             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2168                                                  x + s_image_x + map->x,
2169                                                  y + s_image_y + map->y,
2170                                                  brw->has_swizzling);
2171             ptrdiff_t z_offset = ((y + z_image_y) *
2172                                   (z_mt->region->pitch / 4) +
2173                                   (x + z_image_x));
2174
2175             if (map_z32f_x24s8) {
2176                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2177                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2178             } else {
2179                uint32_t packed = packed_map[y * map->w + x];
2180                s_map[s_offset] = packed >> 24;
2181                z_map[z_offset] = packed;
2182             }
2183          }
2184       }
2185
2186       intel_miptree_unmap_raw(brw, s_mt);
2187       intel_miptree_unmap_raw(brw, z_mt);
2188
2189       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2190           __FUNCTION__,
2191           map->x, map->y, map->w, map->h,
2192           z_mt, _mesa_get_format_name(z_mt->format),
2193           map->x + z_image_x, map->y + z_image_y,
2194           s_mt, map->x + s_image_x, map->y + s_image_y,
2195           map->ptr, map->stride);
2196    }
2197
2198    free(map->buffer);
2199 }
2200
2201 /**
2202  * Create and attach a map to the miptree at (level, slice). Return the
2203  * attached map.
2204  */
2205 static struct intel_miptree_map*
2206 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2207                          unsigned int level,
2208                          unsigned int slice,
2209                          unsigned int x,
2210                          unsigned int y,
2211                          unsigned int w,
2212                          unsigned int h,
2213                          GLbitfield mode)
2214 {
2215    struct intel_miptree_map *map = calloc(1, sizeof(*map));
2216
2217    if (!map)
2218       return NULL;
2219
2220    assert(mt->level[level].slice[slice].map == NULL);
2221    mt->level[level].slice[slice].map = map;
2222
2223    map->mode = mode;
2224    map->x = x;
2225    map->y = y;
2226    map->w = w;
2227    map->h = h;
2228
2229    return map;
2230 }
2231
2232 /**
2233  * Release the map at (level, slice).
2234  */
2235 static void
2236 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2237                          unsigned int level,
2238                          unsigned int slice)
2239 {
2240    struct intel_miptree_map **map;
2241
2242    map = &mt->level[level].slice[slice].map;
2243    free(*map);
2244    *map = NULL;
2245 }
2246
2247 static void
2248 intel_miptree_map_singlesample(struct brw_context *brw,
2249                                struct intel_mipmap_tree *mt,
2250                                unsigned int level,
2251                                unsigned int slice,
2252                                unsigned int x,
2253                                unsigned int y,
2254                                unsigned int w,
2255                                unsigned int h,
2256                                GLbitfield mode,
2257                                void **out_ptr,
2258                                int *out_stride)
2259 {
2260    struct intel_miptree_map *map;
2261
2262    assert(mt->num_samples <= 1);
2263
2264    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2265    if (!map){
2266       *out_ptr = NULL;
2267       *out_stride = 0;
2268       return;
2269    }
2270
2271    intel_miptree_slice_resolve_depth(brw, mt, level, slice);
2272    if (map->mode & GL_MAP_WRITE_BIT) {
2273       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2274    }
2275
2276    if (mt->format == MESA_FORMAT_S8) {
2277       intel_miptree_map_s8(brw, mt, map, level, slice);
2278    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2279               !(mode & BRW_MAP_DIRECT_BIT)) {
2280       intel_miptree_map_etc(brw, mt, map, level, slice);
2281    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2282       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
2283    }
2284    /* See intel_miptree_blit() for details on the 32k pitch limit. */
2285    else if (brw->has_llc &&
2286             !(mode & GL_MAP_WRITE_BIT) &&
2287             !mt->compressed &&
2288             (mt->region->tiling == I915_TILING_X ||
2289              (brw->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
2290             mt->region->pitch < 32768) {
2291       intel_miptree_map_blit(brw, mt, map, level, slice);
2292    } else if (mt->region->tiling != I915_TILING_NONE &&
2293               mt->region->bo->size >= brw->max_gtt_map_object_size) {
2294       assert(mt->region->pitch < 32768);
2295       intel_miptree_map_blit(brw, mt, map, level, slice);
2296 #ifdef __SSE4_1__
2297    } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed) {
2298       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
2299 #endif
2300    } else {
2301       intel_miptree_map_gtt(brw, mt, map, level, slice);
2302    }
2303
2304    *out_ptr = map->ptr;
2305    *out_stride = map->stride;
2306
2307    if (map->ptr == NULL)
2308       intel_miptree_release_map(mt, level, slice);
2309 }
2310
2311 static void
2312 intel_miptree_unmap_singlesample(struct brw_context *brw,
2313                                  struct intel_mipmap_tree *mt,
2314                                  unsigned int level,
2315                                  unsigned int slice)
2316 {
2317    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2318
2319    assert(mt->num_samples <= 1);
2320
2321    if (!map)
2322       return;
2323
2324    DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2325        mt, _mesa_get_format_name(mt->format), level, slice);
2326
2327    if (mt->format == MESA_FORMAT_S8) {
2328       intel_miptree_unmap_s8(brw, mt, map, level, slice);
2329    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2330               !(map->mode & BRW_MAP_DIRECT_BIT)) {
2331       intel_miptree_unmap_etc(brw, mt, map, level, slice);
2332    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2333       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
2334    } else if (map->mt) {
2335       intel_miptree_unmap_blit(brw, mt, map, level, slice);
2336 #ifdef __SSE4_1__
2337    } else if (map->buffer) {
2338       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
2339 #endif
2340    } else {
2341       intel_miptree_unmap_gtt(brw, mt, map, level, slice);
2342    }
2343
2344    intel_miptree_release_map(mt, level, slice);
2345 }
2346
2347 static void
2348 intel_miptree_map_multisample(struct brw_context *brw,
2349                               struct intel_mipmap_tree *mt,
2350                               unsigned int level,
2351                               unsigned int slice,
2352                               unsigned int x,
2353                               unsigned int y,
2354                               unsigned int w,
2355                               unsigned int h,
2356                               GLbitfield mode,
2357                               void **out_ptr,
2358                               int *out_stride)
2359 {
2360    struct gl_context *ctx = &brw->ctx;
2361    struct intel_miptree_map *map;
2362
2363    assert(mt->num_samples > 1);
2364
2365    /* Only flat, renderbuffer-like miptrees are supported. */
2366    if (mt->target != GL_TEXTURE_2D ||
2367        mt->first_level != 0 ||
2368        mt->last_level != 0) {
2369       _mesa_problem(ctx, "attempt to map a multisample miptree for "
2370                     "which (target, first_level, last_level != "
2371                     "(GL_TEXTURE_2D, 0, 0)");
2372       goto fail;
2373    }
2374
2375    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2376    if (!map)
2377       goto fail;
2378
2379    if (!mt->singlesample_mt) {
2380       mt->singlesample_mt =
2381          intel_miptree_create_for_renderbuffer(brw,
2382                                                mt->format,
2383                                                mt->logical_width0,
2384                                                mt->logical_height0,
2385                                                0 /*num_samples*/);
2386       if (!mt->singlesample_mt)
2387          goto fail;
2388
2389       map->singlesample_mt_is_tmp = true;
2390       mt->need_downsample = true;
2391    }
2392
2393    intel_miptree_downsample(brw, mt);
2394    intel_miptree_map_singlesample(brw, mt->singlesample_mt,
2395                                   level, slice,
2396                                   x, y, w, h,
2397                                   mode,
2398                                   out_ptr, out_stride);
2399    return;
2400
2401 fail:
2402    intel_miptree_release_map(mt, level, slice);
2403    *out_ptr = NULL;
2404    *out_stride = 0;
2405 }
2406
2407 static void
2408 intel_miptree_unmap_multisample(struct brw_context *brw,
2409                                 struct intel_mipmap_tree *mt,
2410                                 unsigned int level,
2411                                 unsigned int slice)
2412 {
2413    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2414
2415    assert(mt->num_samples > 1);
2416
2417    if (!map)
2418       return;
2419
2420    intel_miptree_unmap_singlesample(brw, mt->singlesample_mt, level, slice);
2421
2422    mt->need_downsample = false;
2423    if (map->mode & GL_MAP_WRITE_BIT)
2424       intel_miptree_upsample(brw, mt);
2425
2426    if (map->singlesample_mt_is_tmp)
2427       intel_miptree_release(&mt->singlesample_mt);
2428
2429    intel_miptree_release_map(mt, level, slice);
2430 }
2431
2432 void
2433 intel_miptree_map(struct brw_context *brw,
2434                   struct intel_mipmap_tree *mt,
2435                   unsigned int level,
2436                   unsigned int slice,
2437                   unsigned int x,
2438                   unsigned int y,
2439                   unsigned int w,
2440                   unsigned int h,
2441                   GLbitfield mode,
2442                   void **out_ptr,
2443                   int *out_stride)
2444 {
2445    if (mt->num_samples <= 1)
2446       intel_miptree_map_singlesample(brw, mt,
2447                                      level, slice,
2448                                      x, y, w, h,
2449                                      mode,
2450                                      out_ptr, out_stride);
2451    else
2452       intel_miptree_map_multisample(brw, mt,
2453                                     level, slice,
2454                                     x, y, w, h,
2455                                     mode,
2456                                     out_ptr, out_stride);
2457 }
2458
2459 void
2460 intel_miptree_unmap(struct brw_context *brw,
2461                     struct intel_mipmap_tree *mt,
2462                     unsigned int level,
2463                     unsigned int slice)
2464 {
2465    if (mt->num_samples <= 1)
2466       intel_miptree_unmap_singlesample(brw, mt, level, slice);
2467    else
2468       intel_miptree_unmap_multisample(brw, mt, level, slice);
2469 }