src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2006 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <GL/gl.h>
  29 #include <GL/internal/dri_interface.h>
  30
  31 #include "intel_batchbuffer.h"
  32 #include "intel_chipset.h"
  33 #include "intel_mipmap_tree.h"
  34 #include "intel_regions.h"
  35 #include "intel_resolve_map.h"
  36 #include "intel_tex.h"
  37 #include "intel_blit.h"
  38
  39 #include "brw_blorp.h"
  40 #include "brw_context.h"
  41
  42 #include "main/enums.h"
  43 #include "main/formats.h"
  44 #include "main/glformats.h"
  45 #include "main/texcompress_etc.h"
  46 #include "main/teximage.h"
  47 #include "main/streaming-load-memcpy.h"
  48
  49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  50
  51 static GLenum
  52 target_to_target(GLenum target)
  53 {
  54    switch (target) {
  55    case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
  56    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
  57    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
  58    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
  59    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
  60    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
  61       return GL_TEXTURE_CUBE_MAP_ARB;
  62    default:
  63       return target;
  64    }
  65 }
  66
  67
  68 /**
  69  * Determine which MSAA layout should be used by the MSAA surface being
  70  * created, based on the chip generation and the surface type.
  71  */
  72 static enum intel_msaa_layout
  73 compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target)
  74 {
  75    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  76    if (brw->gen < 7)
  77       return INTEL_MSAA_LAYOUT_IMS;
  78
  79    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  80    switch (_mesa_get_format_base_format(format)) {
  81    case GL_DEPTH_COMPONENT:
  82    case GL_STENCIL_INDEX:
  83    case GL_DEPTH_STENCIL:
  84       return INTEL_MSAA_LAYOUT_IMS;
  85    default:
  86       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  87        *
  88        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  89        *   are not written
  90        *
  91        * In practice this means that we have to disable MCS for all signed
  92        * integer MSAA buffers.  The alternative, to disable MCS only when one
  93        * of the render target channels is disabled, is impractical because it
  94        * would require converting between CMS and UMS MSAA layouts on the fly,
  95        * which is expensive.
  96        */
  97       if (_mesa_get_format_datatype(format) == GL_INT) {
  98          /* TODO: is this workaround needed for future chipsets? */
  99          assert(brw->gen == 7);
 100          return INTEL_MSAA_LAYOUT_UMS;
 101       } else {
 102          return INTEL_MSAA_LAYOUT_CMS;
 103       }
 104    }
 105 }
 106
 107
 108 /**
 109  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
 110  * scaled-down bitfield representation of the color buffer which is capable of
 111  * recording when blocks of the color buffer are equal to the clear value.
 112  * This function returns the block size that will be used by the MCS buffer
 113  * corresponding to a certain color miptree.
 114  *
 115  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 116  * beneath the "Fast Color Clear" bullet (p327):
 117  *
 118  *     The following table describes the RT alignment
 119  *
 120  *                       Pixels  Lines
 121  *         TiledY RT CL
 122  *             bpp
 123  *              32          8      4
 124  *              64          4      4
 125  *             128          2      4
 126  *         TiledX RT CL
 127  *             bpp
 128  *              32         16      2
 129  *              64          8      2
 130  *             128          4      2
 131  *
 132  * This alignment has the following uses:
 133  *
 134  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
 135  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
 136  *
 137  * - For figuring out alignment restrictions for a fast clear operation.  Fast
 138  *   clear operations must always clear aligned multiples of 16 blocks
 139  *   horizontally and 32 blocks vertically.
 140  *
 141  * - For scaling down the coordinates sent through the render pipeline during
 142  *   a fast clear.  X coordinates must be scaled down by 8 times the block
 143  *   width, and Y coordinates by 16 times the block height.
 144  *
 145  * - For scaling down the coordinates sent through the render pipeline during
 146  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
 147  *   by half the block width, and Y coordinates by half the block height.
 148  */
 149 void
 150 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
 151                                  struct intel_mipmap_tree *mt,
 152                                  unsigned *width_px, unsigned *height)
 153 {
 154    switch (mt->region->tiling) {
 155    default:
 156       assert(!"Non-MSRT MCS requires X or Y tiling");
 157       /* In release builds, fall through */
 158    case I915_TILING_Y:
 159       *width_px = 32 / mt->cpp;
 160       *height = 4;
 161       break;
 162    case I915_TILING_X:
 163       *width_px = 64 / mt->cpp;
 164       *height = 2;
 165    }
 166 }
 167
 168
 169 /**
 170  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 171  * can be used.
 172  *
 173  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 174  * beneath the "Fast Color Clear" bullet (p326):
 175  *
 176  *     - Support is limited to tiled render targets.
 177  *     - Support is for non-mip-mapped and non-array surface types only.
 178  *
 179  * And then later, on p327:
 180  *
 181  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 182  *       64bpp, and 128bpp.
 183  */
 184 bool
 185 intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
 186                                        struct intel_mipmap_tree *mt)
 187 {
 188    /* MCS support does not exist prior to Gen7 */
 189    if (brw->gen < 7 || brw->gen >= 8)
 190       return false;
 191
 192    /* MCS is only supported for color buffers */
 193    switch (_mesa_get_format_base_format(mt->format)) {
 194    case GL_DEPTH_COMPONENT:
 195    case GL_DEPTH_STENCIL:
 196    case GL_STENCIL_INDEX:
 197       return false;
 198    }
 199
 200    if (mt->region->tiling != I915_TILING_X &&
 201        mt->region->tiling != I915_TILING_Y)
 202       return false;
 203    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 204       return false;
 205    if (mt->first_level != 0 || mt->last_level != 0)
 206       return false;
 207    if (mt->physical_depth0 != 1)
 208       return false;
 209
 210    /* There's no point in using an MCS buffer if the surface isn't in a
 211     * renderable format.
 212     */
 213    if (!brw->format_supported_as_render_target[mt->format])
 214       return false;
 215
 216    return true;
 217 }
 218
 219
 220 /**
 221  * @param for_bo Indicates that the caller is
 222  *        intel_miptree_create_for_bo(). If true, then do not create
 223  *        \c stencil_mt.
 224  */
 225 struct intel_mipmap_tree *
 226 intel_miptree_create_layout(struct brw_context *brw,
 227                             GLenum target,
 228                             mesa_format format,
 229                             GLuint first_level,
 230                             GLuint last_level,
 231                             GLuint width0,
 232                             GLuint height0,
 233                             GLuint depth0,
 234                             bool for_bo,
 235                             GLuint num_samples)
 236 {
 237    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 238    if (!mt)
 239       return NULL;
 240
 241    DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
 242        _mesa_lookup_enum_by_nr(target),
 243        _mesa_get_format_name(format),
 244        first_level, last_level, mt);
 245
 246    mt->target = target_to_target(target);
 247    mt->format = format;
 248    mt->first_level = first_level;
 249    mt->last_level = last_level;
 250    mt->logical_width0 = width0;
 251    mt->logical_height0 = height0;
 252    mt->logical_depth0 = depth0;
 253    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
 254
 255    /* The cpp is bytes per (1, blockheight)-sized block for compressed
 256     * textures.  This is why you'll see divides by blockheight all over
 257     */
 258    unsigned bw, bh;
 259    _mesa_get_format_block_size(format, &bw, &bh);
 260    assert(_mesa_get_format_bytes(mt->format) % bw == 0);
 261    mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
 262
 263    mt->num_samples = num_samples;
 264    mt->compressed = _mesa_is_format_compressed(format);
 265    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 266    mt->refcount = 1;
 267
 268    if (num_samples > 1) {
 269       /* Adjust width/height/depth for MSAA */
 270       mt->msaa_layout = compute_msaa_layout(brw, format, mt->target);
 271       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 272          /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
 273           *
 274           *     "Any of the other messages (sample*, LOD, load4) used with a
 275           *      (4x) multisampled surface will in-effect sample a surface with
 276           *      double the height and width as that indicated in the surface
 277           *      state. Each pixel position on the original-sized surface is
 278           *      replaced with a 2x2 of samples with the following arrangement:
 279           *
 280           *         sample 0 sample 2
 281           *         sample 1 sample 3"
 282           *
 283           * Thus, when sampling from a multisampled texture, it behaves as
 284           * though the layout in memory for (x,y,sample) is:
 285           *
 286           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 287           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 288           *
 289           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 290           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 291           *
 292           * However, the actual layout of multisampled data in memory is:
 293           *
 294           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 295           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 296           *
 297           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 298           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 299           *
 300           * This pattern repeats for each 2x2 pixel block.
 301           *
 302           * As a result, when calculating the size of our 4-sample buffer for
 303           * an odd width or height, we have to align before scaling up because
 304           * sample 3 is in that bottom right 2x2 block.
 305           */
 306          switch (num_samples) {
 307          case 4:
 308             width0 = ALIGN(width0, 2) * 2;
 309             height0 = ALIGN(height0, 2) * 2;
 310             break;
 311          case 8:
 312             width0 = ALIGN(width0, 2) * 4;
 313             height0 = ALIGN(height0, 2) * 2;
 314             break;
 315          default:
 316             /* num_samples should already have been quantized to 0, 1, 4, or
 317              * 8.
 318              */
 319             assert(false);
 320          }
 321       } else {
 322          /* Non-interleaved */
 323          depth0 *= num_samples;
 324       }
 325    }
 326
 327    /* array_spacing_lod0 is only used for non-IMS MSAA surfaces.  TODO: can we
 328     * use it elsewhere?
 329     */
 330    switch (mt->msaa_layout) {
 331    case INTEL_MSAA_LAYOUT_NONE:
 332    case INTEL_MSAA_LAYOUT_IMS:
 333       mt->array_spacing_lod0 = false;
 334       break;
 335    case INTEL_MSAA_LAYOUT_UMS:
 336    case INTEL_MSAA_LAYOUT_CMS:
 337       mt->array_spacing_lod0 = true;
 338       break;
 339    }
 340
 341    if (target == GL_TEXTURE_CUBE_MAP) {
 342       assert(depth0 == 1);
 343       depth0 = 6;
 344    }
 345
 346    mt->physical_width0 = width0;
 347    mt->physical_height0 = height0;
 348    mt->physical_depth0 = depth0;
 349
 350    if (!for_bo &&
 351        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 352        (brw->must_use_separate_stencil ||
 353         (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) {
 354       mt->stencil_mt = intel_miptree_create(brw,
 355                                             mt->target,
 356                                             MESA_FORMAT_S_UINT8,
 357                                             mt->first_level,
 358                                             mt->last_level,
 359                                             mt->logical_width0,
 360                                             mt->logical_height0,
 361                                             mt->logical_depth0,
 362                                             true,
 363                                             num_samples,
 364                                             INTEL_MIPTREE_TILING_ANY);
 365       if (!mt->stencil_mt) {
 366          intel_miptree_release(&mt);
 367          return NULL;
 368       }
 369
 370       /* Fix up the Z miptree format for how we're splitting out separate
 371        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 372        */
 373       if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT) {
 374          mt->format = MESA_FORMAT_Z24_UNORM_S8_UINT;
 375       } else if (mt->format == MESA_FORMAT_Z32_FLOAT_S8X24_UINT) {
 376          mt->format = MESA_FORMAT_Z_FLOAT32;
 377          mt->cpp = 4;
 378       } else {
 379          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 380                        _mesa_get_format_name(mt->format));
 381       }
 382    }
 383
 384    brw_miptree_layout(brw, mt);
 385
 386    return mt;
 387 }
 388
 389 /**
 390  * \brief Helper function for intel_miptree_create().
 391  */
 392 static uint32_t
 393 intel_miptree_choose_tiling(struct brw_context *brw,
 394                             mesa_format format,
 395                             uint32_t width0,
 396                             uint32_t num_samples,
 397                             enum intel_miptree_tiling_mode requested,
 398                             struct intel_mipmap_tree *mt)
 399 {
 400    if (format == MESA_FORMAT_S_UINT8) {
 401       /* The stencil buffer is W tiled. However, we request from the kernel a
 402        * non-tiled buffer because the GTT is incapable of W fencing.
 403        */
 404       return I915_TILING_NONE;
 405    }
 406
 407    /* Some usages may want only one type of tiling, like depth miptrees (Y
 408     * tiled), or temporary BOs for uploading data once (linear).
 409     */
 410    switch (requested) {
 411    case INTEL_MIPTREE_TILING_ANY:
 412       break;
 413    case INTEL_MIPTREE_TILING_Y:
 414       return I915_TILING_Y;
 415    case INTEL_MIPTREE_TILING_NONE:
 416       return I915_TILING_NONE;
 417    }
 418
 419    if (num_samples > 1) {
 420       /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
 421        * Surface"):
 422        *
 423        *   [DevSNB+]: For multi-sample render targets, this field must be
 424        *   1. MSRTs can only be tiled.
 425        *
 426        * Our usual reason for preferring X tiling (fast blits using the
 427        * blitting engine) doesn't apply to MSAA, since we'll generally be
 428        * downsampling or upsampling when blitting between the MSAA buffer
 429        * and another buffer, and the blitting engine doesn't support that.
 430        * So use Y tiling, since it makes better use of the cache.
 431        */
 432       return I915_TILING_Y;
 433    }
 434
 435    GLenum base_format = _mesa_get_format_base_format(format);
 436    if (base_format == GL_DEPTH_COMPONENT ||
 437        base_format == GL_DEPTH_STENCIL_EXT)
 438       return I915_TILING_Y;
 439
 440    int minimum_pitch = mt->total_width * mt->cpp;
 441
 442    /* If the width is much smaller than a tile, don't bother tiling. */
 443    if (minimum_pitch < 64)
 444       return I915_TILING_NONE;
 445
 446    if (ALIGN(minimum_pitch, 512) >= 32768 ||
 447        mt->total_width >= 32768 || mt->total_height >= 32768) {
 448       perf_debug("%dx%d miptree too large to blit, falling back to untiled",
 449                  mt->total_width, mt->total_height);
 450       return I915_TILING_NONE;
 451    }
 452
 453    /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
 454    if (brw->gen < 6)
 455       return I915_TILING_X;
 456
 457    /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
 458     * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
 459     *  or Linear."
 460     * 128 bits per pixel translates to 16 bytes per pixel.  This is necessary
 461     * all the way back to 965, but is explicitly permitted on Gen7.
 462     */
 463    if (brw->gen != 7 && mt->cpp >= 16)
 464       return I915_TILING_X;
 465
 466    /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
 467     * messages), on p64, under the heading "Surface Vertical Alignment":
 468     *
 469     *     This field must be set to VALIGN_4 for all tiled Y Render Target
 470     *     surfaces.
 471     *
 472     * So if the surface is renderable and uses a vertical alignment of 2,
 473     * force it to be X tiled.  This is somewhat conservative (it's possible
 474     * that the client won't ever render to this surface), but it's difficult
 475     * to know that ahead of time.  And besides, since we use a vertical
 476     * alignment of 4 as often as we can, this shouldn't happen very often.
 477     */
 478    if (brw->gen == 7 && mt->align_h == 2 &&
 479        brw->format_supported_as_render_target[format]) {
 480       return I915_TILING_X;
 481    }
 482
 483    return I915_TILING_Y | I915_TILING_X;
 484 }
 485
 486 struct intel_mipmap_tree *
 487 intel_miptree_create(struct brw_context *brw,
 488                      GLenum target,
 489                      mesa_format format,
 490                      GLuint first_level,
 491                      GLuint last_level,
 492                      GLuint width0,
 493                      GLuint height0,
 494                      GLuint depth0,
 495                      bool expect_accelerated_upload,
 496                      GLuint num_samples,
 497                      enum intel_miptree_tiling_mode requested_tiling)
 498 {
 499    struct intel_mipmap_tree *mt;
 500    mesa_format tex_format = format;
 501    mesa_format etc_format = MESA_FORMAT_NONE;
 502    GLuint total_width, total_height;
 503
 504    if (brw->gen < 8 && !brw->is_baytrail) {
 505       switch (format) {
 506       case MESA_FORMAT_ETC1_RGB8:
 507          format = MESA_FORMAT_R8G8B8X8_UNORM;
 508          break;
 509       case MESA_FORMAT_ETC2_RGB8:
 510          format = MESA_FORMAT_R8G8B8X8_UNORM;
 511          break;
 512       case MESA_FORMAT_ETC2_SRGB8:
 513       case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 514       case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 515          format = MESA_FORMAT_B8G8R8A8_SRGB;
 516          break;
 517       case MESA_FORMAT_ETC2_RGBA8_EAC:
 518       case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 519          format = MESA_FORMAT_R8G8B8A8_UNORM;
 520          break;
 521       case MESA_FORMAT_ETC2_R11_EAC:
 522          format = MESA_FORMAT_R_UNORM16;
 523          break;
 524       case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 525          format = MESA_FORMAT_R_SNORM16;
 526          break;
 527       case MESA_FORMAT_ETC2_RG11_EAC:
 528          format = MESA_FORMAT_R16G16_UNORM;
 529          break;
 530       case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 531          format = MESA_FORMAT_R16G16_SNORM;
 532          break;
 533       default:
 534          /* Non ETC1 / ETC2 format */
 535          break;
 536       }
 537    }
 538
 539    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 540
 541    mt = intel_miptree_create_layout(brw, target, format,
 542                                       first_level, last_level, width0,
 543                                       height0, depth0,
 544                                       false, num_samples);
 545    /*
 546     * pitch == 0 || height == 0  indicates the null texture
 547     */
 548    if (!mt || !mt->total_width || !mt->total_height) {
 549       intel_miptree_release(&mt);
 550       return NULL;
 551    }
 552
 553    total_width = mt->total_width;
 554    total_height = mt->total_height;
 555
 556    if (format == MESA_FORMAT_S_UINT8) {
 557       /* Align to size of W tile, 64x64. */
 558       total_width = ALIGN(total_width, 64);
 559       total_height = ALIGN(total_height, 64);
 560    }
 561
 562    uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
 563                                                  num_samples, requested_tiling,
 564                                                  mt);
 565    bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
 566
 567    mt->etc_format = etc_format;
 568    mt->region = intel_region_alloc(brw->intelScreen,
 569                                    y_or_x ? I915_TILING_Y : tiling,
 570                                    mt->cpp,
 571                                    total_width,
 572                                    total_height,
 573                                    expect_accelerated_upload);
 574
 575    /* If the region is too large to fit in the aperture, we need to use the
 576     * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
 577     * so we need to fall back to X.
 578     */
 579    if (y_or_x && mt->region->bo->size >= brw->max_gtt_map_object_size) {
 580       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 581                  mt->total_width, mt->total_height);
 582       intel_region_release(&mt->region);
 583
 584       mt->region = intel_region_alloc(brw->intelScreen,
 585                                       I915_TILING_X,
 586                                       mt->cpp,
 587                                       total_width,
 588                                       total_height,
 589                                       expect_accelerated_upload);
 590    }
 591
 592    mt->offset = 0;
 593
 594    if (!mt->region) {
 595        intel_miptree_release(&mt);
 596        return NULL;
 597    }
 598
 599
 600    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 601       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
 602          intel_miptree_release(&mt);
 603          return NULL;
 604       }
 605    }
 606
 607    /* If this miptree is capable of supporting fast color clears, set
 608     * fast_clear_state appropriately to ensure that fast clears will occur.
 609     * Allocation of the MCS miptree will be deferred until the first fast
 610     * clear actually occurs.
 611     */
 612    if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
 613       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 614
 615    return mt;
 616 }
 617
 618 struct intel_mipmap_tree *
 619 intel_miptree_create_for_bo(struct brw_context *brw,
 620                             drm_intel_bo *bo,
 621                             mesa_format format,
 622                             uint32_t offset,
 623                             uint32_t width,
 624                             uint32_t height,
 625                             int pitch,
 626                             uint32_t tiling)
 627 {
 628    struct intel_mipmap_tree *mt;
 629
 630    struct intel_region *region = calloc(1, sizeof(*region));
 631    if (!region)
 632       return NULL;
 633
 634    /* Nothing will be able to use this miptree with the BO if the offset isn't
 635     * aligned.
 636     */
 637    if (tiling != I915_TILING_NONE)
 638       assert(offset % 4096 == 0);
 639
 640    /* miptrees can't handle negative pitch.  If you need flipping of images,
 641     * that's outside of the scope of the mt.
 642     */
 643    assert(pitch >= 0);
 644
 645    mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
 646                                     0, 0,
 647                                     width, height, 1,
 648                                     true, 0 /* num_samples */);
 649    if (!mt) {
 650       free(region);
 651       return mt;
 652    }
 653
 654    region->cpp = mt->cpp;
 655    region->width = width;
 656    region->height = height;
 657    region->pitch = pitch;
 658    region->refcount = 1;
 659    drm_intel_bo_reference(bo);
 660    region->bo = bo;
 661    region->tiling = tiling;
 662
 663    mt->region = region;
 664    mt->offset = offset;
 665
 666    return mt;
 667 }
 668
 669
 670 /**
 671  * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
 672  *
 673  * For a multisample DRI2 buffer, this wraps the given region with
 674  * a singlesample miptree, then creates a multisample miptree into which the
 675  * singlesample miptree is embedded as a child.
 676  */
 677 struct intel_mipmap_tree*
 678 intel_miptree_create_for_dri2_buffer(struct brw_context *brw,
 679                                      unsigned dri_attachment,
 680                                      mesa_format format,
 681                                      uint32_t num_samples,
 682                                      struct intel_region *region)
 683 {
 684    struct intel_mipmap_tree *singlesample_mt = NULL;
 685    struct intel_mipmap_tree *multisample_mt = NULL;
 686
 687    /* Only the front and back buffers, which are color buffers, are shared
 688     * through DRI2.
 689     */
 690    assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
 691           dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 692           dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
 693    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 694           _mesa_get_format_base_format(format) == GL_RGBA);
 695
 696    singlesample_mt = intel_miptree_create_for_bo(brw,
 697                                                  region->bo,
 698                                                  format,
 699                                                  0,
 700                                                  region->width,
 701                                                  region->height,
 702                                                  region->pitch,
 703                                                  region->tiling);
 704    if (!singlesample_mt)
 705       return NULL;
 706    singlesample_mt->region->name = region->name;
 707
 708    /* If this miptree is capable of supporting fast color clears, set
 709     * fast_clear_state appropriately to ensure that fast clears will occur.
 710     * Allocation of the MCS miptree will be deferred until the first fast
 711     * clear actually occurs.
 712     */
 713    if (intel_is_non_msrt_mcs_buffer_supported(brw, singlesample_mt))
 714       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 715
 716    if (num_samples == 0)
 717       return singlesample_mt;
 718
 719    multisample_mt = intel_miptree_create_for_renderbuffer(brw,
 720                                                           format,
 721                                                           region->width,
 722                                                           region->height,
 723                                                           num_samples);
 724    if (!multisample_mt) {
 725       intel_miptree_release(&singlesample_mt);
 726       return NULL;
 727    }
 728
 729    multisample_mt->singlesample_mt = singlesample_mt;
 730    multisample_mt->need_downsample = false;
 731
 732    if (brw->is_front_buffer_rendering &&
 733        (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
 734         dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
 735       intel_miptree_upsample(brw, multisample_mt);
 736    }
 737
 738    return multisample_mt;
 739 }
 740
 741 /**
 742  * For a singlesample image buffer, this simply wraps the given region with a miptree.
 743  *
 744  * For a multisample image buffer, this wraps the given region with
 745  * a singlesample miptree, then creates a multisample miptree into which the
 746  * singlesample miptree is embedded as a child.
 747  */
 748 struct intel_mipmap_tree*
 749 intel_miptree_create_for_image_buffer(struct brw_context *intel,
 750                                       enum __DRIimageBufferMask buffer_type,
 751                                       mesa_format format,
 752                                       uint32_t num_samples,
 753                                       struct intel_region *region)
 754 {
 755    struct intel_mipmap_tree *singlesample_mt = NULL;
 756    struct intel_mipmap_tree *multisample_mt = NULL;
 757
 758    /* Only the front and back buffers, which are color buffers, are allocated
 759     * through the image loader.
 760     */
 761    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 762           _mesa_get_format_base_format(format) == GL_RGBA);
 763
 764    singlesample_mt = intel_miptree_create_for_bo(intel,
 765                                                  region->bo,
 766                                                  format,
 767                                                  0,
 768                                                  region->width,
 769                                                  region->height,
 770                                                  region->pitch,
 771                                                  region->tiling);
 772    if (!singlesample_mt)
 773       return NULL;
 774
 775    /* If this miptree is capable of supporting fast color clears, set
 776     * mcs_state appropriately to ensure that fast clears will occur.
 777     * Allocation of the MCS miptree will be deferred until the first fast
 778     * clear actually occurs.
 779     */
 780    if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
 781       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 782
 783    if (num_samples == 0)
 784       return singlesample_mt;
 785
 786    multisample_mt = intel_miptree_create_for_renderbuffer(intel,
 787                                                           format,
 788                                                           region->width,
 789                                                           region->height,
 790                                                           num_samples);
 791    if (!multisample_mt) {
 792       intel_miptree_release(&singlesample_mt);
 793       return NULL;
 794    }
 795
 796    multisample_mt->singlesample_mt = singlesample_mt;
 797    multisample_mt->need_downsample = false;
 798
 799    if (intel->is_front_buffer_rendering && buffer_type == __DRI_IMAGE_BUFFER_FRONT) {
 800       intel_miptree_upsample(intel, multisample_mt);
 801    }
 802
 803    return multisample_mt;
 804 }
 805
 806 struct intel_mipmap_tree*
 807 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
 808                                       mesa_format format,
 809                                       uint32_t width,
 810                                       uint32_t height,
 811                                       uint32_t num_samples)
 812 {
 813    struct intel_mipmap_tree *mt;
 814    uint32_t depth = 1;
 815    bool ok;
 816
 817    mt = intel_miptree_create(brw, GL_TEXTURE_2D, format, 0, 0,
 818                              width, height, depth, true, num_samples,
 819                              INTEL_MIPTREE_TILING_ANY);
 820    if (!mt)
 821       goto fail;
 822
 823    if (brw_is_hiz_depth_format(brw, format)) {
 824       ok = intel_miptree_alloc_hiz(brw, mt);
 825       if (!ok)
 826          goto fail;
 827    }
 828
 829    return mt;
 830
 831 fail:
 832    intel_miptree_release(&mt);
 833    return NULL;
 834 }
 835
 836 void
 837 intel_miptree_reference(struct intel_mipmap_tree **dst,
 838                         struct intel_mipmap_tree *src)
 839 {
 840    if (*dst == src)
 841       return;
 842
 843    intel_miptree_release(dst);
 844
 845    if (src) {
 846       src->refcount++;
 847       DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
 848    }
 849
 850    *dst = src;
 851 }
 852
 853
 854 void
 855 intel_miptree_release(struct intel_mipmap_tree **mt)
 856 {
 857    if (!*mt)
 858       return;
 859
 860    DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
 861    if (--(*mt)->refcount <= 0) {
 862       GLuint i;
 863
 864       DBG("%s deleting %p\n", __FUNCTION__, *mt);
 865
 866       intel_region_release(&((*mt)->region));
 867       intel_miptree_release(&(*mt)->stencil_mt);
 868       intel_miptree_release(&(*mt)->hiz_mt);
 869       intel_miptree_release(&(*mt)->mcs_mt);
 870       intel_miptree_release(&(*mt)->singlesample_mt);
 871       intel_resolve_map_clear(&(*mt)->hiz_map);
 872
 873       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
 874          free((*mt)->level[i].slice);
 875       }
 876
 877       free(*mt);
 878    }
 879    *mt = NULL;
 880 }
 881
 882 void
 883 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
 884                                        int *width, int *height, int *depth)
 885 {
 886    switch (image->TexObject->Target) {
 887    case GL_TEXTURE_1D_ARRAY:
 888       *width = image->Width;
 889       *height = 1;
 890       *depth = image->Height;
 891       break;
 892    default:
 893       *width = image->Width;
 894       *height = image->Height;
 895       *depth = image->Depth;
 896       break;
 897    }
 898 }
 899
 900 /**
 901  * Can the image be pulled into a unified mipmap tree?  This mirrors
 902  * the completeness test in a lot of ways.
 903  *
 904  * Not sure whether I want to pass gl_texture_image here.
 905  */
 906 bool
 907 intel_miptree_match_image(struct intel_mipmap_tree *mt,
 908                           struct gl_texture_image *image)
 909 {
 910    struct intel_texture_image *intelImage = intel_texture_image(image);
 911    GLuint level = intelImage->base.Base.Level;
 912    int width, height, depth;
 913
 914    /* glTexImage* choose the texture object based on the target passed in, and
 915     * objects can't change targets over their lifetimes, so this should be
 916     * true.
 917     */
 918    assert(target_to_target(image->TexObject->Target) == mt->target);
 919
 920    mesa_format mt_format = mt->format;
 921    if (mt->format == MESA_FORMAT_Z24_UNORM_S8_UINT && mt->stencil_mt)
 922       mt_format = MESA_FORMAT_Z24_UNORM_X8_UINT;
 923    if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
 924       mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
 925    if (mt->etc_format != MESA_FORMAT_NONE)
 926       mt_format = mt->etc_format;
 927
 928    if (image->TexFormat != mt_format)
 929       return false;
 930
 931    intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
 932
 933    if (mt->target == GL_TEXTURE_CUBE_MAP)
 934       depth = 6;
 935
 936    /* Test image dimensions against the base level image adjusted for
 937     * minification.  This will also catch images not present in the
 938     * tree, changed targets, etc.
 939     */
 940    if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
 941          mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
 942       /* nonzero level here is always bogus */
 943       assert(level == 0);
 944
 945       if (width != mt->logical_width0 ||
 946             height != mt->logical_height0 ||
 947             depth != mt->logical_depth0) {
 948          return false;
 949       }
 950    }
 951    else {
 952       /* all normal textures, renderbuffers, etc */
 953       if (width != mt->level[level].width ||
 954           height != mt->level[level].height ||
 955           depth != mt->level[level].depth) {
 956          return false;
 957       }
 958    }
 959
 960    if (image->NumSamples != mt->num_samples)
 961       return false;
 962
 963    return true;
 964 }
 965
 966
 967 void
 968 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 969                              GLuint level,
 970                              GLuint x, GLuint y,
 971                              GLuint w, GLuint h, GLuint d)
 972 {
 973    mt->level[level].width = w;
 974    mt->level[level].height = h;
 975    mt->level[level].depth = d;
 976    mt->level[level].level_x = x;
 977    mt->level[level].level_y = y;
 978
 979    DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
 980        level, w, h, d, x, y);
 981
 982    assert(mt->level[level].slice == NULL);
 983
 984    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
 985    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
 986    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
 987 }
 988
 989
 990 void
 991 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
 992                                GLuint level, GLuint img,
 993                                GLuint x, GLuint y)
 994 {
 995    if (img == 0 && level == 0)
 996       assert(x == 0 && y == 0);
 997
 998    assert(img < mt->level[level].depth);
 999
1000    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
1001    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
1002
1003    DBG("%s level %d img %d pos %d,%d\n",
1004        __FUNCTION__, level, img,
1005        mt->level[level].slice[img].x_offset,
1006        mt->level[level].slice[img].y_offset);
1007 }
1008
1009 void
1010 intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
1011                                GLuint level, GLuint slice,
1012                                GLuint *x, GLuint *y)
1013 {
1014    assert(slice < mt->level[level].depth);
1015
1016    *x = mt->level[level].slice[slice].x_offset;
1017    *y = mt->level[level].slice[slice].y_offset;
1018 }
1019
1020 /**
1021  * Rendering with tiled buffers requires that the base address of the buffer
1022  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1023  * textures, we may want the surface to point at a texture image level that
1024  * isn't at a page boundary.
1025  *
1026  * This function returns an appropriately-aligned base offset
1027  * according to the tiling restrictions, plus any required x/y offset
1028  * from there.
1029  */
1030 uint32_t
1031 intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
1032                                GLuint level, GLuint slice,
1033                                uint32_t *tile_x,
1034                                uint32_t *tile_y)
1035 {
1036    struct intel_region *region = mt->region;
1037    uint32_t x, y;
1038    uint32_t mask_x, mask_y;
1039
1040    intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
1041    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1042
1043    *tile_x = x & mask_x;
1044    *tile_y = y & mask_y;
1045
1046    return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
1047                                           false);
1048 }
1049
1050 static void
1051 intel_miptree_copy_slice_sw(struct brw_context *brw,
1052                             struct intel_mipmap_tree *dst_mt,
1053                             struct intel_mipmap_tree *src_mt,
1054                             int level,
1055                             int slice,
1056                             int width,
1057                             int height)
1058 {
1059    void *src, *dst;
1060    int src_stride, dst_stride;
1061    int cpp = dst_mt->cpp;
1062
1063    intel_miptree_map(brw, src_mt,
1064                      level, slice,
1065                      0, 0,
1066                      width, height,
1067                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1068                      &src, &src_stride);
1069
1070    intel_miptree_map(brw, dst_mt,
1071                      level, slice,
1072                      0, 0,
1073                      width, height,
1074                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1075                      BRW_MAP_DIRECT_BIT,
1076                      &dst, &dst_stride);
1077
1078    DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
1079        _mesa_get_format_name(src_mt->format),
1080        src_mt, src, src_stride,
1081        _mesa_get_format_name(dst_mt->format),
1082        dst_mt, dst, dst_stride,
1083        width, height);
1084
1085    int row_size = cpp * width;
1086    if (src_stride == row_size &&
1087        dst_stride == row_size) {
1088       memcpy(dst, src, row_size * height);
1089    } else {
1090       for (int i = 0; i < height; i++) {
1091          memcpy(dst, src, row_size);
1092          dst += dst_stride;
1093          src += src_stride;
1094       }
1095    }
1096
1097    intel_miptree_unmap(brw, dst_mt, level, slice);
1098    intel_miptree_unmap(brw, src_mt, level, slice);
1099
1100    /* Don't forget to copy the stencil data over, too.  We could have skipped
1101     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1102     * shuffling the two data sources in/out of temporary storage instead of
1103     * the direct mapping we get this way.
1104     */
1105    if (dst_mt->stencil_mt) {
1106       assert(src_mt->stencil_mt);
1107       intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
1108                                   level, slice, width, height);
1109    }
1110 }
1111
1112 static void
1113 intel_miptree_copy_slice(struct brw_context *brw,
1114                          struct intel_mipmap_tree *dst_mt,
1115                          struct intel_mipmap_tree *src_mt,
1116                          int level,
1117                          int face,
1118                          int depth)
1119
1120 {
1121    mesa_format format = src_mt->format;
1122    uint32_t width = src_mt->level[level].width;
1123    uint32_t height = src_mt->level[level].height;
1124    int slice;
1125
1126    if (face > 0)
1127       slice = face;
1128    else
1129       slice = depth;
1130
1131    assert(depth < src_mt->level[level].depth);
1132    assert(src_mt->format == dst_mt->format);
1133
1134    if (dst_mt->compressed) {
1135       height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1136       width = ALIGN(width, dst_mt->align_w);
1137    }
1138
1139    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1140     * below won't apply since we can't do the depth's Y tiling or the
1141     * stencil's W tiling in the blitter.
1142     */
1143    if (src_mt->stencil_mt) {
1144       intel_miptree_copy_slice_sw(brw,
1145                                   dst_mt, src_mt,
1146                                   level, slice,
1147                                   width, height);
1148       return;
1149    }
1150
1151    uint32_t dst_x, dst_y, src_x, src_y;
1152    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1153    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1154
1155    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1156        _mesa_get_format_name(src_mt->format),
1157        src_mt, src_x, src_y, src_mt->region->pitch,
1158        _mesa_get_format_name(dst_mt->format),
1159        dst_mt, dst_x, dst_y, dst_mt->region->pitch,
1160        width, height);
1161
1162    if (!intel_miptree_blit(brw,
1163                            src_mt, level, slice, 0, 0, false,
1164                            dst_mt, level, slice, 0, 0, false,
1165                            width, height, GL_COPY)) {
1166       perf_debug("miptree validate blit for %s failed\n",
1167                  _mesa_get_format_name(format));
1168
1169       intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
1170                                   width, height);
1171    }
1172 }
1173
1174 /**
1175  * Copies the image's current data to the given miptree, and associates that
1176  * miptree with the image.
1177  *
1178  * If \c invalidate is true, then the actual image data does not need to be
1179  * copied, but the image still needs to be associated to the new miptree (this
1180  * is set to true if we're about to clear the image).
1181  */
1182 void
1183 intel_miptree_copy_teximage(struct brw_context *brw,
1184                             struct intel_texture_image *intelImage,
1185                             struct intel_mipmap_tree *dst_mt,
1186                             bool invalidate)
1187 {
1188    struct intel_mipmap_tree *src_mt = intelImage->mt;
1189    struct intel_texture_object *intel_obj =
1190       intel_texture_object(intelImage->base.Base.TexObject);
1191    int level = intelImage->base.Base.Level;
1192    int face = intelImage->base.Base.Face;
1193    GLuint depth = intelImage->base.Base.Depth;
1194
1195    if (!invalidate) {
1196       for (int slice = 0; slice < depth; slice++) {
1197          intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
1198       }
1199    }
1200
1201    intel_miptree_reference(&intelImage->mt, dst_mt);
1202    intel_obj->needs_validate = true;
1203 }
1204
1205 bool
1206 intel_miptree_alloc_mcs(struct brw_context *brw,
1207                         struct intel_mipmap_tree *mt,
1208                         GLuint num_samples)
1209 {
1210    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1211    assert(mt->mcs_mt == NULL);
1212
1213    /* Choose the correct format for the MCS buffer.  All that really matters
1214     * is that we allocate the right buffer size, since we'll always be
1215     * accessing this miptree using MCS-specific hardware mechanisms, which
1216     * infer the correct format based on num_samples.
1217     */
1218    mesa_format format;
1219    switch (num_samples) {
1220    case 4:
1221       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1222        * each sample).
1223        */
1224       format = MESA_FORMAT_R_UNORM8;
1225       break;
1226    case 8:
1227       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1228        * for each sample, plus 8 padding bits).
1229        */
1230       format = MESA_FORMAT_R_UINT32;
1231       break;
1232    default:
1233       assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
1234       return false;
1235    };
1236
1237    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1238     *
1239     *     "The MCS surface must be stored as Tile Y."
1240     */
1241    mt->mcs_mt = intel_miptree_create(brw,
1242                                      mt->target,
1243                                      format,
1244                                      mt->first_level,
1245                                      mt->last_level,
1246                                      mt->logical_width0,
1247                                      mt->logical_height0,
1248                                      mt->logical_depth0,
1249                                      true,
1250                                      0 /* num_samples */,
1251                                      INTEL_MIPTREE_TILING_Y);
1252
1253    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1254     *
1255     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1256     *     is cleared prior to any rendering.
1257     *
1258     * Since we don't use the MCS buffer for any purpose other than rendering,
1259     * it makes sense to just clear it immediately upon allocation.
1260     *
1261     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1262     */
1263    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
1264    memset(data, 0xff, mt->mcs_mt->region->bo->size);
1265    intel_miptree_unmap_raw(brw, mt->mcs_mt);
1266    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
1267
1268    return mt->mcs_mt;
1269 }
1270
1271
1272 bool
1273 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1274                                  struct intel_mipmap_tree *mt)
1275 {
1276    assert(mt->mcs_mt == NULL);
1277
1278    /* The format of the MCS buffer is opaque to the driver; all that matters
1279     * is that we get its size and pitch right.  We'll pretend that the format
1280     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1281     * R32 buffer is 32 pixels across, we'll need to scale the width down by
1282     * the block width and then a further factor of 4.  Since an MCS tile
1283     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1284     * we'll need to scale the height down by the block height and then a
1285     * further factor of 8.
1286     */
1287    const mesa_format format = MESA_FORMAT_R_UINT32;
1288    unsigned block_width_px;
1289    unsigned block_height;
1290    intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
1291    unsigned width_divisor = block_width_px * 4;
1292    unsigned height_divisor = block_height * 8;
1293    unsigned mcs_width =
1294       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1295    unsigned mcs_height =
1296       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1297    assert(mt->logical_depth0 == 1);
1298    mt->mcs_mt = intel_miptree_create(brw,
1299                                      mt->target,
1300                                      format,
1301                                      mt->first_level,
1302                                      mt->last_level,
1303                                      mcs_width,
1304                                      mcs_height,
1305                                      mt->logical_depth0,
1306                                      true,
1307                                      0 /* num_samples */,
1308                                      INTEL_MIPTREE_TILING_Y);
1309
1310    return mt->mcs_mt;
1311 }
1312
1313
1314 /**
1315  * Helper for intel_miptree_alloc_hiz() that sets
1316  * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
1317  * \c has_hiz was set.
1318  */
1319 static bool
1320 intel_miptree_slice_enable_hiz(struct brw_context *brw,
1321                                struct intel_mipmap_tree *mt,
1322                                uint32_t level,
1323                                uint32_t layer)
1324 {
1325    assert(mt->hiz_mt);
1326
1327    if (brw->is_haswell) {
1328       const struct intel_mipmap_level *l = &mt->level[level];
1329
1330       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1331        * and the height is 4 aligned. This allows our HiZ support
1332        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1333        * we can grow the width & height to allow the HiZ op to
1334        * force the proper size alignments.
1335        */
1336       if (level > 0 && ((l->width & 7) || (l->height & 3))) {
1337          return false;
1338       }
1339    }
1340
1341    mt->level[level].slice[layer].has_hiz = true;
1342    return true;
1343 }
1344
1345
1346
1347 bool
1348 intel_miptree_alloc_hiz(struct brw_context *brw,
1349                         struct intel_mipmap_tree *mt)
1350 {
1351    assert(mt->hiz_mt == NULL);
1352    mt->hiz_mt = intel_miptree_create(brw,
1353                                      mt->target,
1354                                      mt->format,
1355                                      mt->first_level,
1356                                      mt->last_level,
1357                                      mt->logical_width0,
1358                                      mt->logical_height0,
1359                                      mt->logical_depth0,
1360                                      true,
1361                                      mt->num_samples,
1362                                      INTEL_MIPTREE_TILING_ANY);
1363
1364    if (!mt->hiz_mt)
1365       return false;
1366
1367    /* Mark that all slices need a HiZ resolve. */
1368    struct intel_resolve_map *head = &mt->hiz_map;
1369    for (int level = mt->first_level; level <= mt->last_level; ++level) {
1370       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1371          if (!intel_miptree_slice_enable_hiz(brw, mt, level, layer))
1372             continue;
1373
1374          head->next = malloc(sizeof(*head->next));
1375          head->next->prev = head;
1376          head->next->next = NULL;
1377          head = head->next;
1378
1379          head->level = level;
1380          head->layer = layer;
1381          head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1382       }
1383    }
1384
1385    return true;
1386 }
1387
1388 /**
1389  * Does the miptree slice have hiz enabled?
1390  */
1391 bool
1392 intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
1393                             uint32_t level,
1394                             uint32_t layer)
1395 {
1396    intel_miptree_check_level_layer(mt, level, layer);
1397    return mt->level[level].slice[layer].has_hiz;
1398 }
1399
1400 void
1401 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1402                                           uint32_t level,
1403                                           uint32_t layer)
1404 {
1405    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1406       return;
1407
1408    intel_resolve_map_set(&mt->hiz_map,
1409                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1410 }
1411
1412
1413 void
1414 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1415                                             uint32_t level,
1416                                             uint32_t layer)
1417 {
1418    if (!intel_miptree_slice_has_hiz(mt, level, layer))
1419       return;
1420
1421    intel_resolve_map_set(&mt->hiz_map,
1422                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1423 }
1424
1425 void
1426 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
1427                                                 uint32_t level)
1428 {
1429    uint32_t layer;
1430    uint32_t end_layer = mt->level[level].depth;
1431
1432    for (layer = 0; layer < end_layer; layer++) {
1433       intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
1434    }
1435 }
1436
1437 static bool
1438 intel_miptree_slice_resolve(struct brw_context *brw,
1439                             struct intel_mipmap_tree *mt,
1440                             uint32_t level,
1441                             uint32_t layer,
1442                             enum gen6_hiz_op need)
1443 {
1444    intel_miptree_check_level_layer(mt, level, layer);
1445
1446    struct intel_resolve_map *item =
1447          intel_resolve_map_get(&mt->hiz_map, level, layer);
1448
1449    if (!item || item->need != need)
1450       return false;
1451
1452    intel_hiz_exec(brw, mt, level, layer, need);
1453    intel_resolve_map_remove(item);
1454    return true;
1455 }
1456
1457 bool
1458 intel_miptree_slice_resolve_hiz(struct brw_context *brw,
1459                                 struct intel_mipmap_tree *mt,
1460                                 uint32_t level,
1461                                 uint32_t layer)
1462 {
1463    return intel_miptree_slice_resolve(brw, mt, level, layer,
1464                                       GEN6_HIZ_OP_HIZ_RESOLVE);
1465 }
1466
1467 bool
1468 intel_miptree_slice_resolve_depth(struct brw_context *brw,
1469                                   struct intel_mipmap_tree *mt,
1470                                   uint32_t level,
1471                                   uint32_t layer)
1472 {
1473    return intel_miptree_slice_resolve(brw, mt, level, layer,
1474                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
1475 }
1476
1477 static bool
1478 intel_miptree_all_slices_resolve(struct brw_context *brw,
1479                                  struct intel_mipmap_tree *mt,
1480                                  enum gen6_hiz_op need)
1481 {
1482    bool did_resolve = false;
1483    struct intel_resolve_map *i, *next;
1484
1485    for (i = mt->hiz_map.next; i; i = next) {
1486       next = i->next;
1487       if (i->need != need)
1488          continue;
1489
1490       intel_hiz_exec(brw, mt, i->level, i->layer, need);
1491       intel_resolve_map_remove(i);
1492       did_resolve = true;
1493    }
1494
1495    return did_resolve;
1496 }
1497
1498 bool
1499 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
1500                                      struct intel_mipmap_tree *mt)
1501 {
1502    return intel_miptree_all_slices_resolve(brw, mt,
1503                                            GEN6_HIZ_OP_HIZ_RESOLVE);
1504 }
1505
1506 bool
1507 intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
1508                                        struct intel_mipmap_tree *mt)
1509 {
1510    return intel_miptree_all_slices_resolve(brw, mt,
1511                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
1512 }
1513
1514
1515 void
1516 intel_miptree_resolve_color(struct brw_context *brw,
1517                             struct intel_mipmap_tree *mt)
1518 {
1519    switch (mt->fast_clear_state) {
1520    case INTEL_FAST_CLEAR_STATE_NO_MCS:
1521    case INTEL_FAST_CLEAR_STATE_RESOLVED:
1522       /* No resolve needed */
1523       break;
1524    case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
1525    case INTEL_FAST_CLEAR_STATE_CLEAR:
1526       /* Fast color clear resolves only make sense for non-MSAA buffers. */
1527       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
1528          brw_blorp_resolve_color(brw, mt);
1529       break;
1530    }
1531 }
1532
1533
1534 /**
1535  * Make it possible to share the region backing the given miptree with another
1536  * process or another miptree.
1537  *
1538  * Fast color clears are unsafe with shared buffers, so we need to resolve and
1539  * then discard the MCS buffer, if present.  We also set the fast_clear_state
1540  * to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
1541  * allocated in the future.
1542  */
1543 void
1544 intel_miptree_make_shareable(struct brw_context *brw,
1545                              struct intel_mipmap_tree *mt)
1546 {
1547    /* MCS buffers are also used for multisample buffers, but we can't resolve
1548     * away a multisample MCS buffer because it's an integral part of how the
1549     * pixel data is stored.  Fortunately this code path should never be
1550     * reached for multisample buffers.
1551     */
1552    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1553
1554    if (mt->mcs_mt) {
1555       intel_miptree_resolve_color(brw, mt);
1556       intel_miptree_release(&mt->mcs_mt);
1557       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
1558    }
1559 }
1560
1561
1562 /**
1563  * \brief Get pointer offset into stencil buffer.
1564  *
1565  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1566  * must decode the tile's layout in software.
1567  *
1568  * See
1569  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1570  *     Format.
1571  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1572  *
1573  * Even though the returned offset is always positive, the return type is
1574  * signed due to
1575  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1576  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
1577  */
1578 static intptr_t
1579 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1580 {
1581    uint32_t tile_size = 4096;
1582    uint32_t tile_width = 64;
1583    uint32_t tile_height = 64;
1584    uint32_t row_size = 64 * stride;
1585
1586    uint32_t tile_x = x / tile_width;
1587    uint32_t tile_y = y / tile_height;
1588
1589    /* The byte's address relative to the tile's base addres. */
1590    uint32_t byte_x = x % tile_width;
1591    uint32_t byte_y = y % tile_height;
1592
1593    uintptr_t u = tile_y * row_size
1594                + tile_x * tile_size
1595                + 512 * (byte_x / 8)
1596                +  64 * (byte_y / 8)
1597                +  32 * ((byte_y / 4) % 2)
1598                +  16 * ((byte_x / 4) % 2)
1599                +   8 * ((byte_y / 2) % 2)
1600                +   4 * ((byte_x / 2) % 2)
1601                +   2 * (byte_y % 2)
1602                +   1 * (byte_x % 2);
1603
1604    if (swizzled) {
1605       /* adjust for bit6 swizzling */
1606       if (((byte_x / 8) % 2) == 1) {
1607          if (((byte_y / 8) % 2) == 0) {
1608             u += 64;
1609          } else {
1610             u -= 64;
1611          }
1612       }
1613    }
1614
1615    return u;
1616 }
1617
1618 static void
1619 intel_miptree_updownsample(struct brw_context *brw,
1620                            struct intel_mipmap_tree *src,
1621                            struct intel_mipmap_tree *dst,
1622                            unsigned width,
1623                            unsigned height)
1624 {
1625    int src_x0 = 0;
1626    int src_y0 = 0;
1627    int dst_x0 = 0;
1628    int dst_y0 = 0;
1629
1630    brw_blorp_blit_miptrees(brw,
1631                            src, 0 /* level */, 0 /* layer */,
1632                            dst, 0 /* level */, 0 /* layer */,
1633                            src_x0, src_y0,
1634                            width, height,
1635                            dst_x0, dst_y0,
1636                            width, height,
1637                            GL_NEAREST, false, false /*mirror x, y*/);
1638
1639    if (src->stencil_mt) {
1640       brw_blorp_blit_miptrees(brw,
1641                               src->stencil_mt, 0 /* level */, 0 /* layer */,
1642                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
1643                               src_x0, src_y0,
1644                               width, height,
1645                               dst_x0, dst_y0,
1646                               width, height,
1647                               GL_NEAREST, false, false /*mirror x, y*/);
1648    }
1649 }
1650
1651 static void
1652 assert_is_flat(struct intel_mipmap_tree *mt)
1653 {
1654    assert(mt->target == GL_TEXTURE_2D);
1655    assert(mt->first_level == 0);
1656    assert(mt->last_level == 0);
1657 }
1658
1659 /**
1660  * \brief Downsample from mt to mt->singlesample_mt.
1661  *
1662  * If the miptree needs no downsample, then skip.
1663  */
1664 void
1665 intel_miptree_downsample(struct brw_context *brw,
1666                          struct intel_mipmap_tree *mt)
1667 {
1668    /* Only flat, renderbuffer-like miptrees are supported. */
1669    assert_is_flat(mt);
1670
1671    if (!mt->need_downsample)
1672       return;
1673    intel_miptree_updownsample(brw,
1674                               mt, mt->singlesample_mt,
1675                               mt->logical_width0,
1676                               mt->logical_height0);
1677    mt->need_downsample = false;
1678 }
1679
1680 /**
1681  * \brief Upsample from mt->singlesample_mt to mt.
1682  *
1683  * The upsample is done unconditionally.
1684  */
1685 void
1686 intel_miptree_upsample(struct brw_context *brw,
1687                        struct intel_mipmap_tree *mt)
1688 {
1689    /* Only flat, renderbuffer-like miptrees are supported. */
1690    assert_is_flat(mt);
1691    assert(!mt->need_downsample);
1692
1693    intel_miptree_updownsample(brw,
1694                               mt->singlesample_mt, mt,
1695                               mt->logical_width0,
1696                               mt->logical_height0);
1697 }
1698
1699 void *
1700 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
1701 {
1702    /* CPU accesses to color buffers don't understand fast color clears, so
1703     * resolve any pending fast color clears before we map.
1704     */
1705    intel_miptree_resolve_color(brw, mt);
1706
1707    drm_intel_bo *bo = mt->region->bo;
1708
1709    intel_batchbuffer_flush(brw);
1710
1711    if (mt->region->tiling != I915_TILING_NONE)
1712       brw_bo_map_gtt(brw, bo, "miptree");
1713    else
1714       brw_bo_map(brw, bo, true, "miptree");
1715
1716    return bo->virtual;
1717 }
1718
1719 void
1720 intel_miptree_unmap_raw(struct brw_context *brw,
1721                         struct intel_mipmap_tree *mt)
1722 {
1723    drm_intel_bo_unmap(mt->region->bo);
1724 }
1725
1726 static void
1727 intel_miptree_map_gtt(struct brw_context *brw,
1728                       struct intel_mipmap_tree *mt,
1729                       struct intel_miptree_map *map,
1730                       unsigned int level, unsigned int slice)
1731 {
1732    unsigned int bw, bh;
1733    void *base;
1734    unsigned int image_x, image_y;
1735    int x = map->x;
1736    int y = map->y;
1737
1738    /* For compressed formats, the stride is the number of bytes per
1739     * row of blocks.  intel_miptree_get_image_offset() already does
1740     * the divide.
1741     */
1742    _mesa_get_format_block_size(mt->format, &bw, &bh);
1743    assert(y % bh == 0);
1744    y /= bh;
1745
1746    base = intel_miptree_map_raw(brw, mt) + mt->offset;
1747
1748    if (base == NULL)
1749       map->ptr = NULL;
1750    else {
1751       /* Note that in the case of cube maps, the caller must have passed the
1752        * slice number referencing the face.
1753       */
1754       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1755       x += image_x;
1756       y += image_y;
1757
1758       map->stride = mt->region->pitch;
1759       map->ptr = base + y * map->stride + x * mt->cpp;
1760    }
1761
1762    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1763        map->x, map->y, map->w, map->h,
1764        mt, _mesa_get_format_name(mt->format),
1765        x, y, map->ptr, map->stride);
1766 }
1767
1768 static void
1769 intel_miptree_unmap_gtt(struct brw_context *brw,
1770                         struct intel_mipmap_tree *mt,
1771                         struct intel_miptree_map *map,
1772                         unsigned int level,
1773                         unsigned int slice)
1774 {
1775    intel_miptree_unmap_raw(brw, mt);
1776 }
1777
1778 static void
1779 intel_miptree_map_blit(struct brw_context *brw,
1780                        struct intel_mipmap_tree *mt,
1781                        struct intel_miptree_map *map,
1782                        unsigned int level, unsigned int slice)
1783 {
1784    map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
1785                                   0, 0,
1786                                   map->w, map->h, 1,
1787                                   false, 0,
1788                                   INTEL_MIPTREE_TILING_NONE);
1789    if (!map->mt) {
1790       fprintf(stderr, "Failed to allocate blit temporary\n");
1791       goto fail;
1792    }
1793    map->stride = map->mt->region->pitch;
1794
1795    if (!intel_miptree_blit(brw,
1796                            mt, level, slice,
1797                            map->x, map->y, false,
1798                            map->mt, 0, 0,
1799                            0, 0, false,
1800                            map->w, map->h, GL_COPY)) {
1801       fprintf(stderr, "Failed to blit\n");
1802       goto fail;
1803    }
1804
1805    map->ptr = intel_miptree_map_raw(brw, map->mt);
1806
1807    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1808        map->x, map->y, map->w, map->h,
1809        mt, _mesa_get_format_name(mt->format),
1810        level, slice, map->ptr, map->stride);
1811
1812    return;
1813
1814 fail:
1815    intel_miptree_release(&map->mt);
1816    map->ptr = NULL;
1817    map->stride = 0;
1818 }
1819
1820 static void
1821 intel_miptree_unmap_blit(struct brw_context *brw,
1822                          struct intel_mipmap_tree *mt,
1823                          struct intel_miptree_map *map,
1824                          unsigned int level,
1825                          unsigned int slice)
1826 {
1827    struct gl_context *ctx = &brw->ctx;
1828
1829    intel_miptree_unmap_raw(brw, map->mt);
1830
1831    if (map->mode & GL_MAP_WRITE_BIT) {
1832       bool ok = intel_miptree_blit(brw,
1833                                    map->mt, 0, 0,
1834                                    0, 0, false,
1835                                    mt, level, slice,
1836                                    map->x, map->y, false,
1837                                    map->w, map->h, GL_COPY);
1838       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1839    }
1840
1841    intel_miptree_release(&map->mt);
1842 }
1843
1844 #ifdef __SSE4_1__
1845 /**
1846  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
1847  */
1848 static void
1849 intel_miptree_map_movntdqa(struct brw_context *brw,
1850                            struct intel_mipmap_tree *mt,
1851                            struct intel_miptree_map *map,
1852                            unsigned int level, unsigned int slice)
1853 {
1854    assert(map->mode & GL_MAP_READ_BIT);
1855    assert(!(map->mode & GL_MAP_WRITE_BIT));
1856
1857    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1858        map->x, map->y, map->w, map->h,
1859        mt, _mesa_get_format_name(mt->format),
1860        level, slice, map->ptr, map->stride);
1861
1862    /* Map the original image */
1863    uint32_t image_x;
1864    uint32_t image_y;
1865    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1866    image_x += map->x;
1867    image_y += map->y;
1868
1869    void *src = intel_miptree_map_raw(brw, mt);
1870    if (!src)
1871       return;
1872    src += image_y * mt->region->pitch;
1873    src += image_x * mt->region->cpp;
1874
1875    /* Due to the pixel offsets for the particular image being mapped, our
1876     * src pointer may not be 16-byte aligned.  However, if the pitch is
1877     * divisible by 16, then the amount by which it's misaligned will remain
1878     * consistent from row to row.
1879     */
1880    assert((mt->region->pitch % 16) == 0);
1881    const int misalignment = ((uintptr_t) src) & 15;
1882
1883    /* Create an untiled temporary buffer for the mapping. */
1884    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
1885
1886    map->stride = ALIGN(misalignment + width_bytes, 16);
1887
1888    map->buffer = malloc(map->stride * map->h);
1889    /* Offset the destination so it has the same misalignment as src. */
1890    map->ptr = map->buffer + misalignment;
1891
1892    assert((((uintptr_t) map->ptr) & 15) == misalignment);
1893
1894    for (uint32_t y = 0; y < map->h; y++) {
1895       void *dst_ptr = map->ptr + y * map->stride;
1896       void *src_ptr = src + y * mt->region->pitch;
1897
1898       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
1899    }
1900
1901    intel_miptree_unmap_raw(brw, mt);
1902 }
1903
1904 static void
1905 intel_miptree_unmap_movntdqa(struct brw_context *brw,
1906                              struct intel_mipmap_tree *mt,
1907                              struct intel_miptree_map *map,
1908                              unsigned int level,
1909                              unsigned int slice)
1910 {
1911    free(map->buffer);
1912    map->buffer = NULL;
1913    map->ptr = NULL;
1914 }
1915 #endif
1916
1917 static void
1918 intel_miptree_map_s8(struct brw_context *brw,
1919                      struct intel_mipmap_tree *mt,
1920                      struct intel_miptree_map *map,
1921                      unsigned int level, unsigned int slice)
1922 {
1923    map->stride = map->w;
1924    map->buffer = map->ptr = malloc(map->stride * map->h);
1925    if (!map->buffer)
1926       return;
1927
1928    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1929     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1930     * invalidate is set, since we'll be writing the whole rectangle from our
1931     * temporary buffer back out.
1932     */
1933    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1934       uint8_t *untiled_s8_map = map->ptr;
1935       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1936       unsigned int image_x, image_y;
1937
1938       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1939
1940       for (uint32_t y = 0; y < map->h; y++) {
1941          for (uint32_t x = 0; x < map->w; x++) {
1942             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1943                                                x + image_x + map->x,
1944                                                y + image_y + map->y,
1945                                                brw->has_swizzling);
1946             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
1947          }
1948       }
1949
1950       intel_miptree_unmap_raw(brw, mt);
1951
1952       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
1953           map->x, map->y, map->w, map->h,
1954           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
1955    } else {
1956       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
1957           map->x, map->y, map->w, map->h,
1958           mt, map->ptr, map->stride);
1959    }
1960 }
1961
1962 static void
1963 intel_miptree_unmap_s8(struct brw_context *brw,
1964                        struct intel_mipmap_tree *mt,
1965                        struct intel_miptree_map *map,
1966                        unsigned int level,
1967                        unsigned int slice)
1968 {
1969    if (map->mode & GL_MAP_WRITE_BIT) {
1970       unsigned int image_x, image_y;
1971       uint8_t *untiled_s8_map = map->ptr;
1972       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1973
1974       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1975
1976       for (uint32_t y = 0; y < map->h; y++) {
1977          for (uint32_t x = 0; x < map->w; x++) {
1978             ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
1979                                                x + map->x,
1980                                                y + map->y,
1981                                                brw->has_swizzling);
1982             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
1983          }
1984       }
1985
1986       intel_miptree_unmap_raw(brw, mt);
1987    }
1988
1989    free(map->buffer);
1990 }
1991
1992 static void
1993 intel_miptree_map_etc(struct brw_context *brw,
1994                       struct intel_mipmap_tree *mt,
1995                       struct intel_miptree_map *map,
1996                       unsigned int level,
1997                       unsigned int slice)
1998 {
1999    assert(mt->etc_format != MESA_FORMAT_NONE);
2000    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
2001       assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
2002    }
2003
2004    assert(map->mode & GL_MAP_WRITE_BIT);
2005    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
2006
2007    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
2008    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
2009                                                 map->w, map->h, 1));
2010    map->ptr = map->buffer;
2011 }
2012
2013 static void
2014 intel_miptree_unmap_etc(struct brw_context *brw,
2015                         struct intel_mipmap_tree *mt,
2016                         struct intel_miptree_map *map,
2017                         unsigned int level,
2018                         unsigned int slice)
2019 {
2020    uint32_t image_x;
2021    uint32_t image_y;
2022    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2023
2024    image_x += map->x;
2025    image_y += map->y;
2026
2027    uint8_t *dst = intel_miptree_map_raw(brw, mt)
2028                 + image_y * mt->region->pitch
2029                 + image_x * mt->region->cpp;
2030
2031    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
2032       _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
2033                                  map->ptr, map->stride,
2034                                  map->w, map->h);
2035    else
2036       _mesa_unpack_etc2_format(dst, mt->region->pitch,
2037                                map->ptr, map->stride,
2038                                map->w, map->h, mt->etc_format);
2039
2040    intel_miptree_unmap_raw(brw, mt);
2041    free(map->buffer);
2042 }
2043
2044 /**
2045  * Mapping function for packed depth/stencil miptrees backed by real separate
2046  * miptrees for depth and stencil.
2047  *
2048  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
2049  * separate from the depth buffer.  Yet at the GL API level, we have to expose
2050  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
2051  * be able to map that memory for texture storage and glReadPixels-type
2052  * operations.  We give Mesa core that access by mallocing a temporary and
2053  * copying the data between the actual backing store and the temporary.
2054  */
2055 static void
2056 intel_miptree_map_depthstencil(struct brw_context *brw,
2057                                struct intel_mipmap_tree *mt,
2058                                struct intel_miptree_map *map,
2059                                unsigned int level, unsigned int slice)
2060 {
2061    struct intel_mipmap_tree *z_mt = mt;
2062    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2063    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
2064    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
2065
2066    map->stride = map->w * packed_bpp;
2067    map->buffer = map->ptr = malloc(map->stride * map->h);
2068    if (!map->buffer)
2069       return;
2070
2071    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2072     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2073     * invalidate is set, since we'll be writing the whole rectangle from our
2074     * temporary buffer back out.
2075     */
2076    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2077       uint32_t *packed_map = map->ptr;
2078       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2079       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2080       unsigned int s_image_x, s_image_y;
2081       unsigned int z_image_x, z_image_y;
2082
2083       intel_miptree_get_image_offset(s_mt, level, slice,
2084                                      &s_image_x, &s_image_y);
2085       intel_miptree_get_image_offset(z_mt, level, slice,
2086                                      &z_image_x, &z_image_y);
2087
2088       for (uint32_t y = 0; y < map->h; y++) {
2089          for (uint32_t x = 0; x < map->w; x++) {
2090             int map_x = map->x + x, map_y = map->y + y;
2091             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2092                                                  map_x + s_image_x,
2093                                                  map_y + s_image_y,
2094                                                  brw->has_swizzling);
2095             ptrdiff_t z_offset = ((map_y + z_image_y) *
2096                                   (z_mt->region->pitch / 4) +
2097                                   (map_x + z_image_x));
2098             uint8_t s = s_map[s_offset];
2099             uint32_t z = z_map[z_offset];
2100
2101             if (map_z32f_x24s8) {
2102                packed_map[(y * map->w + x) * 2 + 0] = z;
2103                packed_map[(y * map->w + x) * 2 + 1] = s;
2104             } else {
2105                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2106             }
2107          }
2108       }
2109
2110       intel_miptree_unmap_raw(brw, s_mt);
2111       intel_miptree_unmap_raw(brw, z_mt);
2112
2113       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2114           __FUNCTION__,
2115           map->x, map->y, map->w, map->h,
2116           z_mt, map->x + z_image_x, map->y + z_image_y,
2117           s_mt, map->x + s_image_x, map->y + s_image_y,
2118           map->ptr, map->stride);
2119    } else {
2120       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2121           map->x, map->y, map->w, map->h,
2122           mt, map->ptr, map->stride);
2123    }
2124 }
2125
2126 static void
2127 intel_miptree_unmap_depthstencil(struct brw_context *brw,
2128                                  struct intel_mipmap_tree *mt,
2129                                  struct intel_miptree_map *map,
2130                                  unsigned int level,
2131                                  unsigned int slice)
2132 {
2133    struct intel_mipmap_tree *z_mt = mt;
2134    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2135    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
2136
2137    if (map->mode & GL_MAP_WRITE_BIT) {
2138       uint32_t *packed_map = map->ptr;
2139       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2140       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2141       unsigned int s_image_x, s_image_y;
2142       unsigned int z_image_x, z_image_y;
2143
2144       intel_miptree_get_image_offset(s_mt, level, slice,
2145                                      &s_image_x, &s_image_y);
2146       intel_miptree_get_image_offset(z_mt, level, slice,
2147                                      &z_image_x, &z_image_y);
2148
2149       for (uint32_t y = 0; y < map->h; y++) {
2150          for (uint32_t x = 0; x < map->w; x++) {
2151             ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
2152                                                  x + s_image_x + map->x,
2153                                                  y + s_image_y + map->y,
2154                                                  brw->has_swizzling);
2155             ptrdiff_t z_offset = ((y + z_image_y) *
2156                                   (z_mt->region->pitch / 4) +
2157                                   (x + z_image_x));
2158
2159             if (map_z32f_x24s8) {
2160                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2161                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2162             } else {
2163                uint32_t packed = packed_map[y * map->w + x];
2164                s_map[s_offset] = packed >> 24;
2165                z_map[z_offset] = packed;
2166             }
2167          }
2168       }
2169
2170       intel_miptree_unmap_raw(brw, s_mt);
2171       intel_miptree_unmap_raw(brw, z_mt);
2172
2173       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2174           __FUNCTION__,
2175           map->x, map->y, map->w, map->h,
2176           z_mt, _mesa_get_format_name(z_mt->format),
2177           map->x + z_image_x, map->y + z_image_y,
2178           s_mt, map->x + s_image_x, map->y + s_image_y,
2179           map->ptr, map->stride);
2180    }
2181
2182    free(map->buffer);
2183 }
2184
2185 /**
2186  * Create and attach a map to the miptree at (level, slice). Return the
2187  * attached map.
2188  */
2189 static struct intel_miptree_map*
2190 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2191                          unsigned int level,
2192                          unsigned int slice,
2193                          unsigned int x,
2194                          unsigned int y,
2195                          unsigned int w,
2196                          unsigned int h,
2197                          GLbitfield mode)
2198 {
2199    struct intel_miptree_map *map = calloc(1, sizeof(*map));
2200
2201    if (!map)
2202       return NULL;
2203
2204    assert(mt->level[level].slice[slice].map == NULL);
2205    mt->level[level].slice[slice].map = map;
2206
2207    map->mode = mode;
2208    map->x = x;
2209    map->y = y;
2210    map->w = w;
2211    map->h = h;
2212
2213    return map;
2214 }
2215
2216 /**
2217  * Release the map at (level, slice).
2218  */
2219 static void
2220 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2221                          unsigned int level,
2222                          unsigned int slice)
2223 {
2224    struct intel_miptree_map **map;
2225
2226    map = &mt->level[level].slice[slice].map;
2227    free(*map);
2228    *map = NULL;
2229 }
2230
2231 static bool
2232 can_blit_slice(struct intel_mipmap_tree *mt,
2233                unsigned int level, unsigned int slice)
2234 {
2235    uint32_t image_x;
2236    uint32_t image_y;
2237    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2238    if (image_x >= 32768 || image_y >= 32768)
2239       return false;
2240
2241    if (mt->region->pitch >= 32768)
2242       return false;
2243
2244    return true;
2245 }
2246
2247 static void
2248 intel_miptree_map_singlesample(struct brw_context *brw,
2249                                struct intel_mipmap_tree *mt,
2250                                unsigned int level,
2251                                unsigned int slice,
2252                                unsigned int x,
2253                                unsigned int y,
2254                                unsigned int w,
2255                                unsigned int h,
2256                                GLbitfield mode,
2257                                void **out_ptr,
2258                                int *out_stride)
2259 {
2260    struct intel_miptree_map *map;
2261
2262    assert(mt->num_samples <= 1);
2263
2264    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2265    if (!map){
2266       *out_ptr = NULL;
2267       *out_stride = 0;
2268       return;
2269    }
2270
2271    intel_miptree_slice_resolve_depth(brw, mt, level, slice);
2272    if (map->mode & GL_MAP_WRITE_BIT) {
2273       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2274    }
2275
2276    if (mt->format == MESA_FORMAT_S_UINT8) {
2277       intel_miptree_map_s8(brw, mt, map, level, slice);
2278    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2279               !(mode & BRW_MAP_DIRECT_BIT)) {
2280       intel_miptree_map_etc(brw, mt, map, level, slice);
2281    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2282       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
2283    }
2284    /* See intel_miptree_blit() for details on the 32k pitch limit. */
2285    else if (brw->has_llc &&
2286             !(mode & GL_MAP_WRITE_BIT) &&
2287             !mt->compressed &&
2288             (mt->region->tiling == I915_TILING_X ||
2289              (brw->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
2290             can_blit_slice(mt, level, slice)) {
2291       intel_miptree_map_blit(brw, mt, map, level, slice);
2292    } else if (mt->region->tiling != I915_TILING_NONE &&
2293               mt->region->bo->size >= brw->max_gtt_map_object_size) {
2294       assert(can_blit_slice(mt, level, slice));
2295       intel_miptree_map_blit(brw, mt, map, level, slice);
2296 #ifdef __SSE4_1__
2297    } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed) {
2298       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
2299 #endif
2300    } else {
2301       intel_miptree_map_gtt(brw, mt, map, level, slice);
2302    }
2303
2304    *out_ptr = map->ptr;
2305    *out_stride = map->stride;
2306
2307    if (map->ptr == NULL)
2308       intel_miptree_release_map(mt, level, slice);
2309 }
2310
2311 static void
2312 intel_miptree_unmap_singlesample(struct brw_context *brw,
2313                                  struct intel_mipmap_tree *mt,
2314                                  unsigned int level,
2315                                  unsigned int slice)
2316 {
2317    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2318
2319    assert(mt->num_samples <= 1);
2320
2321    if (!map)
2322       return;
2323
2324    DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2325        mt, _mesa_get_format_name(mt->format), level, slice);
2326
2327    if (mt->format == MESA_FORMAT_S_UINT8) {
2328       intel_miptree_unmap_s8(brw, mt, map, level, slice);
2329    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2330               !(map->mode & BRW_MAP_DIRECT_BIT)) {
2331       intel_miptree_unmap_etc(brw, mt, map, level, slice);
2332    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2333       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
2334    } else if (map->mt) {
2335       intel_miptree_unmap_blit(brw, mt, map, level, slice);
2336 #ifdef __SSE4_1__
2337    } else if (map->buffer) {
2338       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
2339 #endif
2340    } else {
2341       intel_miptree_unmap_gtt(brw, mt, map, level, slice);
2342    }
2343
2344    intel_miptree_release_map(mt, level, slice);
2345 }
2346
2347 static void
2348 intel_miptree_map_multisample(struct brw_context *brw,
2349                               struct intel_mipmap_tree *mt,
2350                               unsigned int level,
2351                               unsigned int slice,
2352                               unsigned int x,
2353                               unsigned int y,
2354                               unsigned int w,
2355                               unsigned int h,
2356                               GLbitfield mode,
2357                               void **out_ptr,
2358                               int *out_stride)
2359 {
2360    struct gl_context *ctx = &brw->ctx;
2361    struct intel_miptree_map *map;
2362
2363    assert(mt->num_samples > 1);
2364
2365    /* Only flat, renderbuffer-like miptrees are supported. */
2366    if (mt->target != GL_TEXTURE_2D ||
2367        mt->first_level != 0 ||
2368        mt->last_level != 0) {
2369       _mesa_problem(ctx, "attempt to map a multisample miptree for "
2370                     "which (target, first_level, last_level != "
2371                     "(GL_TEXTURE_2D, 0, 0)");
2372       goto fail;
2373    }
2374
2375    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2376    if (!map)
2377       goto fail;
2378
2379    if (!mt->singlesample_mt) {
2380       mt->singlesample_mt =
2381          intel_miptree_create_for_renderbuffer(brw,
2382                                                mt->format,
2383                                                mt->logical_width0,
2384                                                mt->logical_height0,
2385                                                0 /*num_samples*/);
2386       if (!mt->singlesample_mt)
2387          goto fail;
2388
2389       map->singlesample_mt_is_tmp = true;
2390       mt->need_downsample = true;
2391    }
2392
2393    intel_miptree_downsample(brw, mt);
2394    intel_miptree_map_singlesample(brw, mt->singlesample_mt,
2395                                   level, slice,
2396                                   x, y, w, h,
2397                                   mode,
2398                                   out_ptr, out_stride);
2399    return;
2400
2401 fail:
2402    intel_miptree_release_map(mt, level, slice);
2403    *out_ptr = NULL;
2404    *out_stride = 0;
2405 }
2406
2407 static void
2408 intel_miptree_unmap_multisample(struct brw_context *brw,
2409                                 struct intel_mipmap_tree *mt,
2410                                 unsigned int level,
2411                                 unsigned int slice)
2412 {
2413    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2414
2415    assert(mt->num_samples > 1);
2416
2417    if (!map)
2418       return;
2419
2420    intel_miptree_unmap_singlesample(brw, mt->singlesample_mt, level, slice);
2421
2422    mt->need_downsample = false;
2423    if (map->mode & GL_MAP_WRITE_BIT)
2424       intel_miptree_upsample(brw, mt);
2425
2426    if (map->singlesample_mt_is_tmp)
2427       intel_miptree_release(&mt->singlesample_mt);
2428
2429    intel_miptree_release_map(mt, level, slice);
2430 }
2431
2432 void
2433 intel_miptree_map(struct brw_context *brw,
2434                   struct intel_mipmap_tree *mt,
2435                   unsigned int level,
2436                   unsigned int slice,
2437                   unsigned int x,
2438                   unsigned int y,
2439                   unsigned int w,
2440                   unsigned int h,
2441                   GLbitfield mode,
2442                   void **out_ptr,
2443                   int *out_stride)
2444 {
2445    if (mt->num_samples <= 1)
2446       intel_miptree_map_singlesample(brw, mt,
2447                                      level, slice,
2448                                      x, y, w, h,
2449                                      mode,
2450                                      out_ptr, out_stride);
2451    else
2452       intel_miptree_map_multisample(brw, mt,
2453                                     level, slice,
2454                                     x, y, w, h,
2455                                     mode,
2456                                     out_ptr, out_stride);
2457 }
2458
2459 void
2460 intel_miptree_unmap(struct brw_context *brw,
2461                     struct intel_mipmap_tree *mt,
2462                     unsigned int level,
2463                     unsigned int slice)
2464 {
2465    if (mt->num_samples <= 1)
2466       intel_miptree_unmap_singlesample(brw, mt, level, slice);
2467    else
2468       intel_miptree_unmap_multisample(brw, mt, level, slice);
2469 }