src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2006 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <GL/gl.h>
  29 #include <GL/internal/dri_interface.h>
  30
  31 #include "intel_batchbuffer.h"
  32 #include "intel_mipmap_tree.h"
  33 #include "intel_resolve_map.h"
  34 #include "intel_tex.h"
  35 #include "intel_blit.h"
  36 #include "intel_fbo.h"
  37
  38 #include "brw_blorp.h"
  39 #include "brw_context.h"
  40
  41 #include "main/enums.h"
  42 #include "main/fbobject.h"
  43 #include "main/formats.h"
  44 #include "main/glformats.h"
  45 #include "main/texcompress_etc.h"
  46 #include "main/teximage.h"
  47 #include "main/streaming-load-memcpy.h"
  48 #include "x86/common_x86_asm.h"
  49
  50 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  51
  52 /**
  53  * Determine which MSAA layout should be used by the MSAA surface being
  54  * created, based on the chip generation and the surface type.
  55  */
  56 static enum intel_msaa_layout
  57 compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target)
  58 {
  59    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  60    if (brw->gen < 7)
  61       return INTEL_MSAA_LAYOUT_IMS;
  62
  63    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  64    switch (_mesa_get_format_base_format(format)) {
  65    case GL_DEPTH_COMPONENT:
  66    case GL_STENCIL_INDEX:
  67    case GL_DEPTH_STENCIL:
  68       return INTEL_MSAA_LAYOUT_IMS;
  69    default:
  70       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  71        *
  72        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  73        *   are not written
  74        *
  75        * In practice this means that we have to disable MCS for all signed
  76        * integer MSAA buffers.  The alternative, to disable MCS only when one
  77        * of the render target channels is disabled, is impractical because it
  78        * would require converting between CMS and UMS MSAA layouts on the fly,
  79        * which is expensive.
  80        */
  81       if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
  82          return INTEL_MSAA_LAYOUT_UMS;
  83       } else {
  84          return INTEL_MSAA_LAYOUT_CMS;
  85       }
  86    }
  87 }
  88
  89
  90 /**
  91  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
  92  * scaled-down bitfield representation of the color buffer which is capable of
  93  * recording when blocks of the color buffer are equal to the clear value.
  94  * This function returns the block size that will be used by the MCS buffer
  95  * corresponding to a certain color miptree.
  96  *
  97  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
  98  * beneath the "Fast Color Clear" bullet (p327):
  99  *
 100  *     The following table describes the RT alignment
 101  *
 102  *                       Pixels  Lines
 103  *         TiledY RT CL
 104  *             bpp
 105  *              32          8      4
 106  *              64          4      4
 107  *             128          2      4
 108  *         TiledX RT CL
 109  *             bpp
 110  *              32         16      2
 111  *              64          8      2
 112  *             128          4      2
 113  *
 114  * This alignment has the following uses:
 115  *
 116  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
 117  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
 118  *
 119  * - For figuring out alignment restrictions for a fast clear operation.  Fast
 120  *   clear operations must always clear aligned multiples of 16 blocks
 121  *   horizontally and 32 blocks vertically.
 122  *
 123  * - For scaling down the coordinates sent through the render pipeline during
 124  *   a fast clear.  X coordinates must be scaled down by 8 times the block
 125  *   width, and Y coordinates by 16 times the block height.
 126  *
 127  * - For scaling down the coordinates sent through the render pipeline during
 128  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
 129  *   by half the block width, and Y coordinates by half the block height.
 130  */
 131 void
 132 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
 133                                  struct intel_mipmap_tree *mt,
 134                                  unsigned *width_px, unsigned *height)
 135 {
 136    switch (mt->tiling) {
 137    default:
 138       unreachable("Non-MSRT MCS requires X or Y tiling");
 139       /* In release builds, fall through */
 140    case I915_TILING_Y:
 141       *width_px = 32 / mt->cpp;
 142       *height = 4;
 143       break;
 144    case I915_TILING_X:
 145       *width_px = 64 / mt->cpp;
 146       *height = 2;
 147    }
 148 }
 149
 150
 151 /**
 152  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 153  * can be used.
 154  *
 155  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 156  * beneath the "Fast Color Clear" bullet (p326):
 157  *
 158  *     - Support is limited to tiled render targets.
 159  *     - Support is for non-mip-mapped and non-array surface types only.
 160  *
 161  * And then later, on p327:
 162  *
 163  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 164  *       64bpp, and 128bpp.
 165  */
 166 bool
 167 intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
 168                                        struct intel_mipmap_tree *mt)
 169 {
 170    /* MCS support does not exist prior to Gen7 */
 171    if (brw->gen < 7)
 172       return false;
 173
 174    /* MCS is only supported for color buffers */
 175    switch (_mesa_get_format_base_format(mt->format)) {
 176    case GL_DEPTH_COMPONENT:
 177    case GL_DEPTH_STENCIL:
 178    case GL_STENCIL_INDEX:
 179       return false;
 180    }
 181
 182    if (mt->tiling != I915_TILING_X &&
 183        mt->tiling != I915_TILING_Y)
 184       return false;
 185    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 186       return false;
 187    if (mt->first_level != 0 || mt->last_level != 0)
 188       return false;
 189    if (mt->physical_depth0 != 1)
 190       return false;
 191
 192    /* There's no point in using an MCS buffer if the surface isn't in a
 193     * renderable format.
 194     */
 195    if (!brw->format_supported_as_render_target[mt->format])
 196       return false;
 197
 198    return true;
 199 }
 200
 201
 202 /**
 203  * Determine depth format corresponding to a depth+stencil format,
 204  * for separate stencil.
 205  */
 206 mesa_format
 207 intel_depth_format_for_depthstencil_format(mesa_format format) {
 208    switch (format) {
 209    case MESA_FORMAT_Z24_UNORM_S8_UINT:
 210       return MESA_FORMAT_Z24_UNORM_X8_UINT;
 211    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
 212       return MESA_FORMAT_Z_FLOAT32;
 213    default:
 214       return format;
 215    }
 216 }
 217
 218
 219 /**
 220  * @param for_bo Indicates that the caller is
 221  *        intel_miptree_create_for_bo(). If true, then do not create
 222  *        \c stencil_mt.
 223  */
 224 struct intel_mipmap_tree *
 225 intel_miptree_create_layout(struct brw_context *brw,
 226                             GLenum target,
 227                             mesa_format format,
 228                             GLuint first_level,
 229                             GLuint last_level,
 230                             GLuint width0,
 231                             GLuint height0,
 232                             GLuint depth0,
 233                             bool for_bo,
 234                             GLuint num_samples,
 235                             bool force_all_slices_at_each_lod)
 236 {
 237    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 238    if (!mt)
 239       return NULL;
 240
 241    DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __FUNCTION__,
 242        _mesa_lookup_enum_by_nr(target),
 243        _mesa_get_format_name(format),
 244        first_level, last_level, depth0, mt);
 245
 246    if (target == GL_TEXTURE_1D_ARRAY) {
 247       /* For a 1D Array texture the OpenGL API will treat the height0
 248        * parameter as the number of array slices. For Intel hardware, we treat
 249        * the 1D array as a 2D Array with a height of 1.
 250        *
 251        * So, when we first come through this path to create a 1D Array
 252        * texture, height0 stores the number of slices, and depth0 is 1. In
 253        * this case, we want to swap height0 and depth0.
 254        *
 255        * Since some miptrees will be created based on the base miptree, we may
 256        * come through this path and see height0 as 1 and depth0 being the
 257        * number of slices. In this case we don't need to do the swap.
 258        */
 259       assert(height0 == 1 || depth0 == 1);
 260       if (height0 > 1) {
 261          depth0 = height0;
 262          height0 = 1;
 263       }
 264    }
 265
 266    mt->target = target;
 267    mt->format = format;
 268    mt->first_level = first_level;
 269    mt->last_level = last_level;
 270    mt->logical_width0 = width0;
 271    mt->logical_height0 = height0;
 272    mt->logical_depth0 = depth0;
 273    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
 274    exec_list_make_empty(&mt->hiz_map);
 275
 276    /* The cpp is bytes per (1, blockheight)-sized block for compressed
 277     * textures.  This is why you'll see divides by blockheight all over
 278     */
 279    unsigned bw, bh;
 280    _mesa_get_format_block_size(format, &bw, &bh);
 281    assert(_mesa_get_format_bytes(mt->format) % bw == 0);
 282    mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
 283
 284    mt->num_samples = num_samples;
 285    mt->compressed = _mesa_is_format_compressed(format);
 286    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 287    mt->refcount = 1;
 288
 289    if (num_samples > 1) {
 290       /* Adjust width/height/depth for MSAA */
 291       mt->msaa_layout = compute_msaa_layout(brw, format, mt->target);
 292       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 293          /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
 294           * "If the surface is multisampled and it is a depth or stencil
 295           *  surface or Multisampled Surface StorageFormat in SURFACE_STATE is
 296           *  MSFMT_DEPTH_STENCIL, WL and HL must be adjusted as follows before
 297           *  proceeding:
 298           *
 299           *  +----------------------------------------------------------------+
 300           *  | Num Multisamples |        W_l =         |        H_l =         |
 301           *  +----------------------------------------------------------------+
 302           *  |         2        | ceiling(W_l / 2) * 4 | H_l (no adjustment)  |
 303           *  |         4        | ceiling(W_l / 2) * 4 | ceiling(H_l / 2) * 4 |
 304           *  |         8        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 4 |
 305           *  |        16        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 8 |
 306           *  +----------------------------------------------------------------+
 307           * "
 308           *
 309           * Note that MSFMT_DEPTH_STENCIL just means the IMS (interleaved)
 310           * format rather than UMS/CMS (array slices).  The Sandybridge PRM,
 311           * Volume 1, Part 1, Page 111 has the same formula for 4x MSAA.
 312           *
 313           * Another more complicated explanation for these adjustments comes
 314           * from the Sandybridge PRM, volume 4, part 1, page 31:
 315           *
 316           *     "Any of the other messages (sample*, LOD, load4) used with a
 317           *      (4x) multisampled surface will in-effect sample a surface with
 318           *      double the height and width as that indicated in the surface
 319           *      state. Each pixel position on the original-sized surface is
 320           *      replaced with a 2x2 of samples with the following arrangement:
 321           *
 322           *         sample 0 sample 2
 323           *         sample 1 sample 3"
 324           *
 325           * Thus, when sampling from a multisampled texture, it behaves as
 326           * though the layout in memory for (x,y,sample) is:
 327           *
 328           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 329           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 330           *
 331           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 332           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 333           *
 334           * However, the actual layout of multisampled data in memory is:
 335           *
 336           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 337           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 338           *
 339           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 340           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 341           *
 342           * This pattern repeats for each 2x2 pixel block.
 343           *
 344           * As a result, when calculating the size of our 4-sample buffer for
 345           * an odd width or height, we have to align before scaling up because
 346           * sample 3 is in that bottom right 2x2 block.
 347           */
 348          switch (num_samples) {
 349          case 2:
 350             assert(brw->gen >= 8);
 351             width0 = ALIGN(width0, 2) * 2;
 352             height0 = ALIGN(height0, 2);
 353             break;
 354          case 4:
 355             width0 = ALIGN(width0, 2) * 2;
 356             height0 = ALIGN(height0, 2) * 2;
 357             break;
 358          case 8:
 359             width0 = ALIGN(width0, 2) * 4;
 360             height0 = ALIGN(height0, 2) * 2;
 361             break;
 362          default:
 363             /* num_samples should already have been quantized to 0, 1, 2, 4, or
 364              * 8.
 365              */
 366             unreachable("not reached");
 367          }
 368       } else {
 369          /* Non-interleaved */
 370          depth0 *= num_samples;
 371       }
 372    }
 373
 374    /* Set array_layout to ALL_SLICES_AT_EACH_LOD when gen7+ array_spacing_lod0
 375     * can be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces.
 376     * TODO: can we use it elsewhere?
 377     */
 378    switch (mt->msaa_layout) {
 379    case INTEL_MSAA_LAYOUT_NONE:
 380    case INTEL_MSAA_LAYOUT_IMS:
 381       mt->array_layout = ALL_LOD_IN_EACH_SLICE;
 382       break;
 383    case INTEL_MSAA_LAYOUT_UMS:
 384    case INTEL_MSAA_LAYOUT_CMS:
 385       mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 386       break;
 387    }
 388
 389    if (target == GL_TEXTURE_CUBE_MAP) {
 390       assert(depth0 == 1);
 391       depth0 = 6;
 392    }
 393
 394    mt->physical_width0 = width0;
 395    mt->physical_height0 = height0;
 396    mt->physical_depth0 = depth0;
 397
 398    if (!for_bo &&
 399        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 400        (brw->must_use_separate_stencil ||
 401         (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) {
 402       const bool force_all_slices_at_each_lod = brw->gen == 6;
 403       mt->stencil_mt = intel_miptree_create(brw,
 404                                             mt->target,
 405                                             MESA_FORMAT_S_UINT8,
 406                                             mt->first_level,
 407                                             mt->last_level,
 408                                             mt->logical_width0,
 409                                             mt->logical_height0,
 410                                             mt->logical_depth0,
 411                                             true,
 412                                             num_samples,
 413                                             INTEL_MIPTREE_TILING_ANY,
 414                                             force_all_slices_at_each_lod);
 415       if (!mt->stencil_mt) {
 416          intel_miptree_release(&mt);
 417          return NULL;
 418       }
 419
 420       /* Fix up the Z miptree format for how we're splitting out separate
 421        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 422        */
 423       mt->format = intel_depth_format_for_depthstencil_format(mt->format);
 424       mt->cpp = 4;
 425
 426       if (format == mt->format) {
 427          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 428                        _mesa_get_format_name(mt->format));
 429       }
 430    }
 431
 432    if (force_all_slices_at_each_lod)
 433       mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 434
 435    brw_miptree_layout(brw, mt);
 436
 437    return mt;
 438 }
 439
 440 /**
 441  * \brief Helper function for intel_miptree_create().
 442  */
 443 static uint32_t
 444 intel_miptree_choose_tiling(struct brw_context *brw,
 445                             mesa_format format,
 446                             uint32_t width0,
 447                             uint32_t num_samples,
 448                             enum intel_miptree_tiling_mode requested,
 449                             struct intel_mipmap_tree *mt)
 450 {
 451    if (format == MESA_FORMAT_S_UINT8) {
 452       /* The stencil buffer is W tiled. However, we request from the kernel a
 453        * non-tiled buffer because the GTT is incapable of W fencing.
 454        */
 455       return I915_TILING_NONE;
 456    }
 457
 458    /* Some usages may want only one type of tiling, like depth miptrees (Y
 459     * tiled), or temporary BOs for uploading data once (linear).
 460     */
 461    switch (requested) {
 462    case INTEL_MIPTREE_TILING_ANY:
 463       break;
 464    case INTEL_MIPTREE_TILING_Y:
 465       return I915_TILING_Y;
 466    case INTEL_MIPTREE_TILING_NONE:
 467       return I915_TILING_NONE;
 468    }
 469
 470    if (num_samples > 1) {
 471       /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
 472        * Surface"):
 473        *
 474        *   [DevSNB+]: For multi-sample render targets, this field must be
 475        *   1. MSRTs can only be tiled.
 476        *
 477        * Our usual reason for preferring X tiling (fast blits using the
 478        * blitting engine) doesn't apply to MSAA, since we'll generally be
 479        * downsampling or upsampling when blitting between the MSAA buffer
 480        * and another buffer, and the blitting engine doesn't support that.
 481        * So use Y tiling, since it makes better use of the cache.
 482        */
 483       return I915_TILING_Y;
 484    }
 485
 486    GLenum base_format = _mesa_get_format_base_format(format);
 487    if (base_format == GL_DEPTH_COMPONENT ||
 488        base_format == GL_DEPTH_STENCIL_EXT)
 489       return I915_TILING_Y;
 490
 491    int minimum_pitch = mt->total_width * mt->cpp;
 492
 493    /* If the width is much smaller than a tile, don't bother tiling. */
 494    if (minimum_pitch < 64)
 495       return I915_TILING_NONE;
 496
 497    if (ALIGN(minimum_pitch, 512) >= 32768 ||
 498        mt->total_width >= 32768 || mt->total_height >= 32768) {
 499       perf_debug("%dx%d miptree too large to blit, falling back to untiled",
 500                  mt->total_width, mt->total_height);
 501       return I915_TILING_NONE;
 502    }
 503
 504    /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
 505    if (brw->gen < 6)
 506       return I915_TILING_X;
 507
 508    /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
 509     * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
 510     *  or Linear."
 511     * 128 bits per pixel translates to 16 bytes per pixel.  This is necessary
 512     * all the way back to 965, but is explicitly permitted on Gen7.
 513     */
 514    if (brw->gen != 7 && mt->cpp >= 16)
 515       return I915_TILING_X;
 516
 517    /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
 518     * messages), on p64, under the heading "Surface Vertical Alignment":
 519     *
 520     *     This field must be set to VALIGN_4 for all tiled Y Render Target
 521     *     surfaces.
 522     *
 523     * So if the surface is renderable and uses a vertical alignment of 2,
 524     * force it to be X tiled.  This is somewhat conservative (it's possible
 525     * that the client won't ever render to this surface), but it's difficult
 526     * to know that ahead of time.  And besides, since we use a vertical
 527     * alignment of 4 as often as we can, this shouldn't happen very often.
 528     */
 529    if (brw->gen == 7 && mt->align_h == 2 &&
 530        brw->format_supported_as_render_target[format]) {
 531       return I915_TILING_X;
 532    }
 533
 534    return I915_TILING_Y | I915_TILING_X;
 535 }
 536
 537
 538 /**
 539  * Choose an appropriate uncompressed format for a requested
 540  * compressed format, if unsupported.
 541  */
 542 mesa_format
 543 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
 544 {
 545    /* No need to lower ETC formats on these platforms,
 546     * they are supported natively.
 547     */
 548    if (brw->gen >= 8 || brw->is_baytrail)
 549       return format;
 550
 551    switch (format) {
 552    case MESA_FORMAT_ETC1_RGB8:
 553       return MESA_FORMAT_R8G8B8X8_UNORM;
 554    case MESA_FORMAT_ETC2_RGB8:
 555       return MESA_FORMAT_R8G8B8X8_UNORM;
 556    case MESA_FORMAT_ETC2_SRGB8:
 557    case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 558    case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 559       return MESA_FORMAT_B8G8R8A8_SRGB;
 560    case MESA_FORMAT_ETC2_RGBA8_EAC:
 561    case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 562       return MESA_FORMAT_R8G8B8A8_UNORM;
 563    case MESA_FORMAT_ETC2_R11_EAC:
 564       return MESA_FORMAT_R_UNORM16;
 565    case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 566       return MESA_FORMAT_R_SNORM16;
 567    case MESA_FORMAT_ETC2_RG11_EAC:
 568       return MESA_FORMAT_R16G16_UNORM;
 569    case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 570       return MESA_FORMAT_R16G16_SNORM;
 571    default:
 572       /* Non ETC1 / ETC2 format */
 573       return format;
 574    }
 575 }
 576
 577
 578 struct intel_mipmap_tree *
 579 intel_miptree_create(struct brw_context *brw,
 580                      GLenum target,
 581                      mesa_format format,
 582                      GLuint first_level,
 583                      GLuint last_level,
 584                      GLuint width0,
 585                      GLuint height0,
 586                      GLuint depth0,
 587                      bool expect_accelerated_upload,
 588                      GLuint num_samples,
 589                      enum intel_miptree_tiling_mode requested_tiling,
 590                      bool force_all_slices_at_each_lod)
 591 {
 592    struct intel_mipmap_tree *mt;
 593    mesa_format tex_format = format;
 594    mesa_format etc_format = MESA_FORMAT_NONE;
 595    GLuint total_width, total_height;
 596
 597    format = intel_lower_compressed_format(brw, format);
 598
 599    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 600
 601    mt = intel_miptree_create_layout(brw, target, format,
 602                                       first_level, last_level, width0,
 603                                       height0, depth0,
 604                                     false, num_samples,
 605                                     force_all_slices_at_each_lod);
 606    /*
 607     * pitch == 0 || height == 0  indicates the null texture
 608     */
 609    if (!mt || !mt->total_width || !mt->total_height) {
 610       intel_miptree_release(&mt);
 611       return NULL;
 612    }
 613
 614    total_width = mt->total_width;
 615    total_height = mt->total_height;
 616
 617    if (format == MESA_FORMAT_S_UINT8) {
 618       /* Align to size of W tile, 64x64. */
 619       total_width = ALIGN(total_width, 64);
 620       total_height = ALIGN(total_height, 64);
 621    }
 622
 623    uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
 624                                                  num_samples, requested_tiling,
 625                                                  mt);
 626    bool y_or_x = false;
 627
 628    if (tiling == (I915_TILING_Y | I915_TILING_X)) {
 629       y_or_x = true;
 630       mt->tiling = I915_TILING_Y;
 631    } else {
 632       mt->tiling = tiling;
 633    }
 634
 635    unsigned long pitch;
 636    mt->etc_format = etc_format;
 637    mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
 638                                      total_width, total_height, mt->cpp,
 639                                      &mt->tiling, &pitch,
 640                                      (expect_accelerated_upload ?
 641                                       BO_ALLOC_FOR_RENDER : 0));
 642    mt->pitch = pitch;
 643
 644    /* If the BO is too large to fit in the aperture, we need to use the
 645     * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
 646     * so we need to fall back to X.
 647     */
 648    if (y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) {
 649       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 650                  mt->total_width, mt->total_height);
 651
 652       mt->tiling = I915_TILING_X;
 653       drm_intel_bo_unreference(mt->bo);
 654       mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
 655                                         total_width, total_height, mt->cpp,
 656                                         &mt->tiling, &pitch,
 657                                         (expect_accelerated_upload ?
 658                                          BO_ALLOC_FOR_RENDER : 0));
 659       mt->pitch = pitch;
 660    }
 661
 662    mt->offset = 0;
 663
 664    if (!mt->bo) {
 665        intel_miptree_release(&mt);
 666        return NULL;
 667    }
 668
 669
 670    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 671       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
 672          intel_miptree_release(&mt);
 673          return NULL;
 674       }
 675    }
 676
 677    /* If this miptree is capable of supporting fast color clears, set
 678     * fast_clear_state appropriately to ensure that fast clears will occur.
 679     * Allocation of the MCS miptree will be deferred until the first fast
 680     * clear actually occurs.
 681     */
 682    if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
 683       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 684
 685    return mt;
 686 }
 687
 688 struct intel_mipmap_tree *
 689 intel_miptree_create_for_bo(struct brw_context *brw,
 690                             drm_intel_bo *bo,
 691                             mesa_format format,
 692                             uint32_t offset,
 693                             uint32_t width,
 694                             uint32_t height,
 695                             uint32_t depth,
 696                             int pitch)
 697 {
 698    struct intel_mipmap_tree *mt;
 699    uint32_t tiling, swizzle;
 700    GLenum target;
 701
 702    drm_intel_bo_get_tiling(bo, &tiling, &swizzle);
 703
 704    /* Nothing will be able to use this miptree with the BO if the offset isn't
 705     * aligned.
 706     */
 707    if (tiling != I915_TILING_NONE)
 708       assert(offset % 4096 == 0);
 709
 710    /* miptrees can't handle negative pitch.  If you need flipping of images,
 711     * that's outside of the scope of the mt.
 712     */
 713    assert(pitch >= 0);
 714
 715    target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
 716
 717    mt = intel_miptree_create_layout(brw, target, format,
 718                                     0, 0,
 719                                     width, height, depth,
 720                                     true, 0, false);
 721    if (!mt) {
 722       free(mt);
 723       return mt;
 724    }
 725
 726    drm_intel_bo_reference(bo);
 727    mt->bo = bo;
 728    mt->pitch = pitch;
 729    mt->offset = offset;
 730    mt->tiling = tiling;
 731
 732    return mt;
 733 }
 734
 735 /**
 736  * For a singlesample renderbuffer, this simply wraps the given BO with a
 737  * miptree.
 738  *
 739  * For a multisample renderbuffer, this wraps the window system's
 740  * (singlesample) BO with a singlesample miptree attached to the
 741  * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
 742  * that will contain the actual rendering (which is lazily resolved to
 743  * irb->singlesample_mt).
 744  */
 745 void
 746 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
 747                                          struct intel_renderbuffer *irb,
 748                                          drm_intel_bo *bo,
 749                                          uint32_t width, uint32_t height,
 750                                          uint32_t pitch)
 751 {
 752    struct intel_mipmap_tree *singlesample_mt = NULL;
 753    struct intel_mipmap_tree *multisample_mt = NULL;
 754    struct gl_renderbuffer *rb = &irb->Base.Base;
 755    mesa_format format = rb->Format;
 756    int num_samples = rb->NumSamples;
 757
 758    /* Only the front and back buffers, which are color buffers, are allocated
 759     * through the image loader.
 760     */
 761    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 762           _mesa_get_format_base_format(format) == GL_RGBA);
 763
 764    singlesample_mt = intel_miptree_create_for_bo(intel,
 765                                                  bo,
 766                                                  format,
 767                                                  0,
 768                                                  width,
 769                                                  height,
 770                                                  1,
 771                                                  pitch);
 772    if (!singlesample_mt)
 773       goto fail;
 774
 775    /* If this miptree is capable of supporting fast color clears, set
 776     * mcs_state appropriately to ensure that fast clears will occur.
 777     * Allocation of the MCS miptree will be deferred until the first fast
 778     * clear actually occurs.
 779     */
 780    if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
 781       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 782
 783    if (num_samples == 0) {
 784       intel_miptree_release(&irb->mt);
 785       irb->mt = singlesample_mt;
 786
 787       assert(!irb->singlesample_mt);
 788    } else {
 789       intel_miptree_release(&irb->singlesample_mt);
 790       irb->singlesample_mt = singlesample_mt;
 791
 792       if (!irb->mt ||
 793           irb->mt->logical_width0 != width ||
 794           irb->mt->logical_height0 != height) {
 795          multisample_mt = intel_miptree_create_for_renderbuffer(intel,
 796                                                                 format,
 797                                                                 width,
 798                                                                 height,
 799                                                                 num_samples);
 800          if (!multisample_mt)
 801             goto fail;
 802
 803          irb->need_downsample = false;
 804          intel_miptree_release(&irb->mt);
 805          irb->mt = multisample_mt;
 806       }
 807    }
 808    return;
 809
 810 fail:
 811    intel_miptree_release(&irb->singlesample_mt);
 812    intel_miptree_release(&irb->mt);
 813    return;
 814 }
 815
 816 struct intel_mipmap_tree*
 817 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
 818                                       mesa_format format,
 819                                       uint32_t width,
 820                                       uint32_t height,
 821                                       uint32_t num_samples)
 822 {
 823    struct intel_mipmap_tree *mt;
 824    uint32_t depth = 1;
 825    bool ok;
 826    GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
 827
 828    mt = intel_miptree_create(brw, target, format, 0, 0,
 829                              width, height, depth, true, num_samples,
 830                              INTEL_MIPTREE_TILING_ANY, false);
 831    if (!mt)
 832       goto fail;
 833
 834    if (brw_is_hiz_depth_format(brw, format)) {
 835       ok = intel_miptree_alloc_hiz(brw, mt);
 836       if (!ok)
 837          goto fail;
 838    }
 839
 840    return mt;
 841
 842 fail:
 843    intel_miptree_release(&mt);
 844    return NULL;
 845 }
 846
 847 void
 848 intel_miptree_reference(struct intel_mipmap_tree **dst,
 849                         struct intel_mipmap_tree *src)
 850 {
 851    if (*dst == src)
 852       return;
 853
 854    intel_miptree_release(dst);
 855
 856    if (src) {
 857       src->refcount++;
 858       DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
 859    }
 860
 861    *dst = src;
 862 }
 863
 864
 865 void
 866 intel_miptree_release(struct intel_mipmap_tree **mt)
 867 {
 868    if (!*mt)
 869       return;
 870
 871    DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
 872    if (--(*mt)->refcount <= 0) {
 873       GLuint i;
 874
 875       DBG("%s deleting %p\n", __FUNCTION__, *mt);
 876
 877       drm_intel_bo_unreference((*mt)->bo);
 878       intel_miptree_release(&(*mt)->stencil_mt);
 879       intel_miptree_release(&(*mt)->hiz_mt);
 880       intel_miptree_release(&(*mt)->mcs_mt);
 881       intel_resolve_map_clear(&(*mt)->hiz_map);
 882
 883       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
 884          free((*mt)->level[i].slice);
 885       }
 886
 887       free(*mt);
 888    }
 889    *mt = NULL;
 890 }
 891
 892 void
 893 intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
 894                                        int *width, int *height, int *depth)
 895 {
 896    switch (image->TexObject->Target) {
 897    case GL_TEXTURE_1D_ARRAY:
 898       *width = image->Width;
 899       *height = 1;
 900       *depth = image->Height;
 901       break;
 902    default:
 903       *width = image->Width;
 904       *height = image->Height;
 905       *depth = image->Depth;
 906       break;
 907    }
 908 }
 909
 910 /**
 911  * Can the image be pulled into a unified mipmap tree?  This mirrors
 912  * the completeness test in a lot of ways.
 913  *
 914  * Not sure whether I want to pass gl_texture_image here.
 915  */
 916 bool
 917 intel_miptree_match_image(struct intel_mipmap_tree *mt,
 918                           struct gl_texture_image *image)
 919 {
 920    struct intel_texture_image *intelImage = intel_texture_image(image);
 921    GLuint level = intelImage->base.Base.Level;
 922    int width, height, depth;
 923
 924    /* glTexImage* choose the texture object based on the target passed in, and
 925     * objects can't change targets over their lifetimes, so this should be
 926     * true.
 927     */
 928    assert(image->TexObject->Target == mt->target);
 929
 930    mesa_format mt_format = mt->format;
 931    if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
 932       mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
 933    if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
 934       mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
 935    if (mt->etc_format != MESA_FORMAT_NONE)
 936       mt_format = mt->etc_format;
 937
 938    if (image->TexFormat != mt_format)
 939       return false;
 940
 941    intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
 942
 943    if (mt->target == GL_TEXTURE_CUBE_MAP)
 944       depth = 6;
 945
 946    int level_depth = mt->level[level].depth;
 947    if (mt->num_samples > 1) {
 948       switch (mt->msaa_layout) {
 949       case INTEL_MSAA_LAYOUT_NONE:
 950       case INTEL_MSAA_LAYOUT_IMS:
 951          break;
 952       case INTEL_MSAA_LAYOUT_UMS:
 953       case INTEL_MSAA_LAYOUT_CMS:
 954          level_depth /= mt->num_samples;
 955          break;
 956       }
 957    }
 958
 959    /* Test image dimensions against the base level image adjusted for
 960     * minification.  This will also catch images not present in the
 961     * tree, changed targets, etc.
 962     */
 963    if (width != minify(mt->logical_width0, level - mt->first_level) ||
 964        height != minify(mt->logical_height0, level - mt->first_level) ||
 965        depth != level_depth) {
 966       return false;
 967    }
 968
 969    if (image->NumSamples != mt->num_samples)
 970       return false;
 971
 972    return true;
 973 }
 974
 975
 976 void
 977 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 978                              GLuint level,
 979                              GLuint x, GLuint y, GLuint d)
 980 {
 981    mt->level[level].depth = d;
 982    mt->level[level].level_x = x;
 983    mt->level[level].level_y = y;
 984
 985    DBG("%s level %d, depth %d, offset %d,%d\n", __FUNCTION__,
 986        level, d, x, y);
 987
 988    assert(mt->level[level].slice == NULL);
 989
 990    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
 991    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
 992    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
 993 }
 994
 995
 996 void
 997 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
 998                                GLuint level, GLuint img,
 999                                GLuint x, GLuint y)
1000 {
1001    if (img == 0 && level == 0)
1002       assert(x == 0 && y == 0);
1003
1004    assert(img < mt->level[level].depth);
1005
1006    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
1007    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
1008
1009    DBG("%s level %d img %d pos %d,%d\n",
1010        __FUNCTION__, level, img,
1011        mt->level[level].slice[img].x_offset,
1012        mt->level[level].slice[img].y_offset);
1013 }
1014
1015 void
1016 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1017                                GLuint level, GLuint slice,
1018                                GLuint *x, GLuint *y)
1019 {
1020    assert(slice < mt->level[level].depth);
1021
1022    *x = mt->level[level].slice[slice].x_offset;
1023    *y = mt->level[level].slice[slice].y_offset;
1024 }
1025
1026 /**
1027  * This function computes masks that may be used to select the bits of the X
1028  * and Y coordinates that indicate the offset within a tile.  If the BO is
1029  * untiled, the masks are set to 0.
1030  */
1031 void
1032 intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
1033                              uint32_t *mask_x, uint32_t *mask_y,
1034                              bool map_stencil_as_y_tiled)
1035 {
1036    int cpp = mt->cpp;
1037    uint32_t tiling = mt->tiling;
1038
1039    if (map_stencil_as_y_tiled)
1040       tiling = I915_TILING_Y;
1041
1042    switch (tiling) {
1043    default:
1044       unreachable("not reached");
1045    case I915_TILING_NONE:
1046       *mask_x = *mask_y = 0;
1047       break;
1048    case I915_TILING_X:
1049       *mask_x = 512 / cpp - 1;
1050       *mask_y = 7;
1051       break;
1052    case I915_TILING_Y:
1053       *mask_x = 128 / cpp - 1;
1054       *mask_y = 31;
1055       break;
1056    }
1057 }
1058
1059 /**
1060  * Compute the offset (in bytes) from the start of the BO to the given x
1061  * and y coordinate.  For tiled BOs, caller must ensure that x and y are
1062  * multiples of the tile size.
1063  */
1064 uint32_t
1065 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1066                                  uint32_t x, uint32_t y,
1067                                  bool map_stencil_as_y_tiled)
1068 {
1069    int cpp = mt->cpp;
1070    uint32_t pitch = mt->pitch;
1071    uint32_t tiling = mt->tiling;
1072
1073    if (map_stencil_as_y_tiled) {
1074       tiling = I915_TILING_Y;
1075
1076       /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile
1077        * gets transformed into a 32-high Y-tile.  Accordingly, the pitch of
1078        * the resulting surface is twice the pitch of the original miptree,
1079        * since each row in the Y-tiled view corresponds to two rows in the
1080        * actual W-tiled surface.  So we need to correct the pitch before
1081        * computing the offsets.
1082        */
1083       pitch *= 2;
1084    }
1085
1086    switch (tiling) {
1087    default:
1088       unreachable("not reached");
1089    case I915_TILING_NONE:
1090       return y * pitch + x * cpp;
1091    case I915_TILING_X:
1092       assert((x % (512 / cpp)) == 0);
1093       assert((y % 8) == 0);
1094       return y * pitch + x / (512 / cpp) * 4096;
1095    case I915_TILING_Y:
1096       assert((x % (128 / cpp)) == 0);
1097       assert((y % 32) == 0);
1098       return y * pitch + x / (128 / cpp) * 4096;
1099    }
1100 }
1101
1102 /**
1103  * Rendering with tiled buffers requires that the base address of the buffer
1104  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1105  * textures, we may want the surface to point at a texture image level that
1106  * isn't at a page boundary.
1107  *
1108  * This function returns an appropriately-aligned base offset
1109  * according to the tiling restrictions, plus any required x/y offset
1110  * from there.
1111  */
1112 uint32_t
1113 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1114                                GLuint level, GLuint slice,
1115                                uint32_t *tile_x,
1116                                uint32_t *tile_y)
1117 {
1118    uint32_t x, y;
1119    uint32_t mask_x, mask_y;
1120
1121    intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, false);
1122    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1123
1124    *tile_x = x & mask_x;
1125    *tile_y = y & mask_y;
1126
1127    return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y, false);
1128 }
1129
1130 static void
1131 intel_miptree_copy_slice_sw(struct brw_context *brw,
1132                             struct intel_mipmap_tree *dst_mt,
1133                             struct intel_mipmap_tree *src_mt,
1134                             int level,
1135                             int slice,
1136                             int width,
1137                             int height)
1138 {
1139    void *src, *dst;
1140    ptrdiff_t src_stride, dst_stride;
1141    int cpp = dst_mt->cpp;
1142
1143    intel_miptree_map(brw, src_mt,
1144                      level, slice,
1145                      0, 0,
1146                      width, height,
1147                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1148                      &src, &src_stride);
1149
1150    intel_miptree_map(brw, dst_mt,
1151                      level, slice,
1152                      0, 0,
1153                      width, height,
1154                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1155                      BRW_MAP_DIRECT_BIT,
1156                      &dst, &dst_stride);
1157
1158    DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1159        _mesa_get_format_name(src_mt->format),
1160        src_mt, src, src_stride,
1161        _mesa_get_format_name(dst_mt->format),
1162        dst_mt, dst, dst_stride,
1163        width, height);
1164
1165    int row_size = cpp * width;
1166    if (src_stride == row_size &&
1167        dst_stride == row_size) {
1168       memcpy(dst, src, row_size * height);
1169    } else {
1170       for (int i = 0; i < height; i++) {
1171          memcpy(dst, src, row_size);
1172          dst += dst_stride;
1173          src += src_stride;
1174       }
1175    }
1176
1177    intel_miptree_unmap(brw, dst_mt, level, slice);
1178    intel_miptree_unmap(brw, src_mt, level, slice);
1179
1180    /* Don't forget to copy the stencil data over, too.  We could have skipped
1181     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1182     * shuffling the two data sources in/out of temporary storage instead of
1183     * the direct mapping we get this way.
1184     */
1185    if (dst_mt->stencil_mt) {
1186       assert(src_mt->stencil_mt);
1187       intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
1188                                   level, slice, width, height);
1189    }
1190 }
1191
1192 static void
1193 intel_miptree_copy_slice(struct brw_context *brw,
1194                          struct intel_mipmap_tree *dst_mt,
1195                          struct intel_mipmap_tree *src_mt,
1196                          int level,
1197                          int face,
1198                          int depth)
1199
1200 {
1201    mesa_format format = src_mt->format;
1202    uint32_t width = minify(src_mt->physical_width0, level - src_mt->first_level);
1203    uint32_t height = minify(src_mt->physical_height0, level - src_mt->first_level);
1204    int slice;
1205
1206    if (face > 0)
1207       slice = face;
1208    else
1209       slice = depth;
1210
1211    assert(depth < src_mt->level[level].depth);
1212    assert(src_mt->format == dst_mt->format);
1213
1214    if (dst_mt->compressed) {
1215       height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
1216       width = ALIGN(width, dst_mt->align_w);
1217    }
1218
1219    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1220     * below won't apply since we can't do the depth's Y tiling or the
1221     * stencil's W tiling in the blitter.
1222     */
1223    if (src_mt->stencil_mt) {
1224       intel_miptree_copy_slice_sw(brw,
1225                                   dst_mt, src_mt,
1226                                   level, slice,
1227                                   width, height);
1228       return;
1229    }
1230
1231    uint32_t dst_x, dst_y, src_x, src_y;
1232    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1233    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1234
1235    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1236        _mesa_get_format_name(src_mt->format),
1237        src_mt, src_x, src_y, src_mt->pitch,
1238        _mesa_get_format_name(dst_mt->format),
1239        dst_mt, dst_x, dst_y, dst_mt->pitch,
1240        width, height);
1241
1242    if (!intel_miptree_blit(brw,
1243                            src_mt, level, slice, 0, 0, false,
1244                            dst_mt, level, slice, 0, 0, false,
1245                            width, height, GL_COPY)) {
1246       perf_debug("miptree validate blit for %s failed\n",
1247                  _mesa_get_format_name(format));
1248
1249       intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
1250                                   width, height);
1251    }
1252 }
1253
1254 /**
1255  * Copies the image's current data to the given miptree, and associates that
1256  * miptree with the image.
1257  *
1258  * If \c invalidate is true, then the actual image data does not need to be
1259  * copied, but the image still needs to be associated to the new miptree (this
1260  * is set to true if we're about to clear the image).
1261  */
1262 void
1263 intel_miptree_copy_teximage(struct brw_context *brw,
1264                             struct intel_texture_image *intelImage,
1265                             struct intel_mipmap_tree *dst_mt,
1266                             bool invalidate)
1267 {
1268    struct intel_mipmap_tree *src_mt = intelImage->mt;
1269    struct intel_texture_object *intel_obj =
1270       intel_texture_object(intelImage->base.Base.TexObject);
1271    int level = intelImage->base.Base.Level;
1272    int face = intelImage->base.Base.Face;
1273
1274    GLuint depth;
1275    if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY)
1276       depth = intelImage->base.Base.Height;
1277    else
1278       depth = intelImage->base.Base.Depth;
1279
1280    if (!invalidate) {
1281       for (int slice = 0; slice < depth; slice++) {
1282          intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
1283       }
1284    }
1285
1286    intel_miptree_reference(&intelImage->mt, dst_mt);
1287    intel_obj->needs_validate = true;
1288 }
1289
1290 bool
1291 intel_miptree_alloc_mcs(struct brw_context *brw,
1292                         struct intel_mipmap_tree *mt,
1293                         GLuint num_samples)
1294 {
1295    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1296    assert(mt->mcs_mt == NULL);
1297
1298    /* Choose the correct format for the MCS buffer.  All that really matters
1299     * is that we allocate the right buffer size, since we'll always be
1300     * accessing this miptree using MCS-specific hardware mechanisms, which
1301     * infer the correct format based on num_samples.
1302     */
1303    mesa_format format;
1304    switch (num_samples) {
1305    case 2:
1306    case 4:
1307       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1308        * each sample).
1309        */
1310       format = MESA_FORMAT_R_UNORM8;
1311       break;
1312    case 8:
1313       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1314        * for each sample, plus 8 padding bits).
1315        */
1316       format = MESA_FORMAT_R_UINT32;
1317       break;
1318    default:
1319       unreachable("Unrecognized sample count in intel_miptree_alloc_mcs");
1320    };
1321
1322    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1323     *
1324     *     "The MCS surface must be stored as Tile Y."
1325     */
1326    mt->mcs_mt = intel_miptree_create(brw,
1327                                      mt->target,
1328                                      format,
1329                                      mt->first_level,
1330                                      mt->last_level,
1331                                      mt->logical_width0,
1332                                      mt->logical_height0,
1333                                      mt->logical_depth0,
1334                                      true,
1335                                      0 /* num_samples */,
1336                                      INTEL_MIPTREE_TILING_Y,
1337                                      false);
1338
1339    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1340     *
1341     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1342     *     is cleared prior to any rendering.
1343     *
1344     * Since we don't use the MCS buffer for any purpose other than rendering,
1345     * it makes sense to just clear it immediately upon allocation.
1346     *
1347     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1348     */
1349    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
1350    memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
1351    intel_miptree_unmap_raw(brw, mt->mcs_mt);
1352    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
1353
1354    return mt->mcs_mt;
1355 }
1356
1357
1358 bool
1359 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1360                                  struct intel_mipmap_tree *mt)
1361 {
1362    assert(mt->mcs_mt == NULL);
1363
1364    /* The format of the MCS buffer is opaque to the driver; all that matters
1365     * is that we get its size and pitch right.  We'll pretend that the format
1366     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1367     * R32 buffer is 32 pixels across, we'll need to scale the width down by
1368     * the block width and then a further factor of 4.  Since an MCS tile
1369     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1370     * we'll need to scale the height down by the block height and then a
1371     * further factor of 8.
1372     */
1373    const mesa_format format = MESA_FORMAT_R_UINT32;
1374    unsigned block_width_px;
1375    unsigned block_height;
1376    intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
1377    unsigned width_divisor = block_width_px * 4;
1378    unsigned height_divisor = block_height * 8;
1379    unsigned mcs_width =
1380       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1381    unsigned mcs_height =
1382       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1383    assert(mt->logical_depth0 == 1);
1384    mt->mcs_mt = intel_miptree_create(brw,
1385                                      mt->target,
1386                                      format,
1387                                      mt->first_level,
1388                                      mt->last_level,
1389                                      mcs_width,
1390                                      mcs_height,
1391                                      mt->logical_depth0,
1392                                      true,
1393                                      0 /* num_samples */,
1394                                      INTEL_MIPTREE_TILING_Y,
1395                                      false);
1396
1397    return mt->mcs_mt;
1398 }
1399
1400
1401 /**
1402  * Helper for intel_miptree_alloc_hiz() that sets
1403  * \c mt->level[level].has_hiz. Return true if and only if
1404  * \c has_hiz was set.
1405  */
1406 static bool
1407 intel_miptree_level_enable_hiz(struct brw_context *brw,
1408                                struct intel_mipmap_tree *mt,
1409                                uint32_t level)
1410 {
1411    assert(mt->hiz_mt);
1412
1413    if (brw->gen >= 8 || brw->is_haswell) {
1414       uint32_t width = minify(mt->physical_width0, level);
1415       uint32_t height = minify(mt->physical_height0, level);
1416
1417       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1418        * and the height is 4 aligned. This allows our HiZ support
1419        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1420        * we can grow the width & height to allow the HiZ op to
1421        * force the proper size alignments.
1422        */
1423       if (level > 0 && ((width & 7) || (height & 3))) {
1424          DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1425          return false;
1426       }
1427    }
1428
1429    DBG("mt %p level %d: HiZ enabled\n", mt, level);
1430    mt->level[level].has_hiz = true;
1431    return true;
1432 }
1433
1434
1435
1436 bool
1437 intel_miptree_alloc_hiz(struct brw_context *brw,
1438                         struct intel_mipmap_tree *mt)
1439 {
1440    assert(mt->hiz_mt == NULL);
1441    const bool force_all_slices_at_each_lod = brw->gen == 6;
1442    mt->hiz_mt = intel_miptree_create(brw,
1443                                      mt->target,
1444                                      mt->format,
1445                                      mt->first_level,
1446                                      mt->last_level,
1447                                      mt->logical_width0,
1448                                      mt->logical_height0,
1449                                      mt->logical_depth0,
1450                                      true,
1451                                      mt->num_samples,
1452                                      INTEL_MIPTREE_TILING_ANY,
1453                                      force_all_slices_at_each_lod);
1454
1455    if (!mt->hiz_mt)
1456       return false;
1457
1458    /* Mark that all slices need a HiZ resolve. */
1459    for (int level = mt->first_level; level <= mt->last_level; ++level) {
1460       if (!intel_miptree_level_enable_hiz(brw, mt, level))
1461          continue;
1462
1463       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
1464          struct intel_resolve_map *m = malloc(sizeof(struct intel_resolve_map));
1465          exec_node_init(&m->link);
1466          m->level = level;
1467          m->layer = layer;
1468          m->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1469
1470          exec_list_push_tail(&mt->hiz_map, &m->link);
1471       }
1472    }
1473
1474    return true;
1475 }
1476
1477 /**
1478  * Does the miptree slice have hiz enabled?
1479  */
1480 bool
1481 intel_miptree_level_has_hiz(struct intel_mipmap_tree *mt, uint32_t level)
1482 {
1483    intel_miptree_check_level_layer(mt, level, 0);
1484    return mt->level[level].has_hiz;
1485 }
1486
1487 void
1488 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1489                                           uint32_t level,
1490                                           uint32_t layer)
1491 {
1492    if (!intel_miptree_level_has_hiz(mt, level))
1493       return;
1494
1495    intel_resolve_map_set(&mt->hiz_map,
1496                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1497 }
1498
1499
1500 void
1501 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1502                                             uint32_t level,
1503                                             uint32_t layer)
1504 {
1505    if (!intel_miptree_level_has_hiz(mt, level))
1506       return;
1507
1508    intel_resolve_map_set(&mt->hiz_map,
1509                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1510 }
1511
1512 void
1513 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
1514                                                 uint32_t level)
1515 {
1516    uint32_t layer;
1517    uint32_t end_layer = mt->level[level].depth;
1518
1519    for (layer = 0; layer < end_layer; layer++) {
1520       intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
1521    }
1522 }
1523
1524 static bool
1525 intel_miptree_slice_resolve(struct brw_context *brw,
1526                             struct intel_mipmap_tree *mt,
1527                             uint32_t level,
1528                             uint32_t layer,
1529                             enum gen6_hiz_op need)
1530 {
1531    intel_miptree_check_level_layer(mt, level, layer);
1532
1533    struct intel_resolve_map *item =
1534          intel_resolve_map_get(&mt->hiz_map, level, layer);
1535
1536    if (!item || item->need != need)
1537       return false;
1538
1539    intel_hiz_exec(brw, mt, level, layer, need);
1540    intel_resolve_map_remove(item);
1541    return true;
1542 }
1543
1544 bool
1545 intel_miptree_slice_resolve_hiz(struct brw_context *brw,
1546                                 struct intel_mipmap_tree *mt,
1547                                 uint32_t level,
1548                                 uint32_t layer)
1549 {
1550    return intel_miptree_slice_resolve(brw, mt, level, layer,
1551                                       GEN6_HIZ_OP_HIZ_RESOLVE);
1552 }
1553
1554 bool
1555 intel_miptree_slice_resolve_depth(struct brw_context *brw,
1556                                   struct intel_mipmap_tree *mt,
1557                                   uint32_t level,
1558                                   uint32_t layer)
1559 {
1560    return intel_miptree_slice_resolve(brw, mt, level, layer,
1561                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
1562 }
1563
1564 static bool
1565 intel_miptree_all_slices_resolve(struct brw_context *brw,
1566                                  struct intel_mipmap_tree *mt,
1567                                  enum gen6_hiz_op need)
1568 {
1569    bool did_resolve = false;
1570
1571    foreach_list_typed_safe(struct intel_resolve_map, map, link, &mt->hiz_map) {
1572       if (map->need != need)
1573          continue;
1574
1575       intel_hiz_exec(brw, mt, map->level, map->layer, need);
1576       intel_resolve_map_remove(map);
1577       did_resolve = true;
1578    }
1579
1580    return did_resolve;
1581 }
1582
1583 bool
1584 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
1585                                      struct intel_mipmap_tree *mt)
1586 {
1587    return intel_miptree_all_slices_resolve(brw, mt,
1588                                            GEN6_HIZ_OP_HIZ_RESOLVE);
1589 }
1590
1591 bool
1592 intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
1593                                        struct intel_mipmap_tree *mt)
1594 {
1595    return intel_miptree_all_slices_resolve(brw, mt,
1596                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
1597 }
1598
1599
1600 void
1601 intel_miptree_resolve_color(struct brw_context *brw,
1602                             struct intel_mipmap_tree *mt)
1603 {
1604    switch (mt->fast_clear_state) {
1605    case INTEL_FAST_CLEAR_STATE_NO_MCS:
1606    case INTEL_FAST_CLEAR_STATE_RESOLVED:
1607       /* No resolve needed */
1608       break;
1609    case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
1610    case INTEL_FAST_CLEAR_STATE_CLEAR:
1611       /* Fast color clear resolves only make sense for non-MSAA buffers. */
1612       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
1613          brw_meta_resolve_color(brw, mt);
1614       break;
1615    }
1616 }
1617
1618
1619 /**
1620  * Make it possible to share the BO backing the given miptree with another
1621  * process or another miptree.
1622  *
1623  * Fast color clears are unsafe with shared buffers, so we need to resolve and
1624  * then discard the MCS buffer, if present.  We also set the fast_clear_state
1625  * to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
1626  * allocated in the future.
1627  */
1628 void
1629 intel_miptree_make_shareable(struct brw_context *brw,
1630                              struct intel_mipmap_tree *mt)
1631 {
1632    /* MCS buffers are also used for multisample buffers, but we can't resolve
1633     * away a multisample MCS buffer because it's an integral part of how the
1634     * pixel data is stored.  Fortunately this code path should never be
1635     * reached for multisample buffers.
1636     */
1637    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1638
1639    if (mt->mcs_mt) {
1640       intel_miptree_resolve_color(brw, mt);
1641       intel_miptree_release(&mt->mcs_mt);
1642       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
1643    }
1644 }
1645
1646
1647 /**
1648  * \brief Get pointer offset into stencil buffer.
1649  *
1650  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1651  * must decode the tile's layout in software.
1652  *
1653  * See
1654  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
1655  *     Format.
1656  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
1657  *
1658  * Even though the returned offset is always positive, the return type is
1659  * signed due to
1660  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
1661  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
1662  */
1663 static intptr_t
1664 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
1665 {
1666    uint32_t tile_size = 4096;
1667    uint32_t tile_width = 64;
1668    uint32_t tile_height = 64;
1669    uint32_t row_size = 64 * stride;
1670
1671    uint32_t tile_x = x / tile_width;
1672    uint32_t tile_y = y / tile_height;
1673
1674    /* The byte's address relative to the tile's base addres. */
1675    uint32_t byte_x = x % tile_width;
1676    uint32_t byte_y = y % tile_height;
1677
1678    uintptr_t u = tile_y * row_size
1679                + tile_x * tile_size
1680                + 512 * (byte_x / 8)
1681                +  64 * (byte_y / 8)
1682                +  32 * ((byte_y / 4) % 2)
1683                +  16 * ((byte_x / 4) % 2)
1684                +   8 * ((byte_y / 2) % 2)
1685                +   4 * ((byte_x / 2) % 2)
1686                +   2 * (byte_y % 2)
1687                +   1 * (byte_x % 2);
1688
1689    if (swizzled) {
1690       /* adjust for bit6 swizzling */
1691       if (((byte_x / 8) % 2) == 1) {
1692          if (((byte_y / 8) % 2) == 0) {
1693             u += 64;
1694          } else {
1695             u -= 64;
1696          }
1697       }
1698    }
1699
1700    return u;
1701 }
1702
1703 void
1704 intel_miptree_updownsample(struct brw_context *brw,
1705                            struct intel_mipmap_tree *src,
1706                            struct intel_mipmap_tree *dst)
1707 {
1708    if (brw->gen < 8) {
1709       brw_blorp_blit_miptrees(brw,
1710                               src, 0 /* level */, 0 /* layer */, src->format,
1711                               dst, 0 /* level */, 0 /* layer */, dst->format,
1712                               0, 0,
1713                               src->logical_width0, src->logical_height0,
1714                               0, 0,
1715                               dst->logical_width0, dst->logical_height0,
1716                               GL_NEAREST, false, false /*mirror x, y*/);
1717    } else if (src->format == MESA_FORMAT_S_UINT8) {
1718       brw_meta_stencil_updownsample(brw, src, dst);
1719    } else {
1720       brw_meta_updownsample(brw, src, dst);
1721    }
1722
1723    if (src->stencil_mt) {
1724       if (brw->gen >= 8) {
1725          brw_meta_stencil_updownsample(brw, src->stencil_mt, dst);
1726          return;
1727       }
1728
1729       brw_blorp_blit_miptrees(brw,
1730                               src->stencil_mt, 0 /* level */, 0 /* layer */,
1731                               src->stencil_mt->format,
1732                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
1733                               dst->stencil_mt->format,
1734                               0, 0,
1735                               src->logical_width0, src->logical_height0,
1736                               0, 0,
1737                               dst->logical_width0, dst->logical_height0,
1738                               GL_NEAREST, false, false /*mirror x, y*/);
1739    }
1740 }
1741
1742 void *
1743 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
1744 {
1745    /* CPU accesses to color buffers don't understand fast color clears, so
1746     * resolve any pending fast color clears before we map.
1747     */
1748    intel_miptree_resolve_color(brw, mt);
1749
1750    drm_intel_bo *bo = mt->bo;
1751
1752    if (drm_intel_bo_references(brw->batch.bo, bo))
1753       intel_batchbuffer_flush(brw);
1754
1755    if (mt->tiling != I915_TILING_NONE)
1756       brw_bo_map_gtt(brw, bo, "miptree");
1757    else
1758       brw_bo_map(brw, bo, true, "miptree");
1759
1760    return bo->virtual;
1761 }
1762
1763 void
1764 intel_miptree_unmap_raw(struct brw_context *brw,
1765                         struct intel_mipmap_tree *mt)
1766 {
1767    drm_intel_bo_unmap(mt->bo);
1768 }
1769
1770 static void
1771 intel_miptree_map_gtt(struct brw_context *brw,
1772                       struct intel_mipmap_tree *mt,
1773                       struct intel_miptree_map *map,
1774                       unsigned int level, unsigned int slice)
1775 {
1776    unsigned int bw, bh;
1777    void *base;
1778    unsigned int image_x, image_y;
1779    intptr_t x = map->x;
1780    intptr_t y = map->y;
1781
1782    /* For compressed formats, the stride is the number of bytes per
1783     * row of blocks.  intel_miptree_get_image_offset() already does
1784     * the divide.
1785     */
1786    _mesa_get_format_block_size(mt->format, &bw, &bh);
1787    assert(y % bh == 0);
1788    y /= bh;
1789
1790    base = intel_miptree_map_raw(brw, mt) + mt->offset;
1791
1792    if (base == NULL)
1793       map->ptr = NULL;
1794    else {
1795       /* Note that in the case of cube maps, the caller must have passed the
1796        * slice number referencing the face.
1797       */
1798       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1799       x += image_x;
1800       y += image_y;
1801
1802       map->stride = mt->pitch;
1803       map->ptr = base + y * map->stride + x * mt->cpp;
1804    }
1805
1806    DBG("%s: %d,%d %dx%d from mt %p (%s) "
1807        "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __FUNCTION__,
1808        map->x, map->y, map->w, map->h,
1809        mt, _mesa_get_format_name(mt->format),
1810        x, y, map->ptr, map->stride);
1811 }
1812
1813 static void
1814 intel_miptree_unmap_gtt(struct brw_context *brw,
1815                         struct intel_mipmap_tree *mt,
1816                         struct intel_miptree_map *map,
1817                         unsigned int level,
1818                         unsigned int slice)
1819 {
1820    intel_miptree_unmap_raw(brw, mt);
1821 }
1822
1823 static void
1824 intel_miptree_map_blit(struct brw_context *brw,
1825                        struct intel_mipmap_tree *mt,
1826                        struct intel_miptree_map *map,
1827                        unsigned int level, unsigned int slice)
1828 {
1829    map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
1830                                   0, 0,
1831                                   map->w, map->h, 1,
1832                                   false, 0,
1833                                   INTEL_MIPTREE_TILING_NONE,
1834                                   false);
1835    if (!map->mt) {
1836       fprintf(stderr, "Failed to allocate blit temporary\n");
1837       goto fail;
1838    }
1839    map->stride = map->mt->pitch;
1840
1841    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1842     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1843     * invalidate is set, since we'll be writing the whole rectangle from our
1844     * temporary buffer back out.
1845     */
1846    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1847       if (!intel_miptree_blit(brw,
1848                               mt, level, slice,
1849                               map->x, map->y, false,
1850                               map->mt, 0, 0,
1851                               0, 0, false,
1852                               map->w, map->h, GL_COPY)) {
1853          fprintf(stderr, "Failed to blit\n");
1854          goto fail;
1855       }
1856    }
1857
1858    map->ptr = intel_miptree_map_raw(brw, map->mt);
1859
1860    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1861        map->x, map->y, map->w, map->h,
1862        mt, _mesa_get_format_name(mt->format),
1863        level, slice, map->ptr, map->stride);
1864
1865    return;
1866
1867 fail:
1868    intel_miptree_release(&map->mt);
1869    map->ptr = NULL;
1870    map->stride = 0;
1871 }
1872
1873 static void
1874 intel_miptree_unmap_blit(struct brw_context *brw,
1875                          struct intel_mipmap_tree *mt,
1876                          struct intel_miptree_map *map,
1877                          unsigned int level,
1878                          unsigned int slice)
1879 {
1880    struct gl_context *ctx = &brw->ctx;
1881
1882    intel_miptree_unmap_raw(brw, map->mt);
1883
1884    if (map->mode & GL_MAP_WRITE_BIT) {
1885       bool ok = intel_miptree_blit(brw,
1886                                    map->mt, 0, 0,
1887                                    0, 0, false,
1888                                    mt, level, slice,
1889                                    map->x, map->y, false,
1890                                    map->w, map->h, GL_COPY);
1891       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
1892    }
1893
1894    intel_miptree_release(&map->mt);
1895 }
1896
1897 /**
1898  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
1899  */
1900 #if defined(USE_SSE41)
1901 static void
1902 intel_miptree_map_movntdqa(struct brw_context *brw,
1903                            struct intel_mipmap_tree *mt,
1904                            struct intel_miptree_map *map,
1905                            unsigned int level, unsigned int slice)
1906 {
1907    assert(map->mode & GL_MAP_READ_BIT);
1908    assert(!(map->mode & GL_MAP_WRITE_BIT));
1909
1910    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
1911        map->x, map->y, map->w, map->h,
1912        mt, _mesa_get_format_name(mt->format),
1913        level, slice, map->ptr, map->stride);
1914
1915    /* Map the original image */
1916    uint32_t image_x;
1917    uint32_t image_y;
1918    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1919    image_x += map->x;
1920    image_y += map->y;
1921
1922    void *src = intel_miptree_map_raw(brw, mt);
1923    if (!src)
1924       return;
1925    src += image_y * mt->pitch;
1926    src += image_x * mt->cpp;
1927
1928    /* Due to the pixel offsets for the particular image being mapped, our
1929     * src pointer may not be 16-byte aligned.  However, if the pitch is
1930     * divisible by 16, then the amount by which it's misaligned will remain
1931     * consistent from row to row.
1932     */
1933    assert((mt->pitch % 16) == 0);
1934    const int misalignment = ((uintptr_t) src) & 15;
1935
1936    /* Create an untiled temporary buffer for the mapping. */
1937    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
1938
1939    map->stride = ALIGN(misalignment + width_bytes, 16);
1940
1941    map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
1942    /* Offset the destination so it has the same misalignment as src. */
1943    map->ptr = map->buffer + misalignment;
1944
1945    assert((((uintptr_t) map->ptr) & 15) == misalignment);
1946
1947    for (uint32_t y = 0; y < map->h; y++) {
1948       void *dst_ptr = map->ptr + y * map->stride;
1949       void *src_ptr = src + y * mt->pitch;
1950
1951       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
1952    }
1953
1954    intel_miptree_unmap_raw(brw, mt);
1955 }
1956
1957 static void
1958 intel_miptree_unmap_movntdqa(struct brw_context *brw,
1959                              struct intel_mipmap_tree *mt,
1960                              struct intel_miptree_map *map,
1961                              unsigned int level,
1962                              unsigned int slice)
1963 {
1964    _mesa_align_free(map->buffer);
1965    map->buffer = NULL;
1966    map->ptr = NULL;
1967 }
1968 #endif
1969
1970 static void
1971 intel_miptree_map_s8(struct brw_context *brw,
1972                      struct intel_mipmap_tree *mt,
1973                      struct intel_miptree_map *map,
1974                      unsigned int level, unsigned int slice)
1975 {
1976    map->stride = map->w;
1977    map->buffer = map->ptr = malloc(map->stride * map->h);
1978    if (!map->buffer)
1979       return;
1980
1981    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
1982     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
1983     * invalidate is set, since we'll be writing the whole rectangle from our
1984     * temporary buffer back out.
1985     */
1986    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
1987       uint8_t *untiled_s8_map = map->ptr;
1988       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
1989       unsigned int image_x, image_y;
1990
1991       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
1992
1993       for (uint32_t y = 0; y < map->h; y++) {
1994          for (uint32_t x = 0; x < map->w; x++) {
1995             ptrdiff_t offset = intel_offset_S8(mt->pitch,
1996                                                x + image_x + map->x,
1997                                                y + image_y + map->y,
1998                                                brw->has_swizzling);
1999             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
2000          }
2001       }
2002
2003       intel_miptree_unmap_raw(brw, mt);
2004
2005       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
2006           map->x, map->y, map->w, map->h,
2007           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
2008    } else {
2009       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2010           map->x, map->y, map->w, map->h,
2011           mt, map->ptr, map->stride);
2012    }
2013 }
2014
2015 static void
2016 intel_miptree_unmap_s8(struct brw_context *brw,
2017                        struct intel_mipmap_tree *mt,
2018                        struct intel_miptree_map *map,
2019                        unsigned int level,
2020                        unsigned int slice)
2021 {
2022    if (map->mode & GL_MAP_WRITE_BIT) {
2023       unsigned int image_x, image_y;
2024       uint8_t *untiled_s8_map = map->ptr;
2025       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
2026
2027       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2028
2029       for (uint32_t y = 0; y < map->h; y++) {
2030          for (uint32_t x = 0; x < map->w; x++) {
2031             ptrdiff_t offset = intel_offset_S8(mt->pitch,
2032                                                x + map->x,
2033                                                y + map->y,
2034                                                brw->has_swizzling);
2035             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
2036          }
2037       }
2038
2039       intel_miptree_unmap_raw(brw, mt);
2040    }
2041
2042    free(map->buffer);
2043 }
2044
2045 static void
2046 intel_miptree_map_etc(struct brw_context *brw,
2047                       struct intel_mipmap_tree *mt,
2048                       struct intel_miptree_map *map,
2049                       unsigned int level,
2050                       unsigned int slice)
2051 {
2052    assert(mt->etc_format != MESA_FORMAT_NONE);
2053    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
2054       assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
2055    }
2056
2057    assert(map->mode & GL_MAP_WRITE_BIT);
2058    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
2059
2060    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
2061    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
2062                                                 map->w, map->h, 1));
2063    map->ptr = map->buffer;
2064 }
2065
2066 static void
2067 intel_miptree_unmap_etc(struct brw_context *brw,
2068                         struct intel_mipmap_tree *mt,
2069                         struct intel_miptree_map *map,
2070                         unsigned int level,
2071                         unsigned int slice)
2072 {
2073    uint32_t image_x;
2074    uint32_t image_y;
2075    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2076
2077    image_x += map->x;
2078    image_y += map->y;
2079
2080    uint8_t *dst = intel_miptree_map_raw(brw, mt)
2081                 + image_y * mt->pitch
2082                 + image_x * mt->cpp;
2083
2084    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
2085       _mesa_etc1_unpack_rgba8888(dst, mt->pitch,
2086                                  map->ptr, map->stride,
2087                                  map->w, map->h);
2088    else
2089       _mesa_unpack_etc2_format(dst, mt->pitch,
2090                                map->ptr, map->stride,
2091                                map->w, map->h, mt->etc_format);
2092
2093    intel_miptree_unmap_raw(brw, mt);
2094    free(map->buffer);
2095 }
2096
2097 /**
2098  * Mapping function for packed depth/stencil miptrees backed by real separate
2099  * miptrees for depth and stencil.
2100  *
2101  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
2102  * separate from the depth buffer.  Yet at the GL API level, we have to expose
2103  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
2104  * be able to map that memory for texture storage and glReadPixels-type
2105  * operations.  We give Mesa core that access by mallocing a temporary and
2106  * copying the data between the actual backing store and the temporary.
2107  */
2108 static void
2109 intel_miptree_map_depthstencil(struct brw_context *brw,
2110                                struct intel_mipmap_tree *mt,
2111                                struct intel_miptree_map *map,
2112                                unsigned int level, unsigned int slice)
2113 {
2114    struct intel_mipmap_tree *z_mt = mt;
2115    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2116    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
2117    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
2118
2119    map->stride = map->w * packed_bpp;
2120    map->buffer = map->ptr = malloc(map->stride * map->h);
2121    if (!map->buffer)
2122       return;
2123
2124    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2125     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2126     * invalidate is set, since we'll be writing the whole rectangle from our
2127     * temporary buffer back out.
2128     */
2129    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2130       uint32_t *packed_map = map->ptr;
2131       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2132       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2133       unsigned int s_image_x, s_image_y;
2134       unsigned int z_image_x, z_image_y;
2135
2136       intel_miptree_get_image_offset(s_mt, level, slice,
2137                                      &s_image_x, &s_image_y);
2138       intel_miptree_get_image_offset(z_mt, level, slice,
2139                                      &z_image_x, &z_image_y);
2140
2141       for (uint32_t y = 0; y < map->h; y++) {
2142          for (uint32_t x = 0; x < map->w; x++) {
2143             int map_x = map->x + x, map_y = map->y + y;
2144             ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch,
2145                                                  map_x + s_image_x,
2146                                                  map_y + s_image_y,
2147                                                  brw->has_swizzling);
2148             ptrdiff_t z_offset = ((map_y + z_image_y) *
2149                                   (z_mt->pitch / 4) +
2150                                   (map_x + z_image_x));
2151             uint8_t s = s_map[s_offset];
2152             uint32_t z = z_map[z_offset];
2153
2154             if (map_z32f_x24s8) {
2155                packed_map[(y * map->w + x) * 2 + 0] = z;
2156                packed_map[(y * map->w + x) * 2 + 1] = s;
2157             } else {
2158                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2159             }
2160          }
2161       }
2162
2163       intel_miptree_unmap_raw(brw, s_mt);
2164       intel_miptree_unmap_raw(brw, z_mt);
2165
2166       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2167           __FUNCTION__,
2168           map->x, map->y, map->w, map->h,
2169           z_mt, map->x + z_image_x, map->y + z_image_y,
2170           s_mt, map->x + s_image_x, map->y + s_image_y,
2171           map->ptr, map->stride);
2172    } else {
2173       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
2174           map->x, map->y, map->w, map->h,
2175           mt, map->ptr, map->stride);
2176    }
2177 }
2178
2179 static void
2180 intel_miptree_unmap_depthstencil(struct brw_context *brw,
2181                                  struct intel_mipmap_tree *mt,
2182                                  struct intel_miptree_map *map,
2183                                  unsigned int level,
2184                                  unsigned int slice)
2185 {
2186    struct intel_mipmap_tree *z_mt = mt;
2187    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2188    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
2189
2190    if (map->mode & GL_MAP_WRITE_BIT) {
2191       uint32_t *packed_map = map->ptr;
2192       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2193       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2194       unsigned int s_image_x, s_image_y;
2195       unsigned int z_image_x, z_image_y;
2196
2197       intel_miptree_get_image_offset(s_mt, level, slice,
2198                                      &s_image_x, &s_image_y);
2199       intel_miptree_get_image_offset(z_mt, level, slice,
2200                                      &z_image_x, &z_image_y);
2201
2202       for (uint32_t y = 0; y < map->h; y++) {
2203          for (uint32_t x = 0; x < map->w; x++) {
2204             ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch,
2205                                                  x + s_image_x + map->x,
2206                                                  y + s_image_y + map->y,
2207                                                  brw->has_swizzling);
2208             ptrdiff_t z_offset = ((y + z_image_y + map->y) *
2209                                   (z_mt->pitch / 4) +
2210                                   (x + z_image_x + map->x));
2211
2212             if (map_z32f_x24s8) {
2213                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2214                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2215             } else {
2216                uint32_t packed = packed_map[y * map->w + x];
2217                s_map[s_offset] = packed >> 24;
2218                z_map[z_offset] = packed;
2219             }
2220          }
2221       }
2222
2223       intel_miptree_unmap_raw(brw, s_mt);
2224       intel_miptree_unmap_raw(brw, z_mt);
2225
2226       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2227           __FUNCTION__,
2228           map->x, map->y, map->w, map->h,
2229           z_mt, _mesa_get_format_name(z_mt->format),
2230           map->x + z_image_x, map->y + z_image_y,
2231           s_mt, map->x + s_image_x, map->y + s_image_y,
2232           map->ptr, map->stride);
2233    }
2234
2235    free(map->buffer);
2236 }
2237
2238 /**
2239  * Create and attach a map to the miptree at (level, slice). Return the
2240  * attached map.
2241  */
2242 static struct intel_miptree_map*
2243 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2244                          unsigned int level,
2245                          unsigned int slice,
2246                          unsigned int x,
2247                          unsigned int y,
2248                          unsigned int w,
2249                          unsigned int h,
2250                          GLbitfield mode)
2251 {
2252    struct intel_miptree_map *map = calloc(1, sizeof(*map));
2253
2254    if (!map)
2255       return NULL;
2256
2257    assert(mt->level[level].slice[slice].map == NULL);
2258    mt->level[level].slice[slice].map = map;
2259
2260    map->mode = mode;
2261    map->x = x;
2262    map->y = y;
2263    map->w = w;
2264    map->h = h;
2265
2266    return map;
2267 }
2268
2269 /**
2270  * Release the map at (level, slice).
2271  */
2272 static void
2273 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2274                          unsigned int level,
2275                          unsigned int slice)
2276 {
2277    struct intel_miptree_map **map;
2278
2279    map = &mt->level[level].slice[slice].map;
2280    free(*map);
2281    *map = NULL;
2282 }
2283
2284 static bool
2285 can_blit_slice(struct intel_mipmap_tree *mt,
2286                unsigned int level, unsigned int slice)
2287 {
2288    uint32_t image_x;
2289    uint32_t image_y;
2290    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2291    if (image_x >= 32768 || image_y >= 32768)
2292       return false;
2293
2294    if (mt->pitch >= 32768)
2295       return false;
2296
2297    return true;
2298 }
2299
2300 /**
2301  * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
2302  * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
2303  * arithmetic overflow.
2304  *
2305  * If you call this function and use \a out_stride, then you're doing pointer
2306  * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
2307  * bugs.  The caller must still take care to avoid 32-bit overflow errors in
2308  * all arithmetic expressions that contain buffer offsets and pixel sizes,
2309  * which usually have type uint32_t or GLuint.
2310  */
2311 void
2312 intel_miptree_map(struct brw_context *brw,
2313                   struct intel_mipmap_tree *mt,
2314                   unsigned int level,
2315                   unsigned int slice,
2316                   unsigned int x,
2317                   unsigned int y,
2318                   unsigned int w,
2319                   unsigned int h,
2320                   GLbitfield mode,
2321                   void **out_ptr,
2322                   ptrdiff_t *out_stride)
2323 {
2324    struct intel_miptree_map *map;
2325
2326    assert(mt->num_samples <= 1);
2327
2328    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2329    if (!map){
2330       *out_ptr = NULL;
2331       *out_stride = 0;
2332       return;
2333    }
2334
2335    intel_miptree_slice_resolve_depth(brw, mt, level, slice);
2336    if (map->mode & GL_MAP_WRITE_BIT) {
2337       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2338    }
2339
2340    if (mt->format == MESA_FORMAT_S_UINT8) {
2341       intel_miptree_map_s8(brw, mt, map, level, slice);
2342    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2343               !(mode & BRW_MAP_DIRECT_BIT)) {
2344       intel_miptree_map_etc(brw, mt, map, level, slice);
2345    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2346       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
2347    }
2348    /* See intel_miptree_blit() for details on the 32k pitch limit. */
2349    else if (brw->has_llc &&
2350             !(mode & GL_MAP_WRITE_BIT) &&
2351             !mt->compressed &&
2352             (mt->tiling == I915_TILING_X ||
2353              (brw->gen >= 6 && mt->tiling == I915_TILING_Y)) &&
2354             can_blit_slice(mt, level, slice)) {
2355       intel_miptree_map_blit(brw, mt, map, level, slice);
2356    } else if (mt->tiling != I915_TILING_NONE &&
2357               mt->bo->size >= brw->max_gtt_map_object_size) {
2358       assert(can_blit_slice(mt, level, slice));
2359       intel_miptree_map_blit(brw, mt, map, level, slice);
2360 #if defined(USE_SSE41)
2361    } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) {
2362       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
2363 #endif
2364    } else {
2365       intel_miptree_map_gtt(brw, mt, map, level, slice);
2366    }
2367
2368    *out_ptr = map->ptr;
2369    *out_stride = map->stride;
2370
2371    if (map->ptr == NULL)
2372       intel_miptree_release_map(mt, level, slice);
2373 }
2374
2375 void
2376 intel_miptree_unmap(struct brw_context *brw,
2377                     struct intel_mipmap_tree *mt,
2378                     unsigned int level,
2379                     unsigned int slice)
2380 {
2381    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2382
2383    assert(mt->num_samples <= 1);
2384
2385    if (!map)
2386       return;
2387
2388    DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
2389        mt, _mesa_get_format_name(mt->format), level, slice);
2390
2391    if (mt->format == MESA_FORMAT_S_UINT8) {
2392       intel_miptree_unmap_s8(brw, mt, map, level, slice);
2393    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2394               !(map->mode & BRW_MAP_DIRECT_BIT)) {
2395       intel_miptree_unmap_etc(brw, mt, map, level, slice);
2396    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2397       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
2398    } else if (map->mt) {
2399       intel_miptree_unmap_blit(brw, mt, map, level, slice);
2400 #if defined(USE_SSE41)
2401    } else if (map->buffer && cpu_has_sse4_1) {
2402       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
2403 #endif
2404    } else {
2405       intel_miptree_unmap_gtt(brw, mt, map, level, slice);
2406    }
2407
2408    intel_miptree_release_map(mt, level, slice);
2409 }