src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /*
   2  * Copyright 2006 VMware, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sublicense, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial portions
  15  * of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 #include <GL/gl.h>
  27 #include <GL/internal/dri_interface.h>
  28
  29 #include "intel_batchbuffer.h"
  30 #include "intel_mipmap_tree.h"
  31 #include "intel_resolve_map.h"
  32 #include "intel_tex.h"
  33 #include "intel_blit.h"
  34 #include "intel_fbo.h"
  35
  36 #include "brw_blorp.h"
  37 #include "brw_context.h"
  38
  39 #include "main/enums.h"
  40 #include "main/fbobject.h"
  41 #include "main/formats.h"
  42 #include "main/glformats.h"
  43 #include "main/texcompress_etc.h"
  44 #include "main/teximage.h"
  45 #include "main/streaming-load-memcpy.h"
  46 #include "x86/common_x86_asm.h"
  47
  48 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  49
  50 static void *intel_miptree_map_raw(struct brw_context *brw,
  51                                    struct intel_mipmap_tree *mt);
  52
  53 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
  54
  55 static bool
  56 intel_miptree_alloc_mcs(struct brw_context *brw,
  57                         struct intel_mipmap_tree *mt,
  58                         GLuint num_samples);
  59
  60 /**
  61  * Determine which MSAA layout should be used by the MSAA surface being
  62  * created, based on the chip generation and the surface type.
  63  */
  64 static enum intel_msaa_layout
  65 compute_msaa_layout(struct brw_context *brw, mesa_format format,
  66                     bool disable_aux_buffers)
  67 {
  68    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  69    if (brw->gen < 7)
  70       return INTEL_MSAA_LAYOUT_IMS;
  71
  72    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  73    switch (_mesa_get_format_base_format(format)) {
  74    case GL_DEPTH_COMPONENT:
  75    case GL_STENCIL_INDEX:
  76    case GL_DEPTH_STENCIL:
  77       return INTEL_MSAA_LAYOUT_IMS;
  78    default:
  79       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  80        *
  81        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  82        *   are not written
  83        *
  84        * In practice this means that we have to disable MCS for all signed
  85        * integer MSAA buffers.  The alternative, to disable MCS only when one
  86        * of the render target channels is disabled, is impractical because it
  87        * would require converting between CMS and UMS MSAA layouts on the fly,
  88        * which is expensive.
  89        */
  90       if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
  91          return INTEL_MSAA_LAYOUT_UMS;
  92       } else if (disable_aux_buffers) {
  93          /* We can't use the CMS layout because it uses an aux buffer, the MCS
  94           * buffer. So fallback to UMS, which is identical to CMS without the
  95           * MCS. */
  96          return INTEL_MSAA_LAYOUT_UMS;
  97       } else {
  98          return INTEL_MSAA_LAYOUT_CMS;
  99       }
 100    }
 101 }
 102
 103
 104 /**
 105  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
 106  * scaled-down bitfield representation of the color buffer which is capable of
 107  * recording when blocks of the color buffer are equal to the clear value.
 108  * This function returns the block size that will be used by the MCS buffer
 109  * corresponding to a certain color miptree.
 110  *
 111  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 112  * beneath the "Fast Color Clear" bullet (p327):
 113  *
 114  *     The following table describes the RT alignment
 115  *
 116  *                       Pixels  Lines
 117  *         TiledY RT CL
 118  *             bpp
 119  *              32          8      4
 120  *              64          4      4
 121  *             128          2      4
 122  *         TiledX RT CL
 123  *             bpp
 124  *              32         16      2
 125  *              64          8      2
 126  *             128          4      2
 127  *
 128  * This alignment has the following uses:
 129  *
 130  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
 131  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
 132  *
 133  * - For figuring out alignment restrictions for a fast clear operation.  Fast
 134  *   clear operations must always clear aligned multiples of 16 blocks
 135  *   horizontally and 32 blocks vertically.
 136  *
 137  * - For scaling down the coordinates sent through the render pipeline during
 138  *   a fast clear.  X coordinates must be scaled down by 8 times the block
 139  *   width, and Y coordinates by 16 times the block height.
 140  *
 141  * - For scaling down the coordinates sent through the render pipeline during
 142  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
 143  *   by half the block width, and Y coordinates by half the block height.
 144  */
 145 void
 146 intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
 147                                  unsigned *width_px, unsigned *height)
 148 {
 149    switch (mt->tiling) {
 150    default:
 151       unreachable("Non-MSRT MCS requires X or Y tiling");
 152       /* In release builds, fall through */
 153    case I915_TILING_Y:
 154       *width_px = 32 / mt->cpp;
 155       *height = 4;
 156       break;
 157    case I915_TILING_X:
 158       *width_px = 64 / mt->cpp;
 159       *height = 2;
 160    }
 161 }
 162
 163 bool
 164 intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
 165 {
 166    /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 167     * Target(s)", beneath the "Fast Color Clear" bullet (p326):
 168     *
 169     *     - Support is limited to tiled render targets.
 170     *
 171     * Gen9 changes the restriction to Y-tile only.
 172     */
 173    if (brw->gen >= 9)
 174       return tiling == I915_TILING_Y;
 175    else if (brw->gen >= 7)
 176       return tiling != I915_TILING_NONE;
 177    else
 178       return false;
 179 }
 180
 181 /**
 182  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 183  * can be used. This doesn't (and should not) inspect any of the properties of
 184  * the miptree's BO.
 185  *
 186  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 187  * beneath the "Fast Color Clear" bullet (p326):
 188  *
 189  *     - Support is for non-mip-mapped and non-array surface types only.
 190  *
 191  * And then later, on p327:
 192  *
 193  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 194  *       64bpp, and 128bpp.
 195  */
 196 bool
 197 intel_miptree_is_fast_clear_capable(struct brw_context *brw,
 198                                     struct intel_mipmap_tree *mt)
 199 {
 200    /* MCS support does not exist prior to Gen7 */
 201    if (brw->gen < 7)
 202       return false;
 203
 204    if (mt->disable_aux_buffers)
 205       return false;
 206
 207    /* MCS is only supported for color buffers */
 208    switch (_mesa_get_format_base_format(mt->format)) {
 209    case GL_DEPTH_COMPONENT:
 210    case GL_DEPTH_STENCIL:
 211    case GL_STENCIL_INDEX:
 212       return false;
 213    }
 214
 215    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 216       return false;
 217    if (mt->first_level != 0 || mt->last_level != 0) {
 218       if (brw->gen >= 8) {
 219          perf_debug("Multi-LOD fast clear - giving up (%dx%dx%d).\n",
 220                     mt->logical_width0, mt->logical_height0, mt->last_level);
 221       }
 222
 223       return false;
 224    }
 225    if (mt->physical_depth0 != 1) {
 226       if (brw->gen >= 8) {
 227          perf_debug("Layered fast clear - giving up. (%dx%d%d)\n",
 228                     mt->logical_width0, mt->logical_height0,
 229                     mt->physical_depth0);
 230       }
 231
 232       return false;
 233    }
 234
 235    /* There's no point in using an MCS buffer if the surface isn't in a
 236     * renderable format.
 237     */
 238    if (!brw->format_supported_as_render_target[mt->format])
 239       return false;
 240
 241    return true;
 242 }
 243
 244
 245 /**
 246  * Determine depth format corresponding to a depth+stencil format,
 247  * for separate stencil.
 248  */
 249 mesa_format
 250 intel_depth_format_for_depthstencil_format(mesa_format format) {
 251    switch (format) {
 252    case MESA_FORMAT_Z24_UNORM_S8_UINT:
 253       return MESA_FORMAT_Z24_UNORM_X8_UINT;
 254    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
 255       return MESA_FORMAT_Z_FLOAT32;
 256    default:
 257       return format;
 258    }
 259 }
 260
 261
 262 /**
 263  * @param for_bo Indicates that the caller is
 264  *        intel_miptree_create_for_bo(). If true, then do not create
 265  *        \c stencil_mt.
 266  */
 267 static struct intel_mipmap_tree *
 268 intel_miptree_create_layout(struct brw_context *brw,
 269                             GLenum target,
 270                             mesa_format format,
 271                             GLuint first_level,
 272                             GLuint last_level,
 273                             GLuint width0,
 274                             GLuint height0,
 275                             GLuint depth0,
 276                             GLuint num_samples,
 277                             uint32_t layout_flags)
 278 {
 279    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 280    if (!mt)
 281       return NULL;
 282
 283    DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__,
 284        _mesa_enum_to_string(target),
 285        _mesa_get_format_name(format),
 286        first_level, last_level, depth0, mt);
 287
 288    if (target == GL_TEXTURE_1D_ARRAY) {
 289       /* For a 1D Array texture the OpenGL API will treat the height0
 290        * parameter as the number of array slices. For Intel hardware, we treat
 291        * the 1D array as a 2D Array with a height of 1.
 292        *
 293        * So, when we first come through this path to create a 1D Array
 294        * texture, height0 stores the number of slices, and depth0 is 1. In
 295        * this case, we want to swap height0 and depth0.
 296        *
 297        * Since some miptrees will be created based on the base miptree, we may
 298        * come through this path and see height0 as 1 and depth0 being the
 299        * number of slices. In this case we don't need to do the swap.
 300        */
 301       assert(height0 == 1 || depth0 == 1);
 302       if (height0 > 1) {
 303          depth0 = height0;
 304          height0 = 1;
 305       }
 306    }
 307
 308    mt->target = target;
 309    mt->format = format;
 310    mt->first_level = first_level;
 311    mt->last_level = last_level;
 312    mt->logical_width0 = width0;
 313    mt->logical_height0 = height0;
 314    mt->logical_depth0 = depth0;
 315    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
 316    mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0;
 317    exec_list_make_empty(&mt->hiz_map);
 318    mt->cpp = _mesa_get_format_bytes(format);
 319    mt->num_samples = num_samples;
 320    mt->compressed = _mesa_is_format_compressed(format);
 321    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 322    mt->refcount = 1;
 323
 324    if (num_samples > 1) {
 325       /* Adjust width/height/depth for MSAA */
 326       mt->msaa_layout = compute_msaa_layout(brw, format,
 327                                             mt->disable_aux_buffers);
 328       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 329          /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
 330           * "If the surface is multisampled and it is a depth or stencil
 331           *  surface or Multisampled Surface StorageFormat in SURFACE_STATE is
 332           *  MSFMT_DEPTH_STENCIL, WL and HL must be adjusted as follows before
 333           *  proceeding:
 334           *
 335           *  +----------------------------------------------------------------+
 336           *  | Num Multisamples |        W_l =         |        H_l =         |
 337           *  +----------------------------------------------------------------+
 338           *  |         2        | ceiling(W_l / 2) * 4 | H_l (no adjustment)  |
 339           *  |         4        | ceiling(W_l / 2) * 4 | ceiling(H_l / 2) * 4 |
 340           *  |         8        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 4 |
 341           *  |        16        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 8 |
 342           *  +----------------------------------------------------------------+
 343           * "
 344           *
 345           * Note that MSFMT_DEPTH_STENCIL just means the IMS (interleaved)
 346           * format rather than UMS/CMS (array slices).  The Sandybridge PRM,
 347           * Volume 1, Part 1, Page 111 has the same formula for 4x MSAA.
 348           *
 349           * Another more complicated explanation for these adjustments comes
 350           * from the Sandybridge PRM, volume 4, part 1, page 31:
 351           *
 352           *     "Any of the other messages (sample*, LOD, load4) used with a
 353           *      (4x) multisampled surface will in-effect sample a surface with
 354           *      double the height and width as that indicated in the surface
 355           *      state. Each pixel position on the original-sized surface is
 356           *      replaced with a 2x2 of samples with the following arrangement:
 357           *
 358           *         sample 0 sample 2
 359           *         sample 1 sample 3"
 360           *
 361           * Thus, when sampling from a multisampled texture, it behaves as
 362           * though the layout in memory for (x,y,sample) is:
 363           *
 364           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 365           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 366           *
 367           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 368           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 369           *
 370           * However, the actual layout of multisampled data in memory is:
 371           *
 372           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 373           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 374           *
 375           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 376           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 377           *
 378           * This pattern repeats for each 2x2 pixel block.
 379           *
 380           * As a result, when calculating the size of our 4-sample buffer for
 381           * an odd width or height, we have to align before scaling up because
 382           * sample 3 is in that bottom right 2x2 block.
 383           */
 384          switch (num_samples) {
 385          case 2:
 386             assert(brw->gen >= 8);
 387             width0 = ALIGN(width0, 2) * 2;
 388             height0 = ALIGN(height0, 2);
 389             break;
 390          case 4:
 391             width0 = ALIGN(width0, 2) * 2;
 392             height0 = ALIGN(height0, 2) * 2;
 393             break;
 394          case 8:
 395             width0 = ALIGN(width0, 2) * 4;
 396             height0 = ALIGN(height0, 2) * 2;
 397             break;
 398          default:
 399             /* num_samples should already have been quantized to 0, 1, 2, 4, or
 400              * 8.
 401              */
 402             unreachable("not reached");
 403          }
 404       } else {
 405          /* Non-interleaved */
 406          depth0 *= num_samples;
 407       }
 408    }
 409
 410    /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 can
 411     * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on
 412     * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is still
 413     * used on Gen8 to make it pick a qpitch value which doesn't include space
 414     * for the mipmaps. On Gen9 this is not necessary because it will
 415     * automatically pick a packed qpitch value whenever mt->first_level ==
 416     * mt->last_level.
 417     * TODO: can we use it elsewhere?
 418     * TODO: also disable this on Gen8 and pick the qpitch value like Gen9
 419     */
 420    if (brw->gen >= 9) {
 421       mt->array_layout = ALL_LOD_IN_EACH_SLICE;
 422    } else {
 423       switch (mt->msaa_layout) {
 424       case INTEL_MSAA_LAYOUT_NONE:
 425       case INTEL_MSAA_LAYOUT_IMS:
 426          mt->array_layout = ALL_LOD_IN_EACH_SLICE;
 427          break;
 428       case INTEL_MSAA_LAYOUT_UMS:
 429       case INTEL_MSAA_LAYOUT_CMS:
 430          mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 431          break;
 432       }
 433    }
 434
 435    if (target == GL_TEXTURE_CUBE_MAP) {
 436       assert(depth0 == 1);
 437       depth0 = 6;
 438    }
 439
 440    mt->physical_width0 = width0;
 441    mt->physical_height0 = height0;
 442    mt->physical_depth0 = depth0;
 443
 444    if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
 445        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 446        (brw->must_use_separate_stencil ||
 447         (brw->has_separate_stencil &&
 448          intel_miptree_wants_hiz_buffer(brw, mt)))) {
 449       uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
 450       if (brw->gen == 6) {
 451          stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD |
 452                           MIPTREE_LAYOUT_TILING_ANY;
 453       }
 454
 455       mt->stencil_mt = intel_miptree_create(brw,
 456                                             mt->target,
 457                                             MESA_FORMAT_S_UINT8,
 458                                             mt->first_level,
 459                                             mt->last_level,
 460                                             mt->logical_width0,
 461                                             mt->logical_height0,
 462                                             mt->logical_depth0,
 463                                             num_samples,
 464                                             stencil_flags);
 465
 466       if (!mt->stencil_mt) {
 467          intel_miptree_release(&mt);
 468          return NULL;
 469       }
 470
 471       /* Fix up the Z miptree format for how we're splitting out separate
 472        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 473        */
 474       mt->format = intel_depth_format_for_depthstencil_format(mt->format);
 475       mt->cpp = 4;
 476
 477       if (format == mt->format) {
 478          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 479                        _mesa_get_format_name(mt->format));
 480       }
 481    }
 482
 483    if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
 484       mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 485
 486    /*
 487     * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are
 488     * multisampled or have an AUX buffer attached to it.
 489     *
 490     * GEN  |    MSRT        | AUX_CCS_* or AUX_MCS
 491     *  -------------------------------------------
 492     *  9   |  HALIGN_16     |    HALIGN_16
 493     *  8   |  HALIGN_ANY    |    HALIGN_16
 494     *  7   |      ?         |        ?
 495     *  6   |      ?         |        ?
 496     */
 497    if (intel_miptree_is_fast_clear_capable(brw, mt)) {
 498       if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1))
 499          layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 500    } else if (brw->gen >= 9 && num_samples > 1) {
 501       layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 502    } else {
 503       /* For now, nothing else has this requirement */
 504       assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
 505    }
 506
 507    brw_miptree_layout(brw, mt, layout_flags);
 508
 509    if (mt->disable_aux_buffers)
 510       assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
 511
 512    return mt;
 513 }
 514
 515
 516 /**
 517  * Choose an appropriate uncompressed format for a requested
 518  * compressed format, if unsupported.
 519  */
 520 mesa_format
 521 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
 522 {
 523    /* No need to lower ETC formats on these platforms,
 524     * they are supported natively.
 525     */
 526    if (brw->gen >= 8 || brw->is_baytrail)
 527       return format;
 528
 529    switch (format) {
 530    case MESA_FORMAT_ETC1_RGB8:
 531       return MESA_FORMAT_R8G8B8X8_UNORM;
 532    case MESA_FORMAT_ETC2_RGB8:
 533       return MESA_FORMAT_R8G8B8X8_UNORM;
 534    case MESA_FORMAT_ETC2_SRGB8:
 535    case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 536    case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 537       return MESA_FORMAT_B8G8R8A8_SRGB;
 538    case MESA_FORMAT_ETC2_RGBA8_EAC:
 539    case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 540       return MESA_FORMAT_R8G8B8A8_UNORM;
 541    case MESA_FORMAT_ETC2_R11_EAC:
 542       return MESA_FORMAT_R_UNORM16;
 543    case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 544       return MESA_FORMAT_R_SNORM16;
 545    case MESA_FORMAT_ETC2_RG11_EAC:
 546       return MESA_FORMAT_R16G16_UNORM;
 547    case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 548       return MESA_FORMAT_R16G16_SNORM;
 549    default:
 550       /* Non ETC1 / ETC2 format */
 551       return format;
 552    }
 553 }
 554
 555 /* This function computes Yf/Ys tiled bo size, alignment and pitch. */
 556 static unsigned long
 557 intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
 558                         unsigned long *pitch)
 559 {
 560    uint32_t tile_width, tile_height;
 561    unsigned long stride, size, aligned_y;
 562
 563    assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
 564    intel_get_tile_dims(mt->tiling, mt->tr_mode, mt->cpp,
 565                        &tile_width, &tile_height);
 566
 567    aligned_y = ALIGN(mt->total_height, tile_height);
 568    stride = mt->total_width * mt->cpp;
 569    stride = ALIGN(stride, tile_width);
 570    size = stride * aligned_y;
 571
 572    if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YF) {
 573       assert(size % 4096 == 0);
 574       *alignment = 4096;
 575    } else {
 576       assert(size % (64 * 1024) == 0);
 577       *alignment = 64 * 1024;
 578    }
 579    *pitch = stride;
 580    return size;
 581 }
 582
 583 struct intel_mipmap_tree *
 584 intel_miptree_create(struct brw_context *brw,
 585                      GLenum target,
 586                      mesa_format format,
 587                      GLuint first_level,
 588                      GLuint last_level,
 589                      GLuint width0,
 590                      GLuint height0,
 591                      GLuint depth0,
 592                      GLuint num_samples,
 593                      uint32_t layout_flags)
 594 {
 595    struct intel_mipmap_tree *mt;
 596    mesa_format tex_format = format;
 597    mesa_format etc_format = MESA_FORMAT_NONE;
 598    GLuint total_width, total_height;
 599    uint32_t alloc_flags = 0;
 600
 601    format = intel_lower_compressed_format(brw, format);
 602
 603    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 604
 605    assert((layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) == 0);
 606    assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
 607    mt = intel_miptree_create_layout(brw, target, format,
 608                                     first_level, last_level, width0,
 609                                     height0, depth0, num_samples,
 610                                     layout_flags);
 611    /*
 612     * pitch == 0 || height == 0  indicates the null texture
 613     */
 614    if (!mt || !mt->total_width || !mt->total_height) {
 615       intel_miptree_release(&mt);
 616       return NULL;
 617    }
 618
 619    total_width = mt->total_width;
 620    total_height = mt->total_height;
 621
 622    if (format == MESA_FORMAT_S_UINT8) {
 623       /* Align to size of W tile, 64x64. */
 624       total_width = ALIGN(total_width, 64);
 625       total_height = ALIGN(total_height, 64);
 626    }
 627
 628    bool y_or_x = false;
 629
 630    if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) {
 631       y_or_x = true;
 632       mt->tiling = I915_TILING_Y;
 633    }
 634
 635    if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
 636       alloc_flags |= BO_ALLOC_FOR_RENDER;
 637
 638    unsigned long pitch;
 639    mt->etc_format = etc_format;
 640
 641    if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
 642       unsigned alignment = 0;
 643       unsigned long size;
 644       size = intel_get_yf_ys_bo_size(mt, &alignment, &pitch);
 645       assert(size);
 646       mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree",
 647                                              size, alignment);
 648    } else {
 649       mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
 650                                         total_width, total_height, mt->cpp,
 651                                         &mt->tiling, &pitch,
 652                                         alloc_flags);
 653    }
 654
 655    mt->pitch = pitch;
 656
 657    /* If the BO is too large to fit in the aperture, we need to use the
 658     * BLT engine to support it.  Prior to Sandybridge, the BLT paths can't
 659     * handle Y-tiling, so we need to fall back to X.
 660     */
 661    if (brw->gen < 6 && y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) {
 662       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 663                  mt->total_width, mt->total_height);
 664
 665       mt->tiling = I915_TILING_X;
 666       drm_intel_bo_unreference(mt->bo);
 667       mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
 668                                   total_width, total_height, mt->cpp,
 669                                   &mt->tiling, &pitch, alloc_flags);
 670       mt->pitch = pitch;
 671    }
 672
 673    mt->offset = 0;
 674
 675    if (!mt->bo) {
 676        intel_miptree_release(&mt);
 677        return NULL;
 678    }
 679
 680
 681    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 682       assert(mt->num_samples > 1);
 683       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
 684          intel_miptree_release(&mt);
 685          return NULL;
 686       }
 687    }
 688
 689    /* If this miptree is capable of supporting fast color clears, set
 690     * fast_clear_state appropriately to ensure that fast clears will occur.
 691     * Allocation of the MCS miptree will be deferred until the first fast
 692     * clear actually occurs.
 693     */
 694    if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
 695        intel_miptree_is_fast_clear_capable(brw, mt)) {
 696       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 697       assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
 698    }
 699
 700    return mt;
 701 }
 702
 703 struct intel_mipmap_tree *
 704 intel_miptree_create_for_bo(struct brw_context *brw,
 705                             drm_intel_bo *bo,
 706                             mesa_format format,
 707                             uint32_t offset,
 708                             uint32_t width,
 709                             uint32_t height,
 710                             uint32_t depth,
 711                             int pitch,
 712                             uint32_t layout_flags)
 713 {
 714    struct intel_mipmap_tree *mt;
 715    uint32_t tiling, swizzle;
 716    GLenum target;
 717
 718    drm_intel_bo_get_tiling(bo, &tiling, &swizzle);
 719
 720    /* Nothing will be able to use this miptree with the BO if the offset isn't
 721     * aligned.
 722     */
 723    if (tiling != I915_TILING_NONE)
 724       assert(offset % 4096 == 0);
 725
 726    /* miptrees can't handle negative pitch.  If you need flipping of images,
 727     * that's outside of the scope of the mt.
 728     */
 729    assert(pitch >= 0);
 730
 731    target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
 732
 733    /* The BO already has a tiling format and we shouldn't confuse the lower
 734     * layers by making it try to find a tiling format again.
 735     */
 736    assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
 737    assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
 738
 739    layout_flags |= MIPTREE_LAYOUT_FOR_BO;
 740    mt = intel_miptree_create_layout(brw, target, format,
 741                                     0, 0,
 742                                     width, height, depth, 0,
 743                                     layout_flags);
 744    if (!mt)
 745       return NULL;
 746
 747    drm_intel_bo_reference(bo);
 748    mt->bo = bo;
 749    mt->pitch = pitch;
 750    mt->offset = offset;
 751    mt->tiling = tiling;
 752
 753    return mt;
 754 }
 755
 756 /**
 757  * For a singlesample renderbuffer, this simply wraps the given BO with a
 758  * miptree.
 759  *
 760  * For a multisample renderbuffer, this wraps the window system's
 761  * (singlesample) BO with a singlesample miptree attached to the
 762  * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
 763  * that will contain the actual rendering (which is lazily resolved to
 764  * irb->singlesample_mt).
 765  */
 766 void
 767 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
 768                                          struct intel_renderbuffer *irb,
 769                                          drm_intel_bo *bo,
 770                                          uint32_t width, uint32_t height,
 771                                          uint32_t pitch)
 772 {
 773    struct intel_mipmap_tree *singlesample_mt = NULL;
 774    struct intel_mipmap_tree *multisample_mt = NULL;
 775    struct gl_renderbuffer *rb = &irb->Base.Base;
 776    mesa_format format = rb->Format;
 777    int num_samples = rb->NumSamples;
 778
 779    /* Only the front and back buffers, which are color buffers, are allocated
 780     * through the image loader.
 781     */
 782    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 783           _mesa_get_format_base_format(format) == GL_RGBA);
 784
 785    singlesample_mt = intel_miptree_create_for_bo(intel,
 786                                                  bo,
 787                                                  format,
 788                                                  0,
 789                                                  width,
 790                                                  height,
 791                                                  1,
 792                                                  pitch,
 793                                                  0);
 794    if (!singlesample_mt)
 795       goto fail;
 796
 797    /* If this miptree is capable of supporting fast color clears, set
 798     * mcs_state appropriately to ensure that fast clears will occur.
 799     * Allocation of the MCS miptree will be deferred until the first fast
 800     * clear actually occurs.
 801     */
 802    if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) &&
 803        intel_miptree_is_fast_clear_capable(intel, singlesample_mt))
 804       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 805
 806    if (num_samples == 0) {
 807       intel_miptree_release(&irb->mt);
 808       irb->mt = singlesample_mt;
 809
 810       assert(!irb->singlesample_mt);
 811    } else {
 812       intel_miptree_release(&irb->singlesample_mt);
 813       irb->singlesample_mt = singlesample_mt;
 814
 815       if (!irb->mt ||
 816           irb->mt->logical_width0 != width ||
 817           irb->mt->logical_height0 != height) {
 818          multisample_mt = intel_miptree_create_for_renderbuffer(intel,
 819                                                                 format,
 820                                                                 width,
 821                                                                 height,
 822                                                                 num_samples);
 823          if (!multisample_mt)
 824             goto fail;
 825
 826          irb->need_downsample = false;
 827          intel_miptree_release(&irb->mt);
 828          irb->mt = multisample_mt;
 829       }
 830    }
 831    return;
 832
 833 fail:
 834    intel_miptree_release(&irb->singlesample_mt);
 835    intel_miptree_release(&irb->mt);
 836    return;
 837 }
 838
 839 struct intel_mipmap_tree*
 840 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
 841                                       mesa_format format,
 842                                       uint32_t width,
 843                                       uint32_t height,
 844                                       uint32_t num_samples)
 845 {
 846    struct intel_mipmap_tree *mt;
 847    uint32_t depth = 1;
 848    bool ok;
 849    GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
 850    const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
 851                                  MIPTREE_LAYOUT_TILING_ANY;
 852
 853
 854    mt = intel_miptree_create(brw, target, format, 0, 0,
 855                              width, height, depth, num_samples,
 856                              layout_flags);
 857    if (!mt)
 858       goto fail;
 859
 860    if (intel_miptree_wants_hiz_buffer(brw, mt)) {
 861       ok = intel_miptree_alloc_hiz(brw, mt);
 862       if (!ok)
 863          goto fail;
 864    }
 865
 866    return mt;
 867
 868 fail:
 869    intel_miptree_release(&mt);
 870    return NULL;
 871 }
 872
 873 void
 874 intel_miptree_reference(struct intel_mipmap_tree **dst,
 875                         struct intel_mipmap_tree *src)
 876 {
 877    if (*dst == src)
 878       return;
 879
 880    intel_miptree_release(dst);
 881
 882    if (src) {
 883       src->refcount++;
 884       DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
 885    }
 886
 887    *dst = src;
 888 }
 889
 890
 891 void
 892 intel_miptree_release(struct intel_mipmap_tree **mt)
 893 {
 894    if (!*mt)
 895       return;
 896
 897    DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
 898    if (--(*mt)->refcount <= 0) {
 899       GLuint i;
 900
 901       DBG("%s deleting %p\n", __func__, *mt);
 902
 903       drm_intel_bo_unreference((*mt)->bo);
 904       intel_miptree_release(&(*mt)->stencil_mt);
 905       if ((*mt)->hiz_buf) {
 906          if ((*mt)->hiz_buf->mt)
 907             intel_miptree_release(&(*mt)->hiz_buf->mt);
 908          else
 909             drm_intel_bo_unreference((*mt)->hiz_buf->bo);
 910          free((*mt)->hiz_buf);
 911       }
 912       intel_miptree_release(&(*mt)->mcs_mt);
 913       intel_resolve_map_clear(&(*mt)->hiz_map);
 914
 915       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
 916          free((*mt)->level[i].slice);
 917       }
 918
 919       free(*mt);
 920    }
 921    *mt = NULL;
 922 }
 923
 924
 925 void
 926 intel_get_image_dims(struct gl_texture_image *image,
 927                      int *width, int *height, int *depth)
 928 {
 929    switch (image->TexObject->Target) {
 930    case GL_TEXTURE_1D_ARRAY:
 931       /* For a 1D Array texture the OpenGL API will treat the image height as
 932        * the number of array slices. For Intel hardware, we treat the 1D array
 933        * as a 2D Array with a height of 1. So, here we want to swap image
 934        * height and depth.
 935        */
 936       *width = image->Width;
 937       *height = 1;
 938       *depth = image->Height;
 939       break;
 940    default:
 941       *width = image->Width;
 942       *height = image->Height;
 943       *depth = image->Depth;
 944       break;
 945    }
 946 }
 947
 948 /**
 949  * Can the image be pulled into a unified mipmap tree?  This mirrors
 950  * the completeness test in a lot of ways.
 951  *
 952  * Not sure whether I want to pass gl_texture_image here.
 953  */
 954 bool
 955 intel_miptree_match_image(struct intel_mipmap_tree *mt,
 956                           struct gl_texture_image *image)
 957 {
 958    struct intel_texture_image *intelImage = intel_texture_image(image);
 959    GLuint level = intelImage->base.Base.Level;
 960    int width, height, depth;
 961
 962    /* glTexImage* choose the texture object based on the target passed in, and
 963     * objects can't change targets over their lifetimes, so this should be
 964     * true.
 965     */
 966    assert(image->TexObject->Target == mt->target);
 967
 968    mesa_format mt_format = mt->format;
 969    if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
 970       mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
 971    if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
 972       mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
 973    if (mt->etc_format != MESA_FORMAT_NONE)
 974       mt_format = mt->etc_format;
 975
 976    if (image->TexFormat != mt_format)
 977       return false;
 978
 979    intel_get_image_dims(image, &width, &height, &depth);
 980
 981    if (mt->target == GL_TEXTURE_CUBE_MAP)
 982       depth = 6;
 983
 984    int level_depth = mt->level[level].depth;
 985    if (mt->num_samples > 1) {
 986       switch (mt->msaa_layout) {
 987       case INTEL_MSAA_LAYOUT_NONE:
 988       case INTEL_MSAA_LAYOUT_IMS:
 989          break;
 990       case INTEL_MSAA_LAYOUT_UMS:
 991       case INTEL_MSAA_LAYOUT_CMS:
 992          level_depth /= mt->num_samples;
 993          break;
 994       }
 995    }
 996
 997    /* Test image dimensions against the base level image adjusted for
 998     * minification.  This will also catch images not present in the
 999     * tree, changed targets, etc.
1000     */
1001    if (width != minify(mt->logical_width0, level - mt->first_level) ||
1002        height != minify(mt->logical_height0, level - mt->first_level) ||
1003        depth != level_depth) {
1004       return false;
1005    }
1006
1007    if (image->NumSamples != mt->num_samples)
1008       return false;
1009
1010    return true;
1011 }
1012
1013
1014 void
1015 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
1016                              GLuint level,
1017                              GLuint x, GLuint y, GLuint d)
1018 {
1019    mt->level[level].depth = d;
1020    mt->level[level].level_x = x;
1021    mt->level[level].level_y = y;
1022
1023    DBG("%s level %d, depth %d, offset %d,%d\n", __func__,
1024        level, d, x, y);
1025
1026    assert(mt->level[level].slice == NULL);
1027
1028    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
1029    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
1030    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
1031 }
1032
1033
1034 void
1035 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
1036                                GLuint level, GLuint img,
1037                                GLuint x, GLuint y)
1038 {
1039    if (img == 0 && level == 0)
1040       assert(x == 0 && y == 0);
1041
1042    assert(img < mt->level[level].depth);
1043
1044    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
1045    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
1046
1047    DBG("%s level %d img %d pos %d,%d\n",
1048        __func__, level, img,
1049        mt->level[level].slice[img].x_offset,
1050        mt->level[level].slice[img].y_offset);
1051 }
1052
1053 void
1054 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1055                                GLuint level, GLuint slice,
1056                                GLuint *x, GLuint *y)
1057 {
1058    assert(slice < mt->level[level].depth);
1059
1060    *x = mt->level[level].slice[slice].x_offset;
1061    *y = mt->level[level].slice[slice].y_offset;
1062 }
1063
1064
1065 /**
1066  * This function computes the tile_w (in bytes) and tile_h (in rows) of
1067  * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1068  * and tile_h is set to 1.
1069  */
1070 void
1071 intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
1072                     uint32_t *tile_w, uint32_t *tile_h)
1073 {
1074    if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
1075       switch (tiling) {
1076       case I915_TILING_X:
1077          *tile_w = 512;
1078          *tile_h = 8;
1079          break;
1080       case I915_TILING_Y:
1081          *tile_w = 128;
1082          *tile_h = 32;
1083          break;
1084       case I915_TILING_NONE:
1085          *tile_w = cpp;
1086          *tile_h = 1;
1087          break;
1088       default:
1089          unreachable("not reached");
1090       }
1091    } else {
1092       uint32_t aspect_ratio = 1;
1093       assert(_mesa_is_pow_two(cpp));
1094
1095       switch (cpp) {
1096       case 1:
1097          *tile_h = 64;
1098          break;
1099       case 2:
1100       case 4:
1101          *tile_h = 32;
1102          break;
1103       case 8:
1104       case 16:
1105          *tile_h = 16;
1106          break;
1107       default:
1108          unreachable("not reached");
1109       }
1110
1111       if (cpp == 2 || cpp == 8)
1112          aspect_ratio = 2;
1113
1114       if (tr_mode == INTEL_MIPTREE_TRMODE_YS)
1115          *tile_h *= 4;
1116
1117       *tile_w = *tile_h * aspect_ratio * cpp;
1118    }
1119 }
1120
1121
1122 /**
1123  * This function computes masks that may be used to select the bits of the X
1124  * and Y coordinates that indicate the offset within a tile.  If the BO is
1125  * untiled, the masks are set to 0.
1126  */
1127 void
1128 intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
1129                      bool map_stencil_as_y_tiled,
1130                      uint32_t *mask_x, uint32_t *mask_y)
1131 {
1132    uint32_t tile_w_bytes, tile_h;
1133    if (map_stencil_as_y_tiled)
1134       tiling = I915_TILING_Y;
1135
1136    intel_get_tile_dims(tiling, tr_mode, cpp, &tile_w_bytes, &tile_h);
1137
1138    *mask_x = tile_w_bytes / cpp - 1;
1139    *mask_y = tile_h - 1;
1140 }
1141
1142 /**
1143  * Compute the offset (in bytes) from the start of the BO to the given x
1144  * and y coordinate.  For tiled BOs, caller must ensure that x and y are
1145  * multiples of the tile size.
1146  */
1147 uint32_t
1148 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1149                                  uint32_t x, uint32_t y,
1150                                  bool map_stencil_as_y_tiled)
1151 {
1152    int cpp = mt->cpp;
1153    uint32_t pitch = mt->pitch;
1154    uint32_t tiling = mt->tiling;
1155
1156    if (map_stencil_as_y_tiled) {
1157       tiling = I915_TILING_Y;
1158
1159       /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile
1160        * gets transformed into a 32-high Y-tile.  Accordingly, the pitch of
1161        * the resulting surface is twice the pitch of the original miptree,
1162        * since each row in the Y-tiled view corresponds to two rows in the
1163        * actual W-tiled surface.  So we need to correct the pitch before
1164        * computing the offsets.
1165        */
1166       pitch *= 2;
1167    }
1168
1169    switch (tiling) {
1170    default:
1171       unreachable("not reached");
1172    case I915_TILING_NONE:
1173       return y * pitch + x * cpp;
1174    case I915_TILING_X:
1175       assert((x % (512 / cpp)) == 0);
1176       assert((y % 8) == 0);
1177       return y * pitch + x / (512 / cpp) * 4096;
1178    case I915_TILING_Y:
1179       assert((x % (128 / cpp)) == 0);
1180       assert((y % 32) == 0);
1181       return y * pitch + x / (128 / cpp) * 4096;
1182    }
1183 }
1184
1185 /**
1186  * Rendering with tiled buffers requires that the base address of the buffer
1187  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1188  * textures, we may want the surface to point at a texture image level that
1189  * isn't at a page boundary.
1190  *
1191  * This function returns an appropriately-aligned base offset
1192  * according to the tiling restrictions, plus any required x/y offset
1193  * from there.
1194  */
1195 uint32_t
1196 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1197                                GLuint level, GLuint slice,
1198                                uint32_t *tile_x,
1199                                uint32_t *tile_y)
1200 {
1201    uint32_t x, y;
1202    uint32_t mask_x, mask_y;
1203
1204    intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp, false, &mask_x, &mask_y);
1205    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1206
1207    *tile_x = x & mask_x;
1208    *tile_y = y & mask_y;
1209
1210    return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y, false);
1211 }
1212
1213 static void
1214 intel_miptree_copy_slice_sw(struct brw_context *brw,
1215                             struct intel_mipmap_tree *dst_mt,
1216                             struct intel_mipmap_tree *src_mt,
1217                             int level,
1218                             int slice,
1219                             int width,
1220                             int height)
1221 {
1222    void *src, *dst;
1223    ptrdiff_t src_stride, dst_stride;
1224    int cpp = dst_mt->cpp;
1225
1226    intel_miptree_map(brw, src_mt,
1227                      level, slice,
1228                      0, 0,
1229                      width, height,
1230                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1231                      &src, &src_stride);
1232
1233    intel_miptree_map(brw, dst_mt,
1234                      level, slice,
1235                      0, 0,
1236                      width, height,
1237                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1238                      BRW_MAP_DIRECT_BIT,
1239                      &dst, &dst_stride);
1240
1241    DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1242        _mesa_get_format_name(src_mt->format),
1243        src_mt, src, src_stride,
1244        _mesa_get_format_name(dst_mt->format),
1245        dst_mt, dst, dst_stride,
1246        width, height);
1247
1248    int row_size = cpp * width;
1249    if (src_stride == row_size &&
1250        dst_stride == row_size) {
1251       memcpy(dst, src, row_size * height);
1252    } else {
1253       for (int i = 0; i < height; i++) {
1254          memcpy(dst, src, row_size);
1255          dst += dst_stride;
1256          src += src_stride;
1257       }
1258    }
1259
1260    intel_miptree_unmap(brw, dst_mt, level, slice);
1261    intel_miptree_unmap(brw, src_mt, level, slice);
1262
1263    /* Don't forget to copy the stencil data over, too.  We could have skipped
1264     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1265     * shuffling the two data sources in/out of temporary storage instead of
1266     * the direct mapping we get this way.
1267     */
1268    if (dst_mt->stencil_mt) {
1269       assert(src_mt->stencil_mt);
1270       intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
1271                                   level, slice, width, height);
1272    }
1273 }
1274
1275 static void
1276 intel_miptree_copy_slice(struct brw_context *brw,
1277                          struct intel_mipmap_tree *dst_mt,
1278                          struct intel_mipmap_tree *src_mt,
1279                          int level,
1280                          int face,
1281                          int depth)
1282
1283 {
1284    mesa_format format = src_mt->format;
1285    uint32_t width = minify(src_mt->physical_width0, level - src_mt->first_level);
1286    uint32_t height = minify(src_mt->physical_height0, level - src_mt->first_level);
1287    int slice;
1288
1289    if (face > 0)
1290       slice = face;
1291    else
1292       slice = depth;
1293
1294    assert(depth < src_mt->level[level].depth);
1295    assert(src_mt->format == dst_mt->format);
1296
1297    if (dst_mt->compressed) {
1298       unsigned int i, j;
1299       _mesa_get_format_block_size(dst_mt->format, &i, &j);
1300       height = ALIGN_NPOT(height, j) / j;
1301       width = ALIGN_NPOT(width, i) / i;
1302    }
1303
1304    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1305     * below won't apply since we can't do the depth's Y tiling or the
1306     * stencil's W tiling in the blitter.
1307     */
1308    if (src_mt->stencil_mt) {
1309       intel_miptree_copy_slice_sw(brw,
1310                                   dst_mt, src_mt,
1311                                   level, slice,
1312                                   width, height);
1313       return;
1314    }
1315
1316    uint32_t dst_x, dst_y, src_x, src_y;
1317    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
1318    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
1319
1320    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1321        _mesa_get_format_name(src_mt->format),
1322        src_mt, src_x, src_y, src_mt->pitch,
1323        _mesa_get_format_name(dst_mt->format),
1324        dst_mt, dst_x, dst_y, dst_mt->pitch,
1325        width, height);
1326
1327    if (!intel_miptree_blit(brw,
1328                            src_mt, level, slice, 0, 0, false,
1329                            dst_mt, level, slice, 0, 0, false,
1330                            width, height, GL_COPY)) {
1331       perf_debug("miptree validate blit for %s failed\n",
1332                  _mesa_get_format_name(format));
1333
1334       intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
1335                                   width, height);
1336    }
1337 }
1338
1339 /**
1340  * Copies the image's current data to the given miptree, and associates that
1341  * miptree with the image.
1342  *
1343  * If \c invalidate is true, then the actual image data does not need to be
1344  * copied, but the image still needs to be associated to the new miptree (this
1345  * is set to true if we're about to clear the image).
1346  */
1347 void
1348 intel_miptree_copy_teximage(struct brw_context *brw,
1349                             struct intel_texture_image *intelImage,
1350                             struct intel_mipmap_tree *dst_mt,
1351                             bool invalidate)
1352 {
1353    struct intel_mipmap_tree *src_mt = intelImage->mt;
1354    struct intel_texture_object *intel_obj =
1355       intel_texture_object(intelImage->base.Base.TexObject);
1356    int level = intelImage->base.Base.Level;
1357    int face = intelImage->base.Base.Face;
1358
1359    GLuint depth;
1360    if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY)
1361       depth = intelImage->base.Base.Height;
1362    else
1363       depth = intelImage->base.Base.Depth;
1364
1365    if (!invalidate) {
1366       for (int slice = 0; slice < depth; slice++) {
1367          intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
1368       }
1369    }
1370
1371    intel_miptree_reference(&intelImage->mt, dst_mt);
1372    intel_obj->needs_validate = true;
1373 }
1374
1375 static bool
1376 intel_miptree_alloc_mcs(struct brw_context *brw,
1377                         struct intel_mipmap_tree *mt,
1378                         GLuint num_samples)
1379 {
1380    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1381    assert(mt->mcs_mt == NULL);
1382    assert(!mt->disable_aux_buffers);
1383
1384    /* Choose the correct format for the MCS buffer.  All that really matters
1385     * is that we allocate the right buffer size, since we'll always be
1386     * accessing this miptree using MCS-specific hardware mechanisms, which
1387     * infer the correct format based on num_samples.
1388     */
1389    mesa_format format;
1390    switch (num_samples) {
1391    case 2:
1392    case 4:
1393       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
1394        * each sample).
1395        */
1396       format = MESA_FORMAT_R_UNORM8;
1397       break;
1398    case 8:
1399       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
1400        * for each sample, plus 8 padding bits).
1401        */
1402       format = MESA_FORMAT_R_UINT32;
1403       break;
1404    default:
1405       unreachable("Unrecognized sample count in intel_miptree_alloc_mcs");
1406    };
1407
1408    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
1409     *
1410     *     "The MCS surface must be stored as Tile Y."
1411     */
1412    const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1413                               MIPTREE_LAYOUT_TILING_Y;
1414    mt->mcs_mt = intel_miptree_create(brw,
1415                                      mt->target,
1416                                      format,
1417                                      mt->first_level,
1418                                      mt->last_level,
1419                                      mt->logical_width0,
1420                                      mt->logical_height0,
1421                                      mt->logical_depth0,
1422                                      0 /* num_samples */,
1423                                      mcs_flags);
1424
1425    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1426     *
1427     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1428     *     is cleared prior to any rendering.
1429     *
1430     * Since we don't use the MCS buffer for any purpose other than rendering,
1431     * it makes sense to just clear it immediately upon allocation.
1432     *
1433     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1434     */
1435    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
1436    memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
1437    intel_miptree_unmap_raw(mt->mcs_mt);
1438    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
1439
1440    return mt->mcs_mt;
1441 }
1442
1443
1444 bool
1445 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1446                                  struct intel_mipmap_tree *mt)
1447 {
1448    assert(mt->mcs_mt == NULL);
1449    assert(!mt->disable_aux_buffers);
1450
1451    /* The format of the MCS buffer is opaque to the driver; all that matters
1452     * is that we get its size and pitch right.  We'll pretend that the format
1453     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
1454     * R32 buffer is 32 pixels across, we'll need to scale the width down by
1455     * the block width and then a further factor of 4.  Since an MCS tile
1456     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
1457     * we'll need to scale the height down by the block height and then a
1458     * further factor of 8.
1459     */
1460    const mesa_format format = MESA_FORMAT_R_UINT32;
1461    unsigned block_width_px;
1462    unsigned block_height;
1463    intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
1464    unsigned width_divisor = block_width_px * 4;
1465    unsigned height_divisor = block_height * 8;
1466    unsigned mcs_width =
1467       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
1468    unsigned mcs_height =
1469       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
1470    assert(mt->logical_depth0 == 1);
1471    uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1472                            MIPTREE_LAYOUT_TILING_Y;
1473    if (brw->gen >= 8) {
1474       layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
1475    }
1476    mt->mcs_mt = intel_miptree_create(brw,
1477                                      mt->target,
1478                                      format,
1479                                      mt->first_level,
1480                                      mt->last_level,
1481                                      mcs_width,
1482                                      mcs_height,
1483                                      mt->logical_depth0,
1484                                      0 /* num_samples */,
1485                                      layout_flags);
1486
1487    return mt->mcs_mt;
1488 }
1489
1490
1491 /**
1492  * Helper for intel_miptree_alloc_hiz() that sets
1493  * \c mt->level[level].has_hiz. Return true if and only if
1494  * \c has_hiz was set.
1495  */
1496 static bool
1497 intel_miptree_level_enable_hiz(struct brw_context *brw,
1498                                struct intel_mipmap_tree *mt,
1499                                uint32_t level)
1500 {
1501    assert(mt->hiz_buf);
1502
1503    if (brw->gen >= 8 || brw->is_haswell) {
1504       uint32_t width = minify(mt->physical_width0, level);
1505       uint32_t height = minify(mt->physical_height0, level);
1506
1507       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1508        * and the height is 4 aligned. This allows our HiZ support
1509        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1510        * we can grow the width & height to allow the HiZ op to
1511        * force the proper size alignments.
1512        */
1513       if (level > 0 && ((width & 7) || (height & 3))) {
1514          DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1515          return false;
1516       }
1517    }
1518
1519    DBG("mt %p level %d: HiZ enabled\n", mt, level);
1520    mt->level[level].has_hiz = true;
1521    return true;
1522 }
1523
1524
1525 /**
1526  * Helper for intel_miptree_alloc_hiz() that determines the required hiz
1527  * buffer dimensions and allocates a bo for the hiz buffer.
1528  */
1529 static struct intel_miptree_aux_buffer *
1530 intel_gen7_hiz_buf_create(struct brw_context *brw,
1531                           struct intel_mipmap_tree *mt)
1532 {
1533    unsigned z_width = mt->logical_width0;
1534    unsigned z_height = mt->logical_height0;
1535    const unsigned z_depth = MAX2(mt->logical_depth0, 1);
1536    unsigned hz_width, hz_height;
1537    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1538
1539    if (!buf)
1540       return NULL;
1541
1542    /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
1543     * adjustments required for Z_Height and Z_Width based on multisampling.
1544     */
1545    switch (mt->num_samples) {
1546    case 0:
1547    case 1:
1548       break;
1549    case 2:
1550    case 4:
1551       z_width *= 2;
1552       z_height *= 2;
1553       break;
1554    case 8:
1555       z_width *= 4;
1556       z_height *= 2;
1557       break;
1558    default:
1559       unreachable("unsupported sample count");
1560    }
1561
1562    const unsigned vertical_align = 8; /* 'j' in the docs */
1563    const unsigned H0 = z_height;
1564    const unsigned h0 = ALIGN(H0, vertical_align);
1565    const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
1566    const unsigned Z0 = z_depth;
1567
1568    /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
1569    hz_width = ALIGN(z_width, 16);
1570
1571    if (mt->target == GL_TEXTURE_3D) {
1572       unsigned H_i = H0;
1573       unsigned Z_i = Z0;
1574       hz_height = 0;
1575       for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
1576          unsigned h_i = ALIGN(H_i, vertical_align);
1577          /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
1578          hz_height += h_i * Z_i;
1579          H_i = minify(H_i, 1);
1580          Z_i = minify(Z_i, 1);
1581       }
1582       /* HZ_Height =
1583        *    (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i)))
1584        */
1585       hz_height = DIV_ROUND_UP(hz_height, 2);
1586    } else {
1587       const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align);
1588       if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
1589           mt->target == GL_TEXTURE_CUBE_MAP) {
1590          /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth * 6/2) /8 ) * 8 */
1591          hz_height = DIV_ROUND_UP(hz_qpitch * Z0 * 6, 2 * 8) * 8;
1592       } else {
1593          /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
1594          hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
1595       }
1596    }
1597
1598    unsigned long pitch;
1599    uint32_t tiling = I915_TILING_Y;
1600    buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
1601                                       hz_width, hz_height, 1,
1602                                       &tiling, &pitch,
1603                                       BO_ALLOC_FOR_RENDER);
1604    if (!buf->bo) {
1605       free(buf);
1606       return NULL;
1607    } else if (tiling != I915_TILING_Y) {
1608       drm_intel_bo_unreference(buf->bo);
1609       free(buf);
1610       return NULL;
1611    }
1612
1613    buf->pitch = pitch;
1614
1615    return buf;
1616 }
1617
1618
1619 /**
1620  * Helper for intel_miptree_alloc_hiz() that determines the required hiz
1621  * buffer dimensions and allocates a bo for the hiz buffer.
1622  */
1623 static struct intel_miptree_aux_buffer *
1624 intel_gen8_hiz_buf_create(struct brw_context *brw,
1625                           struct intel_mipmap_tree *mt)
1626 {
1627    unsigned z_width = mt->logical_width0;
1628    unsigned z_height = mt->logical_height0;
1629    const unsigned z_depth = MAX2(mt->logical_depth0, 1);
1630    unsigned hz_width, hz_height;
1631    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1632
1633    if (!buf)
1634       return NULL;
1635
1636    /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
1637     * adjustments required for Z_Height and Z_Width based on multisampling.
1638     */
1639    if (brw->gen < 9) {
1640       switch (mt->num_samples) {
1641       case 0:
1642       case 1:
1643          break;
1644       case 2:
1645       case 4:
1646          z_width *= 2;
1647          z_height *= 2;
1648          break;
1649       case 8:
1650          z_width *= 4;
1651          z_height *= 2;
1652          break;
1653       default:
1654          unreachable("unsupported sample count");
1655       }
1656    }
1657
1658    const unsigned vertical_align = 8; /* 'j' in the docs */
1659    const unsigned H0 = z_height;
1660    const unsigned h0 = ALIGN(H0, vertical_align);
1661    const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
1662    const unsigned Z0 = z_depth;
1663
1664    /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
1665    hz_width = ALIGN(z_width, 16);
1666
1667    unsigned H_i = H0;
1668    unsigned Z_i = Z0;
1669    unsigned sum_h_i = 0;
1670    unsigned hz_height_3d_sum = 0;
1671    for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
1672       unsigned i = level - mt->first_level;
1673       unsigned h_i = ALIGN(H_i, vertical_align);
1674       /* sum(i=2 to m; h_i) */
1675       if (i >= 2) {
1676          sum_h_i += h_i;
1677       }
1678       /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
1679       hz_height_3d_sum += h_i * Z_i;
1680       H_i = minify(H_i, 1);
1681       Z_i = minify(Z_i, 1);
1682    }
1683    /* HZ_QPitch = h0 + max(h1, sum(i=2 to m; h_i)) */
1684    buf->qpitch = h0 + MAX2(h1, sum_h_i);
1685
1686    if (mt->target == GL_TEXTURE_3D) {
1687       /* (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
1688       hz_height = DIV_ROUND_UP(hz_height_3d_sum, 2);
1689    } else {
1690       /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */
1691       hz_height = DIV_ROUND_UP(buf->qpitch, 2 * 8) * 8 * Z0;
1692       if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
1693           mt->target == GL_TEXTURE_CUBE_MAP) {
1694          /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * 6 * Z_Depth
1695           *
1696           * We can can just take our hz_height calculation from above, and
1697           * multiply by 6 for the cube map and cube map array types.
1698           */
1699          hz_height *= 6;
1700       }
1701    }
1702
1703    unsigned long pitch;
1704    uint32_t tiling = I915_TILING_Y;
1705    buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
1706                                       hz_width, hz_height, 1,
1707                                       &tiling, &pitch,
1708                                       BO_ALLOC_FOR_RENDER);
1709    if (!buf->bo) {
1710       free(buf);
1711       return NULL;
1712    } else if (tiling != I915_TILING_Y) {
1713       drm_intel_bo_unreference(buf->bo);
1714       free(buf);
1715       return NULL;
1716    }
1717
1718    buf->pitch = pitch;
1719
1720    return buf;
1721 }
1722
1723
1724 static struct intel_miptree_aux_buffer *
1725 intel_hiz_miptree_buf_create(struct brw_context *brw,
1726                              struct intel_mipmap_tree *mt)
1727 {
1728    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1729    uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
1730
1731    if (brw->gen == 6)
1732       layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
1733
1734    if (!buf)
1735       return NULL;
1736
1737    layout_flags |= MIPTREE_LAYOUT_TILING_ANY;
1738    buf->mt = intel_miptree_create(brw,
1739                                   mt->target,
1740                                   mt->format,
1741                                   mt->first_level,
1742                                   mt->last_level,
1743                                   mt->logical_width0,
1744                                   mt->logical_height0,
1745                                   mt->logical_depth0,
1746                                   mt->num_samples,
1747                                   layout_flags);
1748    if (!buf->mt) {
1749       free(buf);
1750       return NULL;
1751    }
1752
1753    buf->bo = buf->mt->bo;
1754    buf->pitch = buf->mt->pitch;
1755    buf->qpitch = buf->mt->qpitch;
1756
1757    return buf;
1758 }
1759
1760 bool
1761 intel_miptree_wants_hiz_buffer(struct brw_context *brw,
1762                                struct intel_mipmap_tree *mt)
1763 {
1764    if (!brw->has_hiz)
1765       return false;
1766
1767    if (mt->hiz_buf != NULL)
1768       return false;
1769
1770    if (mt->disable_aux_buffers)
1771       return false;
1772
1773    switch (mt->format) {
1774    case MESA_FORMAT_Z_FLOAT32:
1775    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
1776    case MESA_FORMAT_Z24_UNORM_X8_UINT:
1777    case MESA_FORMAT_Z24_UNORM_S8_UINT:
1778    case MESA_FORMAT_Z_UNORM16:
1779       return true;
1780    default:
1781       return false;
1782    }
1783 }
1784
1785 bool
1786 intel_miptree_alloc_hiz(struct brw_context *brw,
1787                         struct intel_mipmap_tree *mt)
1788 {
1789    assert(mt->hiz_buf == NULL);
1790    assert(!mt->disable_aux_buffers);
1791
1792    if (brw->gen == 7) {
1793       mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt);
1794    } else if (brw->gen >= 8) {
1795       mt->hiz_buf = intel_gen8_hiz_buf_create(brw, mt);
1796    } else {
1797       mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);
1798    }
1799
1800    if (!mt->hiz_buf)
1801       return false;
1802
1803    /* Mark that all slices need a HiZ resolve. */
1804    for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
1805       if (!intel_miptree_level_enable_hiz(brw, mt, level))
1806          continue;
1807
1808       for (unsigned layer = 0; layer < mt->level[level].depth; ++layer) {
1809          struct intel_resolve_map *m = malloc(sizeof(struct intel_resolve_map));
1810          exec_node_init(&m->link);
1811          m->level = level;
1812          m->layer = layer;
1813          m->need = GEN6_HIZ_OP_HIZ_RESOLVE;
1814
1815          exec_list_push_tail(&mt->hiz_map, &m->link);
1816       }
1817    }
1818
1819    return true;
1820 }
1821
1822 /**
1823  * Does the miptree slice have hiz enabled?
1824  */
1825 bool
1826 intel_miptree_level_has_hiz(struct intel_mipmap_tree *mt, uint32_t level)
1827 {
1828    intel_miptree_check_level_layer(mt, level, 0);
1829    return mt->level[level].has_hiz;
1830 }
1831
1832 void
1833 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
1834                                           uint32_t level,
1835                                           uint32_t layer)
1836 {
1837    if (!intel_miptree_level_has_hiz(mt, level))
1838       return;
1839
1840    intel_resolve_map_set(&mt->hiz_map,
1841                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
1842 }
1843
1844
1845 void
1846 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
1847                                             uint32_t level,
1848                                             uint32_t layer)
1849 {
1850    if (!intel_miptree_level_has_hiz(mt, level))
1851       return;
1852
1853    intel_resolve_map_set(&mt->hiz_map,
1854                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
1855 }
1856
1857 void
1858 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
1859                                                 uint32_t level)
1860 {
1861    uint32_t layer;
1862    uint32_t end_layer = mt->level[level].depth;
1863
1864    for (layer = 0; layer < end_layer; layer++) {
1865       intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
1866    }
1867 }
1868
1869 static bool
1870 intel_miptree_slice_resolve(struct brw_context *brw,
1871                             struct intel_mipmap_tree *mt,
1872                             uint32_t level,
1873                             uint32_t layer,
1874                             enum gen6_hiz_op need)
1875 {
1876    intel_miptree_check_level_layer(mt, level, layer);
1877
1878    struct intel_resolve_map *item =
1879          intel_resolve_map_get(&mt->hiz_map, level, layer);
1880
1881    if (!item || item->need != need)
1882       return false;
1883
1884    intel_hiz_exec(brw, mt, level, layer, need);
1885    intel_resolve_map_remove(item);
1886    return true;
1887 }
1888
1889 bool
1890 intel_miptree_slice_resolve_hiz(struct brw_context *brw,
1891                                 struct intel_mipmap_tree *mt,
1892                                 uint32_t level,
1893                                 uint32_t layer)
1894 {
1895    return intel_miptree_slice_resolve(brw, mt, level, layer,
1896                                       GEN6_HIZ_OP_HIZ_RESOLVE);
1897 }
1898
1899 bool
1900 intel_miptree_slice_resolve_depth(struct brw_context *brw,
1901                                   struct intel_mipmap_tree *mt,
1902                                   uint32_t level,
1903                                   uint32_t layer)
1904 {
1905    return intel_miptree_slice_resolve(brw, mt, level, layer,
1906                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
1907 }
1908
1909 static bool
1910 intel_miptree_all_slices_resolve(struct brw_context *brw,
1911                                  struct intel_mipmap_tree *mt,
1912                                  enum gen6_hiz_op need)
1913 {
1914    bool did_resolve = false;
1915
1916    foreach_list_typed_safe(struct intel_resolve_map, map, link, &mt->hiz_map) {
1917       if (map->need != need)
1918          continue;
1919
1920       intel_hiz_exec(brw, mt, map->level, map->layer, need);
1921       intel_resolve_map_remove(map);
1922       did_resolve = true;
1923    }
1924
1925    return did_resolve;
1926 }
1927
1928 bool
1929 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
1930                                      struct intel_mipmap_tree *mt)
1931 {
1932    return intel_miptree_all_slices_resolve(brw, mt,
1933                                            GEN6_HIZ_OP_HIZ_RESOLVE);
1934 }
1935
1936 bool
1937 intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
1938                                        struct intel_mipmap_tree *mt)
1939 {
1940    return intel_miptree_all_slices_resolve(brw, mt,
1941                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
1942 }
1943
1944
1945 void
1946 intel_miptree_resolve_color(struct brw_context *brw,
1947                             struct intel_mipmap_tree *mt)
1948 {
1949    switch (mt->fast_clear_state) {
1950    case INTEL_FAST_CLEAR_STATE_NO_MCS:
1951    case INTEL_FAST_CLEAR_STATE_RESOLVED:
1952       /* No resolve needed */
1953       break;
1954    case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
1955    case INTEL_FAST_CLEAR_STATE_CLEAR:
1956       /* Fast color clear resolves only make sense for non-MSAA buffers. */
1957       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
1958          brw_meta_resolve_color(brw, mt);
1959       break;
1960    }
1961 }
1962
1963
1964 /**
1965  * Make it possible to share the BO backing the given miptree with another
1966  * process or another miptree.
1967  *
1968  * Fast color clears are unsafe with shared buffers, so we need to resolve and
1969  * then discard the MCS buffer, if present.  We also set the fast_clear_state
1970  * to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
1971  * allocated in the future.
1972  */
1973 void
1974 intel_miptree_make_shareable(struct brw_context *brw,
1975                              struct intel_mipmap_tree *mt)
1976 {
1977    /* MCS buffers are also used for multisample buffers, but we can't resolve
1978     * away a multisample MCS buffer because it's an integral part of how the
1979     * pixel data is stored.  Fortunately this code path should never be
1980     * reached for multisample buffers.
1981     */
1982    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
1983
1984    if (mt->mcs_mt) {
1985       intel_miptree_resolve_color(brw, mt);
1986       intel_miptree_release(&mt->mcs_mt);
1987       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
1988    }
1989 }
1990
1991
1992 /**
1993  * \brief Get pointer offset into stencil buffer.
1994  *
1995  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
1996  * must decode the tile's layout in software.
1997  *
1998  * See
1999  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2000  *     Format.
2001  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2002  *
2003  * Even though the returned offset is always positive, the return type is
2004  * signed due to
2005  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2006  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
2007  */
2008 static intptr_t
2009 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2010 {
2011    uint32_t tile_size = 4096;
2012    uint32_t tile_width = 64;
2013    uint32_t tile_height = 64;
2014    uint32_t row_size = 64 * stride;
2015
2016    uint32_t tile_x = x / tile_width;
2017    uint32_t tile_y = y / tile_height;
2018
2019    /* The byte's address relative to the tile's base addres. */
2020    uint32_t byte_x = x % tile_width;
2021    uint32_t byte_y = y % tile_height;
2022
2023    uintptr_t u = tile_y * row_size
2024                + tile_x * tile_size
2025                + 512 * (byte_x / 8)
2026                +  64 * (byte_y / 8)
2027                +  32 * ((byte_y / 4) % 2)
2028                +  16 * ((byte_x / 4) % 2)
2029                +   8 * ((byte_y / 2) % 2)
2030                +   4 * ((byte_x / 2) % 2)
2031                +   2 * (byte_y % 2)
2032                +   1 * (byte_x % 2);
2033
2034    if (swizzled) {
2035       /* adjust for bit6 swizzling */
2036       if (((byte_x / 8) % 2) == 1) {
2037          if (((byte_y / 8) % 2) == 0) {
2038             u += 64;
2039          } else {
2040             u -= 64;
2041          }
2042       }
2043    }
2044
2045    return u;
2046 }
2047
2048 void
2049 intel_miptree_updownsample(struct brw_context *brw,
2050                            struct intel_mipmap_tree *src,
2051                            struct intel_mipmap_tree *dst)
2052 {
2053    if (brw->gen < 8) {
2054       brw_blorp_blit_miptrees(brw,
2055                               src, 0 /* level */, 0 /* layer */, src->format,
2056                               dst, 0 /* level */, 0 /* layer */, dst->format,
2057                               0, 0,
2058                               src->logical_width0, src->logical_height0,
2059                               0, 0,
2060                               dst->logical_width0, dst->logical_height0,
2061                               GL_NEAREST, false, false /*mirror x, y*/);
2062    } else if (src->format == MESA_FORMAT_S_UINT8) {
2063       brw_meta_stencil_updownsample(brw, src, dst);
2064    } else {
2065       brw_meta_updownsample(brw, src, dst);
2066    }
2067
2068    if (src->stencil_mt) {
2069       if (brw->gen >= 8) {
2070          brw_meta_stencil_updownsample(brw, src->stencil_mt, dst);
2071          return;
2072       }
2073
2074       brw_blorp_blit_miptrees(brw,
2075                               src->stencil_mt, 0 /* level */, 0 /* layer */,
2076                               src->stencil_mt->format,
2077                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
2078                               dst->stencil_mt->format,
2079                               0, 0,
2080                               src->logical_width0, src->logical_height0,
2081                               0, 0,
2082                               dst->logical_width0, dst->logical_height0,
2083                               GL_NEAREST, false, false /*mirror x, y*/);
2084    }
2085 }
2086
2087 void *
2088 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
2089 {
2090    /* CPU accesses to color buffers don't understand fast color clears, so
2091     * resolve any pending fast color clears before we map.
2092     */
2093    intel_miptree_resolve_color(brw, mt);
2094
2095    drm_intel_bo *bo = mt->bo;
2096
2097    if (drm_intel_bo_references(brw->batch.bo, bo))
2098       intel_batchbuffer_flush(brw);
2099
2100    if (mt->tiling != I915_TILING_NONE)
2101       brw_bo_map_gtt(brw, bo, "miptree");
2102    else
2103       brw_bo_map(brw, bo, true, "miptree");
2104
2105    return bo->virtual;
2106 }
2107
2108 void
2109 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2110 {
2111    drm_intel_bo_unmap(mt->bo);
2112 }
2113
2114 static void
2115 intel_miptree_map_gtt(struct brw_context *brw,
2116                       struct intel_mipmap_tree *mt,
2117                       struct intel_miptree_map *map,
2118                       unsigned int level, unsigned int slice)
2119 {
2120    unsigned int bw, bh;
2121    void *base;
2122    unsigned int image_x, image_y;
2123    intptr_t x = map->x;
2124    intptr_t y = map->y;
2125
2126    /* For compressed formats, the stride is the number of bytes per
2127     * row of blocks.  intel_miptree_get_image_offset() already does
2128     * the divide.
2129     */
2130    _mesa_get_format_block_size(mt->format, &bw, &bh);
2131    assert(y % bh == 0);
2132    assert(x % bw == 0);
2133    y /= bh;
2134    x /= bw;
2135
2136    base = intel_miptree_map_raw(brw, mt) + mt->offset;
2137
2138    if (base == NULL)
2139       map->ptr = NULL;
2140    else {
2141       /* Note that in the case of cube maps, the caller must have passed the
2142        * slice number referencing the face.
2143       */
2144       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2145       x += image_x;
2146       y += image_y;
2147
2148       map->stride = mt->pitch;
2149       map->ptr = base + y * map->stride + x * mt->cpp;
2150    }
2151
2152    DBG("%s: %d,%d %dx%d from mt %p (%s) "
2153        "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
2154        map->x, map->y, map->w, map->h,
2155        mt, _mesa_get_format_name(mt->format),
2156        x, y, map->ptr, map->stride);
2157 }
2158
2159 static void
2160 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
2161 {
2162    intel_miptree_unmap_raw(mt);
2163 }
2164
2165 static void
2166 intel_miptree_map_blit(struct brw_context *brw,
2167                        struct intel_mipmap_tree *mt,
2168                        struct intel_miptree_map *map,
2169                        unsigned int level, unsigned int slice)
2170 {
2171    map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
2172                                          /* first_level */ 0,
2173                                          /* last_level */ 0,
2174                                          map->w, map->h, 1,
2175                                          /* samples */ 0,
2176                                          MIPTREE_LAYOUT_TILING_NONE);
2177
2178    if (!map->linear_mt) {
2179       fprintf(stderr, "Failed to allocate blit temporary\n");
2180       goto fail;
2181    }
2182    map->stride = map->linear_mt->pitch;
2183
2184    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2185     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2186     * invalidate is set, since we'll be writing the whole rectangle from our
2187     * temporary buffer back out.
2188     */
2189    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2190       if (!intel_miptree_blit(brw,
2191                               mt, level, slice,
2192                               map->x, map->y, false,
2193                               map->linear_mt, 0, 0,
2194                               0, 0, false,
2195                               map->w, map->h, GL_COPY)) {
2196          fprintf(stderr, "Failed to blit\n");
2197          goto fail;
2198       }
2199    }
2200
2201    map->ptr = intel_miptree_map_raw(brw, map->linear_mt);
2202
2203    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2204        map->x, map->y, map->w, map->h,
2205        mt, _mesa_get_format_name(mt->format),
2206        level, slice, map->ptr, map->stride);
2207
2208    return;
2209
2210 fail:
2211    intel_miptree_release(&map->linear_mt);
2212    map->ptr = NULL;
2213    map->stride = 0;
2214 }
2215
2216 static void
2217 intel_miptree_unmap_blit(struct brw_context *brw,
2218                          struct intel_mipmap_tree *mt,
2219                          struct intel_miptree_map *map,
2220                          unsigned int level,
2221                          unsigned int slice)
2222 {
2223    struct gl_context *ctx = &brw->ctx;
2224
2225    intel_miptree_unmap_raw(map->linear_mt);
2226
2227    if (map->mode & GL_MAP_WRITE_BIT) {
2228       bool ok = intel_miptree_blit(brw,
2229                                    map->linear_mt, 0, 0,
2230                                    0, 0, false,
2231                                    mt, level, slice,
2232                                    map->x, map->y, false,
2233                                    map->w, map->h, GL_COPY);
2234       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
2235    }
2236
2237    intel_miptree_release(&map->linear_mt);
2238 }
2239
2240 /**
2241  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
2242  */
2243 #if defined(USE_SSE41)
2244 static void
2245 intel_miptree_map_movntdqa(struct brw_context *brw,
2246                            struct intel_mipmap_tree *mt,
2247                            struct intel_miptree_map *map,
2248                            unsigned int level, unsigned int slice)
2249 {
2250    assert(map->mode & GL_MAP_READ_BIT);
2251    assert(!(map->mode & GL_MAP_WRITE_BIT));
2252
2253    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2254        map->x, map->y, map->w, map->h,
2255        mt, _mesa_get_format_name(mt->format),
2256        level, slice, map->ptr, map->stride);
2257
2258    /* Map the original image */
2259    uint32_t image_x;
2260    uint32_t image_y;
2261    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2262    image_x += map->x;
2263    image_y += map->y;
2264
2265    void *src = intel_miptree_map_raw(brw, mt);
2266    if (!src)
2267       return;
2268    src += image_y * mt->pitch;
2269    src += image_x * mt->cpp;
2270
2271    /* Due to the pixel offsets for the particular image being mapped, our
2272     * src pointer may not be 16-byte aligned.  However, if the pitch is
2273     * divisible by 16, then the amount by which it's misaligned will remain
2274     * consistent from row to row.
2275     */
2276    assert((mt->pitch % 16) == 0);
2277    const int misalignment = ((uintptr_t) src) & 15;
2278
2279    /* Create an untiled temporary buffer for the mapping. */
2280    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
2281
2282    map->stride = ALIGN(misalignment + width_bytes, 16);
2283
2284    map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
2285    /* Offset the destination so it has the same misalignment as src. */
2286    map->ptr = map->buffer + misalignment;
2287
2288    assert((((uintptr_t) map->ptr) & 15) == misalignment);
2289
2290    for (uint32_t y = 0; y < map->h; y++) {
2291       void *dst_ptr = map->ptr + y * map->stride;
2292       void *src_ptr = src + y * mt->pitch;
2293
2294       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
2295    }
2296
2297    intel_miptree_unmap_raw(mt);
2298 }
2299
2300 static void
2301 intel_miptree_unmap_movntdqa(struct brw_context *brw,
2302                              struct intel_mipmap_tree *mt,
2303                              struct intel_miptree_map *map,
2304                              unsigned int level,
2305                              unsigned int slice)
2306 {
2307    _mesa_align_free(map->buffer);
2308    map->buffer = NULL;
2309    map->ptr = NULL;
2310 }
2311 #endif
2312
2313 static void
2314 intel_miptree_map_s8(struct brw_context *brw,
2315                      struct intel_mipmap_tree *mt,
2316                      struct intel_miptree_map *map,
2317                      unsigned int level, unsigned int slice)
2318 {
2319    map->stride = map->w;
2320    map->buffer = map->ptr = malloc(map->stride * map->h);
2321    if (!map->buffer)
2322       return;
2323
2324    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2325     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2326     * invalidate is set, since we'll be writing the whole rectangle from our
2327     * temporary buffer back out.
2328     */
2329    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2330       uint8_t *untiled_s8_map = map->ptr;
2331       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
2332       unsigned int image_x, image_y;
2333
2334       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2335
2336       for (uint32_t y = 0; y < map->h; y++) {
2337          for (uint32_t x = 0; x < map->w; x++) {
2338             ptrdiff_t offset = intel_offset_S8(mt->pitch,
2339                                                x + image_x + map->x,
2340                                                y + image_y + map->y,
2341                                                brw->has_swizzling);
2342             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
2343          }
2344       }
2345
2346       intel_miptree_unmap_raw(mt);
2347
2348       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
2349           map->x, map->y, map->w, map->h,
2350           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
2351    } else {
2352       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
2353           map->x, map->y, map->w, map->h,
2354           mt, map->ptr, map->stride);
2355    }
2356 }
2357
2358 static void
2359 intel_miptree_unmap_s8(struct brw_context *brw,
2360                        struct intel_mipmap_tree *mt,
2361                        struct intel_miptree_map *map,
2362                        unsigned int level,
2363                        unsigned int slice)
2364 {
2365    if (map->mode & GL_MAP_WRITE_BIT) {
2366       unsigned int image_x, image_y;
2367       uint8_t *untiled_s8_map = map->ptr;
2368       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
2369
2370       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2371
2372       for (uint32_t y = 0; y < map->h; y++) {
2373          for (uint32_t x = 0; x < map->w; x++) {
2374             ptrdiff_t offset = intel_offset_S8(mt->pitch,
2375                                                x + map->x,
2376                                                y + map->y,
2377                                                brw->has_swizzling);
2378             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
2379          }
2380       }
2381
2382       intel_miptree_unmap_raw(mt);
2383    }
2384
2385    free(map->buffer);
2386 }
2387
2388 static void
2389 intel_miptree_map_etc(struct brw_context *brw,
2390                       struct intel_mipmap_tree *mt,
2391                       struct intel_miptree_map *map,
2392                       unsigned int level,
2393                       unsigned int slice)
2394 {
2395    assert(mt->etc_format != MESA_FORMAT_NONE);
2396    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
2397       assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
2398    }
2399
2400    assert(map->mode & GL_MAP_WRITE_BIT);
2401    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
2402
2403    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
2404    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
2405                                                 map->w, map->h, 1));
2406    map->ptr = map->buffer;
2407 }
2408
2409 static void
2410 intel_miptree_unmap_etc(struct brw_context *brw,
2411                         struct intel_mipmap_tree *mt,
2412                         struct intel_miptree_map *map,
2413                         unsigned int level,
2414                         unsigned int slice)
2415 {
2416    uint32_t image_x;
2417    uint32_t image_y;
2418    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2419
2420    image_x += map->x;
2421    image_y += map->y;
2422
2423    uint8_t *dst = intel_miptree_map_raw(brw, mt)
2424                 + image_y * mt->pitch
2425                 + image_x * mt->cpp;
2426
2427    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
2428       _mesa_etc1_unpack_rgba8888(dst, mt->pitch,
2429                                  map->ptr, map->stride,
2430                                  map->w, map->h);
2431    else
2432       _mesa_unpack_etc2_format(dst, mt->pitch,
2433                                map->ptr, map->stride,
2434                                map->w, map->h, mt->etc_format);
2435
2436    intel_miptree_unmap_raw(mt);
2437    free(map->buffer);
2438 }
2439
2440 /**
2441  * Mapping function for packed depth/stencil miptrees backed by real separate
2442  * miptrees for depth and stencil.
2443  *
2444  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
2445  * separate from the depth buffer.  Yet at the GL API level, we have to expose
2446  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
2447  * be able to map that memory for texture storage and glReadPixels-type
2448  * operations.  We give Mesa core that access by mallocing a temporary and
2449  * copying the data between the actual backing store and the temporary.
2450  */
2451 static void
2452 intel_miptree_map_depthstencil(struct brw_context *brw,
2453                                struct intel_mipmap_tree *mt,
2454                                struct intel_miptree_map *map,
2455                                unsigned int level, unsigned int slice)
2456 {
2457    struct intel_mipmap_tree *z_mt = mt;
2458    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2459    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
2460    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
2461
2462    map->stride = map->w * packed_bpp;
2463    map->buffer = map->ptr = malloc(map->stride * map->h);
2464    if (!map->buffer)
2465       return;
2466
2467    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2468     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2469     * invalidate is set, since we'll be writing the whole rectangle from our
2470     * temporary buffer back out.
2471     */
2472    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2473       uint32_t *packed_map = map->ptr;
2474       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2475       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2476       unsigned int s_image_x, s_image_y;
2477       unsigned int z_image_x, z_image_y;
2478
2479       intel_miptree_get_image_offset(s_mt, level, slice,
2480                                      &s_image_x, &s_image_y);
2481       intel_miptree_get_image_offset(z_mt, level, slice,
2482                                      &z_image_x, &z_image_y);
2483
2484       for (uint32_t y = 0; y < map->h; y++) {
2485          for (uint32_t x = 0; x < map->w; x++) {
2486             int map_x = map->x + x, map_y = map->y + y;
2487             ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch,
2488                                                  map_x + s_image_x,
2489                                                  map_y + s_image_y,
2490                                                  brw->has_swizzling);
2491             ptrdiff_t z_offset = ((map_y + z_image_y) *
2492                                   (z_mt->pitch / 4) +
2493                                   (map_x + z_image_x));
2494             uint8_t s = s_map[s_offset];
2495             uint32_t z = z_map[z_offset];
2496
2497             if (map_z32f_x24s8) {
2498                packed_map[(y * map->w + x) * 2 + 0] = z;
2499                packed_map[(y * map->w + x) * 2 + 1] = s;
2500             } else {
2501                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
2502             }
2503          }
2504       }
2505
2506       intel_miptree_unmap_raw(s_mt);
2507       intel_miptree_unmap_raw(z_mt);
2508
2509       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
2510           __func__,
2511           map->x, map->y, map->w, map->h,
2512           z_mt, map->x + z_image_x, map->y + z_image_y,
2513           s_mt, map->x + s_image_x, map->y + s_image_y,
2514           map->ptr, map->stride);
2515    } else {
2516       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
2517           map->x, map->y, map->w, map->h,
2518           mt, map->ptr, map->stride);
2519    }
2520 }
2521
2522 static void
2523 intel_miptree_unmap_depthstencil(struct brw_context *brw,
2524                                  struct intel_mipmap_tree *mt,
2525                                  struct intel_miptree_map *map,
2526                                  unsigned int level,
2527                                  unsigned int slice)
2528 {
2529    struct intel_mipmap_tree *z_mt = mt;
2530    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
2531    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
2532
2533    if (map->mode & GL_MAP_WRITE_BIT) {
2534       uint32_t *packed_map = map->ptr;
2535       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
2536       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
2537       unsigned int s_image_x, s_image_y;
2538       unsigned int z_image_x, z_image_y;
2539
2540       intel_miptree_get_image_offset(s_mt, level, slice,
2541                                      &s_image_x, &s_image_y);
2542       intel_miptree_get_image_offset(z_mt, level, slice,
2543                                      &z_image_x, &z_image_y);
2544
2545       for (uint32_t y = 0; y < map->h; y++) {
2546          for (uint32_t x = 0; x < map->w; x++) {
2547             ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch,
2548                                                  x + s_image_x + map->x,
2549                                                  y + s_image_y + map->y,
2550                                                  brw->has_swizzling);
2551             ptrdiff_t z_offset = ((y + z_image_y + map->y) *
2552                                   (z_mt->pitch / 4) +
2553                                   (x + z_image_x + map->x));
2554
2555             if (map_z32f_x24s8) {
2556                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
2557                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
2558             } else {
2559                uint32_t packed = packed_map[y * map->w + x];
2560                s_map[s_offset] = packed >> 24;
2561                z_map[z_offset] = packed;
2562             }
2563          }
2564       }
2565
2566       intel_miptree_unmap_raw(s_mt);
2567       intel_miptree_unmap_raw(z_mt);
2568
2569       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
2570           __func__,
2571           map->x, map->y, map->w, map->h,
2572           z_mt, _mesa_get_format_name(z_mt->format),
2573           map->x + z_image_x, map->y + z_image_y,
2574           s_mt, map->x + s_image_x, map->y + s_image_y,
2575           map->ptr, map->stride);
2576    }
2577
2578    free(map->buffer);
2579 }
2580
2581 /**
2582  * Create and attach a map to the miptree at (level, slice). Return the
2583  * attached map.
2584  */
2585 static struct intel_miptree_map*
2586 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
2587                          unsigned int level,
2588                          unsigned int slice,
2589                          unsigned int x,
2590                          unsigned int y,
2591                          unsigned int w,
2592                          unsigned int h,
2593                          GLbitfield mode)
2594 {
2595    struct intel_miptree_map *map = calloc(1, sizeof(*map));
2596
2597    if (!map)
2598       return NULL;
2599
2600    assert(mt->level[level].slice[slice].map == NULL);
2601    mt->level[level].slice[slice].map = map;
2602
2603    map->mode = mode;
2604    map->x = x;
2605    map->y = y;
2606    map->w = w;
2607    map->h = h;
2608
2609    return map;
2610 }
2611
2612 /**
2613  * Release the map at (level, slice).
2614  */
2615 static void
2616 intel_miptree_release_map(struct intel_mipmap_tree *mt,
2617                          unsigned int level,
2618                          unsigned int slice)
2619 {
2620    struct intel_miptree_map **map;
2621
2622    map = &mt->level[level].slice[slice].map;
2623    free(*map);
2624    *map = NULL;
2625 }
2626
2627 static bool
2628 can_blit_slice(struct intel_mipmap_tree *mt,
2629                unsigned int level, unsigned int slice)
2630 {
2631    uint32_t image_x;
2632    uint32_t image_y;
2633    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2634    if (image_x >= 32768 || image_y >= 32768)
2635       return false;
2636
2637    /* See intel_miptree_blit() for details on the 32k pitch limit. */
2638    if (mt->pitch >= 32768)
2639       return false;
2640
2641    return true;
2642 }
2643
2644 static bool
2645 use_intel_mipree_map_blit(struct brw_context *brw,
2646                           struct intel_mipmap_tree *mt,
2647                           GLbitfield mode,
2648                           unsigned int level,
2649                           unsigned int slice)
2650 {
2651    if (brw->has_llc &&
2652       /* It's probably not worth swapping to the blit ring because of
2653        * all the overhead involved.
2654        */
2655        !(mode & GL_MAP_WRITE_BIT) &&
2656        !mt->compressed &&
2657        (mt->tiling == I915_TILING_X ||
2658         /* Prior to Sandybridge, the blitter can't handle Y tiling */
2659         (brw->gen >= 6 && mt->tiling == I915_TILING_Y)) &&
2660        can_blit_slice(mt, level, slice))
2661       return true;
2662
2663    if (mt->tiling != I915_TILING_NONE &&
2664        mt->bo->size >= brw->max_gtt_map_object_size) {
2665       assert(can_blit_slice(mt, level, slice));
2666       return true;
2667    }
2668
2669    return false;
2670 }
2671
2672 /**
2673  * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
2674  * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
2675  * arithmetic overflow.
2676  *
2677  * If you call this function and use \a out_stride, then you're doing pointer
2678  * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
2679  * bugs.  The caller must still take care to avoid 32-bit overflow errors in
2680  * all arithmetic expressions that contain buffer offsets and pixel sizes,
2681  * which usually have type uint32_t or GLuint.
2682  */
2683 void
2684 intel_miptree_map(struct brw_context *brw,
2685                   struct intel_mipmap_tree *mt,
2686                   unsigned int level,
2687                   unsigned int slice,
2688                   unsigned int x,
2689                   unsigned int y,
2690                   unsigned int w,
2691                   unsigned int h,
2692                   GLbitfield mode,
2693                   void **out_ptr,
2694                   ptrdiff_t *out_stride)
2695 {
2696    struct intel_miptree_map *map;
2697
2698    assert(mt->num_samples <= 1);
2699
2700    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
2701    if (!map){
2702       *out_ptr = NULL;
2703       *out_stride = 0;
2704       return;
2705    }
2706
2707    intel_miptree_slice_resolve_depth(brw, mt, level, slice);
2708    if (map->mode & GL_MAP_WRITE_BIT) {
2709       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
2710    }
2711
2712    if (mt->format == MESA_FORMAT_S_UINT8) {
2713       intel_miptree_map_s8(brw, mt, map, level, slice);
2714    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2715               !(mode & BRW_MAP_DIRECT_BIT)) {
2716       intel_miptree_map_etc(brw, mt, map, level, slice);
2717    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
2718       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
2719    } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
2720       intel_miptree_map_blit(brw, mt, map, level, slice);
2721 #if defined(USE_SSE41)
2722    } else if (!(mode & GL_MAP_WRITE_BIT) &&
2723               !mt->compressed && cpu_has_sse4_1 &&
2724               (mt->pitch % 16 == 0)) {
2725       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
2726 #endif
2727    } else {
2728       intel_miptree_map_gtt(brw, mt, map, level, slice);
2729    }
2730
2731    *out_ptr = map->ptr;
2732    *out_stride = map->stride;
2733
2734    if (map->ptr == NULL)
2735       intel_miptree_release_map(mt, level, slice);
2736 }
2737
2738 void
2739 intel_miptree_unmap(struct brw_context *brw,
2740                     struct intel_mipmap_tree *mt,
2741                     unsigned int level,
2742                     unsigned int slice)
2743 {
2744    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
2745
2746    assert(mt->num_samples <= 1);
2747
2748    if (!map)
2749       return;
2750
2751    DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
2752        mt, _mesa_get_format_name(mt->format), level, slice);
2753
2754    if (mt->format == MESA_FORMAT_S_UINT8) {
2755       intel_miptree_unmap_s8(brw, mt, map, level, slice);
2756    } else if (mt->etc_format != MESA_FORMAT_NONE &&
2757               !(map->mode & BRW_MAP_DIRECT_BIT)) {
2758       intel_miptree_unmap_etc(brw, mt, map, level, slice);
2759    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
2760       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
2761    } else if (map->linear_mt) {
2762       intel_miptree_unmap_blit(brw, mt, map, level, slice);
2763 #if defined(USE_SSE41)
2764    } else if (map->buffer && cpu_has_sse4_1) {
2765       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
2766 #endif
2767    } else {
2768       intel_miptree_unmap_gtt(mt);
2769    }
2770
2771    intel_miptree_release_map(mt, level, slice);
2772 }