src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /*
   2  * Copyright 2006 VMware, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sublicense, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial portions
  15  * of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 #include <GL/gl.h>
  27 #include <GL/internal/dri_interface.h>
  28
  29 #include "intel_batchbuffer.h"
  30 #include "intel_image.h"
  31 #include "intel_mipmap_tree.h"
  32 #include "intel_tex.h"
  33 #include "intel_blit.h"
  34 #include "intel_fbo.h"
  35
  36 #include "brw_blorp.h"
  37 #include "brw_context.h"
  38 #include "brw_state.h"
  39
  40 #include "main/enums.h"
  41 #include "main/fbobject.h"
  42 #include "main/formats.h"
  43 #include "main/glformats.h"
  44 #include "main/texcompress_etc.h"
  45 #include "main/teximage.h"
  46 #include "main/streaming-load-memcpy.h"
  47 #include "x86/common_x86_asm.h"
  48
  49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  50
  51 static void *intel_miptree_map_raw(struct brw_context *brw,
  52                                    struct intel_mipmap_tree *mt,
  53                                    GLbitfield mode);
  54
  55 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
  56
  57 static bool
  58 intel_miptree_alloc_aux(struct brw_context *brw,
  59                         struct intel_mipmap_tree *mt);
  60
  61 /**
  62  * Determine which MSAA layout should be used by the MSAA surface being
  63  * created, based on the chip generation and the surface type.
  64  */
  65 static enum intel_msaa_layout
  66 compute_msaa_layout(struct brw_context *brw, mesa_format format,
  67                     uint32_t layout_flags)
  68 {
  69    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  70    if (brw->gen < 7)
  71       return INTEL_MSAA_LAYOUT_IMS;
  72
  73    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  74    switch (_mesa_get_format_base_format(format)) {
  75    case GL_DEPTH_COMPONENT:
  76    case GL_STENCIL_INDEX:
  77    case GL_DEPTH_STENCIL:
  78       return INTEL_MSAA_LAYOUT_IMS;
  79    default:
  80       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  81        *
  82        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  83        *   are not written
  84        *
  85        * In practice this means that we have to disable MCS for all signed
  86        * integer MSAA buffers.  The alternative, to disable MCS only when one
  87        * of the render target channels is disabled, is impractical because it
  88        * would require converting between CMS and UMS MSAA layouts on the fly,
  89        * which is expensive.
  90        */
  91       if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
  92          return INTEL_MSAA_LAYOUT_UMS;
  93       } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
  94          /* We can't use the CMS layout because it uses an aux buffer, the MCS
  95           * buffer. So fallback to UMS, which is identical to CMS without the
  96           * MCS. */
  97          return INTEL_MSAA_LAYOUT_UMS;
  98       } else {
  99          return INTEL_MSAA_LAYOUT_CMS;
 100       }
 101    }
 102 }
 103
 104 static bool
 105 intel_tiling_supports_ccs(const struct brw_context *brw, unsigned tiling)
 106 {
 107    /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 108     * Target(s)", beneath the "Fast Color Clear" bullet (p326):
 109     *
 110     *     - Support is limited to tiled render targets.
 111     *
 112     * Gen9 changes the restriction to Y-tile only.
 113     */
 114    if (brw->gen >= 9)
 115       return tiling == I915_TILING_Y;
 116    else if (brw->gen >= 7)
 117       return tiling != I915_TILING_NONE;
 118    else
 119       return false;
 120 }
 121
 122 /**
 123  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 124  * can be used. This doesn't (and should not) inspect any of the properties of
 125  * the miptree's BO.
 126  *
 127  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 128  * beneath the "Fast Color Clear" bullet (p326):
 129  *
 130  *     - Support is for non-mip-mapped and non-array surface types only.
 131  *
 132  * And then later, on p327:
 133  *
 134  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 135  *       64bpp, and 128bpp.
 136  *
 137  * From the Skylake documentation, it is made clear that X-tiling is no longer
 138  * supported:
 139  *
 140  *     - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
 141  *     non-MSRTs only.
 142  */
 143 static bool
 144 intel_miptree_supports_ccs(struct brw_context *brw,
 145                            const struct intel_mipmap_tree *mt)
 146 {
 147    /* MCS support does not exist prior to Gen7 */
 148    if (brw->gen < 7)
 149       return false;
 150
 151    /* This function applies only to non-multisampled render targets. */
 152    if (mt->num_samples > 1)
 153       return false;
 154
 155    /* MCS is only supported for color buffers */
 156    switch (_mesa_get_format_base_format(mt->format)) {
 157    case GL_DEPTH_COMPONENT:
 158    case GL_DEPTH_STENCIL:
 159    case GL_STENCIL_INDEX:
 160       return false;
 161    }
 162
 163    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 164       return false;
 165
 166    const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0;
 167    const bool arrayed = mt->physical_depth0 != 1;
 168
 169    if (arrayed) {
 170        /* Multisample surfaces with the CMS layout are not layered surfaces,
 171         * yet still have physical_depth0 > 1. Assert that we don't
 172         * accidentally reject a multisampled surface here. We should have
 173         * rejected it earlier by explicitly checking the sample count.
 174         */
 175       assert(mt->num_samples <= 1);
 176    }
 177
 178    /* Handle the hardware restrictions...
 179     *
 180     * All GENs have the following restriction: "MCS buffer for non-MSRT is
 181     * supported only for RT formats 32bpp, 64bpp, and 128bpp."
 182     *
 183     * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of
 184     * Non-MultiSampler Render Target Restrictions) Support is for
 185     * non-mip-mapped and non-array surface types only.
 186     *
 187     * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of
 188     * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
 189     * surfaces are supported with MCS buffer layout with these alignments in
 190     * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128.
 191     *
 192     * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of
 193     * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
 194     * surfaces are supported with MCS buffer layout with these alignments in
 195     * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64.
 196     */
 197    if (brw->gen < 8 && (mip_mapped || arrayed))
 198       return false;
 199
 200    /* There's no point in using an MCS buffer if the surface isn't in a
 201     * renderable format.
 202     */
 203    if (!brw->mesa_format_supports_render[mt->format])
 204       return false;
 205
 206    if (brw->gen >= 9) {
 207       mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
 208       const enum isl_format isl_format =
 209          brw_isl_format_for_mesa_format(linear_format);
 210       return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
 211    } else
 212       return true;
 213 }
 214
 215 static bool
 216 intel_tiling_supports_hiz(const struct brw_context *brw, unsigned tiling)
 217 {
 218    if (brw->gen < 6)
 219       return false;
 220
 221    return tiling == I915_TILING_Y;
 222 }
 223
 224 static bool
 225 intel_miptree_supports_hiz(struct brw_context *brw,
 226                            struct intel_mipmap_tree *mt)
 227 {
 228    if (!brw->has_hiz)
 229       return false;
 230
 231    switch (mt->format) {
 232    case MESA_FORMAT_Z_FLOAT32:
 233    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
 234    case MESA_FORMAT_Z24_UNORM_X8_UINT:
 235    case MESA_FORMAT_Z24_UNORM_S8_UINT:
 236    case MESA_FORMAT_Z_UNORM16:
 237       return true;
 238    default:
 239       return false;
 240    }
 241 }
 242
 243
 244 /* On Gen9 support for color buffer compression was extended to single
 245  * sampled surfaces. This is a helper considering both auxiliary buffer
 246  * type and number of samples telling if the given miptree represents
 247  * the new single sampled case - also called lossless compression.
 248  */
 249 bool
 250 intel_miptree_is_lossless_compressed(const struct brw_context *brw,
 251                                      const struct intel_mipmap_tree *mt)
 252 {
 253    /* Only available from Gen9 onwards. */
 254    if (brw->gen < 9)
 255       return false;
 256
 257    /* Compression always requires auxiliary buffer. */
 258    if (!mt->mcs_buf)
 259       return false;
 260
 261    /* Single sample compression is represented re-using msaa compression
 262     * layout type: "Compressed Multisampled Surfaces".
 263     */
 264    if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS)
 265       return false;
 266
 267    /* And finally distinguish between msaa and single sample case. */
 268    return mt->num_samples <= 1;
 269 }
 270
 271 static bool
 272 intel_miptree_supports_ccs_e(struct brw_context *brw,
 273                              const struct intel_mipmap_tree *mt)
 274 {
 275    /* For now compression is only enabled for integer formats even though
 276     * there exist supported floating point formats also. This is a heuristic
 277     * decision based on current public benchmarks. In none of the cases these
 278     * formats provided any improvement but a few cases were seen to regress.
 279     * Hence these are left to to be enabled in the future when they are known
 280     * to improve things.
 281     */
 282    if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
 283       return false;
 284
 285    if (!intel_miptree_supports_ccs(brw, mt))
 286       return false;
 287
 288    /* Fast clear can be also used to clear srgb surfaces by using equivalent
 289     * linear format. This trick, however, can't be extended to be used with
 290     * lossless compression and therefore a check is needed to see if the format
 291     * really is linear.
 292     */
 293    return _mesa_get_srgb_format_linear(mt->format) == mt->format;
 294 }
 295
 296 /**
 297  * Determine depth format corresponding to a depth+stencil format,
 298  * for separate stencil.
 299  */
 300 mesa_format
 301 intel_depth_format_for_depthstencil_format(mesa_format format) {
 302    switch (format) {
 303    case MESA_FORMAT_Z24_UNORM_S8_UINT:
 304       return MESA_FORMAT_Z24_UNORM_X8_UINT;
 305    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
 306       return MESA_FORMAT_Z_FLOAT32;
 307    default:
 308       return format;
 309    }
 310 }
 311
 312 static bool
 313 create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
 314                      unsigned depth0, struct intel_mipmap_level *table)
 315 {
 316    for (unsigned level = first_level; level <= last_level; level++) {
 317       const unsigned d =
 318          target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
 319
 320       table[level].slice = calloc(d, sizeof(*table[0].slice));
 321       if (!table[level].slice)
 322          goto unwind;
 323    }
 324
 325    return true;
 326
 327 unwind:
 328    for (unsigned level = first_level; level <= last_level; level++)
 329       free(table[level].slice);
 330
 331    return false;
 332 }
 333
 334 /**
 335  * @param for_bo Indicates that the caller is
 336  *        intel_miptree_create_for_bo(). If true, then do not create
 337  *        \c stencil_mt.
 338  */
 339 static struct intel_mipmap_tree *
 340 intel_miptree_create_layout(struct brw_context *brw,
 341                             GLenum target,
 342                             mesa_format format,
 343                             GLuint first_level,
 344                             GLuint last_level,
 345                             GLuint width0,
 346                             GLuint height0,
 347                             GLuint depth0,
 348                             GLuint num_samples,
 349                             uint32_t layout_flags)
 350 {
 351    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 352    if (!mt)
 353       return NULL;
 354
 355    DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__,
 356        _mesa_enum_to_string(target),
 357        _mesa_get_format_name(format),
 358        first_level, last_level, depth0, mt);
 359
 360    if (target == GL_TEXTURE_1D_ARRAY)
 361       assert(height0 == 1);
 362
 363    mt->target = target;
 364    mt->format = format;
 365    mt->first_level = first_level;
 366    mt->last_level = last_level;
 367    mt->logical_width0 = width0;
 368    mt->logical_height0 = height0;
 369    mt->logical_depth0 = depth0;
 370    mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0;
 371    mt->aux_usage = ISL_AUX_USAGE_NONE;
 372    mt->supports_fast_clear = false;
 373    mt->aux_state = NULL;
 374    mt->cpp = _mesa_get_format_bytes(format);
 375    mt->num_samples = num_samples;
 376    mt->compressed = _mesa_is_format_compressed(format);
 377    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 378    mt->refcount = 1;
 379
 380    if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8)
 381       layout_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL;
 382
 383    int depth_multiply = 1;
 384    if (num_samples > 1) {
 385       /* Adjust width/height/depth for MSAA */
 386       mt->msaa_layout = compute_msaa_layout(brw, format, layout_flags);
 387       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 388          /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
 389           * "If the surface is multisampled and it is a depth or stencil
 390           *  surface or Multisampled Surface StorageFormat in SURFACE_STATE is
 391           *  MSFMT_DEPTH_STENCIL, WL and HL must be adjusted as follows before
 392           *  proceeding:
 393           *
 394           *  +----------------------------------------------------------------+
 395           *  | Num Multisamples |        W_l =         |        H_l =         |
 396           *  +----------------------------------------------------------------+
 397           *  |         2        | ceiling(W_l / 2) * 4 | H_l (no adjustment)  |
 398           *  |         4        | ceiling(W_l / 2) * 4 | ceiling(H_l / 2) * 4 |
 399           *  |         8        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 4 |
 400           *  |        16        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 8 |
 401           *  +----------------------------------------------------------------+
 402           * "
 403           *
 404           * Note that MSFMT_DEPTH_STENCIL just means the IMS (interleaved)
 405           * format rather than UMS/CMS (array slices).  The Sandybridge PRM,
 406           * Volume 1, Part 1, Page 111 has the same formula for 4x MSAA.
 407           *
 408           * Another more complicated explanation for these adjustments comes
 409           * from the Sandybridge PRM, volume 4, part 1, page 31:
 410           *
 411           *     "Any of the other messages (sample*, LOD, load4) used with a
 412           *      (4x) multisampled surface will in-effect sample a surface with
 413           *      double the height and width as that indicated in the surface
 414           *      state. Each pixel position on the original-sized surface is
 415           *      replaced with a 2x2 of samples with the following arrangement:
 416           *
 417           *         sample 0 sample 2
 418           *         sample 1 sample 3"
 419           *
 420           * Thus, when sampling from a multisampled texture, it behaves as
 421           * though the layout in memory for (x,y,sample) is:
 422           *
 423           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 424           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 425           *
 426           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 427           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 428           *
 429           * However, the actual layout of multisampled data in memory is:
 430           *
 431           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 432           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 433           *
 434           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 435           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 436           *
 437           * This pattern repeats for each 2x2 pixel block.
 438           *
 439           * As a result, when calculating the size of our 4-sample buffer for
 440           * an odd width or height, we have to align before scaling up because
 441           * sample 3 is in that bottom right 2x2 block.
 442           */
 443          switch (num_samples) {
 444          case 2:
 445             assert(brw->gen >= 8);
 446             width0 = ALIGN(width0, 2) * 2;
 447             height0 = ALIGN(height0, 2);
 448             break;
 449          case 4:
 450             width0 = ALIGN(width0, 2) * 2;
 451             height0 = ALIGN(height0, 2) * 2;
 452             break;
 453          case 8:
 454             width0 = ALIGN(width0, 2) * 4;
 455             height0 = ALIGN(height0, 2) * 2;
 456             break;
 457          case 16:
 458             width0 = ALIGN(width0, 2) * 4;
 459             height0 = ALIGN(height0, 2) * 4;
 460             break;
 461          default:
 462             /* num_samples should already have been quantized to 0, 1, 2, 4, 8
 463              * or 16.
 464              */
 465             unreachable("not reached");
 466          }
 467       } else {
 468          /* Non-interleaved */
 469          depth_multiply = num_samples;
 470          depth0 *= depth_multiply;
 471       }
 472    }
 473
 474    if (!create_mapping_table(target, first_level, last_level, depth0,
 475                              mt->level)) {
 476       free(mt);
 477       return NULL;
 478    }
 479
 480    /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 can
 481     * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on
 482     * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is still
 483     * used on Gen8 to make it pick a qpitch value which doesn't include space
 484     * for the mipmaps. On Gen9 this is not necessary because it will
 485     * automatically pick a packed qpitch value whenever mt->first_level ==
 486     * mt->last_level.
 487     * TODO: can we use it elsewhere?
 488     * TODO: also disable this on Gen8 and pick the qpitch value like Gen9
 489     */
 490    if (brw->gen >= 9) {
 491       mt->array_layout = ALL_LOD_IN_EACH_SLICE;
 492    } else {
 493       switch (mt->msaa_layout) {
 494       case INTEL_MSAA_LAYOUT_NONE:
 495       case INTEL_MSAA_LAYOUT_IMS:
 496          mt->array_layout = ALL_LOD_IN_EACH_SLICE;
 497          break;
 498       case INTEL_MSAA_LAYOUT_UMS:
 499       case INTEL_MSAA_LAYOUT_CMS:
 500          mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 501          break;
 502       }
 503    }
 504
 505    if (target == GL_TEXTURE_CUBE_MAP)
 506       assert(depth0 == 6 * depth_multiply);
 507
 508    mt->physical_width0 = width0;
 509    mt->physical_height0 = height0;
 510    mt->physical_depth0 = depth0;
 511
 512    if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
 513        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 514        (brw->must_use_separate_stencil ||
 515         (brw->has_separate_stencil && intel_miptree_supports_hiz(brw, mt)))) {
 516       uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
 517       if (brw->gen == 6) {
 518          stencil_flags |= MIPTREE_LAYOUT_TILING_ANY;
 519       }
 520
 521       mt->stencil_mt = intel_miptree_create(brw,
 522                                             mt->target,
 523                                             MESA_FORMAT_S_UINT8,
 524                                             mt->first_level,
 525                                             mt->last_level,
 526                                             mt->logical_width0,
 527                                             mt->logical_height0,
 528                                             mt->logical_depth0,
 529                                             num_samples,
 530                                             stencil_flags);
 531
 532       if (!mt->stencil_mt) {
 533          intel_miptree_release(&mt);
 534          return NULL;
 535       }
 536       mt->stencil_mt->r8stencil_needs_update = true;
 537
 538       /* Fix up the Z miptree format for how we're splitting out separate
 539        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 540        */
 541       mt->format = intel_depth_format_for_depthstencil_format(mt->format);
 542       mt->cpp = 4;
 543
 544       if (format == mt->format) {
 545          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 546                        _mesa_get_format_name(mt->format));
 547       }
 548    }
 549
 550    if (layout_flags & MIPTREE_LAYOUT_GEN6_HIZ_STENCIL)
 551       mt->array_layout = GEN6_HIZ_STENCIL;
 552
 553    /*
 554     * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are
 555     * multisampled or have an AUX buffer attached to it.
 556     *
 557     * GEN  |    MSRT        | AUX_CCS_* or AUX_MCS
 558     *  -------------------------------------------
 559     *  9   |  HALIGN_16     |    HALIGN_16
 560     *  8   |  HALIGN_ANY    |    HALIGN_16
 561     *  7   |      ?         |        ?
 562     *  6   |      ?         |        ?
 563     */
 564    if (intel_miptree_supports_ccs(brw, mt)) {
 565       if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1))
 566          layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 567    } else if (brw->gen >= 9 && num_samples > 1) {
 568       layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 569    } else {
 570       const UNUSED bool is_lossless_compressed_aux =
 571          brw->gen >= 9 && num_samples == 1 &&
 572          mt->format == MESA_FORMAT_R_UINT32;
 573
 574       /* For now, nothing else has this requirement */
 575       assert(is_lossless_compressed_aux ||
 576              (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
 577    }
 578
 579    if (!brw_miptree_layout(brw, mt, layout_flags)) {
 580       intel_miptree_release(&mt);
 581       return NULL;
 582    }
 583
 584    return mt;
 585 }
 586
 587
 588 /**
 589  * Choose the aux usage for this miptree.  This function must be called fairly
 590  * late in the miptree create process after we have a tiling.
 591  */
 592 static void
 593 intel_miptree_choose_aux_usage(struct brw_context *brw,
 594                                struct intel_mipmap_tree *mt)
 595 {
 596    assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
 597
 598    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 599       mt->aux_usage = ISL_AUX_USAGE_MCS;
 600    } else if (intel_tiling_supports_ccs(brw, mt->tiling) &&
 601               intel_miptree_supports_ccs(brw, mt)) {
 602       if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC) &&
 603           brw->gen >= 9 && !mt->is_scanout &&
 604           intel_miptree_supports_ccs_e(brw, mt)) {
 605          mt->aux_usage = ISL_AUX_USAGE_CCS_E;
 606       } else {
 607          mt->aux_usage = ISL_AUX_USAGE_CCS_D;
 608       }
 609    } else if (intel_tiling_supports_hiz(brw, mt->tiling) &&
 610               intel_miptree_supports_hiz(brw, mt)) {
 611       mt->aux_usage = ISL_AUX_USAGE_HIZ;
 612    }
 613
 614    /* We can do fast-clear on all auxiliary surface types that are
 615     * allocated through the normal texture creation paths.
 616     */
 617    if (mt->aux_usage != ISL_AUX_USAGE_NONE)
 618       mt->supports_fast_clear = true;
 619 }
 620
 621
 622 /**
 623  * Choose an appropriate uncompressed format for a requested
 624  * compressed format, if unsupported.
 625  */
 626 mesa_format
 627 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
 628 {
 629    /* No need to lower ETC formats on these platforms,
 630     * they are supported natively.
 631     */
 632    if (brw->gen >= 8 || brw->is_baytrail)
 633       return format;
 634
 635    switch (format) {
 636    case MESA_FORMAT_ETC1_RGB8:
 637       return MESA_FORMAT_R8G8B8X8_UNORM;
 638    case MESA_FORMAT_ETC2_RGB8:
 639       return MESA_FORMAT_R8G8B8X8_UNORM;
 640    case MESA_FORMAT_ETC2_SRGB8:
 641    case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 642    case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 643       return MESA_FORMAT_B8G8R8A8_SRGB;
 644    case MESA_FORMAT_ETC2_RGBA8_EAC:
 645    case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 646       return MESA_FORMAT_R8G8B8A8_UNORM;
 647    case MESA_FORMAT_ETC2_R11_EAC:
 648       return MESA_FORMAT_R_UNORM16;
 649    case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 650       return MESA_FORMAT_R_SNORM16;
 651    case MESA_FORMAT_ETC2_RG11_EAC:
 652       return MESA_FORMAT_R16G16_UNORM;
 653    case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 654       return MESA_FORMAT_R16G16_SNORM;
 655    default:
 656       /* Non ETC1 / ETC2 format */
 657       return format;
 658    }
 659 }
 660
 661 /** \brief Assert that the level and layer are valid for the miptree. */
 662 void
 663 intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt,
 664                                 uint32_t level,
 665                                 uint32_t layer)
 666 {
 667    (void) mt;
 668    (void) level;
 669    (void) layer;
 670
 671    assert(level >= mt->first_level);
 672    assert(level <= mt->last_level);
 673
 674    if (mt->surf.size > 0)
 675       assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ?
 676                          minify(mt->surf.phys_level0_sa.depth, level) :
 677                          mt->surf.phys_level0_sa.array_len));
 678    else
 679       assert(layer < mt->level[level].depth);
 680 }
 681
 682 static enum isl_aux_state **
 683 create_aux_state_map(struct intel_mipmap_tree *mt,
 684                      enum isl_aux_state initial)
 685 {
 686    const uint32_t levels = mt->last_level + 1;
 687
 688    uint32_t total_slices = 0;
 689    for (uint32_t level = 0; level < levels; level++)
 690       total_slices += mt->level[level].depth;
 691
 692    const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
 693
 694    /* We're going to allocate a single chunk of data for both the per-level
 695     * reference array and the arrays of aux_state.  This makes cleanup
 696     * significantly easier.
 697     */
 698    const size_t total_size = per_level_array_size +
 699                              total_slices * sizeof(enum isl_aux_state);
 700    void *data = malloc(total_size);
 701    if (data == NULL)
 702       return NULL;
 703
 704    enum isl_aux_state **per_level_arr = data;
 705    enum isl_aux_state *s = data + per_level_array_size;
 706    for (uint32_t level = 0; level < levels; level++) {
 707       per_level_arr[level] = s;
 708       for (uint32_t a = 0; a < mt->level[level].depth; a++)
 709          *(s++) = initial;
 710    }
 711    assert((void *)s == data + total_size);
 712
 713    return per_level_arr;
 714 }
 715
 716 static void
 717 free_aux_state_map(enum isl_aux_state **state)
 718 {
 719    free(state);
 720 }
 721
 722 static struct intel_mipmap_tree *
 723 make_surface(struct brw_context *brw, GLenum target, mesa_format format,
 724              unsigned first_level, unsigned last_level,
 725              unsigned width0, unsigned height0, unsigned depth0,
 726              unsigned num_samples, enum isl_tiling isl_tiling,
 727              isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
 728              struct brw_bo *bo)
 729 {
 730    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 731    if (!mt)
 732       return NULL;
 733
 734    if (!create_mapping_table(target, first_level, last_level, depth0,
 735                              mt->level)) {
 736       free(mt);
 737       return NULL;
 738    }
 739
 740    if (target == GL_TEXTURE_CUBE_MAP ||
 741        target == GL_TEXTURE_CUBE_MAP_ARRAY)
 742       isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
 743
 744    DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
 745         __func__,
 746        _mesa_enum_to_string(target),
 747        _mesa_get_format_name(format),
 748        num_samples, width0, height0, depth0,
 749        first_level, last_level, mt);
 750
 751    struct isl_surf_init_info init_info = {
 752       .dim = get_isl_surf_dim(target),
 753       .format = translate_tex_format(brw, format, false),
 754       .width = width0,
 755       .height = height0,
 756       .depth = target == GL_TEXTURE_3D ? depth0 : 1,
 757       .levels = last_level - first_level + 1,
 758       .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
 759       .samples = MAX2(num_samples, 1),
 760       .usage = isl_usage_flags,
 761       .tiling_flags = 1u << isl_tiling
 762    };
 763
 764    if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
 765       goto fail;
 766
 767    assert(mt->surf.size % mt->surf.row_pitch == 0);
 768
 769    if (!bo) {
 770       mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
 771                                   mt->surf.size,
 772                                   isl_tiling_to_bufmgr_tiling(isl_tiling),
 773                                   mt->surf.row_pitch, alloc_flags);
 774       if (!mt->bo)
 775          goto fail;
 776    } else {
 777       mt->bo = bo;
 778    }
 779
 780    mt->first_level = first_level;
 781    mt->last_level = last_level;
 782    mt->target = target;
 783    mt->format = format;
 784    mt->refcount = 1;
 785    mt->aux_state = NULL;
 786
 787    return mt;
 788
 789 fail:
 790    intel_miptree_release(&mt);
 791    return NULL;
 792 }
 793
 794 static struct intel_mipmap_tree *
 795 miptree_create(struct brw_context *brw,
 796                GLenum target,
 797                mesa_format format,
 798                GLuint first_level,
 799                GLuint last_level,
 800                GLuint width0,
 801                GLuint height0,
 802                GLuint depth0,
 803                GLuint num_samples,
 804                uint32_t layout_flags)
 805 {
 806    if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8)
 807       return make_surface(brw, target, format, first_level, last_level,
 808                           width0, height0, depth0, num_samples, ISL_TILING_W,
 809                           ISL_SURF_USAGE_STENCIL_BIT |
 810                           ISL_SURF_USAGE_TEXTURE_BIT,
 811                           BO_ALLOC_FOR_RENDER, NULL);
 812
 813    struct intel_mipmap_tree *mt;
 814    mesa_format tex_format = format;
 815    mesa_format etc_format = MESA_FORMAT_NONE;
 816    uint32_t alloc_flags = 0;
 817
 818    format = intel_lower_compressed_format(brw, format);
 819
 820    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 821
 822    assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
 823    mt = intel_miptree_create_layout(brw, target, format,
 824                                     first_level, last_level, width0,
 825                                     height0, depth0, num_samples,
 826                                     layout_flags);
 827    if (!mt)
 828       return NULL;
 829
 830    if (mt->tiling == (I915_TILING_Y | I915_TILING_X))
 831       mt->tiling = I915_TILING_Y;
 832
 833    if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
 834       alloc_flags |= BO_ALLOC_FOR_RENDER;
 835
 836    mt->etc_format = etc_format;
 837
 838    if (format == MESA_FORMAT_S_UINT8) {
 839       /* Align to size of W tile, 64x64. */
 840       mt->bo = brw_bo_alloc_tiled_2d(brw->bufmgr, "miptree",
 841                                      ALIGN(mt->total_width, 64),
 842                                      ALIGN(mt->total_height, 64),
 843                                      mt->cpp, mt->tiling, &mt->pitch,
 844                                      alloc_flags);
 845    } else {
 846       mt->bo = brw_bo_alloc_tiled_2d(brw->bufmgr, "miptree",
 847                                      mt->total_width, mt->total_height,
 848                                      mt->cpp, mt->tiling, &mt->pitch,
 849                                      alloc_flags);
 850    }
 851
 852    if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
 853       mt->bo->cache_coherent = false;
 854
 855    if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
 856       intel_miptree_choose_aux_usage(brw, mt);
 857
 858    return mt;
 859 }
 860
 861 struct intel_mipmap_tree *
 862 intel_miptree_create(struct brw_context *brw,
 863                      GLenum target,
 864                      mesa_format format,
 865                      GLuint first_level,
 866                      GLuint last_level,
 867                      GLuint width0,
 868                      GLuint height0,
 869                      GLuint depth0,
 870                      GLuint num_samples,
 871                      uint32_t layout_flags)
 872 {
 873    struct intel_mipmap_tree *mt = miptree_create(
 874                                      brw, target, format,
 875                                      first_level, last_level,
 876                                      width0, height0, depth0, num_samples,
 877                                      layout_flags);
 878
 879    /* If the BO is too large to fit in the aperture, we need to use the
 880     * BLT engine to support it.  Prior to Sandybridge, the BLT paths can't
 881     * handle Y-tiling, so we need to fall back to X.
 882     */
 883    if (brw->gen < 6 && mt->bo->size >= brw->max_gtt_map_object_size &&
 884        mt->tiling == I915_TILING_Y) {
 885       const uint32_t alloc_flags =
 886          (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) ?
 887          BO_ALLOC_FOR_RENDER : 0;
 888       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 889                  mt->total_width, mt->total_height);
 890
 891       mt->tiling = I915_TILING_X;
 892       brw_bo_unreference(mt->bo);
 893       mt->bo = brw_bo_alloc_tiled_2d(brw->bufmgr, "miptree",
 894                                      mt->total_width, mt->total_height, mt->cpp,
 895                                      mt->tiling, &mt->pitch, alloc_flags);
 896    }
 897
 898    mt->offset = 0;
 899
 900    if (!mt->bo) {
 901        intel_miptree_release(&mt);
 902        return NULL;
 903    }
 904
 905    if (!intel_miptree_alloc_aux(brw, mt)) {
 906       intel_miptree_release(&mt);
 907       return NULL;
 908    }
 909
 910    return mt;
 911 }
 912
 913 struct intel_mipmap_tree *
 914 intel_miptree_create_for_bo(struct brw_context *brw,
 915                             struct brw_bo *bo,
 916                             mesa_format format,
 917                             uint32_t offset,
 918                             uint32_t width,
 919                             uint32_t height,
 920                             uint32_t depth,
 921                             int pitch,
 922                             uint32_t layout_flags)
 923 {
 924    struct intel_mipmap_tree *mt;
 925    uint32_t tiling, swizzle;
 926    const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
 927
 928    if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8) {
 929       mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
 930                         0, 0, width, height, depth, 1, ISL_TILING_W,
 931                         ISL_SURF_USAGE_STENCIL_BIT |
 932                         ISL_SURF_USAGE_TEXTURE_BIT,
 933                         BO_ALLOC_FOR_RENDER, bo);
 934       if (!mt)
 935          return NULL;
 936
 937       assert(bo->size >= mt->surf.size);
 938
 939       brw_bo_reference(bo);
 940       return mt;
 941    }
 942
 943    brw_bo_get_tiling(bo, &tiling, &swizzle);
 944
 945    /* Nothing will be able to use this miptree with the BO if the offset isn't
 946     * aligned.
 947     */
 948    if (tiling != I915_TILING_NONE)
 949       assert(offset % 4096 == 0);
 950
 951    /* miptrees can't handle negative pitch.  If you need flipping of images,
 952     * that's outside of the scope of the mt.
 953     */
 954    assert(pitch >= 0);
 955
 956    /* The BO already has a tiling format and we shouldn't confuse the lower
 957     * layers by making it try to find a tiling format again.
 958     */
 959    assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
 960    assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
 961
 962    layout_flags |= MIPTREE_LAYOUT_FOR_BO;
 963    mt = intel_miptree_create_layout(brw, target, format,
 964                                     0, 0,
 965                                     width, height, depth, 0,
 966                                     layout_flags);
 967    if (!mt)
 968       return NULL;
 969
 970    brw_bo_reference(bo);
 971    mt->bo = bo;
 972    mt->pitch = pitch;
 973    mt->offset = offset;
 974    mt->tiling = tiling;
 975
 976    if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
 977       intel_miptree_choose_aux_usage(brw, mt);
 978
 979    return mt;
 980 }
 981
 982 static struct intel_mipmap_tree *
 983 miptree_create_for_planar_image(struct brw_context *brw,
 984                                 __DRIimage *image, GLenum target)
 985 {
 986    struct intel_image_format *f = image->planar_format;
 987    struct intel_mipmap_tree *planar_mt = NULL;
 988
 989    for (int i = 0; i < f->nplanes; i++) {
 990       const int index = f->planes[i].buffer_index;
 991       const uint32_t dri_format = f->planes[i].dri_format;
 992       const mesa_format format = driImageFormatToGLFormat(dri_format);
 993       const uint32_t width = image->width >> f->planes[i].width_shift;
 994       const uint32_t height = image->height >> f->planes[i].height_shift;
 995
 996       /* Disable creation of the texture's aux buffers because the driver
 997        * exposes no EGL API to manage them. That is, there is no API for
 998        * resolving the aux buffer's content to the main buffer nor for
 999        * invalidating the aux buffer's content.
1000        */
1001       struct intel_mipmap_tree *mt =
1002          intel_miptree_create_for_bo(brw, image->bo, format,
1003                                      image->offsets[index],
1004                                      width, height, 1,
1005                                      image->strides[index],
1006                                      MIPTREE_LAYOUT_DISABLE_AUX);
1007       if (mt == NULL)
1008          return NULL;
1009
1010       mt->target = target;
1011       mt->total_width = width;
1012       mt->total_height = height;
1013
1014       if (i == 0)
1015          planar_mt = mt;
1016       else
1017          planar_mt->plane[i - 1] = mt;
1018    }
1019
1020    return planar_mt;
1021 }
1022
1023 struct intel_mipmap_tree *
1024 intel_miptree_create_for_dri_image(struct brw_context *brw,
1025                                    __DRIimage *image, GLenum target,
1026                                    enum isl_colorspace colorspace,
1027                                    bool is_winsys_image)
1028 {
1029    if (image->planar_format && image->planar_format->nplanes > 0) {
1030       assert(colorspace == ISL_COLORSPACE_NONE ||
1031              colorspace == ISL_COLORSPACE_YUV);
1032       return miptree_create_for_planar_image(brw, image, target);
1033    }
1034
1035    mesa_format format = image->format;
1036    switch (colorspace) {
1037    case ISL_COLORSPACE_NONE:
1038       /* Keep the image format unmodified */
1039       break;
1040
1041    case ISL_COLORSPACE_LINEAR:
1042       format =_mesa_get_srgb_format_linear(format);
1043       break;
1044
1045    case ISL_COLORSPACE_SRGB:
1046       format =_mesa_get_linear_format_srgb(format);
1047       break;
1048
1049    default:
1050       unreachable("Inalid colorspace for non-planar image");
1051    }
1052
1053    if (!brw->ctx.TextureFormatSupported[format]) {
1054       /* The texture storage paths in core Mesa detect if the driver does not
1055        * support the user-requested format, and then searches for a
1056        * fallback format. The DRIimage code bypasses core Mesa, though. So we
1057        * do the fallbacks here for important formats.
1058        *
1059        * We must support DRM_FOURCC_XBGR8888 textures because the Android
1060        * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
1061        * the Chrome OS compositor consumes as dma_buf EGLImages.
1062        */
1063       format = _mesa_format_fallback_rgbx_to_rgba(format);
1064    }
1065
1066    if (!brw->ctx.TextureFormatSupported[format])
1067       return NULL;
1068
1069    /* If this image comes in from a window system, we have different
1070     * requirements than if it comes in via an EGL import operation.  Window
1071     * system images can use any form of auxiliary compression we wish because
1072     * they get "flushed" before being handed off to the window system and we
1073     * have the opportunity to do resolves.  Window system buffers also may be
1074     * used for scanout so we need to flag that appropriately.
1075     */
1076    const uint32_t mt_layout_flags =
1077       is_winsys_image ? MIPTREE_LAYOUT_FOR_SCANOUT : MIPTREE_LAYOUT_DISABLE_AUX;
1078
1079    /* Disable creation of the texture's aux buffers because the driver exposes
1080     * no EGL API to manage them. That is, there is no API for resolving the aux
1081     * buffer's content to the main buffer nor for invalidating the aux buffer's
1082     * content.
1083     */
1084    struct intel_mipmap_tree *mt =
1085       intel_miptree_create_for_bo(brw, image->bo, format,
1086                                   image->offset, image->width, image->height, 1,
1087                                   image->pitch, mt_layout_flags);
1088    if (mt == NULL)
1089       return NULL;
1090
1091    mt->target = target;
1092    mt->level[0].level_x = image->tile_x;
1093    mt->level[0].level_y = image->tile_y;
1094    mt->level[0].slice[0].x_offset = image->tile_x;
1095    mt->level[0].slice[0].y_offset = image->tile_y;
1096    mt->total_width += image->tile_x;
1097    mt->total_height += image->tile_y;
1098
1099    /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
1100     * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
1101     * trouble resolving back to destination image due to alignment issues.
1102     */
1103    if (!brw->has_surface_tile_offset) {
1104       uint32_t draw_x, draw_y;
1105       intel_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
1106
1107       if (draw_x != 0 || draw_y != 0) {
1108          _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
1109          intel_miptree_release(&mt);
1110          return NULL;
1111       }
1112    }
1113
1114    if (!intel_miptree_alloc_aux(brw, mt)) {
1115       intel_miptree_release(&mt);
1116       return NULL;
1117    }
1118
1119    return mt;
1120 }
1121
1122 /**
1123  * For a singlesample renderbuffer, this simply wraps the given BO with a
1124  * miptree.
1125  *
1126  * For a multisample renderbuffer, this wraps the window system's
1127  * (singlesample) BO with a singlesample miptree attached to the
1128  * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
1129  * that will contain the actual rendering (which is lazily resolved to
1130  * irb->singlesample_mt).
1131  */
1132 bool
1133 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
1134                                          struct intel_renderbuffer *irb,
1135                                          struct intel_mipmap_tree *singlesample_mt,
1136                                          uint32_t width, uint32_t height,
1137                                          uint32_t pitch)
1138 {
1139    struct intel_mipmap_tree *multisample_mt = NULL;
1140    struct gl_renderbuffer *rb = &irb->Base.Base;
1141    mesa_format format = rb->Format;
1142    int num_samples = rb->NumSamples;
1143
1144    /* Only the front and back buffers, which are color buffers, are allocated
1145     * through the image loader.
1146     */
1147    assert(_mesa_get_format_base_format(format) == GL_RGB ||
1148           _mesa_get_format_base_format(format) == GL_RGBA);
1149
1150    assert(singlesample_mt);
1151
1152    if (num_samples == 0) {
1153       intel_miptree_release(&irb->mt);
1154       irb->mt = singlesample_mt;
1155
1156       assert(!irb->singlesample_mt);
1157    } else {
1158       intel_miptree_release(&irb->singlesample_mt);
1159       irb->singlesample_mt = singlesample_mt;
1160
1161       if (!irb->mt ||
1162           irb->mt->logical_width0 != width ||
1163           irb->mt->logical_height0 != height) {
1164          multisample_mt = intel_miptree_create_for_renderbuffer(intel,
1165                                                                 format,
1166                                                                 width,
1167                                                                 height,
1168                                                                 num_samples);
1169          if (!multisample_mt)
1170             goto fail;
1171
1172          irb->need_downsample = false;
1173          intel_miptree_release(&irb->mt);
1174          irb->mt = multisample_mt;
1175       }
1176    }
1177    return true;
1178
1179 fail:
1180    intel_miptree_release(&irb->mt);
1181    return false;
1182 }
1183
1184 struct intel_mipmap_tree*
1185 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
1186                                       mesa_format format,
1187                                       uint32_t width,
1188                                       uint32_t height,
1189                                       uint32_t num_samples)
1190 {
1191    struct intel_mipmap_tree *mt;
1192    uint32_t depth = 1;
1193    bool ok;
1194    GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
1195    const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1196                                  MIPTREE_LAYOUT_TILING_ANY |
1197                                  MIPTREE_LAYOUT_FOR_SCANOUT;
1198
1199    mt = intel_miptree_create(brw, target, format, 0, 0,
1200                              width, height, depth, num_samples,
1201                              layout_flags);
1202    if (!mt)
1203       goto fail;
1204
1205    if (mt->aux_usage == ISL_AUX_USAGE_HIZ) {
1206       ok = intel_miptree_alloc_hiz(brw, mt);
1207       if (!ok)
1208          goto fail;
1209    }
1210
1211    return mt;
1212
1213 fail:
1214    intel_miptree_release(&mt);
1215    return NULL;
1216 }
1217
1218 void
1219 intel_miptree_reference(struct intel_mipmap_tree **dst,
1220                         struct intel_mipmap_tree *src)
1221 {
1222    if (*dst == src)
1223       return;
1224
1225    intel_miptree_release(dst);
1226
1227    if (src) {
1228       src->refcount++;
1229       DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
1230    }
1231
1232    *dst = src;
1233 }
1234
1235 static void
1236 intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf)
1237 {
1238    if (aux_buf == NULL)
1239       return;
1240
1241    brw_bo_unreference(aux_buf->bo);
1242
1243    free(aux_buf);
1244 }
1245
1246 void
1247 intel_miptree_release(struct intel_mipmap_tree **mt)
1248 {
1249    if (!*mt)
1250       return;
1251
1252    DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
1253    if (--(*mt)->refcount <= 0) {
1254       GLuint i;
1255
1256       DBG("%s deleting %p\n", __func__, *mt);
1257
1258       brw_bo_unreference((*mt)->bo);
1259       intel_miptree_release(&(*mt)->stencil_mt);
1260       intel_miptree_release(&(*mt)->r8stencil_mt);
1261       intel_miptree_aux_buffer_free((*mt)->hiz_buf);
1262       intel_miptree_aux_buffer_free((*mt)->mcs_buf);
1263       free_aux_state_map((*mt)->aux_state);
1264
1265       intel_miptree_release(&(*mt)->plane[0]);
1266       intel_miptree_release(&(*mt)->plane[1]);
1267
1268       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
1269          free((*mt)->level[i].slice);
1270       }
1271
1272       free(*mt);
1273    }
1274    *mt = NULL;
1275 }
1276
1277
1278 void
1279 intel_get_image_dims(struct gl_texture_image *image,
1280                      int *width, int *height, int *depth)
1281 {
1282    switch (image->TexObject->Target) {
1283    case GL_TEXTURE_1D_ARRAY:
1284       /* For a 1D Array texture the OpenGL API will treat the image height as
1285        * the number of array slices. For Intel hardware, we treat the 1D array
1286        * as a 2D Array with a height of 1. So, here we want to swap image
1287        * height and depth.
1288        */
1289       assert(image->Depth == 1);
1290       *width = image->Width;
1291       *height = 1;
1292       *depth = image->Height;
1293       break;
1294    case GL_TEXTURE_CUBE_MAP:
1295       /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
1296        * though we really have 6 slices.
1297        */
1298       assert(image->Depth == 1);
1299       *width = image->Width;
1300       *height = image->Height;
1301       *depth = 6;
1302       break;
1303    default:
1304       *width = image->Width;
1305       *height = image->Height;
1306       *depth = image->Depth;
1307       break;
1308    }
1309 }
1310
1311 /**
1312  * Can the image be pulled into a unified mipmap tree?  This mirrors
1313  * the completeness test in a lot of ways.
1314  *
1315  * Not sure whether I want to pass gl_texture_image here.
1316  */
1317 bool
1318 intel_miptree_match_image(struct intel_mipmap_tree *mt,
1319                           struct gl_texture_image *image)
1320 {
1321    struct intel_texture_image *intelImage = intel_texture_image(image);
1322    GLuint level = intelImage->base.Base.Level;
1323    int width, height, depth;
1324
1325    /* glTexImage* choose the texture object based on the target passed in, and
1326     * objects can't change targets over their lifetimes, so this should be
1327     * true.
1328     */
1329    assert(image->TexObject->Target == mt->target);
1330
1331    mesa_format mt_format = mt->format;
1332    if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
1333       mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
1334    if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
1335       mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
1336    if (mt->etc_format != MESA_FORMAT_NONE)
1337       mt_format = mt->etc_format;
1338
1339    if (image->TexFormat != mt_format)
1340       return false;
1341
1342    intel_get_image_dims(image, &width, &height, &depth);
1343
1344    if (mt->target == GL_TEXTURE_CUBE_MAP)
1345       depth = 6;
1346
1347    if (mt->surf.size > 0) {
1348       if (level >= mt->surf.levels)
1349          return false;
1350
1351       const unsigned level_depth =
1352          mt->surf.dim == ISL_SURF_DIM_3D ?
1353             minify(mt->surf.logical_level0_px.depth, level) :
1354             mt->surf.logical_level0_px.array_len;
1355
1356       return width == minify(mt->surf.logical_level0_px.width, level) &&
1357              height == minify(mt->surf.logical_level0_px.height, level) &&
1358              depth == level_depth &&
1359              MAX2(image->NumSamples, 1) == mt->surf.samples;
1360    }
1361
1362    int level_depth = mt->level[level].depth;
1363    if (mt->num_samples > 1) {
1364       switch (mt->msaa_layout) {
1365       case INTEL_MSAA_LAYOUT_NONE:
1366       case INTEL_MSAA_LAYOUT_IMS:
1367          break;
1368       case INTEL_MSAA_LAYOUT_UMS:
1369       case INTEL_MSAA_LAYOUT_CMS:
1370          level_depth /= mt->num_samples;
1371          break;
1372       }
1373    }
1374
1375    /* Test image dimensions against the base level image adjusted for
1376     * minification.  This will also catch images not present in the
1377     * tree, changed targets, etc.
1378     */
1379    if (width != minify(mt->logical_width0, level - mt->first_level) ||
1380        height != minify(mt->logical_height0, level - mt->first_level) ||
1381        depth != level_depth) {
1382       return false;
1383    }
1384
1385    if (image->NumSamples != mt->num_samples)
1386       return false;
1387
1388    return true;
1389 }
1390
1391
1392 void
1393 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
1394                              GLuint level,
1395                              GLuint x, GLuint y, GLuint d)
1396 {
1397    mt->level[level].depth = d;
1398    mt->level[level].level_x = x;
1399    mt->level[level].level_y = y;
1400
1401    DBG("%s level %d, depth %d, offset %d,%d\n", __func__,
1402        level, d, x, y);
1403
1404    assert(mt->level[level].slice);
1405
1406    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
1407    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
1408 }
1409
1410
1411 void
1412 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
1413                                GLuint level, GLuint img,
1414                                GLuint x, GLuint y)
1415 {
1416    if (img == 0 && level == 0)
1417       assert(x == 0 && y == 0);
1418
1419    assert(img < mt->level[level].depth);
1420
1421    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
1422    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
1423
1424    DBG("%s level %d img %d pos %d,%d\n",
1425        __func__, level, img,
1426        mt->level[level].slice[img].x_offset,
1427        mt->level[level].slice[img].y_offset);
1428 }
1429
1430 void
1431 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1432                                GLuint level, GLuint slice,
1433                                GLuint *x, GLuint *y)
1434 {
1435    if (mt->surf.size > 0) {
1436       uint32_t x_offset_sa, y_offset_sa;
1437
1438       /* Given level is relative to level zero while the miptree may be
1439        * represent just a subset of all levels starting from 'first_level'.
1440        */
1441       assert(level >= mt->first_level);
1442       level -= mt->first_level;
1443
1444       const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
1445       slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
1446       isl_surf_get_image_offset_sa(&mt->surf, level, slice, z,
1447                                    &x_offset_sa, &y_offset_sa);
1448
1449       *x = x_offset_sa;
1450       *y = y_offset_sa;
1451       return;
1452    }
1453
1454    assert(slice < mt->level[level].depth);
1455
1456    *x = mt->level[level].slice[slice].x_offset;
1457    *y = mt->level[level].slice[slice].y_offset;
1458 }
1459
1460
1461 /**
1462  * This function computes the tile_w (in bytes) and tile_h (in rows) of
1463  * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1464  * and tile_h is set to 1.
1465  */
1466 void
1467 intel_get_tile_dims(uint32_t tiling, uint32_t cpp,
1468                     uint32_t *tile_w, uint32_t *tile_h)
1469 {
1470    switch (tiling) {
1471    case I915_TILING_X:
1472       *tile_w = 512;
1473       *tile_h = 8;
1474       break;
1475    case I915_TILING_Y:
1476       *tile_w = 128;
1477       *tile_h = 32;
1478       break;
1479    case I915_TILING_NONE:
1480       *tile_w = cpp;
1481       *tile_h = 1;
1482       break;
1483    default:
1484       unreachable("not reached");
1485    }
1486 }
1487
1488
1489 /**
1490  * This function computes masks that may be used to select the bits of the X
1491  * and Y coordinates that indicate the offset within a tile.  If the BO is
1492  * untiled, the masks are set to 0.
1493  */
1494 void
1495 intel_get_tile_masks(uint32_t tiling, uint32_t cpp,
1496                      uint32_t *mask_x, uint32_t *mask_y)
1497 {
1498    uint32_t tile_w_bytes, tile_h;
1499
1500    intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h);
1501
1502    *mask_x = tile_w_bytes / cpp - 1;
1503    *mask_y = tile_h - 1;
1504 }
1505
1506 /**
1507  * Compute the offset (in bytes) from the start of the BO to the given x
1508  * and y coordinate.  For tiled BOs, caller must ensure that x and y are
1509  * multiples of the tile size.
1510  */
1511 uint32_t
1512 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1513                                  uint32_t x, uint32_t y)
1514 {
1515    int cpp = mt->cpp;
1516    uint32_t pitch = mt->pitch;
1517    uint32_t tiling = mt->tiling;
1518
1519    switch (tiling) {
1520    default:
1521       unreachable("not reached");
1522    case I915_TILING_NONE:
1523       return y * pitch + x * cpp;
1524    case I915_TILING_X:
1525       assert((x % (512 / cpp)) == 0);
1526       assert((y % 8) == 0);
1527       return y * pitch + x / (512 / cpp) * 4096;
1528    case I915_TILING_Y:
1529       assert((x % (128 / cpp)) == 0);
1530       assert((y % 32) == 0);
1531       return y * pitch + x / (128 / cpp) * 4096;
1532    }
1533 }
1534
1535 /**
1536  * Rendering with tiled buffers requires that the base address of the buffer
1537  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1538  * textures, we may want the surface to point at a texture image level that
1539  * isn't at a page boundary.
1540  *
1541  * This function returns an appropriately-aligned base offset
1542  * according to the tiling restrictions, plus any required x/y offset
1543  * from there.
1544  */
1545 uint32_t
1546 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1547                                GLuint level, GLuint slice,
1548                                uint32_t *tile_x,
1549                                uint32_t *tile_y)
1550 {
1551    uint32_t x, y;
1552    uint32_t mask_x, mask_y;
1553
1554    intel_get_tile_masks(mt->tiling, mt->cpp, &mask_x, &mask_y);
1555    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1556
1557    *tile_x = x & mask_x;
1558    *tile_y = y & mask_y;
1559
1560    return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
1561 }
1562
1563 static void
1564 intel_miptree_copy_slice_sw(struct brw_context *brw,
1565                             struct intel_mipmap_tree *src_mt,
1566                             unsigned src_level, unsigned src_layer,
1567                             struct intel_mipmap_tree *dst_mt,
1568                             unsigned dst_level, unsigned dst_layer,
1569                             unsigned width, unsigned height)
1570 {
1571    void *src, *dst;
1572    ptrdiff_t src_stride, dst_stride;
1573    const unsigned cpp = dst_mt->surf.size > 0 ?
1574       (isl_format_get_layout(dst_mt->surf.format)->bpb / 8) : dst_mt->cpp;
1575
1576    intel_miptree_map(brw, src_mt,
1577                      src_level, src_layer,
1578                      0, 0,
1579                      width, height,
1580                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1581                      &src, &src_stride);
1582
1583    intel_miptree_map(brw, dst_mt,
1584                      dst_level, dst_layer,
1585                      0, 0,
1586                      width, height,
1587                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1588                      BRW_MAP_DIRECT_BIT,
1589                      &dst, &dst_stride);
1590
1591    DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1592        _mesa_get_format_name(src_mt->format),
1593        src_mt, src, src_stride,
1594        _mesa_get_format_name(dst_mt->format),
1595        dst_mt, dst, dst_stride,
1596        width, height);
1597
1598    int row_size = cpp * width;
1599    if (src_stride == row_size &&
1600        dst_stride == row_size) {
1601       memcpy(dst, src, row_size * height);
1602    } else {
1603       for (int i = 0; i < height; i++) {
1604          memcpy(dst, src, row_size);
1605          dst += dst_stride;
1606          src += src_stride;
1607       }
1608    }
1609
1610    intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
1611    intel_miptree_unmap(brw, src_mt, src_level, src_layer);
1612
1613    /* Don't forget to copy the stencil data over, too.  We could have skipped
1614     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1615     * shuffling the two data sources in/out of temporary storage instead of
1616     * the direct mapping we get this way.
1617     */
1618    if (dst_mt->stencil_mt) {
1619       assert(src_mt->stencil_mt);
1620       intel_miptree_copy_slice_sw(brw,
1621                                   src_mt->stencil_mt, src_level, src_layer,
1622                                   dst_mt->stencil_mt, dst_level, dst_layer,
1623                                   width, height);
1624    }
1625 }
1626
1627 void
1628 intel_miptree_copy_slice(struct brw_context *brw,
1629                          struct intel_mipmap_tree *src_mt,
1630                          unsigned src_level, unsigned src_layer,
1631                          struct intel_mipmap_tree *dst_mt,
1632                          unsigned dst_level, unsigned dst_layer)
1633
1634 {
1635    mesa_format format = src_mt->format;
1636    uint32_t width, height;
1637
1638    if (src_mt->surf.size > 0) {
1639       width = minify(src_mt->surf.phys_level0_sa.width,
1640                      src_level - src_mt->first_level);
1641       height = minify(src_mt->surf.phys_level0_sa.height,
1642                       src_level - src_mt->first_level);
1643
1644       if (src_mt->surf.dim == ISL_SURF_DIM_3D)
1645          assert(src_layer < minify(src_mt->surf.phys_level0_sa.depth,
1646                                    src_level - src_mt->first_level));
1647       else
1648          assert(src_layer < src_mt->surf.phys_level0_sa.array_len);
1649    } else {
1650       width = minify(src_mt->physical_width0,
1651                      src_level - src_mt->first_level);
1652       height = minify(src_mt->physical_height0,
1653                       src_level - src_mt->first_level);
1654       assert(src_layer < src_mt->level[src_level].depth);
1655    }
1656
1657    assert(src_mt->format == dst_mt->format);
1658
1659    if (dst_mt->compressed) {
1660       unsigned int i, j;
1661       _mesa_get_format_block_size(dst_mt->format, &i, &j);
1662       height = ALIGN_NPOT(height, j) / j;
1663       width = ALIGN_NPOT(width, i) / i;
1664    }
1665
1666    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1667     * below won't apply since we can't do the depth's Y tiling or the
1668     * stencil's W tiling in the blitter.
1669     */
1670    if (src_mt->stencil_mt) {
1671       intel_miptree_copy_slice_sw(brw,
1672                                   src_mt, src_level, src_layer,
1673                                   dst_mt, dst_level, dst_layer,
1674                                   width, height);
1675       return;
1676    }
1677
1678    uint32_t dst_x, dst_y, src_x, src_y;
1679    intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
1680                                   &dst_x, &dst_y);
1681    intel_miptree_get_image_offset(src_mt, src_level, src_layer,
1682                                   &src_x, &src_y);
1683
1684    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1685        _mesa_get_format_name(src_mt->format),
1686        src_mt, src_x, src_y, src_mt->pitch,
1687        _mesa_get_format_name(dst_mt->format),
1688        dst_mt, dst_x, dst_y, dst_mt->pitch,
1689        width, height);
1690
1691    if (!intel_miptree_blit(brw,
1692                            src_mt, src_level, src_layer, 0, 0, false,
1693                            dst_mt, dst_level, dst_layer, 0, 0, false,
1694                            width, height, GL_COPY)) {
1695       perf_debug("miptree validate blit for %s failed\n",
1696                  _mesa_get_format_name(format));
1697
1698       intel_miptree_copy_slice_sw(brw,
1699                                   src_mt, src_level, src_layer,
1700                                   dst_mt, dst_level, dst_layer,
1701                                   width, height);
1702    }
1703 }
1704
1705 /**
1706  * Copies the image's current data to the given miptree, and associates that
1707  * miptree with the image.
1708  *
1709  * If \c invalidate is true, then the actual image data does not need to be
1710  * copied, but the image still needs to be associated to the new miptree (this
1711  * is set to true if we're about to clear the image).
1712  */
1713 void
1714 intel_miptree_copy_teximage(struct brw_context *brw,
1715                             struct intel_texture_image *intelImage,
1716                             struct intel_mipmap_tree *dst_mt,
1717                             bool invalidate)
1718 {
1719    struct intel_mipmap_tree *src_mt = intelImage->mt;
1720    struct intel_texture_object *intel_obj =
1721       intel_texture_object(intelImage->base.Base.TexObject);
1722    int level = intelImage->base.Base.Level;
1723    const unsigned face = intelImage->base.Base.Face;
1724    unsigned start_layer, end_layer;
1725
1726    if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
1727       assert(face == 0);
1728       assert(intelImage->base.Base.Height);
1729       start_layer = 0;
1730       end_layer = intelImage->base.Base.Height - 1;
1731    } else if (face > 0) {
1732       start_layer = face;
1733       end_layer = face;
1734    } else {
1735       assert(intelImage->base.Base.Depth);
1736       start_layer = 0;
1737       end_layer = intelImage->base.Base.Depth - 1;
1738    }
1739
1740    if (!invalidate) {
1741       for (unsigned i = start_layer; i <= end_layer; i++) {
1742          intel_miptree_copy_slice(brw,
1743                                   src_mt, level, i,
1744                                   dst_mt, level, i);
1745       }
1746    }
1747
1748    intel_miptree_reference(&intelImage->mt, dst_mt);
1749    intel_obj->needs_validate = true;
1750 }
1751
1752 static void
1753 intel_miptree_init_mcs(struct brw_context *brw,
1754                        struct intel_mipmap_tree *mt,
1755                        int init_value)
1756 {
1757    assert(mt->mcs_buf != NULL);
1758
1759    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1760     *
1761     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1762     *     is cleared prior to any rendering.
1763     *
1764     * Since we don't use the MCS buffer for any purpose other than rendering,
1765     * it makes sense to just clear it immediately upon allocation.
1766     *
1767     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1768     */
1769    void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE);
1770    if (unlikely(map == NULL)) {
1771       fprintf(stderr, "Failed to map mcs buffer into GTT\n");
1772       brw_bo_unreference(mt->mcs_buf->bo);
1773       free(mt->mcs_buf);
1774       return;
1775    }
1776    void *data = map;
1777    memset(data, init_value, mt->mcs_buf->size);
1778    brw_bo_unmap(mt->mcs_buf->bo);
1779 }
1780
1781 static struct intel_miptree_aux_buffer *
1782 intel_alloc_aux_buffer(struct brw_context *brw,
1783                        const char *name,
1784                        const struct isl_surf *aux_surf,
1785                        uint32_t alloc_flags,
1786                        struct intel_mipmap_tree *mt)
1787 {
1788    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1789    if (!buf)
1790       return false;
1791
1792    buf->size = aux_surf->size;
1793    buf->pitch = aux_surf->row_pitch;
1794    buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
1795
1796    /* ISL has stricter set of alignment rules then the drm allocator.
1797     * Therefore one can pass the ISL dimensions in terms of bytes instead of
1798     * trying to recalculate based on different format block sizes.
1799     */
1800    buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
1801                                 I915_TILING_Y, buf->pitch, alloc_flags);
1802    if (!buf->bo) {
1803       free(buf);
1804       return NULL;
1805    }
1806
1807    buf->surf = *aux_surf;
1808
1809    return buf;
1810 }
1811
1812 static bool
1813 intel_miptree_alloc_mcs(struct brw_context *brw,
1814                         struct intel_mipmap_tree *mt,
1815                         GLuint num_samples)
1816 {
1817    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1818    assert(mt->mcs_buf == NULL);
1819    assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
1820
1821    /* Multisampled miptrees are only supported for single level. */
1822    assert(mt->first_level == 0);
1823    enum isl_aux_state **aux_state =
1824       create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
1825    if (!aux_state)
1826       return false;
1827
1828    struct isl_surf temp_main_surf;
1829    struct isl_surf temp_mcs_surf;
1830
1831    /* Create first an ISL presentation for the main color surface and let ISL
1832     * calculate equivalent MCS surface against it.
1833     */
1834    intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
1835    MAYBE_UNUSED bool ok =
1836       isl_surf_get_mcs_surf(&brw->isl_dev, &temp_main_surf, &temp_mcs_surf);
1837    assert(ok);
1838
1839    /* Buffer needs to be initialised requiring the buffer to be immediately
1840     * mapped to cpu space for writing. Therefore do not use the gpu access
1841     * flag which can cause an unnecessary delay if the backing pages happened
1842     * to be just used by the GPU.
1843     */
1844    const uint32_t alloc_flags = 0;
1845    mt->mcs_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
1846                                         &temp_mcs_surf, alloc_flags, mt);
1847    if (!mt->mcs_buf) {
1848       free(aux_state);
1849       return false;
1850    }
1851
1852    mt->aux_state = aux_state;
1853
1854    intel_miptree_init_mcs(brw, mt, 0xFF);
1855
1856    return true;
1857 }
1858
1859 bool
1860 intel_miptree_alloc_ccs(struct brw_context *brw,
1861                         struct intel_mipmap_tree *mt)
1862 {
1863    assert(mt->mcs_buf == NULL);
1864    assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E ||
1865           mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1866
1867    struct isl_surf temp_main_surf;
1868    struct isl_surf temp_ccs_surf;
1869
1870    /* Create first an ISL presentation for the main color surface and let ISL
1871     * calculate equivalent CCS surface against it.
1872     */
1873    intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
1874    if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf, &temp_ccs_surf))
1875       return false;
1876
1877    assert(temp_ccs_surf.size &&
1878           (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
1879
1880    enum isl_aux_state **aux_state =
1881       create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
1882    if (!aux_state)
1883       return false;
1884
1885    /* In case of compression mcs buffer needs to be initialised requiring the
1886     * buffer to be immediately mapped to cpu space for writing. Therefore do
1887     * not use the gpu access flag which can cause an unnecessary delay if the
1888     * backing pages happened to be just used by the GPU.
1889     */
1890    const uint32_t alloc_flags =
1891       mt->aux_usage == ISL_AUX_USAGE_CCS_E ? 0 : BO_ALLOC_FOR_RENDER;
1892    mt->mcs_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
1893                                         &temp_ccs_surf, alloc_flags, mt);
1894    if (!mt->mcs_buf) {
1895       free(aux_state);
1896       return false;
1897    }
1898
1899    mt->aux_state = aux_state;
1900
1901    /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are
1902     * used for lossless compression which requires similar initialisation
1903     * as multi-sample compression.
1904     */
1905    if (mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
1906       /* Hardware sets the auxiliary buffer to all zeroes when it does full
1907        * resolve. Initialize it accordingly in case the first renderer is
1908        * cpu (or other none compression aware party).
1909        *
1910        * This is also explicitly stated in the spec (MCS Buffer for Render
1911        * Target(s)):
1912        *   "If Software wants to enable Color Compression without Fast clear,
1913        *    Software needs to initialize MCS with zeros."
1914        */
1915       intel_miptree_init_mcs(brw, mt, 0);
1916       mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS;
1917    }
1918
1919    return true;
1920 }
1921
1922 /**
1923  * Helper for intel_miptree_alloc_hiz() that sets
1924  * \c mt->level[level].has_hiz. Return true if and only if
1925  * \c has_hiz was set.
1926  */
1927 static bool
1928 intel_miptree_level_enable_hiz(struct brw_context *brw,
1929                                struct intel_mipmap_tree *mt,
1930                                uint32_t level)
1931 {
1932    assert(mt->hiz_buf);
1933
1934    if (brw->gen >= 8 || brw->is_haswell) {
1935       uint32_t width = minify(mt->physical_width0, level);
1936       uint32_t height = minify(mt->physical_height0, level);
1937
1938       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1939        * and the height is 4 aligned. This allows our HiZ support
1940        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1941        * we can grow the width & height to allow the HiZ op to
1942        * force the proper size alignments.
1943        */
1944       if (level > 0 && ((width & 7) || (height & 3))) {
1945          DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1946          return false;
1947       }
1948    }
1949
1950    DBG("mt %p level %d: HiZ enabled\n", mt, level);
1951    mt->level[level].has_hiz = true;
1952    return true;
1953 }
1954
1955 bool
1956 intel_miptree_alloc_hiz(struct brw_context *brw,
1957                         struct intel_mipmap_tree *mt)
1958 {
1959    assert(mt->hiz_buf == NULL);
1960    assert(mt->aux_usage == ISL_AUX_USAGE_HIZ);
1961
1962    enum isl_aux_state **aux_state =
1963       create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
1964    if (!aux_state)
1965       return false;
1966
1967    struct isl_surf temp_main_surf;
1968    struct isl_surf temp_hiz_surf;
1969
1970    intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
1971    MAYBE_UNUSED bool ok =
1972       isl_surf_get_hiz_surf(&brw->isl_dev, &temp_main_surf, &temp_hiz_surf);
1973    assert(ok);
1974
1975    const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
1976    mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
1977                                         &temp_hiz_surf, alloc_flags, mt);
1978
1979    if (!mt->hiz_buf) {
1980       free(aux_state);
1981       return false;
1982    }
1983
1984    for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
1985       intel_miptree_level_enable_hiz(brw, mt, level);
1986
1987    mt->aux_state = aux_state;
1988
1989    return true;
1990 }
1991
1992
1993 /**
1994  * Allocate the initial aux surface for a miptree based on mt->aux_usage
1995  *
1996  * Since MCS and CCS_E can compress more than just clear color, we create the
1997  * auxiliary surfaces up-front.  CCS_D, on the other hand, can only compress
1998  * clear color so we wait until an actual fast-clear to allocate it.
1999  */
2000 static bool
2001 intel_miptree_alloc_aux(struct brw_context *brw,
2002                         struct intel_mipmap_tree *mt)
2003 {
2004    switch (mt->aux_usage) {
2005    case ISL_AUX_USAGE_NONE:
2006       return true;
2007
2008    case ISL_AUX_USAGE_HIZ:
2009       /* HiZ gets allocated elsewhere for no good reason. */
2010       return true;
2011
2012    case ISL_AUX_USAGE_MCS:
2013       assert(_mesa_is_format_color_format(mt->format));
2014       assert(mt->num_samples > 1);
2015       if (!intel_miptree_alloc_mcs(brw, mt, mt->num_samples))
2016          return false;
2017       return true;
2018
2019    case ISL_AUX_USAGE_CCS_D:
2020       /* Since CCS_D can only compress clear color so we wait until an actual
2021        * fast-clear to allocate it.
2022        */
2023       return true;
2024
2025    case ISL_AUX_USAGE_CCS_E:
2026       assert(_mesa_is_format_color_format(mt->format));
2027       assert(mt->num_samples <= 1);
2028       if (!intel_miptree_alloc_ccs(brw, mt))
2029          return false;
2030       return true;
2031    }
2032
2033    unreachable("Invalid aux usage");
2034 }
2035
2036
2037 /**
2038  * Can the miptree sample using the hiz buffer?
2039  */
2040 bool
2041 intel_miptree_sample_with_hiz(struct brw_context *brw,
2042                               struct intel_mipmap_tree *mt)
2043 {
2044    /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
2045     * so keep things conservative for now and never enable it unless we're SKL+.
2046     */
2047    if (brw->gen < 9) {
2048       return false;
2049    }
2050
2051    if (!mt->hiz_buf) {
2052       return false;
2053    }
2054
2055    /* It seems the hardware won't fallback to the depth buffer if some of the
2056     * mipmap levels aren't available in the HiZ buffer. So we need all levels
2057     * of the texture to be HiZ enabled.
2058     */
2059    for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
2060       if (!intel_miptree_level_has_hiz(mt, level))
2061          return false;
2062    }
2063
2064    /* If compressed multisampling is enabled, then we use it for the auxiliary
2065     * buffer instead.
2066     *
2067     * From the BDW PRM (Volume 2d: Command Reference: Structures
2068     *                   RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
2069     *
2070     *  "If this field is set to AUX_HIZ, Number of Multisamples must be
2071     *   MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
2072     *
2073     * There is no such blurb for 1D textures, but there is sufficient evidence
2074     * that this is broken on SKL+.
2075     */
2076    return (mt->num_samples <= 1 &&
2077            mt->target != GL_TEXTURE_3D &&
2078            mt->target != GL_TEXTURE_1D /* gen9+ restriction */);
2079 }
2080
2081 /**
2082  * Does the miptree slice have hiz enabled?
2083  */
2084 bool
2085 intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level)
2086 {
2087    intel_miptree_check_level_layer(mt, level, 0);
2088    return mt->level[level].has_hiz;
2089 }
2090
2091 bool
2092 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt,
2093                                    unsigned start_level, unsigned num_levels,
2094                                    unsigned start_layer, unsigned num_layers)
2095 {
2096    assert(_mesa_is_format_color_format(mt->format));
2097
2098    if (!mt->mcs_buf)
2099       return false;
2100
2101    /* Clamp the level range to fit the miptree */
2102    assert(start_level + num_levels >= start_level);
2103    const uint32_t last_level =
2104       MIN2(mt->last_level, start_level + num_levels - 1);
2105    start_level = MAX2(mt->first_level, start_level);
2106    num_levels = last_level - start_level + 1;
2107
2108    for (uint32_t level = start_level; level <= last_level; level++) {
2109       const uint32_t level_layers = MIN2(num_layers, mt->level[level].depth);
2110       for (unsigned a = 0; a < level_layers; a++) {
2111          enum isl_aux_state aux_state =
2112             intel_miptree_get_aux_state(mt, level, start_layer + a);
2113          assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
2114          if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
2115             return true;
2116       }
2117    }
2118
2119    return false;
2120 }
2121
2122 static void
2123 intel_miptree_check_color_resolve(const struct brw_context *brw,
2124                                   const struct intel_mipmap_tree *mt,
2125                                   unsigned level, unsigned layer)
2126 {
2127
2128    if (!mt->mcs_buf)
2129       return;
2130
2131    /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
2132    assert(brw->gen >= 8 ||
2133           (level == 0 && mt->first_level == 0 && mt->last_level == 0));
2134
2135    /* Compression of arrayed msaa surfaces is supported. */
2136    if (mt->num_samples > 1)
2137       return;
2138
2139    /* Fast color clear is supported for non-msaa arrays only on Gen8+. */
2140    assert(brw->gen >= 8 || (layer == 0 && mt->logical_depth0 == 1));
2141
2142    (void)level;
2143    (void)layer;
2144 }
2145
2146 static enum blorp_fast_clear_op
2147 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
2148                      bool ccs_supported, bool fast_clear_supported)
2149 {
2150    assert(ccs_supported == fast_clear_supported);
2151
2152    switch (aux_state) {
2153    case ISL_AUX_STATE_CLEAR:
2154    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2155       if (!ccs_supported)
2156          return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2157       else
2158          return BLORP_FAST_CLEAR_OP_NONE;
2159
2160    case ISL_AUX_STATE_PASS_THROUGH:
2161       return BLORP_FAST_CLEAR_OP_NONE;
2162
2163    case ISL_AUX_STATE_RESOLVED:
2164    case ISL_AUX_STATE_AUX_INVALID:
2165    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2166       break;
2167    }
2168
2169    unreachable("Invalid aux state for CCS_D");
2170 }
2171
2172 static enum blorp_fast_clear_op
2173 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
2174                      bool ccs_supported, bool fast_clear_supported)
2175 {
2176    switch (aux_state) {
2177    case ISL_AUX_STATE_CLEAR:
2178    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2179       if (!ccs_supported)
2180          return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2181       else if (!fast_clear_supported)
2182          return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
2183       else
2184          return BLORP_FAST_CLEAR_OP_NONE;
2185
2186    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2187       if (!ccs_supported)
2188          return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2189       else
2190          return BLORP_FAST_CLEAR_OP_NONE;
2191
2192    case ISL_AUX_STATE_PASS_THROUGH:
2193       return BLORP_FAST_CLEAR_OP_NONE;
2194
2195    case ISL_AUX_STATE_RESOLVED:
2196    case ISL_AUX_STATE_AUX_INVALID:
2197       break;
2198    }
2199
2200    unreachable("Invalid aux state for CCS_E");
2201 }
2202
2203 static void
2204 intel_miptree_prepare_ccs_access(struct brw_context *brw,
2205                                  struct intel_mipmap_tree *mt,
2206                                  uint32_t level, uint32_t layer,
2207                                  bool aux_supported,
2208                                  bool fast_clear_supported)
2209 {
2210    enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2211
2212    enum blorp_fast_clear_op resolve_op;
2213    if (intel_miptree_is_lossless_compressed(brw, mt)) {
2214       resolve_op = get_ccs_e_resolve_op(aux_state, aux_supported,
2215                                         fast_clear_supported);
2216    } else {
2217       resolve_op = get_ccs_d_resolve_op(aux_state, aux_supported,
2218                                         fast_clear_supported);
2219    }
2220
2221    if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {
2222       intel_miptree_check_color_resolve(brw, mt, level, layer);
2223       brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);
2224
2225       switch (resolve_op) {
2226       case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
2227          /* The CCS full resolve operation destroys the CCS and sets it to the
2228           * pass-through state.  (You can also think of this as being both a
2229           * resolve and an ambiguate in one operation.)
2230           */
2231          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2232                                      ISL_AUX_STATE_PASS_THROUGH);
2233          break;
2234
2235       case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
2236          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2237                                      ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2238          break;
2239
2240       default:
2241          unreachable("Invalid resolve op");
2242       }
2243    }
2244 }
2245
2246 static void
2247 intel_miptree_finish_ccs_write(struct brw_context *brw,
2248                                struct intel_mipmap_tree *mt,
2249                                uint32_t level, uint32_t layer,
2250                                bool written_with_ccs)
2251 {
2252    enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2253
2254    if (intel_miptree_is_lossless_compressed(brw, mt)) {
2255       switch (aux_state) {
2256       case ISL_AUX_STATE_CLEAR:
2257          assert(written_with_ccs);
2258          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2259                                      ISL_AUX_STATE_COMPRESSED_CLEAR);
2260          break;
2261
2262       case ISL_AUX_STATE_COMPRESSED_CLEAR:
2263       case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2264          assert(written_with_ccs);
2265          break; /* Nothing to do */
2266
2267       case ISL_AUX_STATE_PASS_THROUGH:
2268          if (written_with_ccs) {
2269             intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2270                                         ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2271          } else {
2272             /* Nothing to do */
2273          }
2274          break;
2275
2276       case ISL_AUX_STATE_RESOLVED:
2277       case ISL_AUX_STATE_AUX_INVALID:
2278          unreachable("Invalid aux state for CCS_E");
2279       }
2280    } else {
2281       /* CCS_D is a bit simpler */
2282       switch (aux_state) {
2283       case ISL_AUX_STATE_CLEAR:
2284          assert(written_with_ccs);
2285          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2286                                      ISL_AUX_STATE_COMPRESSED_CLEAR);
2287          break;
2288
2289       case ISL_AUX_STATE_COMPRESSED_CLEAR:
2290          assert(written_with_ccs);
2291          break; /* Nothing to do */
2292
2293       case ISL_AUX_STATE_PASS_THROUGH:
2294          /* Nothing to do */
2295          break;
2296
2297       case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2298       case ISL_AUX_STATE_RESOLVED:
2299       case ISL_AUX_STATE_AUX_INVALID:
2300          unreachable("Invalid aux state for CCS_D");
2301       }
2302    }
2303 }
2304
2305 static void
2306 intel_miptree_finish_mcs_write(struct brw_context *brw,
2307                                struct intel_mipmap_tree *mt,
2308                                uint32_t level, uint32_t layer,
2309                                bool written_with_aux)
2310 {
2311    switch (intel_miptree_get_aux_state(mt, level, layer)) {
2312    case ISL_AUX_STATE_CLEAR:
2313       assert(written_with_aux);
2314       intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2315                                   ISL_AUX_STATE_COMPRESSED_CLEAR);
2316       break;
2317
2318    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2319       assert(written_with_aux);
2320       break; /* Nothing to do */
2321
2322    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2323    case ISL_AUX_STATE_RESOLVED:
2324    case ISL_AUX_STATE_PASS_THROUGH:
2325    case ISL_AUX_STATE_AUX_INVALID:
2326       unreachable("Invalid aux state for MCS");
2327    }
2328 }
2329
2330 static void
2331 intel_miptree_prepare_hiz_access(struct brw_context *brw,
2332                                  struct intel_mipmap_tree *mt,
2333                                  uint32_t level, uint32_t layer,
2334                                  bool hiz_supported, bool fast_clear_supported)
2335 {
2336    enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
2337    switch (intel_miptree_get_aux_state(mt, level, layer)) {
2338    case ISL_AUX_STATE_CLEAR:
2339    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2340       if (!hiz_supported || !fast_clear_supported)
2341          hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2342       break;
2343
2344    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2345       if (!hiz_supported)
2346          hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2347       break;
2348
2349    case ISL_AUX_STATE_PASS_THROUGH:
2350    case ISL_AUX_STATE_RESOLVED:
2351       break;
2352
2353    case ISL_AUX_STATE_AUX_INVALID:
2354       if (hiz_supported)
2355          hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
2356       break;
2357    }
2358
2359    if (hiz_op != BLORP_HIZ_OP_NONE) {
2360       intel_hiz_exec(brw, mt, level, layer, 1, hiz_op);
2361
2362       switch (hiz_op) {
2363       case BLORP_HIZ_OP_DEPTH_RESOLVE:
2364          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2365                                      ISL_AUX_STATE_RESOLVED);
2366          break;
2367
2368       case BLORP_HIZ_OP_HIZ_RESOLVE:
2369          /* The HiZ resolve operation is actually an ambiguate */
2370          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2371                                      ISL_AUX_STATE_PASS_THROUGH);
2372          break;
2373
2374       default:
2375          unreachable("Invalid HiZ op");
2376       }
2377    }
2378 }
2379
2380 static void
2381 intel_miptree_finish_hiz_write(struct brw_context *brw,
2382                                struct intel_mipmap_tree *mt,
2383                                uint32_t level, uint32_t layer,
2384                                bool written_with_hiz)
2385 {
2386    switch (intel_miptree_get_aux_state(mt, level, layer)) {
2387    case ISL_AUX_STATE_CLEAR:
2388       assert(written_with_hiz);
2389       intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2390                                   ISL_AUX_STATE_COMPRESSED_CLEAR);
2391       break;
2392
2393    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2394    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2395       assert(written_with_hiz);
2396       break; /* Nothing to do */
2397
2398    case ISL_AUX_STATE_RESOLVED:
2399       if (written_with_hiz) {
2400          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2401                                      ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2402       } else {
2403          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2404                                      ISL_AUX_STATE_AUX_INVALID);
2405       }
2406       break;
2407
2408    case ISL_AUX_STATE_PASS_THROUGH:
2409       if (written_with_hiz) {
2410          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2411                                      ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2412       }
2413       break;
2414
2415    case ISL_AUX_STATE_AUX_INVALID:
2416       assert(!written_with_hiz);
2417       break;
2418    }
2419 }
2420
2421 static inline uint32_t
2422 miptree_level_range_length(const struct intel_mipmap_tree *mt,
2423                            uint32_t start_level, uint32_t num_levels)
2424 {
2425    assert(start_level >= mt->first_level);
2426    assert(start_level <= mt->last_level);
2427
2428    if (num_levels == INTEL_REMAINING_LAYERS)
2429       num_levels = mt->last_level - start_level + 1;
2430    /* Check for overflow */
2431    assert(start_level + num_levels >= start_level);
2432    assert(start_level + num_levels <= mt->last_level + 1);
2433
2434    return num_levels;
2435 }
2436
2437 static inline uint32_t
2438 miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level,
2439                            uint32_t start_layer, uint32_t num_layers)
2440 {
2441    assert(level <= mt->last_level);
2442    uint32_t total_num_layers;
2443
2444    if (mt->surf.size > 0)
2445       total_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
2446          minify(mt->surf.phys_level0_sa.depth, level) :
2447          mt->surf.phys_level0_sa.array_len;
2448    else
2449       total_num_layers = mt->level[level].depth;
2450
2451    assert(start_layer < total_num_layers);
2452    if (num_layers == INTEL_REMAINING_LAYERS)
2453       num_layers = total_num_layers - start_layer;
2454    /* Check for overflow */
2455    assert(start_layer + num_layers >= start_layer);
2456    assert(start_layer + num_layers <= total_num_layers);
2457
2458    return num_layers;
2459 }
2460
2461 void
2462 intel_miptree_prepare_access(struct brw_context *brw,
2463                              struct intel_mipmap_tree *mt,
2464                              uint32_t start_level, uint32_t num_levels,
2465                              uint32_t start_layer, uint32_t num_layers,
2466                              bool aux_supported, bool fast_clear_supported)
2467 {
2468    num_levels = miptree_level_range_length(mt, start_level, num_levels);
2469
2470    if (_mesa_is_format_color_format(mt->format)) {
2471       if (!mt->mcs_buf)
2472          return;
2473
2474       if (mt->num_samples > 1) {
2475          /* Nothing to do for MSAA */
2476          assert(aux_supported && fast_clear_supported);
2477       } else {
2478          for (uint32_t l = 0; l < num_levels; l++) {
2479             const uint32_t level = start_level + l;
2480             const uint32_t level_layers =
2481                miptree_layer_range_length(mt, level, start_layer, num_layers);
2482             for (uint32_t a = 0; a < level_layers; a++) {
2483                intel_miptree_prepare_ccs_access(brw, mt, level,
2484                                                 start_layer + a, aux_supported,
2485                                                 fast_clear_supported);
2486             }
2487          }
2488       }
2489    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2490       /* Nothing to do for stencil */
2491    } else {
2492       if (!mt->hiz_buf)
2493          return;
2494
2495       for (uint32_t l = 0; l < num_levels; l++) {
2496          const uint32_t level = start_level + l;
2497          if (!intel_miptree_level_has_hiz(mt, level))
2498             continue;
2499
2500          const uint32_t level_layers =
2501             miptree_layer_range_length(mt, level, start_layer, num_layers);
2502          for (uint32_t a = 0; a < level_layers; a++) {
2503             intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a,
2504                                              aux_supported,
2505                                              fast_clear_supported);
2506          }
2507       }
2508    }
2509 }
2510
2511 void
2512 intel_miptree_finish_write(struct brw_context *brw,
2513                            struct intel_mipmap_tree *mt, uint32_t level,
2514                            uint32_t start_layer, uint32_t num_layers,
2515                            bool written_with_aux)
2516 {
2517    num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2518
2519    if (_mesa_is_format_color_format(mt->format)) {
2520       if (!mt->mcs_buf)
2521          return;
2522
2523       if (mt->num_samples > 1) {
2524          for (uint32_t a = 0; a < num_layers; a++) {
2525             intel_miptree_finish_mcs_write(brw, mt, level, start_layer + a,
2526                                            written_with_aux);
2527          }
2528       } else {
2529          for (uint32_t a = 0; a < num_layers; a++) {
2530             intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a,
2531                                            written_with_aux);
2532          }
2533       }
2534    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2535       /* Nothing to do for stencil */
2536    } else {
2537       if (!intel_miptree_level_has_hiz(mt, level))
2538          return;
2539
2540       for (uint32_t a = 0; a < num_layers; a++) {
2541          intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a,
2542                                         written_with_aux);
2543       }
2544    }
2545 }
2546
2547 enum isl_aux_state
2548 intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt,
2549                             uint32_t level, uint32_t layer)
2550 {
2551    intel_miptree_check_level_layer(mt, level, layer);
2552
2553    if (_mesa_is_format_color_format(mt->format)) {
2554       assert(mt->mcs_buf != NULL);
2555       assert(mt->num_samples <= 1 || mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);
2556    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2557       unreachable("Cannot get aux state for stencil");
2558    } else {
2559       assert(intel_miptree_level_has_hiz(mt, level));
2560    }
2561
2562    return mt->aux_state[level][layer];
2563 }
2564
2565 void
2566 intel_miptree_set_aux_state(struct brw_context *brw,
2567                             struct intel_mipmap_tree *mt, uint32_t level,
2568                             uint32_t start_layer, uint32_t num_layers,
2569                             enum isl_aux_state aux_state)
2570 {
2571    num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2572
2573    if (_mesa_is_format_color_format(mt->format)) {
2574       assert(mt->mcs_buf != NULL);
2575       assert(mt->num_samples <= 1 || mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);
2576    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2577       unreachable("Cannot get aux state for stencil");
2578    } else {
2579       assert(intel_miptree_level_has_hiz(mt, level));
2580    }
2581
2582    for (unsigned a = 0; a < num_layers; a++)
2583       mt->aux_state[level][start_layer + a] = aux_state;
2584 }
2585
2586 /* On Gen9 color buffers may be compressed by the hardware (lossless
2587  * compression). There are, however, format restrictions and care needs to be
2588  * taken that the sampler engine is capable for re-interpreting a buffer with
2589  * format different the buffer was originally written with.
2590  *
2591  * For example, SRGB formats are not compressible and the sampler engine isn't
2592  * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
2593  * color buffer needs to be resolved so that the sampling surface can be
2594  * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
2595  * set).
2596  */
2597 static bool
2598 can_texture_with_ccs(struct brw_context *brw,
2599                      struct intel_mipmap_tree *mt,
2600                      mesa_format view_format)
2601 {
2602    if (!intel_miptree_is_lossless_compressed(brw, mt))
2603       return false;
2604
2605    enum isl_format isl_mt_format = brw_isl_format_for_mesa_format(mt->format);
2606    enum isl_format isl_view_format = brw_isl_format_for_mesa_format(view_format);
2607
2608    if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
2609                                          isl_mt_format, isl_view_format)) {
2610       perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
2611                  _mesa_get_format_name(view_format),
2612                  _mesa_get_format_name(mt->format));
2613       return false;
2614    }
2615
2616    return true;
2617 }
2618
2619 static void
2620 intel_miptree_prepare_texture_slices(struct brw_context *brw,
2621                                      struct intel_mipmap_tree *mt,
2622                                      mesa_format view_format,
2623                                      uint32_t start_level, uint32_t num_levels,
2624                                      uint32_t start_layer, uint32_t num_layers,
2625                                      bool *aux_supported_out)
2626 {
2627    bool aux_supported, clear_supported;
2628    if (_mesa_is_format_color_format(mt->format)) {
2629       if (mt->num_samples > 1) {
2630          aux_supported = clear_supported = true;
2631       } else {
2632          aux_supported = can_texture_with_ccs(brw, mt, view_format);
2633
2634          /* Clear color is specified as ints or floats and the conversion is
2635           * done by the sampler.  If we have a texture view, we would have to
2636           * perform the clear color conversion manually.  Just disable clear
2637           * color.
2638           */
2639          clear_supported = aux_supported && (mt->format == view_format);
2640       }
2641    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2642       aux_supported = clear_supported = false;
2643    } else {
2644       aux_supported = clear_supported = intel_miptree_sample_with_hiz(brw, mt);
2645    }
2646
2647    intel_miptree_prepare_access(brw, mt, start_level, num_levels,
2648                                 start_layer, num_layers,
2649                                 aux_supported, clear_supported);
2650    if (aux_supported_out)
2651       *aux_supported_out = aux_supported;
2652 }
2653
2654 void
2655 intel_miptree_prepare_texture(struct brw_context *brw,
2656                               struct intel_mipmap_tree *mt,
2657                               mesa_format view_format,
2658                               bool *aux_supported_out)
2659 {
2660    intel_miptree_prepare_texture_slices(brw, mt, view_format,
2661                                         0, INTEL_REMAINING_LEVELS,
2662                                         0, INTEL_REMAINING_LAYERS,
2663                                         aux_supported_out);
2664 }
2665
2666 void
2667 intel_miptree_prepare_image(struct brw_context *brw,
2668                             struct intel_mipmap_tree *mt)
2669 {
2670    /* The data port doesn't understand any compression */
2671    intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2672                                 0, INTEL_REMAINING_LAYERS, false, false);
2673 }
2674
2675 void
2676 intel_miptree_prepare_fb_fetch(struct brw_context *brw,
2677                                struct intel_mipmap_tree *mt, uint32_t level,
2678                                uint32_t start_layer, uint32_t num_layers)
2679 {
2680    intel_miptree_prepare_texture_slices(brw, mt, mt->format, level, 1,
2681                                         start_layer, num_layers, NULL);
2682 }
2683
2684 void
2685 intel_miptree_prepare_render(struct brw_context *brw,
2686                              struct intel_mipmap_tree *mt, uint32_t level,
2687                              uint32_t start_layer, uint32_t layer_count,
2688                              bool srgb_enabled)
2689 {
2690    /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
2691     * the single-sampled color renderbuffers because the CCS buffer isn't
2692     * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
2693     * enabled because otherwise the surface state will be programmed with
2694     * the linear equivalent format anyway.
2695     */
2696    if (brw->gen == 9 && srgb_enabled && mt->num_samples <= 1 &&
2697        _mesa_get_srgb_format_linear(mt->format) != mt->format) {
2698
2699       /* Lossless compression is not supported for SRGB formats, it
2700        * should be impossible to get here with such surfaces.
2701        */
2702       assert(!intel_miptree_is_lossless_compressed(brw, mt));
2703       intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2704                                    false, false);
2705    }
2706 }
2707
2708 void
2709 intel_miptree_finish_render(struct brw_context *brw,
2710                             struct intel_mipmap_tree *mt, uint32_t level,
2711                             uint32_t start_layer, uint32_t layer_count)
2712 {
2713    assert(_mesa_is_format_color_format(mt->format));
2714    intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2715                               mt->mcs_buf != NULL);
2716 }
2717
2718 void
2719 intel_miptree_prepare_depth(struct brw_context *brw,
2720                             struct intel_mipmap_tree *mt, uint32_t level,
2721                             uint32_t start_layer, uint32_t layer_count)
2722 {
2723    intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2724                                 mt->hiz_buf != NULL, mt->hiz_buf != NULL);
2725 }
2726
2727 void
2728 intel_miptree_finish_depth(struct brw_context *brw,
2729                            struct intel_mipmap_tree *mt, uint32_t level,
2730                            uint32_t start_layer, uint32_t layer_count,
2731                            bool depth_written)
2732 {
2733    if (depth_written) {
2734       intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2735                                  mt->hiz_buf != NULL);
2736    }
2737 }
2738
2739 /**
2740  * Make it possible to share the BO backing the given miptree with another
2741  * process or another miptree.
2742  *
2743  * Fast color clears are unsafe with shared buffers, so we need to resolve and
2744  * then discard the MCS buffer, if present.  We also set the no_ccs flag to
2745  * ensure that no MCS buffer gets allocated in the future.
2746  *
2747  * HiZ is similarly unsafe with shared buffers.
2748  */
2749 void
2750 intel_miptree_make_shareable(struct brw_context *brw,
2751                              struct intel_mipmap_tree *mt)
2752 {
2753    /* MCS buffers are also used for multisample buffers, but we can't resolve
2754     * away a multisample MCS buffer because it's an integral part of how the
2755     * pixel data is stored.  Fortunately this code path should never be
2756     * reached for multisample buffers.
2757     */
2758    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || mt->num_samples <= 1);
2759
2760    intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2761                                 0, INTEL_REMAINING_LAYERS, false, false);
2762
2763    if (mt->mcs_buf) {
2764       brw_bo_unreference(mt->mcs_buf->bo);
2765       free(mt->mcs_buf);
2766       mt->mcs_buf = NULL;
2767
2768       /* Any pending MCS/CCS operations are no longer needed. Trying to
2769        * execute any will likely crash due to the missing aux buffer. So let's
2770        * delete all pending ops.
2771        */
2772       free(mt->aux_state);
2773       mt->aux_state = NULL;
2774    }
2775
2776    if (mt->hiz_buf) {
2777       intel_miptree_aux_buffer_free(mt->hiz_buf);
2778       mt->hiz_buf = NULL;
2779
2780       for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
2781          mt->level[l].has_hiz = false;
2782       }
2783
2784       /* Any pending HiZ operations are no longer needed. Trying to execute
2785        * any will likely crash due to the missing aux buffer. So let's delete
2786        * all pending ops.
2787        */
2788       free(mt->aux_state);
2789       mt->aux_state = NULL;
2790    }
2791
2792    mt->aux_usage = ISL_AUX_USAGE_NONE;
2793 }
2794
2795
2796 /**
2797  * \brief Get pointer offset into stencil buffer.
2798  *
2799  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
2800  * must decode the tile's layout in software.
2801  *
2802  * See
2803  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2804  *     Format.
2805  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2806  *
2807  * Even though the returned offset is always positive, the return type is
2808  * signed due to
2809  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2810  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
2811  */
2812 static intptr_t
2813 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2814 {
2815    uint32_t tile_size = 4096;
2816    uint32_t tile_width = 64;
2817    uint32_t tile_height = 64;
2818    uint32_t row_size = 64 * stride;
2819
2820    uint32_t tile_x = x / tile_width;
2821    uint32_t tile_y = y / tile_height;
2822
2823    /* The byte's address relative to the tile's base addres. */
2824    uint32_t byte_x = x % tile_width;
2825    uint32_t byte_y = y % tile_height;
2826
2827    uintptr_t u = tile_y * row_size
2828                + tile_x * tile_size
2829                + 512 * (byte_x / 8)
2830                +  64 * (byte_y / 8)
2831                +  32 * ((byte_y / 4) % 2)
2832                +  16 * ((byte_x / 4) % 2)
2833                +   8 * ((byte_y / 2) % 2)
2834                +   4 * ((byte_x / 2) % 2)
2835                +   2 * (byte_y % 2)
2836                +   1 * (byte_x % 2);
2837
2838    if (swizzled) {
2839       /* adjust for bit6 swizzling */
2840       if (((byte_x / 8) % 2) == 1) {
2841          if (((byte_y / 8) % 2) == 0) {
2842             u += 64;
2843          } else {
2844             u -= 64;
2845          }
2846       }
2847    }
2848
2849    return u;
2850 }
2851
2852 void
2853 intel_miptree_updownsample(struct brw_context *brw,
2854                            struct intel_mipmap_tree *src,
2855                            struct intel_mipmap_tree *dst)
2856 {
2857    unsigned src_w, src_h, dst_w, dst_h;
2858
2859    if (src->surf.size > 0) {
2860       src_w = src->surf.logical_level0_px.width;
2861       src_h = src->surf.logical_level0_px.height;
2862    } else {
2863       src_w = src->logical_width0;
2864       src_h = src->logical_height0;
2865    }
2866
2867    if (dst->surf.size > 0) {
2868       dst_w = dst->surf.logical_level0_px.width;
2869       dst_h = dst->surf.logical_level0_px.height;
2870    } else {
2871       dst_w = dst->logical_width0;
2872       dst_h = dst->logical_height0;
2873    }
2874
2875    brw_blorp_blit_miptrees(brw,
2876                            src, 0 /* level */, 0 /* layer */,
2877                            src->format, SWIZZLE_XYZW,
2878                            dst, 0 /* level */, 0 /* layer */, dst->format,
2879                            0, 0, src_w, src_h,
2880                            0, 0, dst_w, dst_h,
2881                            GL_NEAREST, false, false /*mirror x, y*/,
2882                            false, false);
2883
2884    if (src->stencil_mt) {
2885       if (src->stencil_mt->surf.size > 0) {
2886          src_w = src->stencil_mt->surf.logical_level0_px.width;
2887          src_h = src->stencil_mt->surf.logical_level0_px.height;
2888       } else {
2889          src_w = src->stencil_mt->logical_width0;
2890          src_h = src->stencil_mt->logical_height0;
2891       }
2892
2893       if (dst->stencil_mt->surf.size > 0) {
2894          dst_w = dst->stencil_mt->surf.logical_level0_px.width;
2895          dst_h = dst->stencil_mt->surf.logical_level0_px.height;
2896       } else {
2897          dst_w = dst->stencil_mt->logical_width0;
2898          dst_h = dst->stencil_mt->logical_height0;
2899       }
2900
2901       brw_blorp_blit_miptrees(brw,
2902                               src->stencil_mt, 0 /* level */, 0 /* layer */,
2903                               src->stencil_mt->format, SWIZZLE_XYZW,
2904                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
2905                               dst->stencil_mt->format,
2906                               0, 0, src_w, src_h,
2907                               0, 0, dst_w, dst_h,
2908                               GL_NEAREST, false, false /*mirror x, y*/,
2909                               false, false /* decode/encode srgb */);
2910    }
2911 }
2912
2913 void
2914 intel_update_r8stencil(struct brw_context *brw,
2915                        struct intel_mipmap_tree *mt)
2916 {
2917    assert(brw->gen >= 7);
2918    struct intel_mipmap_tree *src =
2919       mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
2920    if (!src || brw->gen >= 8 || !src->r8stencil_needs_update)
2921       return;
2922
2923    if (!mt->r8stencil_mt) {
2924       const uint32_t r8stencil_flags =
2925          MIPTREE_LAYOUT_ACCELERATED_UPLOAD | MIPTREE_LAYOUT_TILING_Y |
2926          MIPTREE_LAYOUT_DISABLE_AUX;
2927       assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
2928       mt->r8stencil_mt = intel_miptree_create(brw,
2929                                               src->target,
2930                                               MESA_FORMAT_R_UINT8,
2931                                               src->first_level,
2932                                               src->last_level,
2933                                               src->logical_width0,
2934                                               src->logical_height0,
2935                                               src->logical_depth0,
2936                                               src->num_samples,
2937                                               r8stencil_flags);
2938       assert(mt->r8stencil_mt);
2939    }
2940
2941    struct intel_mipmap_tree *dst = mt->r8stencil_mt;
2942
2943    for (int level = src->first_level; level <= src->last_level; level++) {
2944       const unsigned depth = src->level[level].depth;
2945
2946       for (unsigned layer = 0; layer < depth; layer++) {
2947          brw_blorp_copy_miptrees(brw,
2948                                  src, level, layer,
2949                                  dst, level, layer,
2950                                  0, 0, 0, 0,
2951                                  minify(src->logical_width0, level),
2952                                  minify(src->logical_height0, level));
2953       }
2954    }
2955
2956    brw_render_cache_set_check_flush(brw, dst->bo);
2957    src->r8stencil_needs_update = false;
2958 }
2959
2960 static void *
2961 intel_miptree_map_raw(struct brw_context *brw,
2962                       struct intel_mipmap_tree *mt,
2963                       GLbitfield mode)
2964 {
2965    struct brw_bo *bo = mt->bo;
2966
2967    if (brw_batch_references(&brw->batch, bo))
2968       intel_batchbuffer_flush(brw);
2969
2970    return brw_bo_map(brw, bo, mode);
2971 }
2972
2973 static void
2974 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2975 {
2976    brw_bo_unmap(mt->bo);
2977 }
2978
2979 static void
2980 intel_miptree_map_gtt(struct brw_context *brw,
2981                       struct intel_mipmap_tree *mt,
2982                       struct intel_miptree_map *map,
2983                       unsigned int level, unsigned int slice)
2984 {
2985    unsigned int bw, bh;
2986    void *base;
2987    unsigned int image_x, image_y;
2988    intptr_t x = map->x;
2989    intptr_t y = map->y;
2990
2991    /* For compressed formats, the stride is the number of bytes per
2992     * row of blocks.  intel_miptree_get_image_offset() already does
2993     * the divide.
2994     */
2995    _mesa_get_format_block_size(mt->format, &bw, &bh);
2996    assert(y % bh == 0);
2997    assert(x % bw == 0);
2998    y /= bh;
2999    x /= bw;
3000
3001    base = intel_miptree_map_raw(brw, mt, map->mode);
3002
3003    if (base == NULL)
3004       map->ptr = NULL;
3005    else {
3006       base += mt->offset;
3007
3008       /* Note that in the case of cube maps, the caller must have passed the
3009        * slice number referencing the face.
3010       */
3011       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3012       x += image_x;
3013       y += image_y;
3014
3015       map->stride = mt->pitch;
3016       map->ptr = base + y * map->stride + x * mt->cpp;
3017    }
3018
3019    DBG("%s: %d,%d %dx%d from mt %p (%s) "
3020        "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
3021        map->x, map->y, map->w, map->h,
3022        mt, _mesa_get_format_name(mt->format),
3023        x, y, map->ptr, map->stride);
3024 }
3025
3026 static void
3027 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
3028 {
3029    intel_miptree_unmap_raw(mt);
3030 }
3031
3032 static void
3033 intel_miptree_map_blit(struct brw_context *brw,
3034                        struct intel_mipmap_tree *mt,
3035                        struct intel_miptree_map *map,
3036                        unsigned int level, unsigned int slice)
3037 {
3038    map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
3039                                          /* first_level */ 0,
3040                                          /* last_level */ 0,
3041                                          map->w, map->h, 1,
3042                                          /* samples */ 0,
3043                                          MIPTREE_LAYOUT_TILING_NONE);
3044
3045    if (!map->linear_mt) {
3046       fprintf(stderr, "Failed to allocate blit temporary\n");
3047       goto fail;
3048    }
3049    map->stride = map->linear_mt->pitch;
3050
3051    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
3052     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
3053     * invalidate is set, since we'll be writing the whole rectangle from our
3054     * temporary buffer back out.
3055     */
3056    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3057       if (!intel_miptree_copy(brw,
3058                               mt, level, slice, map->x, map->y,
3059                               map->linear_mt, 0, 0, 0, 0,
3060                               map->w, map->h)) {
3061          fprintf(stderr, "Failed to blit\n");
3062          goto fail;
3063       }
3064    }
3065
3066    map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
3067
3068    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
3069        map->x, map->y, map->w, map->h,
3070        mt, _mesa_get_format_name(mt->format),
3071        level, slice, map->ptr, map->stride);
3072
3073    return;
3074
3075 fail:
3076    intel_miptree_release(&map->linear_mt);
3077    map->ptr = NULL;
3078    map->stride = 0;
3079 }
3080
3081 static void
3082 intel_miptree_unmap_blit(struct brw_context *brw,
3083                          struct intel_mipmap_tree *mt,
3084                          struct intel_miptree_map *map,
3085                          unsigned int level,
3086                          unsigned int slice)
3087 {
3088    struct gl_context *ctx = &brw->ctx;
3089
3090    intel_miptree_unmap_raw(map->linear_mt);
3091
3092    if (map->mode & GL_MAP_WRITE_BIT) {
3093       bool ok = intel_miptree_copy(brw,
3094                                    map->linear_mt, 0, 0, 0, 0,
3095                                    mt, level, slice, map->x, map->y,
3096                                    map->w, map->h);
3097       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
3098    }
3099
3100    intel_miptree_release(&map->linear_mt);
3101 }
3102
3103 /**
3104  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
3105  */
3106 #if defined(USE_SSE41)
3107 static void
3108 intel_miptree_map_movntdqa(struct brw_context *brw,
3109                            struct intel_mipmap_tree *mt,
3110                            struct intel_miptree_map *map,
3111                            unsigned int level, unsigned int slice)
3112 {
3113    assert(map->mode & GL_MAP_READ_BIT);
3114    assert(!(map->mode & GL_MAP_WRITE_BIT));
3115
3116    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
3117        map->x, map->y, map->w, map->h,
3118        mt, _mesa_get_format_name(mt->format),
3119        level, slice, map->ptr, map->stride);
3120
3121    /* Map the original image */
3122    uint32_t image_x;
3123    uint32_t image_y;
3124    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3125    image_x += map->x;
3126    image_y += map->y;
3127
3128    void *src = intel_miptree_map_raw(brw, mt, map->mode);
3129    if (!src)
3130       return;
3131
3132    src += mt->offset;
3133
3134    src += image_y * mt->pitch;
3135    src += image_x * mt->cpp;
3136
3137    /* Due to the pixel offsets for the particular image being mapped, our
3138     * src pointer may not be 16-byte aligned.  However, if the pitch is
3139     * divisible by 16, then the amount by which it's misaligned will remain
3140     * consistent from row to row.
3141     */
3142    assert((mt->pitch % 16) == 0);
3143    const int misalignment = ((uintptr_t) src) & 15;
3144
3145    /* Create an untiled temporary buffer for the mapping. */
3146    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
3147
3148    map->stride = ALIGN(misalignment + width_bytes, 16);
3149
3150    map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
3151    /* Offset the destination so it has the same misalignment as src. */
3152    map->ptr = map->buffer + misalignment;
3153
3154    assert((((uintptr_t) map->ptr) & 15) == misalignment);
3155
3156    for (uint32_t y = 0; y < map->h; y++) {
3157       void *dst_ptr = map->ptr + y * map->stride;
3158       void *src_ptr = src + y * mt->pitch;
3159
3160       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
3161    }
3162
3163    intel_miptree_unmap_raw(mt);
3164 }
3165
3166 static void
3167 intel_miptree_unmap_movntdqa(struct brw_context *brw,
3168                              struct intel_mipmap_tree *mt,
3169                              struct intel_miptree_map *map,
3170                              unsigned int level,
3171                              unsigned int slice)
3172 {
3173    _mesa_align_free(map->buffer);
3174    map->buffer = NULL;
3175    map->ptr = NULL;
3176 }
3177 #endif
3178
3179 static void
3180 intel_miptree_map_s8(struct brw_context *brw,
3181                      struct intel_mipmap_tree *mt,
3182                      struct intel_miptree_map *map,
3183                      unsigned int level, unsigned int slice)
3184 {
3185    map->stride = map->w;
3186    map->buffer = map->ptr = malloc(map->stride * map->h);
3187    if (!map->buffer)
3188       return;
3189
3190    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
3191     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
3192     * invalidate is set, since we'll be writing the whole rectangle from our
3193     * temporary buffer back out.
3194     */
3195    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3196       /* ISL uses a stencil pitch value that is expected by hardware whereas
3197        * traditional miptree uses half of that. Below the value gets supplied
3198        * to intel_offset_S8() which expects the legacy interpretation.
3199        */
3200       const unsigned pitch = mt->surf.size > 0 ?
3201                              mt->surf.row_pitch / 2 : mt->pitch;
3202       uint8_t *untiled_s8_map = map->ptr;
3203       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
3204       unsigned int image_x, image_y;
3205
3206       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3207
3208       for (uint32_t y = 0; y < map->h; y++) {
3209          for (uint32_t x = 0; x < map->w; x++) {
3210             ptrdiff_t offset = intel_offset_S8(pitch,
3211                                                x + image_x + map->x,
3212                                                y + image_y + map->y,
3213                                                brw->has_swizzling);
3214             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
3215          }
3216       }
3217
3218       intel_miptree_unmap_raw(mt);
3219
3220       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
3221           map->x, map->y, map->w, map->h,
3222           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
3223    } else {
3224       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3225           map->x, map->y, map->w, map->h,
3226           mt, map->ptr, map->stride);
3227    }
3228 }
3229
3230 static void
3231 intel_miptree_unmap_s8(struct brw_context *brw,
3232                        struct intel_mipmap_tree *mt,
3233                        struct intel_miptree_map *map,
3234                        unsigned int level,
3235                        unsigned int slice)
3236 {
3237    if (map->mode & GL_MAP_WRITE_BIT) {
3238       /* ISL uses a stencil pitch value that is expected by hardware whereas
3239        * traditional miptree uses half of that. Below the value gets supplied
3240        * to intel_offset_S8() which expects the legacy interpretation.
3241        */
3242       const unsigned pitch = mt->surf.size > 0 ?
3243                              mt->surf.row_pitch / 2: mt->pitch;
3244       unsigned int image_x, image_y;
3245       uint8_t *untiled_s8_map = map->ptr;
3246       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
3247
3248       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3249
3250       for (uint32_t y = 0; y < map->h; y++) {
3251          for (uint32_t x = 0; x < map->w; x++) {
3252             ptrdiff_t offset = intel_offset_S8(pitch,
3253                                                image_x + x + map->x,
3254                                                image_y + y + map->y,
3255                                                brw->has_swizzling);
3256             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
3257          }
3258       }
3259
3260       intel_miptree_unmap_raw(mt);
3261    }
3262
3263    free(map->buffer);
3264 }
3265
3266 static void
3267 intel_miptree_map_etc(struct brw_context *brw,
3268                       struct intel_mipmap_tree *mt,
3269                       struct intel_miptree_map *map,
3270                       unsigned int level,
3271                       unsigned int slice)
3272 {
3273    assert(mt->etc_format != MESA_FORMAT_NONE);
3274    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
3275       assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
3276    }
3277
3278    assert(map->mode & GL_MAP_WRITE_BIT);
3279    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
3280
3281    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
3282    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
3283                                                 map->w, map->h, 1));
3284    map->ptr = map->buffer;
3285 }
3286
3287 static void
3288 intel_miptree_unmap_etc(struct brw_context *brw,
3289                         struct intel_mipmap_tree *mt,
3290                         struct intel_miptree_map *map,
3291                         unsigned int level,
3292                         unsigned int slice)
3293 {
3294    uint32_t image_x;
3295    uint32_t image_y;
3296    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3297
3298    image_x += map->x;
3299    image_y += map->y;
3300
3301    uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
3302                 + image_y * mt->pitch
3303                 + image_x * mt->cpp;
3304
3305    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
3306       _mesa_etc1_unpack_rgba8888(dst, mt->pitch,
3307                                  map->ptr, map->stride,
3308                                  map->w, map->h);
3309    else
3310       _mesa_unpack_etc2_format(dst, mt->pitch,
3311                                map->ptr, map->stride,
3312                                map->w, map->h, mt->etc_format);
3313
3314    intel_miptree_unmap_raw(mt);
3315    free(map->buffer);
3316 }
3317
3318 /**
3319  * Mapping function for packed depth/stencil miptrees backed by real separate
3320  * miptrees for depth and stencil.
3321  *
3322  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
3323  * separate from the depth buffer.  Yet at the GL API level, we have to expose
3324  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
3325  * be able to map that memory for texture storage and glReadPixels-type
3326  * operations.  We give Mesa core that access by mallocing a temporary and
3327  * copying the data between the actual backing store and the temporary.
3328  */
3329 static void
3330 intel_miptree_map_depthstencil(struct brw_context *brw,
3331                                struct intel_mipmap_tree *mt,
3332                                struct intel_miptree_map *map,
3333                                unsigned int level, unsigned int slice)
3334 {
3335    struct intel_mipmap_tree *z_mt = mt;
3336    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3337    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3338    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
3339
3340    map->stride = map->w * packed_bpp;
3341    map->buffer = map->ptr = malloc(map->stride * map->h);
3342    if (!map->buffer)
3343       return;
3344
3345    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
3346     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
3347     * invalidate is set, since we'll be writing the whole rectangle from our
3348     * temporary buffer back out.
3349     */
3350    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3351       /* ISL uses a stencil pitch value that is expected by hardware whereas
3352        * traditional miptree uses half of that. Below the value gets supplied
3353        * to intel_offset_S8() which expects the legacy interpretation.
3354        */
3355       const unsigned s_pitch = s_mt->surf.size > 0 ?
3356                                s_mt->surf.row_pitch / 2 : s_mt->pitch;
3357       uint32_t *packed_map = map->ptr;
3358       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
3359       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
3360       unsigned int s_image_x, s_image_y;
3361       unsigned int z_image_x, z_image_y;
3362
3363       intel_miptree_get_image_offset(s_mt, level, slice,
3364                                      &s_image_x, &s_image_y);
3365       intel_miptree_get_image_offset(z_mt, level, slice,
3366                                      &z_image_x, &z_image_y);
3367
3368       for (uint32_t y = 0; y < map->h; y++) {
3369          for (uint32_t x = 0; x < map->w; x++) {
3370             int map_x = map->x + x, map_y = map->y + y;
3371             ptrdiff_t s_offset = intel_offset_S8(s_pitch,
3372                                                  map_x + s_image_x,
3373                                                  map_y + s_image_y,
3374                                                  brw->has_swizzling);
3375             ptrdiff_t z_offset = ((map_y + z_image_y) *
3376                                   (z_mt->pitch / 4) +
3377                                   (map_x + z_image_x));
3378             uint8_t s = s_map[s_offset];
3379             uint32_t z = z_map[z_offset];
3380
3381             if (map_z32f_x24s8) {
3382                packed_map[(y * map->w + x) * 2 + 0] = z;
3383                packed_map[(y * map->w + x) * 2 + 1] = s;
3384             } else {
3385                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
3386             }
3387          }
3388       }
3389
3390       intel_miptree_unmap_raw(s_mt);
3391       intel_miptree_unmap_raw(z_mt);
3392
3393       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
3394           __func__,
3395           map->x, map->y, map->w, map->h,
3396           z_mt, map->x + z_image_x, map->y + z_image_y,
3397           s_mt, map->x + s_image_x, map->y + s_image_y,
3398           map->ptr, map->stride);
3399    } else {
3400       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3401           map->x, map->y, map->w, map->h,
3402           mt, map->ptr, map->stride);
3403    }
3404 }
3405
3406 static void
3407 intel_miptree_unmap_depthstencil(struct brw_context *brw,
3408                                  struct intel_mipmap_tree *mt,
3409                                  struct intel_miptree_map *map,
3410                                  unsigned int level,
3411                                  unsigned int slice)
3412 {
3413    struct intel_mipmap_tree *z_mt = mt;
3414    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3415    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3416
3417    if (map->mode & GL_MAP_WRITE_BIT) {
3418       /* ISL uses a stencil pitch value that is expected by hardware whereas
3419        * traditional miptree uses half of that. Below the value gets supplied
3420        * to intel_offset_S8() which expects the legacy interpretation.
3421        */
3422       const unsigned s_pitch = s_mt->surf.size > 0 ?
3423                                s_mt->surf.row_pitch / 2 : s_mt->pitch;
3424       uint32_t *packed_map = map->ptr;
3425       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
3426       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
3427       unsigned int s_image_x, s_image_y;
3428       unsigned int z_image_x, z_image_y;
3429
3430       intel_miptree_get_image_offset(s_mt, level, slice,
3431                                      &s_image_x, &s_image_y);
3432       intel_miptree_get_image_offset(z_mt, level, slice,
3433                                      &z_image_x, &z_image_y);
3434
3435       for (uint32_t y = 0; y < map->h; y++) {
3436          for (uint32_t x = 0; x < map->w; x++) {
3437             ptrdiff_t s_offset = intel_offset_S8(s_pitch,
3438                                                  x + s_image_x + map->x,
3439                                                  y + s_image_y + map->y,
3440                                                  brw->has_swizzling);
3441             ptrdiff_t z_offset = ((y + z_image_y + map->y) *
3442                                   (z_mt->pitch / 4) +
3443                                   (x + z_image_x + map->x));
3444
3445             if (map_z32f_x24s8) {
3446                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
3447                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
3448             } else {
3449                uint32_t packed = packed_map[y * map->w + x];
3450                s_map[s_offset] = packed >> 24;
3451                z_map[z_offset] = packed;
3452             }
3453          }
3454       }
3455
3456       intel_miptree_unmap_raw(s_mt);
3457       intel_miptree_unmap_raw(z_mt);
3458
3459       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
3460           __func__,
3461           map->x, map->y, map->w, map->h,
3462           z_mt, _mesa_get_format_name(z_mt->format),
3463           map->x + z_image_x, map->y + z_image_y,
3464           s_mt, map->x + s_image_x, map->y + s_image_y,
3465           map->ptr, map->stride);
3466    }
3467
3468    free(map->buffer);
3469 }
3470
3471 /**
3472  * Create and attach a map to the miptree at (level, slice). Return the
3473  * attached map.
3474  */
3475 static struct intel_miptree_map*
3476 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
3477                          unsigned int level,
3478                          unsigned int slice,
3479                          unsigned int x,
3480                          unsigned int y,
3481                          unsigned int w,
3482                          unsigned int h,
3483                          GLbitfield mode)
3484 {
3485    struct intel_miptree_map *map = calloc(1, sizeof(*map));
3486
3487    if (!map)
3488       return NULL;
3489
3490    assert(mt->level[level].slice[slice].map == NULL);
3491    mt->level[level].slice[slice].map = map;
3492
3493    map->mode = mode;
3494    map->x = x;
3495    map->y = y;
3496    map->w = w;
3497    map->h = h;
3498
3499    return map;
3500 }
3501
3502 /**
3503  * Release the map at (level, slice).
3504  */
3505 static void
3506 intel_miptree_release_map(struct intel_mipmap_tree *mt,
3507                          unsigned int level,
3508                          unsigned int slice)
3509 {
3510    struct intel_miptree_map **map;
3511
3512    map = &mt->level[level].slice[slice].map;
3513    free(*map);
3514    *map = NULL;
3515 }
3516
3517 static bool
3518 can_blit_slice(struct intel_mipmap_tree *mt,
3519                unsigned int level, unsigned int slice)
3520 {
3521    /* See intel_miptree_blit() for details on the 32k pitch limit. */
3522    if (mt->pitch >= 32768)
3523       return false;
3524
3525    return true;
3526 }
3527
3528 static bool
3529 use_intel_mipree_map_blit(struct brw_context *brw,
3530                           struct intel_mipmap_tree *mt,
3531                           GLbitfield mode,
3532                           unsigned int level,
3533                           unsigned int slice)
3534 {
3535    if (brw->has_llc &&
3536       /* It's probably not worth swapping to the blit ring because of
3537        * all the overhead involved.
3538        */
3539        !(mode & GL_MAP_WRITE_BIT) &&
3540        !mt->compressed &&
3541        (mt->tiling == I915_TILING_X ||
3542         /* Prior to Sandybridge, the blitter can't handle Y tiling */
3543         (brw->gen >= 6 && mt->tiling == I915_TILING_Y) ||
3544         /* Fast copy blit on skl+ supports all tiling formats. */
3545         brw->gen >= 9) &&
3546        can_blit_slice(mt, level, slice))
3547       return true;
3548
3549    if (mt->tiling != I915_TILING_NONE &&
3550        mt->bo->size >= brw->max_gtt_map_object_size) {
3551       assert(can_blit_slice(mt, level, slice));
3552       return true;
3553    }
3554
3555    return false;
3556 }
3557
3558 /**
3559  * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
3560  * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
3561  * arithmetic overflow.
3562  *
3563  * If you call this function and use \a out_stride, then you're doing pointer
3564  * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
3565  * bugs.  The caller must still take care to avoid 32-bit overflow errors in
3566  * all arithmetic expressions that contain buffer offsets and pixel sizes,
3567  * which usually have type uint32_t or GLuint.
3568  */
3569 void
3570 intel_miptree_map(struct brw_context *brw,
3571                   struct intel_mipmap_tree *mt,
3572                   unsigned int level,
3573                   unsigned int slice,
3574                   unsigned int x,
3575                   unsigned int y,
3576                   unsigned int w,
3577                   unsigned int h,
3578                   GLbitfield mode,
3579                   void **out_ptr,
3580                   ptrdiff_t *out_stride)
3581 {
3582    struct intel_miptree_map *map;
3583
3584    assert(mt->num_samples <= 1);
3585
3586    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
3587    if (!map){
3588       *out_ptr = NULL;
3589       *out_stride = 0;
3590       return;
3591    }
3592
3593    intel_miptree_access_raw(brw, mt, level, slice,
3594                             map->mode & GL_MAP_WRITE_BIT);
3595
3596    if (mt->format == MESA_FORMAT_S_UINT8) {
3597       intel_miptree_map_s8(brw, mt, map, level, slice);
3598    } else if (mt->etc_format != MESA_FORMAT_NONE &&
3599               !(mode & BRW_MAP_DIRECT_BIT)) {
3600       intel_miptree_map_etc(brw, mt, map, level, slice);
3601    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
3602       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
3603    } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
3604       intel_miptree_map_blit(brw, mt, map, level, slice);
3605 #if defined(USE_SSE41)
3606    } else if (!(mode & GL_MAP_WRITE_BIT) &&
3607               !mt->compressed && cpu_has_sse4_1 &&
3608               (mt->pitch % 16 == 0)) {
3609       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
3610 #endif
3611    } else {
3612       intel_miptree_map_gtt(brw, mt, map, level, slice);
3613    }
3614
3615    *out_ptr = map->ptr;
3616    *out_stride = map->stride;
3617
3618    if (map->ptr == NULL)
3619       intel_miptree_release_map(mt, level, slice);
3620 }
3621
3622 void
3623 intel_miptree_unmap(struct brw_context *brw,
3624                     struct intel_mipmap_tree *mt,
3625                     unsigned int level,
3626                     unsigned int slice)
3627 {
3628    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
3629
3630    assert(mt->num_samples <= 1);
3631
3632    if (!map)
3633       return;
3634
3635    DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
3636        mt, _mesa_get_format_name(mt->format), level, slice);
3637
3638    if (mt->format == MESA_FORMAT_S_UINT8) {
3639       intel_miptree_unmap_s8(brw, mt, map, level, slice);
3640    } else if (mt->etc_format != MESA_FORMAT_NONE &&
3641               !(map->mode & BRW_MAP_DIRECT_BIT)) {
3642       intel_miptree_unmap_etc(brw, mt, map, level, slice);
3643    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
3644       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
3645    } else if (map->linear_mt) {
3646       intel_miptree_unmap_blit(brw, mt, map, level, slice);
3647 #if defined(USE_SSE41)
3648    } else if (map->buffer && cpu_has_sse4_1) {
3649       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
3650 #endif
3651    } else {
3652       intel_miptree_unmap_gtt(mt);
3653    }
3654
3655    intel_miptree_release_map(mt, level, slice);
3656 }
3657
3658 enum isl_surf_dim
3659 get_isl_surf_dim(GLenum target)
3660 {
3661    switch (target) {
3662    case GL_TEXTURE_1D:
3663    case GL_TEXTURE_1D_ARRAY:
3664       return ISL_SURF_DIM_1D;
3665
3666    case GL_TEXTURE_2D:
3667    case GL_TEXTURE_2D_ARRAY:
3668    case GL_TEXTURE_RECTANGLE:
3669    case GL_TEXTURE_CUBE_MAP:
3670    case GL_TEXTURE_CUBE_MAP_ARRAY:
3671    case GL_TEXTURE_2D_MULTISAMPLE:
3672    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3673    case GL_TEXTURE_EXTERNAL_OES:
3674       return ISL_SURF_DIM_2D;
3675
3676    case GL_TEXTURE_3D:
3677       return ISL_SURF_DIM_3D;
3678    }
3679
3680    unreachable("Invalid texture target");
3681 }
3682
3683 enum isl_dim_layout
3684 get_isl_dim_layout(const struct gen_device_info *devinfo, uint32_t tiling,
3685                    GLenum target, enum miptree_array_layout array_layout)
3686 {
3687    if (array_layout == GEN6_HIZ_STENCIL)
3688       return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
3689
3690    switch (target) {
3691    case GL_TEXTURE_1D:
3692    case GL_TEXTURE_1D_ARRAY:
3693       return (devinfo->gen >= 9 && tiling == I915_TILING_NONE ?
3694               ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
3695
3696    case GL_TEXTURE_2D:
3697    case GL_TEXTURE_2D_ARRAY:
3698    case GL_TEXTURE_RECTANGLE:
3699    case GL_TEXTURE_2D_MULTISAMPLE:
3700    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3701    case GL_TEXTURE_EXTERNAL_OES:
3702       return ISL_DIM_LAYOUT_GEN4_2D;
3703
3704    case GL_TEXTURE_CUBE_MAP:
3705    case GL_TEXTURE_CUBE_MAP_ARRAY:
3706       return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
3707               ISL_DIM_LAYOUT_GEN4_2D);
3708
3709    case GL_TEXTURE_3D:
3710       return (devinfo->gen >= 9 ?
3711               ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
3712    }
3713
3714    unreachable("Invalid texture target");
3715 }
3716
3717 enum isl_tiling
3718 intel_miptree_get_isl_tiling(const struct intel_mipmap_tree *mt)
3719 {
3720    if (mt->format == MESA_FORMAT_S_UINT8) {
3721       return ISL_TILING_W;
3722    } else {
3723       switch (mt->tiling) {
3724       case I915_TILING_NONE:
3725          return ISL_TILING_LINEAR;
3726       case I915_TILING_X:
3727          return ISL_TILING_X;
3728       case I915_TILING_Y:
3729             return ISL_TILING_Y0;
3730       default:
3731          unreachable("Invalid tiling mode");
3732       }
3733    }
3734 }
3735
3736 void
3737 intel_miptree_get_isl_surf(struct brw_context *brw,
3738                            const struct intel_mipmap_tree *mt,
3739                            struct isl_surf *surf)
3740 {
3741    surf->dim = get_isl_surf_dim(mt->target);
3742    surf->dim_layout = get_isl_dim_layout(&brw->screen->devinfo,
3743                                          mt->tiling, mt->target,
3744                                          mt->array_layout);
3745
3746    if (mt->num_samples > 1) {
3747       switch (mt->msaa_layout) {
3748       case INTEL_MSAA_LAYOUT_IMS:
3749          surf->msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
3750          break;
3751       case INTEL_MSAA_LAYOUT_UMS:
3752       case INTEL_MSAA_LAYOUT_CMS:
3753          surf->msaa_layout = ISL_MSAA_LAYOUT_ARRAY;
3754          break;
3755       default:
3756          unreachable("Invalid MSAA layout");
3757       }
3758    } else {
3759       surf->msaa_layout = ISL_MSAA_LAYOUT_NONE;
3760    }
3761
3762    surf->tiling = intel_miptree_get_isl_tiling(mt);
3763
3764    if (mt->format == MESA_FORMAT_S_UINT8) {
3765       /* The ISL definition of row_pitch matches the surface state pitch field
3766        * a bit better than intel_mipmap_tree.  In particular, ISL incorporates
3767        * the factor of 2 for W-tiling in row_pitch.
3768        */
3769       surf->row_pitch = 2 * mt->pitch;
3770    } else {
3771       surf->row_pitch = mt->pitch;
3772    }
3773
3774    surf->format = translate_tex_format(brw, mt->format, false);
3775
3776    if (brw->gen >= 9) {
3777       if (surf->dim == ISL_SURF_DIM_1D && surf->tiling == ISL_TILING_LINEAR) {
3778          /* For gen9 1-D surfaces, intel_mipmap_tree has a bogus alignment. */
3779          surf->image_alignment_el = isl_extent3d(64, 1, 1);
3780       } else {
3781          /* On gen9+, intel_mipmap_tree stores the horizontal and vertical
3782           * alignment in terms of surface elements like we want.
3783           */
3784          surf->image_alignment_el = isl_extent3d(mt->halign, mt->valign, 1);
3785       }
3786    } else {
3787       /* On earlier gens it's stored in pixels. */
3788       unsigned bw, bh;
3789       _mesa_get_format_block_size(mt->format, &bw, &bh);
3790       surf->image_alignment_el =
3791          isl_extent3d(mt->halign / bw, mt->valign / bh, 1);
3792    }
3793
3794    surf->logical_level0_px.width = mt->logical_width0;
3795    surf->logical_level0_px.height = mt->logical_height0;
3796    if (surf->dim == ISL_SURF_DIM_3D) {
3797       surf->logical_level0_px.depth = mt->logical_depth0;
3798       surf->logical_level0_px.array_len = 1;
3799    } else {
3800       surf->logical_level0_px.depth = 1;
3801       surf->logical_level0_px.array_len = mt->logical_depth0;
3802    }
3803
3804    surf->phys_level0_sa.width = mt->physical_width0;
3805    surf->phys_level0_sa.height = mt->physical_height0;
3806    if (surf->dim == ISL_SURF_DIM_3D) {
3807       surf->phys_level0_sa.depth = mt->physical_depth0;
3808       surf->phys_level0_sa.array_len = 1;
3809    } else {
3810       surf->phys_level0_sa.depth = 1;
3811       surf->phys_level0_sa.array_len = mt->physical_depth0;
3812    }
3813
3814    surf->levels = mt->last_level - mt->first_level + 1;
3815    surf->samples = MAX2(mt->num_samples, 1);
3816
3817    surf->size = 0; /* TODO */
3818    surf->alignment = 0; /* TODO */
3819
3820    switch (surf->dim_layout) {
3821    case ISL_DIM_LAYOUT_GEN4_2D:
3822    case ISL_DIM_LAYOUT_GEN4_3D:
3823    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
3824       if (brw->gen >= 9) {
3825          surf->array_pitch_el_rows = mt->qpitch;
3826       } else {
3827          unsigned bw, bh;
3828          _mesa_get_format_block_size(mt->format, &bw, &bh);
3829          assert(mt->qpitch % bh == 0);
3830          surf->array_pitch_el_rows = mt->qpitch / bh;
3831       }
3832       break;
3833    case ISL_DIM_LAYOUT_GEN9_1D:
3834       surf->array_pitch_el_rows = 1;
3835       break;
3836    }
3837
3838    switch (mt->array_layout) {
3839    case ALL_LOD_IN_EACH_SLICE:
3840       surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_FULL;
3841       break;
3842    case ALL_SLICES_AT_EACH_LOD:
3843    case GEN6_HIZ_STENCIL:
3844       surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_COMPACT;
3845       break;
3846    default:
3847       unreachable("Invalid array layout");
3848    }
3849
3850    GLenum base_format = _mesa_get_format_base_format(mt->format);
3851    switch (base_format) {
3852    case GL_DEPTH_COMPONENT:
3853       surf->usage = ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
3854       break;
3855    case GL_STENCIL_INDEX:
3856       surf->usage = ISL_SURF_USAGE_STENCIL_BIT;
3857       if (brw->gen >= 8)
3858          surf->usage |= ISL_SURF_USAGE_TEXTURE_BIT;
3859       break;
3860    case GL_DEPTH_STENCIL:
3861       /* In this case we only texture from the depth part */
3862       surf->usage = ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
3863                     ISL_SURF_USAGE_TEXTURE_BIT;
3864       break;
3865    default:
3866       surf->usage = ISL_SURF_USAGE_TEXTURE_BIT;
3867       if (brw->mesa_format_supports_render[mt->format])
3868          surf->usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
3869       break;
3870    }
3871
3872    if (_mesa_is_cube_map_texture(mt->target))
3873       surf->usage |= ISL_SURF_USAGE_CUBE_BIT;
3874 }
3875
3876 enum isl_aux_usage
3877 intel_miptree_get_aux_isl_usage(const struct brw_context *brw,
3878                                 const struct intel_mipmap_tree *mt)
3879 {
3880    if (mt->hiz_buf)
3881       return ISL_AUX_USAGE_HIZ;
3882
3883    if (!mt->mcs_buf)
3884       return ISL_AUX_USAGE_NONE;
3885
3886    return mt->aux_usage;
3887 }