src/mesa/drivers/dri/i965/intel_mipmap_tree.c

   1 /*
   2  * Copyright 2006 VMware, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sublicense, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial portions
  15  * of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 #include <GL/gl.h>
  27 #include <GL/internal/dri_interface.h>
  28
  29 #include "intel_batchbuffer.h"
  30 #include "intel_mipmap_tree.h"
  31 #include "intel_tex.h"
  32 #include "intel_blit.h"
  33 #include "intel_fbo.h"
  34
  35 #include "brw_blorp.h"
  36 #include "brw_context.h"
  37 #include "brw_state.h"
  38
  39 #include "main/enums.h"
  40 #include "main/fbobject.h"
  41 #include "main/formats.h"
  42 #include "main/glformats.h"
  43 #include "main/texcompress_etc.h"
  44 #include "main/teximage.h"
  45 #include "main/streaming-load-memcpy.h"
  46 #include "x86/common_x86_asm.h"
  47
  48 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  49
  50 static void *intel_miptree_map_raw(struct brw_context *brw,
  51                                    struct intel_mipmap_tree *mt,
  52                                    GLbitfield mode);
  53
  54 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
  55
  56 static bool
  57 intel_miptree_alloc_mcs(struct brw_context *brw,
  58                         struct intel_mipmap_tree *mt,
  59                         GLuint num_samples);
  60
  61 /**
  62  * Determine which MSAA layout should be used by the MSAA surface being
  63  * created, based on the chip generation and the surface type.
  64  */
  65 static enum intel_msaa_layout
  66 compute_msaa_layout(struct brw_context *brw, mesa_format format,
  67                     enum intel_aux_disable aux_disable)
  68 {
  69    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  70    if (brw->gen < 7)
  71       return INTEL_MSAA_LAYOUT_IMS;
  72
  73    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  74    switch (_mesa_get_format_base_format(format)) {
  75    case GL_DEPTH_COMPONENT:
  76    case GL_STENCIL_INDEX:
  77    case GL_DEPTH_STENCIL:
  78       return INTEL_MSAA_LAYOUT_IMS;
  79    default:
  80       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  81        *
  82        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  83        *   are not written
  84        *
  85        * In practice this means that we have to disable MCS for all signed
  86        * integer MSAA buffers.  The alternative, to disable MCS only when one
  87        * of the render target channels is disabled, is impractical because it
  88        * would require converting between CMS and UMS MSAA layouts on the fly,
  89        * which is expensive.
  90        */
  91       if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
  92          return INTEL_MSAA_LAYOUT_UMS;
  93       } else if (aux_disable & INTEL_AUX_DISABLE_MCS) {
  94          /* We can't use the CMS layout because it uses an aux buffer, the MCS
  95           * buffer. So fallback to UMS, which is identical to CMS without the
  96           * MCS. */
  97          return INTEL_MSAA_LAYOUT_UMS;
  98       } else {
  99          return INTEL_MSAA_LAYOUT_CMS;
 100       }
 101    }
 102 }
 103
 104 bool
 105 intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
 106                                    unsigned tiling)
 107 {
 108    /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 109     * Target(s)", beneath the "Fast Color Clear" bullet (p326):
 110     *
 111     *     - Support is limited to tiled render targets.
 112     *
 113     * Gen9 changes the restriction to Y-tile only.
 114     */
 115    if (brw->gen >= 9)
 116       return tiling == I915_TILING_Y;
 117    else if (brw->gen >= 7)
 118       return tiling != I915_TILING_NONE;
 119    else
 120       return false;
 121 }
 122
 123 /**
 124  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
 125  * can be used. This doesn't (and should not) inspect any of the properties of
 126  * the miptree's BO.
 127  *
 128  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
 129  * beneath the "Fast Color Clear" bullet (p326):
 130  *
 131  *     - Support is for non-mip-mapped and non-array surface types only.
 132  *
 133  * And then later, on p327:
 134  *
 135  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 136  *       64bpp, and 128bpp.
 137  *
 138  * From the Skylake documentation, it is made clear that X-tiling is no longer
 139  * supported:
 140  *
 141  *     - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
 142  *     non-MSRTs only.
 143  */
 144 bool
 145 intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
 146                                            const struct intel_mipmap_tree *mt)
 147 {
 148    /* MCS support does not exist prior to Gen7 */
 149    if (brw->gen < 7)
 150       return false;
 151
 152    if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
 153       return false;
 154
 155    /* This function applies only to non-multisampled render targets. */
 156    if (mt->num_samples > 1)
 157       return false;
 158
 159    /* MCS is only supported for color buffers */
 160    switch (_mesa_get_format_base_format(mt->format)) {
 161    case GL_DEPTH_COMPONENT:
 162    case GL_DEPTH_STENCIL:
 163    case GL_STENCIL_INDEX:
 164       return false;
 165    }
 166
 167    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
 168       return false;
 169
 170    const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0;
 171    const bool arrayed = mt->physical_depth0 != 1;
 172
 173    if (arrayed) {
 174        /* Multisample surfaces with the CMS layout are not layered surfaces,
 175         * yet still have physical_depth0 > 1. Assert that we don't
 176         * accidentally reject a multisampled surface here. We should have
 177         * rejected it earlier by explicitly checking the sample count.
 178         */
 179       assert(mt->num_samples <= 1);
 180    }
 181
 182    /* Handle the hardware restrictions...
 183     *
 184     * All GENs have the following restriction: "MCS buffer for non-MSRT is
 185     * supported only for RT formats 32bpp, 64bpp, and 128bpp."
 186     *
 187     * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of
 188     * Non-MultiSampler Render Target Restrictions) Support is for
 189     * non-mip-mapped and non-array surface types only.
 190     *
 191     * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of
 192     * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
 193     * surfaces are supported with MCS buffer layout with these alignments in
 194     * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128.
 195     *
 196     * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of
 197     * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed
 198     * surfaces are supported with MCS buffer layout with these alignments in
 199     * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64.
 200     */
 201    if (brw->gen < 8 && (mip_mapped || arrayed))
 202       return false;
 203
 204    /* There's no point in using an MCS buffer if the surface isn't in a
 205     * renderable format.
 206     */
 207    if (!brw->mesa_format_supports_render[mt->format])
 208       return false;
 209
 210    if (brw->gen >= 9) {
 211       mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
 212       const enum isl_format isl_format =
 213          brw_isl_format_for_mesa_format(linear_format);
 214       return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
 215    } else
 216       return true;
 217 }
 218
 219 /* On Gen9 support for color buffer compression was extended to single
 220  * sampled surfaces. This is a helper considering both auxiliary buffer
 221  * type and number of samples telling if the given miptree represents
 222  * the new single sampled case - also called lossless compression.
 223  */
 224 bool
 225 intel_miptree_is_lossless_compressed(const struct brw_context *brw,
 226                                      const struct intel_mipmap_tree *mt)
 227 {
 228    /* Only available from Gen9 onwards. */
 229    if (brw->gen < 9)
 230       return false;
 231
 232    /* Compression always requires auxiliary buffer. */
 233    if (!mt->mcs_buf)
 234       return false;
 235
 236    /* Single sample compression is represented re-using msaa compression
 237     * layout type: "Compressed Multisampled Surfaces".
 238     */
 239    if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS)
 240       return false;
 241
 242    /* And finally distinguish between msaa and single sample case. */
 243    return mt->num_samples <= 1;
 244 }
 245
 246 bool
 247 intel_miptree_supports_lossless_compressed(struct brw_context *brw,
 248                                            const struct intel_mipmap_tree *mt)
 249 {
 250    /* For now compression is only enabled for integer formats even though
 251     * there exist supported floating point formats also. This is a heuristic
 252     * decision based on current public benchmarks. In none of the cases these
 253     * formats provided any improvement but a few cases were seen to regress.
 254     * Hence these are left to to be enabled in the future when they are known
 255     * to improve things.
 256     */
 257    if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
 258       return false;
 259
 260    /* Fast clear mechanism and lossless compression go hand in hand. */
 261    if (!intel_miptree_supports_non_msrt_fast_clear(brw, mt))
 262       return false;
 263
 264    /* Fast clear can be also used to clear srgb surfaces by using equivalent
 265     * linear format. This trick, however, can't be extended to be used with
 266     * lossless compression and therefore a check is needed to see if the format
 267     * really is linear.
 268     */
 269    return _mesa_get_srgb_format_linear(mt->format) == mt->format;
 270 }
 271
 272 /**
 273  * Determine depth format corresponding to a depth+stencil format,
 274  * for separate stencil.
 275  */
 276 mesa_format
 277 intel_depth_format_for_depthstencil_format(mesa_format format) {
 278    switch (format) {
 279    case MESA_FORMAT_Z24_UNORM_S8_UINT:
 280       return MESA_FORMAT_Z24_UNORM_X8_UINT;
 281    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
 282       return MESA_FORMAT_Z_FLOAT32;
 283    default:
 284       return format;
 285    }
 286 }
 287
 288 static bool
 289 create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
 290                      unsigned depth0, struct intel_mipmap_level *table)
 291 {
 292    for (unsigned level = first_level; level <= last_level; level++) {
 293       const unsigned d =
 294          target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
 295
 296       table[level].slice = calloc(d, sizeof(*table[0].slice));
 297       if (!table[level].slice)
 298          goto unwind;
 299    }
 300
 301    return true;
 302
 303 unwind:
 304    for (unsigned level = first_level; level <= last_level; level++)
 305       free(table[level].slice);
 306
 307    return false;
 308 }
 309
 310 /**
 311  * @param for_bo Indicates that the caller is
 312  *        intel_miptree_create_for_bo(). If true, then do not create
 313  *        \c stencil_mt.
 314  */
 315 static struct intel_mipmap_tree *
 316 intel_miptree_create_layout(struct brw_context *brw,
 317                             GLenum target,
 318                             mesa_format format,
 319                             GLuint first_level,
 320                             GLuint last_level,
 321                             GLuint width0,
 322                             GLuint height0,
 323                             GLuint depth0,
 324                             GLuint num_samples,
 325                             uint32_t layout_flags)
 326 {
 327    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 328    if (!mt)
 329       return NULL;
 330
 331    DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__,
 332        _mesa_enum_to_string(target),
 333        _mesa_get_format_name(format),
 334        first_level, last_level, depth0, mt);
 335
 336    if (target == GL_TEXTURE_1D_ARRAY)
 337       assert(height0 == 1);
 338
 339    mt->target = target;
 340    mt->format = format;
 341    mt->first_level = first_level;
 342    mt->last_level = last_level;
 343    mt->logical_width0 = width0;
 344    mt->logical_height0 = height0;
 345    mt->logical_depth0 = depth0;
 346    mt->aux_disable = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0 ?
 347       INTEL_AUX_DISABLE_ALL : INTEL_AUX_DISABLE_NONE;
 348    mt->aux_disable |= INTEL_AUX_DISABLE_CCS;
 349    mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0;
 350    mt->aux_state = NULL;
 351    mt->cpp = _mesa_get_format_bytes(format);
 352    mt->num_samples = num_samples;
 353    mt->compressed = _mesa_is_format_compressed(format);
 354    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
 355    mt->refcount = 1;
 356
 357    if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8)
 358       layout_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL;
 359
 360    int depth_multiply = 1;
 361    if (num_samples > 1) {
 362       /* Adjust width/height/depth for MSAA */
 363       mt->msaa_layout = compute_msaa_layout(brw, format, mt->aux_disable);
 364       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
 365          /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
 366           * "If the surface is multisampled and it is a depth or stencil
 367           *  surface or Multisampled Surface StorageFormat in SURFACE_STATE is
 368           *  MSFMT_DEPTH_STENCIL, WL and HL must be adjusted as follows before
 369           *  proceeding:
 370           *
 371           *  +----------------------------------------------------------------+
 372           *  | Num Multisamples |        W_l =         |        H_l =         |
 373           *  +----------------------------------------------------------------+
 374           *  |         2        | ceiling(W_l / 2) * 4 | H_l (no adjustment)  |
 375           *  |         4        | ceiling(W_l / 2) * 4 | ceiling(H_l / 2) * 4 |
 376           *  |         8        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 4 |
 377           *  |        16        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 8 |
 378           *  +----------------------------------------------------------------+
 379           * "
 380           *
 381           * Note that MSFMT_DEPTH_STENCIL just means the IMS (interleaved)
 382           * format rather than UMS/CMS (array slices).  The Sandybridge PRM,
 383           * Volume 1, Part 1, Page 111 has the same formula for 4x MSAA.
 384           *
 385           * Another more complicated explanation for these adjustments comes
 386           * from the Sandybridge PRM, volume 4, part 1, page 31:
 387           *
 388           *     "Any of the other messages (sample*, LOD, load4) used with a
 389           *      (4x) multisampled surface will in-effect sample a surface with
 390           *      double the height and width as that indicated in the surface
 391           *      state. Each pixel position on the original-sized surface is
 392           *      replaced with a 2x2 of samples with the following arrangement:
 393           *
 394           *         sample 0 sample 2
 395           *         sample 1 sample 3"
 396           *
 397           * Thus, when sampling from a multisampled texture, it behaves as
 398           * though the layout in memory for (x,y,sample) is:
 399           *
 400           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
 401           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
 402           *
 403           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
 404           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
 405           *
 406           * However, the actual layout of multisampled data in memory is:
 407           *
 408           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
 409           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
 410           *
 411           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
 412           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
 413           *
 414           * This pattern repeats for each 2x2 pixel block.
 415           *
 416           * As a result, when calculating the size of our 4-sample buffer for
 417           * an odd width or height, we have to align before scaling up because
 418           * sample 3 is in that bottom right 2x2 block.
 419           */
 420          switch (num_samples) {
 421          case 2:
 422             assert(brw->gen >= 8);
 423             width0 = ALIGN(width0, 2) * 2;
 424             height0 = ALIGN(height0, 2);
 425             break;
 426          case 4:
 427             width0 = ALIGN(width0, 2) * 2;
 428             height0 = ALIGN(height0, 2) * 2;
 429             break;
 430          case 8:
 431             width0 = ALIGN(width0, 2) * 4;
 432             height0 = ALIGN(height0, 2) * 2;
 433             break;
 434          case 16:
 435             width0 = ALIGN(width0, 2) * 4;
 436             height0 = ALIGN(height0, 2) * 4;
 437             break;
 438          default:
 439             /* num_samples should already have been quantized to 0, 1, 2, 4, 8
 440              * or 16.
 441              */
 442             unreachable("not reached");
 443          }
 444       } else {
 445          /* Non-interleaved */
 446          depth_multiply = num_samples;
 447          depth0 *= depth_multiply;
 448       }
 449    }
 450
 451    if (!create_mapping_table(target, first_level, last_level, depth0,
 452                              mt->level)) {
 453       free(mt);
 454       return NULL;
 455    }
 456
 457    /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 can
 458     * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on
 459     * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is still
 460     * used on Gen8 to make it pick a qpitch value which doesn't include space
 461     * for the mipmaps. On Gen9 this is not necessary because it will
 462     * automatically pick a packed qpitch value whenever mt->first_level ==
 463     * mt->last_level.
 464     * TODO: can we use it elsewhere?
 465     * TODO: also disable this on Gen8 and pick the qpitch value like Gen9
 466     */
 467    if (brw->gen >= 9) {
 468       mt->array_layout = ALL_LOD_IN_EACH_SLICE;
 469    } else {
 470       switch (mt->msaa_layout) {
 471       case INTEL_MSAA_LAYOUT_NONE:
 472       case INTEL_MSAA_LAYOUT_IMS:
 473          mt->array_layout = ALL_LOD_IN_EACH_SLICE;
 474          break;
 475       case INTEL_MSAA_LAYOUT_UMS:
 476       case INTEL_MSAA_LAYOUT_CMS:
 477          mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 478          break;
 479       }
 480    }
 481
 482    if (target == GL_TEXTURE_CUBE_MAP)
 483       assert(depth0 == 6 * depth_multiply);
 484
 485    mt->physical_width0 = width0;
 486    mt->physical_height0 = height0;
 487    mt->physical_depth0 = depth0;
 488
 489    if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
 490        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
 491        (brw->must_use_separate_stencil ||
 492         (brw->has_separate_stencil &&
 493          intel_miptree_wants_hiz_buffer(brw, mt)))) {
 494       uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
 495       if (brw->gen == 6) {
 496          stencil_flags |= MIPTREE_LAYOUT_TILING_ANY;
 497       }
 498
 499       mt->stencil_mt = intel_miptree_create(brw,
 500                                             mt->target,
 501                                             MESA_FORMAT_S_UINT8,
 502                                             mt->first_level,
 503                                             mt->last_level,
 504                                             mt->logical_width0,
 505                                             mt->logical_height0,
 506                                             mt->logical_depth0,
 507                                             num_samples,
 508                                             stencil_flags);
 509
 510       if (!mt->stencil_mt) {
 511          intel_miptree_release(&mt);
 512          return NULL;
 513       }
 514       mt->stencil_mt->r8stencil_needs_update = true;
 515
 516       /* Fix up the Z miptree format for how we're splitting out separate
 517        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
 518        */
 519       mt->format = intel_depth_format_for_depthstencil_format(mt->format);
 520       mt->cpp = 4;
 521
 522       if (format == mt->format) {
 523          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
 524                        _mesa_get_format_name(mt->format));
 525       }
 526    }
 527
 528    if (layout_flags & MIPTREE_LAYOUT_GEN6_HIZ_STENCIL)
 529       mt->array_layout = GEN6_HIZ_STENCIL;
 530
 531    /*
 532     * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are
 533     * multisampled or have an AUX buffer attached to it.
 534     *
 535     * GEN  |    MSRT        | AUX_CCS_* or AUX_MCS
 536     *  -------------------------------------------
 537     *  9   |  HALIGN_16     |    HALIGN_16
 538     *  8   |  HALIGN_ANY    |    HALIGN_16
 539     *  7   |      ?         |        ?
 540     *  6   |      ?         |        ?
 541     */
 542    if (intel_miptree_supports_non_msrt_fast_clear(brw, mt)) {
 543       if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1))
 544          layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 545    } else if (brw->gen >= 9 && num_samples > 1) {
 546       layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 547    } else {
 548       const UNUSED bool is_lossless_compressed_aux =
 549          brw->gen >= 9 && num_samples == 1 &&
 550          mt->format == MESA_FORMAT_R_UINT32;
 551
 552       /* For now, nothing else has this requirement */
 553       assert(is_lossless_compressed_aux ||
 554              (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
 555    }
 556
 557    if (!brw_miptree_layout(brw, mt, layout_flags)) {
 558       intel_miptree_release(&mt);
 559       return NULL;
 560    }
 561
 562    if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
 563       assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
 564
 565    return mt;
 566 }
 567
 568
 569 /**
 570  * Choose an appropriate uncompressed format for a requested
 571  * compressed format, if unsupported.
 572  */
 573 mesa_format
 574 intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
 575 {
 576    /* No need to lower ETC formats on these platforms,
 577     * they are supported natively.
 578     */
 579    if (brw->gen >= 8 || brw->is_baytrail)
 580       return format;
 581
 582    switch (format) {
 583    case MESA_FORMAT_ETC1_RGB8:
 584       return MESA_FORMAT_R8G8B8X8_UNORM;
 585    case MESA_FORMAT_ETC2_RGB8:
 586       return MESA_FORMAT_R8G8B8X8_UNORM;
 587    case MESA_FORMAT_ETC2_SRGB8:
 588    case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
 589    case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
 590       return MESA_FORMAT_B8G8R8A8_SRGB;
 591    case MESA_FORMAT_ETC2_RGBA8_EAC:
 592    case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
 593       return MESA_FORMAT_R8G8B8A8_UNORM;
 594    case MESA_FORMAT_ETC2_R11_EAC:
 595       return MESA_FORMAT_R_UNORM16;
 596    case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
 597       return MESA_FORMAT_R_SNORM16;
 598    case MESA_FORMAT_ETC2_RG11_EAC:
 599       return MESA_FORMAT_R16G16_UNORM;
 600    case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
 601       return MESA_FORMAT_R16G16_SNORM;
 602    default:
 603       /* Non ETC1 / ETC2 format */
 604       return format;
 605    }
 606 }
 607
 608 /** \brief Assert that the level and layer are valid for the miptree. */
 609 void
 610 intel_miptree_check_level_layer(const struct intel_mipmap_tree *mt,
 611                                 uint32_t level,
 612                                 uint32_t layer)
 613 {
 614    (void) mt;
 615    (void) level;
 616    (void) layer;
 617
 618    assert(level >= mt->first_level);
 619    assert(level <= mt->last_level);
 620
 621    if (mt->surf.size > 0)
 622       assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ?
 623                          minify(mt->surf.phys_level0_sa.depth, level) :
 624                          mt->surf.phys_level0_sa.array_len));
 625    else
 626       assert(layer < mt->level[level].depth);
 627 }
 628
 629 static enum isl_aux_state **
 630 create_aux_state_map(struct intel_mipmap_tree *mt,
 631                      enum isl_aux_state initial)
 632 {
 633    const uint32_t levels = mt->last_level + 1;
 634
 635    uint32_t total_slices = 0;
 636    for (uint32_t level = 0; level < levels; level++)
 637       total_slices += mt->level[level].depth;
 638
 639    const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
 640
 641    /* We're going to allocate a single chunk of data for both the per-level
 642     * reference array and the arrays of aux_state.  This makes cleanup
 643     * significantly easier.
 644     */
 645    const size_t total_size = per_level_array_size +
 646                              total_slices * sizeof(enum isl_aux_state);
 647    void *data = malloc(total_size);
 648    if (data == NULL)
 649       return NULL;
 650
 651    enum isl_aux_state **per_level_arr = data;
 652    enum isl_aux_state *s = data + per_level_array_size;
 653    for (uint32_t level = 0; level < levels; level++) {
 654       per_level_arr[level] = s;
 655       for (uint32_t a = 0; a < mt->level[level].depth; a++)
 656          *(s++) = initial;
 657    }
 658    assert((void *)s == data + total_size);
 659
 660    return per_level_arr;
 661 }
 662
 663 static void
 664 free_aux_state_map(enum isl_aux_state **state)
 665 {
 666    free(state);
 667 }
 668
 669 static struct intel_mipmap_tree *
 670 make_surface(struct brw_context *brw, GLenum target, mesa_format format,
 671              unsigned first_level, unsigned last_level,
 672              unsigned width0, unsigned height0, unsigned depth0,
 673              unsigned num_samples, enum isl_tiling isl_tiling,
 674              isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
 675              struct brw_bo *bo)
 676 {
 677    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 678    if (!mt)
 679       return NULL;
 680
 681    if (!create_mapping_table(target, first_level, last_level, depth0,
 682                              mt->level)) {
 683       free(mt);
 684       return NULL;
 685    }
 686
 687    if (target == GL_TEXTURE_CUBE_MAP ||
 688        target == GL_TEXTURE_CUBE_MAP_ARRAY)
 689       isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
 690
 691    DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
 692         __func__,
 693        _mesa_enum_to_string(target),
 694        _mesa_get_format_name(format),
 695        num_samples, width0, height0, depth0,
 696        first_level, last_level, mt);
 697
 698    struct isl_surf_init_info init_info = {
 699       .dim = get_isl_surf_dim(target),
 700       .format = translate_tex_format(brw, format, false),
 701       .width = width0,
 702       .height = height0,
 703       .depth = target == GL_TEXTURE_3D ? depth0 : 1,
 704       .levels = last_level - first_level + 1,
 705       .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
 706       .samples = MAX2(num_samples, 1),
 707       .usage = isl_usage_flags,
 708       .tiling_flags = 1u << isl_tiling
 709    };
 710
 711    if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
 712       goto fail;
 713
 714    assert(mt->surf.size % mt->surf.row_pitch == 0);
 715
 716    if (!bo) {
 717       mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
 718                                   mt->surf.size,
 719                                   isl_tiling_to_bufmgr_tiling(isl_tiling),
 720                                   mt->surf.row_pitch, alloc_flags);
 721       if (!mt->bo)
 722          goto fail;
 723    } else {
 724       mt->bo = bo;
 725    }
 726
 727    mt->first_level = first_level;
 728    mt->last_level = last_level;
 729    mt->target = target;
 730    mt->format = format;
 731    mt->refcount = 1;
 732    mt->aux_state = NULL;
 733
 734    return mt;
 735
 736 fail:
 737    intel_miptree_release(&mt);
 738    return NULL;
 739 }
 740
 741 static struct intel_mipmap_tree *
 742 miptree_create(struct brw_context *brw,
 743                GLenum target,
 744                mesa_format format,
 745                GLuint first_level,
 746                GLuint last_level,
 747                GLuint width0,
 748                GLuint height0,
 749                GLuint depth0,
 750                GLuint num_samples,
 751                uint32_t layout_flags)
 752 {
 753    if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8)
 754       return make_surface(brw, target, format, first_level, last_level,
 755                           width0, height0, depth0, num_samples, ISL_TILING_W,
 756                           ISL_SURF_USAGE_STENCIL_BIT |
 757                           ISL_SURF_USAGE_TEXTURE_BIT,
 758                           BO_ALLOC_FOR_RENDER, NULL);
 759
 760    struct intel_mipmap_tree *mt;
 761    mesa_format tex_format = format;
 762    mesa_format etc_format = MESA_FORMAT_NONE;
 763    uint32_t alloc_flags = 0;
 764
 765    format = intel_lower_compressed_format(brw, format);
 766
 767    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 768
 769    assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
 770    mt = intel_miptree_create_layout(brw, target, format,
 771                                     first_level, last_level, width0,
 772                                     height0, depth0, num_samples,
 773                                     layout_flags);
 774    if (!mt)
 775       return NULL;
 776
 777    if (mt->tiling == (I915_TILING_Y | I915_TILING_X))
 778       mt->tiling = I915_TILING_Y;
 779
 780    if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
 781       alloc_flags |= BO_ALLOC_FOR_RENDER;
 782
 783    mt->etc_format = etc_format;
 784
 785    if (format == MESA_FORMAT_S_UINT8) {
 786       /* Align to size of W tile, 64x64. */
 787       mt->bo = brw_bo_alloc_tiled_2d(brw->bufmgr, "miptree",
 788                                      ALIGN(mt->total_width, 64),
 789                                      ALIGN(mt->total_height, 64),
 790                                      mt->cpp, mt->tiling, &mt->pitch,
 791                                      alloc_flags);
 792    } else {
 793       mt->bo = brw_bo_alloc_tiled_2d(brw->bufmgr, "miptree",
 794                                      mt->total_width, mt->total_height,
 795                                      mt->cpp, mt->tiling, &mt->pitch,
 796                                      alloc_flags);
 797    }
 798
 799    if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
 800       mt->bo->cache_coherent = false;
 801
 802    return mt;
 803 }
 804
 805 struct intel_mipmap_tree *
 806 intel_miptree_create(struct brw_context *brw,
 807                      GLenum target,
 808                      mesa_format format,
 809                      GLuint first_level,
 810                      GLuint last_level,
 811                      GLuint width0,
 812                      GLuint height0,
 813                      GLuint depth0,
 814                      GLuint num_samples,
 815                      uint32_t layout_flags)
 816 {
 817    struct intel_mipmap_tree *mt = miptree_create(
 818                                      brw, target, format,
 819                                      first_level, last_level,
 820                                      width0, height0, depth0, num_samples,
 821                                      layout_flags);
 822
 823    /* If the BO is too large to fit in the aperture, we need to use the
 824     * BLT engine to support it.  Prior to Sandybridge, the BLT paths can't
 825     * handle Y-tiling, so we need to fall back to X.
 826     */
 827    if (brw->gen < 6 && mt->bo->size >= brw->max_gtt_map_object_size &&
 828        mt->tiling == I915_TILING_Y) {
 829       const uint32_t alloc_flags =
 830          (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) ?
 831          BO_ALLOC_FOR_RENDER : 0;
 832       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
 833                  mt->total_width, mt->total_height);
 834
 835       mt->tiling = I915_TILING_X;
 836       brw_bo_unreference(mt->bo);
 837       mt->bo = brw_bo_alloc_tiled_2d(brw->bufmgr, "miptree",
 838                                      mt->total_width, mt->total_height, mt->cpp,
 839                                      mt->tiling, &mt->pitch, alloc_flags);
 840    }
 841
 842    mt->offset = 0;
 843
 844    if (!mt->bo) {
 845        intel_miptree_release(&mt);
 846        return NULL;
 847    }
 848
 849
 850    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 851       assert(mt->num_samples > 1);
 852       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
 853          intel_miptree_release(&mt);
 854          return NULL;
 855       }
 856    }
 857
 858    /* If this miptree is capable of supporting fast color clears, set
 859     * fast_clear_state appropriately to ensure that fast clears will occur.
 860     * Allocation of the MCS miptree will be deferred until the first fast
 861     * clear actually occurs or when compressed single sampled buffer is
 862     * written by the GPU for the first time.
 863     */
 864    if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
 865        intel_miptree_supports_non_msrt_fast_clear(brw, mt)) {
 866       mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS;
 867       assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
 868
 869       /* On Gen9+ clients are not currently capable of consuming compressed
 870        * single-sampled buffers. Disabling compression allows us to skip
 871        * resolves.
 872        */
 873       const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC;
 874       const bool is_lossless_compressed =
 875          unlikely(!lossless_compression_disabled) &&
 876          brw->gen >= 9 && !mt->is_scanout &&
 877          intel_miptree_supports_lossless_compressed(brw, mt);
 878
 879       if (is_lossless_compressed) {
 880          intel_miptree_alloc_non_msrt_mcs(brw, mt, is_lossless_compressed);
 881       }
 882    }
 883
 884    return mt;
 885 }
 886
 887 struct intel_mipmap_tree *
 888 intel_miptree_create_for_bo(struct brw_context *brw,
 889                             struct brw_bo *bo,
 890                             mesa_format format,
 891                             uint32_t offset,
 892                             uint32_t width,
 893                             uint32_t height,
 894                             uint32_t depth,
 895                             int pitch,
 896                             uint32_t layout_flags)
 897 {
 898    struct intel_mipmap_tree *mt;
 899    uint32_t tiling, swizzle;
 900    const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
 901
 902    if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8) {
 903       mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
 904                         0, 0, width, height, depth, 1, ISL_TILING_W,
 905                         ISL_SURF_USAGE_STENCIL_BIT |
 906                         ISL_SURF_USAGE_TEXTURE_BIT,
 907                         BO_ALLOC_FOR_RENDER, bo);
 908       if (!mt)
 909          return NULL;
 910
 911       assert(bo->size >= mt->surf.size);
 912
 913       brw_bo_reference(bo);
 914       return mt;
 915    }
 916
 917    brw_bo_get_tiling(bo, &tiling, &swizzle);
 918
 919    /* Nothing will be able to use this miptree with the BO if the offset isn't
 920     * aligned.
 921     */
 922    if (tiling != I915_TILING_NONE)
 923       assert(offset % 4096 == 0);
 924
 925    /* miptrees can't handle negative pitch.  If you need flipping of images,
 926     * that's outside of the scope of the mt.
 927     */
 928    assert(pitch >= 0);
 929
 930    /* The BO already has a tiling format and we shouldn't confuse the lower
 931     * layers by making it try to find a tiling format again.
 932     */
 933    assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
 934    assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
 935
 936    layout_flags |= MIPTREE_LAYOUT_FOR_BO;
 937    mt = intel_miptree_create_layout(brw, target, format,
 938                                     0, 0,
 939                                     width, height, depth, 0,
 940                                     layout_flags);
 941    if (!mt)
 942       return NULL;
 943
 944    brw_bo_reference(bo);
 945    mt->bo = bo;
 946    mt->pitch = pitch;
 947    mt->offset = offset;
 948    mt->tiling = tiling;
 949
 950    return mt;
 951 }
 952
 953 /**
 954  * For a singlesample renderbuffer, this simply wraps the given BO with a
 955  * miptree.
 956  *
 957  * For a multisample renderbuffer, this wraps the window system's
 958  * (singlesample) BO with a singlesample miptree attached to the
 959  * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
 960  * that will contain the actual rendering (which is lazily resolved to
 961  * irb->singlesample_mt).
 962  */
 963 void
 964 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
 965                                          struct intel_renderbuffer *irb,
 966                                          struct brw_bo *bo,
 967                                          uint32_t width, uint32_t height,
 968                                          uint32_t pitch)
 969 {
 970    struct intel_mipmap_tree *singlesample_mt = NULL;
 971    struct intel_mipmap_tree *multisample_mt = NULL;
 972    struct gl_renderbuffer *rb = &irb->Base.Base;
 973    mesa_format format = rb->Format;
 974    int num_samples = rb->NumSamples;
 975
 976    /* Only the front and back buffers, which are color buffers, are allocated
 977     * through the image loader.
 978     */
 979    assert(_mesa_get_format_base_format(format) == GL_RGB ||
 980           _mesa_get_format_base_format(format) == GL_RGBA);
 981
 982    singlesample_mt = intel_miptree_create_for_bo(intel,
 983                                                  bo,
 984                                                  format,
 985                                                  0,
 986                                                  width,
 987                                                  height,
 988                                                  1,
 989                                                  pitch,
 990                                                  MIPTREE_LAYOUT_FOR_SCANOUT);
 991    if (!singlesample_mt)
 992       goto fail;
 993
 994    /* If this miptree is capable of supporting fast color clears, set
 995     * mcs_state appropriately to ensure that fast clears will occur.
 996     * Allocation of the MCS miptree will be deferred until the first fast
 997     * clear actually occurs.
 998     */
 999    if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) &&
1000        intel_miptree_supports_non_msrt_fast_clear(intel, singlesample_mt)) {
1001       singlesample_mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS;
1002    }
1003
1004    if (num_samples == 0) {
1005       intel_miptree_release(&irb->mt);
1006       irb->mt = singlesample_mt;
1007
1008       assert(!irb->singlesample_mt);
1009    } else {
1010       intel_miptree_release(&irb->singlesample_mt);
1011       irb->singlesample_mt = singlesample_mt;
1012
1013       if (!irb->mt ||
1014           irb->mt->logical_width0 != width ||
1015           irb->mt->logical_height0 != height) {
1016          multisample_mt = intel_miptree_create_for_renderbuffer(intel,
1017                                                                 format,
1018                                                                 width,
1019                                                                 height,
1020                                                                 num_samples);
1021          if (!multisample_mt)
1022             goto fail;
1023
1024          irb->need_downsample = false;
1025          intel_miptree_release(&irb->mt);
1026          irb->mt = multisample_mt;
1027       }
1028    }
1029    return;
1030
1031 fail:
1032    intel_miptree_release(&irb->singlesample_mt);
1033    intel_miptree_release(&irb->mt);
1034    return;
1035 }
1036
1037 struct intel_mipmap_tree*
1038 intel_miptree_create_for_renderbuffer(struct brw_context *brw,
1039                                       mesa_format format,
1040                                       uint32_t width,
1041                                       uint32_t height,
1042                                       uint32_t num_samples)
1043 {
1044    struct intel_mipmap_tree *mt;
1045    uint32_t depth = 1;
1046    bool ok;
1047    GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
1048    const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
1049                                  MIPTREE_LAYOUT_TILING_ANY |
1050                                  MIPTREE_LAYOUT_FOR_SCANOUT;
1051
1052    mt = intel_miptree_create(brw, target, format, 0, 0,
1053                              width, height, depth, num_samples,
1054                              layout_flags);
1055    if (!mt)
1056       goto fail;
1057
1058    if (intel_miptree_wants_hiz_buffer(brw, mt)) {
1059       ok = intel_miptree_alloc_hiz(brw, mt);
1060       if (!ok)
1061          goto fail;
1062    }
1063
1064    return mt;
1065
1066 fail:
1067    intel_miptree_release(&mt);
1068    return NULL;
1069 }
1070
1071 void
1072 intel_miptree_reference(struct intel_mipmap_tree **dst,
1073                         struct intel_mipmap_tree *src)
1074 {
1075    if (*dst == src)
1076       return;
1077
1078    intel_miptree_release(dst);
1079
1080    if (src) {
1081       src->refcount++;
1082       DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
1083    }
1084
1085    *dst = src;
1086 }
1087
1088 static void
1089 intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf)
1090 {
1091    if (aux_buf == NULL)
1092       return;
1093
1094    brw_bo_unreference(aux_buf->bo);
1095
1096    free(aux_buf);
1097 }
1098
1099 void
1100 intel_miptree_release(struct intel_mipmap_tree **mt)
1101 {
1102    if (!*mt)
1103       return;
1104
1105    DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
1106    if (--(*mt)->refcount <= 0) {
1107       GLuint i;
1108
1109       DBG("%s deleting %p\n", __func__, *mt);
1110
1111       brw_bo_unreference((*mt)->bo);
1112       intel_miptree_release(&(*mt)->stencil_mt);
1113       intel_miptree_release(&(*mt)->r8stencil_mt);
1114       intel_miptree_aux_buffer_free((*mt)->hiz_buf);
1115       intel_miptree_aux_buffer_free((*mt)->mcs_buf);
1116       free_aux_state_map((*mt)->aux_state);
1117
1118       intel_miptree_release(&(*mt)->plane[0]);
1119       intel_miptree_release(&(*mt)->plane[1]);
1120
1121       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
1122          free((*mt)->level[i].slice);
1123       }
1124
1125       free(*mt);
1126    }
1127    *mt = NULL;
1128 }
1129
1130
1131 void
1132 intel_get_image_dims(struct gl_texture_image *image,
1133                      int *width, int *height, int *depth)
1134 {
1135    switch (image->TexObject->Target) {
1136    case GL_TEXTURE_1D_ARRAY:
1137       /* For a 1D Array texture the OpenGL API will treat the image height as
1138        * the number of array slices. For Intel hardware, we treat the 1D array
1139        * as a 2D Array with a height of 1. So, here we want to swap image
1140        * height and depth.
1141        */
1142       assert(image->Depth == 1);
1143       *width = image->Width;
1144       *height = 1;
1145       *depth = image->Height;
1146       break;
1147    case GL_TEXTURE_CUBE_MAP:
1148       /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
1149        * though we really have 6 slices.
1150        */
1151       assert(image->Depth == 1);
1152       *width = image->Width;
1153       *height = image->Height;
1154       *depth = 6;
1155       break;
1156    default:
1157       *width = image->Width;
1158       *height = image->Height;
1159       *depth = image->Depth;
1160       break;
1161    }
1162 }
1163
1164 /**
1165  * Can the image be pulled into a unified mipmap tree?  This mirrors
1166  * the completeness test in a lot of ways.
1167  *
1168  * Not sure whether I want to pass gl_texture_image here.
1169  */
1170 bool
1171 intel_miptree_match_image(struct intel_mipmap_tree *mt,
1172                           struct gl_texture_image *image)
1173 {
1174    struct intel_texture_image *intelImage = intel_texture_image(image);
1175    GLuint level = intelImage->base.Base.Level;
1176    int width, height, depth;
1177
1178    /* glTexImage* choose the texture object based on the target passed in, and
1179     * objects can't change targets over their lifetimes, so this should be
1180     * true.
1181     */
1182    assert(image->TexObject->Target == mt->target);
1183
1184    mesa_format mt_format = mt->format;
1185    if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
1186       mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
1187    if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
1188       mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
1189    if (mt->etc_format != MESA_FORMAT_NONE)
1190       mt_format = mt->etc_format;
1191
1192    if (image->TexFormat != mt_format)
1193       return false;
1194
1195    intel_get_image_dims(image, &width, &height, &depth);
1196
1197    if (mt->target == GL_TEXTURE_CUBE_MAP)
1198       depth = 6;
1199
1200    if (mt->surf.size > 0) {
1201       if (level >= mt->surf.levels)
1202          return false;
1203
1204       const unsigned level_depth =
1205          mt->surf.dim == ISL_SURF_DIM_3D ?
1206             minify(mt->surf.logical_level0_px.depth, level) :
1207             mt->surf.logical_level0_px.array_len;
1208
1209       return width == minify(mt->surf.logical_level0_px.width, level) &&
1210              height == minify(mt->surf.logical_level0_px.height, level) &&
1211              depth == level_depth &&
1212              MAX2(image->NumSamples, 1) == mt->surf.samples;
1213    }
1214
1215    int level_depth = mt->level[level].depth;
1216    if (mt->num_samples > 1) {
1217       switch (mt->msaa_layout) {
1218       case INTEL_MSAA_LAYOUT_NONE:
1219       case INTEL_MSAA_LAYOUT_IMS:
1220          break;
1221       case INTEL_MSAA_LAYOUT_UMS:
1222       case INTEL_MSAA_LAYOUT_CMS:
1223          level_depth /= mt->num_samples;
1224          break;
1225       }
1226    }
1227
1228    /* Test image dimensions against the base level image adjusted for
1229     * minification.  This will also catch images not present in the
1230     * tree, changed targets, etc.
1231     */
1232    if (width != minify(mt->logical_width0, level - mt->first_level) ||
1233        height != minify(mt->logical_height0, level - mt->first_level) ||
1234        depth != level_depth) {
1235       return false;
1236    }
1237
1238    if (image->NumSamples != mt->num_samples)
1239       return false;
1240
1241    return true;
1242 }
1243
1244
1245 void
1246 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
1247                              GLuint level,
1248                              GLuint x, GLuint y, GLuint d)
1249 {
1250    mt->level[level].depth = d;
1251    mt->level[level].level_x = x;
1252    mt->level[level].level_y = y;
1253
1254    DBG("%s level %d, depth %d, offset %d,%d\n", __func__,
1255        level, d, x, y);
1256
1257    assert(mt->level[level].slice);
1258
1259    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
1260    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
1261 }
1262
1263
1264 void
1265 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
1266                                GLuint level, GLuint img,
1267                                GLuint x, GLuint y)
1268 {
1269    if (img == 0 && level == 0)
1270       assert(x == 0 && y == 0);
1271
1272    assert(img < mt->level[level].depth);
1273
1274    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
1275    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
1276
1277    DBG("%s level %d img %d pos %d,%d\n",
1278        __func__, level, img,
1279        mt->level[level].slice[img].x_offset,
1280        mt->level[level].slice[img].y_offset);
1281 }
1282
1283 void
1284 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
1285                                GLuint level, GLuint slice,
1286                                GLuint *x, GLuint *y)
1287 {
1288    if (mt->surf.size > 0) {
1289       uint32_t x_offset_sa, y_offset_sa;
1290
1291       /* Given level is relative to level zero while the miptree may be
1292        * represent just a subset of all levels starting from 'first_level'.
1293        */
1294       assert(level >= mt->first_level);
1295       level -= mt->first_level;
1296
1297       const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
1298       slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
1299       isl_surf_get_image_offset_sa(&mt->surf, level, slice, z,
1300                                    &x_offset_sa, &y_offset_sa);
1301
1302       *x = x_offset_sa;
1303       *y = y_offset_sa;
1304       return;
1305    }
1306
1307    assert(slice < mt->level[level].depth);
1308
1309    *x = mt->level[level].slice[slice].x_offset;
1310    *y = mt->level[level].slice[slice].y_offset;
1311 }
1312
1313
1314 /**
1315  * This function computes the tile_w (in bytes) and tile_h (in rows) of
1316  * different tiling patterns. If the BO is untiled, tile_w is set to cpp
1317  * and tile_h is set to 1.
1318  */
1319 void
1320 intel_get_tile_dims(uint32_t tiling, uint32_t cpp,
1321                     uint32_t *tile_w, uint32_t *tile_h)
1322 {
1323    switch (tiling) {
1324    case I915_TILING_X:
1325       *tile_w = 512;
1326       *tile_h = 8;
1327       break;
1328    case I915_TILING_Y:
1329       *tile_w = 128;
1330       *tile_h = 32;
1331       break;
1332    case I915_TILING_NONE:
1333       *tile_w = cpp;
1334       *tile_h = 1;
1335       break;
1336    default:
1337       unreachable("not reached");
1338    }
1339 }
1340
1341
1342 /**
1343  * This function computes masks that may be used to select the bits of the X
1344  * and Y coordinates that indicate the offset within a tile.  If the BO is
1345  * untiled, the masks are set to 0.
1346  */
1347 void
1348 intel_get_tile_masks(uint32_t tiling, uint32_t cpp,
1349                      uint32_t *mask_x, uint32_t *mask_y)
1350 {
1351    uint32_t tile_w_bytes, tile_h;
1352
1353    intel_get_tile_dims(tiling, cpp, &tile_w_bytes, &tile_h);
1354
1355    *mask_x = tile_w_bytes / cpp - 1;
1356    *mask_y = tile_h - 1;
1357 }
1358
1359 /**
1360  * Compute the offset (in bytes) from the start of the BO to the given x
1361  * and y coordinate.  For tiled BOs, caller must ensure that x and y are
1362  * multiples of the tile size.
1363  */
1364 uint32_t
1365 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
1366                                  uint32_t x, uint32_t y)
1367 {
1368    int cpp = mt->cpp;
1369    uint32_t pitch = mt->pitch;
1370    uint32_t tiling = mt->tiling;
1371
1372    switch (tiling) {
1373    default:
1374       unreachable("not reached");
1375    case I915_TILING_NONE:
1376       return y * pitch + x * cpp;
1377    case I915_TILING_X:
1378       assert((x % (512 / cpp)) == 0);
1379       assert((y % 8) == 0);
1380       return y * pitch + x / (512 / cpp) * 4096;
1381    case I915_TILING_Y:
1382       assert((x % (128 / cpp)) == 0);
1383       assert((y % 32) == 0);
1384       return y * pitch + x / (128 / cpp) * 4096;
1385    }
1386 }
1387
1388 /**
1389  * Rendering with tiled buffers requires that the base address of the buffer
1390  * be aligned to a page boundary.  For renderbuffers, and sometimes with
1391  * textures, we may want the surface to point at a texture image level that
1392  * isn't at a page boundary.
1393  *
1394  * This function returns an appropriately-aligned base offset
1395  * according to the tiling restrictions, plus any required x/y offset
1396  * from there.
1397  */
1398 uint32_t
1399 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
1400                                GLuint level, GLuint slice,
1401                                uint32_t *tile_x,
1402                                uint32_t *tile_y)
1403 {
1404    uint32_t x, y;
1405    uint32_t mask_x, mask_y;
1406
1407    intel_get_tile_masks(mt->tiling, mt->cpp, &mask_x, &mask_y);
1408    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
1409
1410    *tile_x = x & mask_x;
1411    *tile_y = y & mask_y;
1412
1413    return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
1414 }
1415
1416 static void
1417 intel_miptree_copy_slice_sw(struct brw_context *brw,
1418                             struct intel_mipmap_tree *src_mt,
1419                             unsigned src_level, unsigned src_layer,
1420                             struct intel_mipmap_tree *dst_mt,
1421                             unsigned dst_level, unsigned dst_layer,
1422                             unsigned width, unsigned height)
1423 {
1424    void *src, *dst;
1425    ptrdiff_t src_stride, dst_stride;
1426    const unsigned cpp = dst_mt->surf.size > 0 ?
1427       (isl_format_get_layout(dst_mt->surf.format)->bpb / 8) : dst_mt->cpp;
1428
1429    intel_miptree_map(brw, src_mt,
1430                      src_level, src_layer,
1431                      0, 0,
1432                      width, height,
1433                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
1434                      &src, &src_stride);
1435
1436    intel_miptree_map(brw, dst_mt,
1437                      dst_level, dst_layer,
1438                      0, 0,
1439                      width, height,
1440                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
1441                      BRW_MAP_DIRECT_BIT,
1442                      &dst, &dst_stride);
1443
1444    DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
1445        _mesa_get_format_name(src_mt->format),
1446        src_mt, src, src_stride,
1447        _mesa_get_format_name(dst_mt->format),
1448        dst_mt, dst, dst_stride,
1449        width, height);
1450
1451    int row_size = cpp * width;
1452    if (src_stride == row_size &&
1453        dst_stride == row_size) {
1454       memcpy(dst, src, row_size * height);
1455    } else {
1456       for (int i = 0; i < height; i++) {
1457          memcpy(dst, src, row_size);
1458          dst += dst_stride;
1459          src += src_stride;
1460       }
1461    }
1462
1463    intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
1464    intel_miptree_unmap(brw, src_mt, src_level, src_layer);
1465
1466    /* Don't forget to copy the stencil data over, too.  We could have skipped
1467     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
1468     * shuffling the two data sources in/out of temporary storage instead of
1469     * the direct mapping we get this way.
1470     */
1471    if (dst_mt->stencil_mt) {
1472       assert(src_mt->stencil_mt);
1473       intel_miptree_copy_slice_sw(brw,
1474                                   src_mt->stencil_mt, src_level, src_layer,
1475                                   dst_mt->stencil_mt, dst_level, dst_layer,
1476                                   width, height);
1477    }
1478 }
1479
1480 void
1481 intel_miptree_copy_slice(struct brw_context *brw,
1482                          struct intel_mipmap_tree *src_mt,
1483                          unsigned src_level, unsigned src_layer,
1484                          struct intel_mipmap_tree *dst_mt,
1485                          unsigned dst_level, unsigned dst_layer)
1486
1487 {
1488    mesa_format format = src_mt->format;
1489    uint32_t width, height;
1490
1491    if (src_mt->surf.size > 0) {
1492       width = minify(src_mt->surf.phys_level0_sa.width,
1493                      src_level - src_mt->first_level);
1494       height = minify(src_mt->surf.phys_level0_sa.height,
1495                       src_level - src_mt->first_level);
1496
1497       if (src_mt->surf.dim == ISL_SURF_DIM_3D)
1498          assert(src_layer < minify(src_mt->surf.phys_level0_sa.depth,
1499                                    src_level - src_mt->first_level));
1500       else
1501          assert(src_layer < src_mt->surf.phys_level0_sa.array_len);
1502    } else {
1503       width = minify(src_mt->physical_width0,
1504                      src_level - src_mt->first_level);
1505       height = minify(src_mt->physical_height0,
1506                       src_level - src_mt->first_level);
1507       assert(src_layer < src_mt->level[src_level].depth);
1508    }
1509
1510    assert(src_mt->format == dst_mt->format);
1511
1512    if (dst_mt->compressed) {
1513       unsigned int i, j;
1514       _mesa_get_format_block_size(dst_mt->format, &i, &j);
1515       height = ALIGN_NPOT(height, j) / j;
1516       width = ALIGN_NPOT(width, i) / i;
1517    }
1518
1519    /* If it's a packed depth/stencil buffer with separate stencil, the blit
1520     * below won't apply since we can't do the depth's Y tiling or the
1521     * stencil's W tiling in the blitter.
1522     */
1523    if (src_mt->stencil_mt) {
1524       intel_miptree_copy_slice_sw(brw,
1525                                   src_mt, src_level, src_layer,
1526                                   dst_mt, dst_level, dst_layer,
1527                                   width, height);
1528       return;
1529    }
1530
1531    uint32_t dst_x, dst_y, src_x, src_y;
1532    intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
1533                                   &dst_x, &dst_y);
1534    intel_miptree_get_image_offset(src_mt, src_level, src_layer,
1535                                   &src_x, &src_y);
1536
1537    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
1538        _mesa_get_format_name(src_mt->format),
1539        src_mt, src_x, src_y, src_mt->pitch,
1540        _mesa_get_format_name(dst_mt->format),
1541        dst_mt, dst_x, dst_y, dst_mt->pitch,
1542        width, height);
1543
1544    if (!intel_miptree_blit(brw,
1545                            src_mt, src_level, src_layer, 0, 0, false,
1546                            dst_mt, dst_level, dst_layer, 0, 0, false,
1547                            width, height, GL_COPY)) {
1548       perf_debug("miptree validate blit for %s failed\n",
1549                  _mesa_get_format_name(format));
1550
1551       intel_miptree_copy_slice_sw(brw,
1552                                   src_mt, src_level, src_layer,
1553                                   dst_mt, dst_level, dst_layer,
1554                                   width, height);
1555    }
1556 }
1557
1558 /**
1559  * Copies the image's current data to the given miptree, and associates that
1560  * miptree with the image.
1561  *
1562  * If \c invalidate is true, then the actual image data does not need to be
1563  * copied, but the image still needs to be associated to the new miptree (this
1564  * is set to true if we're about to clear the image).
1565  */
1566 void
1567 intel_miptree_copy_teximage(struct brw_context *brw,
1568                             struct intel_texture_image *intelImage,
1569                             struct intel_mipmap_tree *dst_mt,
1570                             bool invalidate)
1571 {
1572    struct intel_mipmap_tree *src_mt = intelImage->mt;
1573    struct intel_texture_object *intel_obj =
1574       intel_texture_object(intelImage->base.Base.TexObject);
1575    int level = intelImage->base.Base.Level;
1576    const unsigned face = intelImage->base.Base.Face;
1577    unsigned start_layer, end_layer;
1578
1579    if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
1580       assert(face == 0);
1581       assert(intelImage->base.Base.Height);
1582       start_layer = 0;
1583       end_layer = intelImage->base.Base.Height - 1;
1584    } else if (face > 0) {
1585       start_layer = face;
1586       end_layer = face;
1587    } else {
1588       assert(intelImage->base.Base.Depth);
1589       start_layer = 0;
1590       end_layer = intelImage->base.Base.Depth - 1;
1591    }
1592
1593    if (!invalidate) {
1594       for (unsigned i = start_layer; i <= end_layer; i++) {
1595          intel_miptree_copy_slice(brw,
1596                                   src_mt, level, i,
1597                                   dst_mt, level, i);
1598       }
1599    }
1600
1601    intel_miptree_reference(&intelImage->mt, dst_mt);
1602    intel_obj->needs_validate = true;
1603 }
1604
1605 static void
1606 intel_miptree_init_mcs(struct brw_context *brw,
1607                        struct intel_mipmap_tree *mt,
1608                        int init_value)
1609 {
1610    assert(mt->mcs_buf != NULL);
1611
1612    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
1613     *
1614     *     When MCS buffer is enabled and bound to MSRT, it is required that it
1615     *     is cleared prior to any rendering.
1616     *
1617     * Since we don't use the MCS buffer for any purpose other than rendering,
1618     * it makes sense to just clear it immediately upon allocation.
1619     *
1620     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
1621     */
1622    void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE);
1623    if (unlikely(map == NULL)) {
1624       fprintf(stderr, "Failed to map mcs buffer into GTT\n");
1625       brw_bo_unreference(mt->mcs_buf->bo);
1626       free(mt->mcs_buf);
1627       return;
1628    }
1629    void *data = map;
1630    memset(data, init_value, mt->mcs_buf->size);
1631    brw_bo_unmap(mt->mcs_buf->bo);
1632 }
1633
1634 static struct intel_miptree_aux_buffer *
1635 intel_alloc_aux_buffer(struct brw_context *brw,
1636                        const char *name,
1637                        const struct isl_surf *aux_surf,
1638                        uint32_t alloc_flags,
1639                        struct intel_mipmap_tree *mt)
1640 {
1641    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
1642    if (!buf)
1643       return false;
1644
1645    buf->size = aux_surf->size;
1646    buf->pitch = aux_surf->row_pitch;
1647    buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
1648
1649    /* ISL has stricter set of alignment rules then the drm allocator.
1650     * Therefore one can pass the ISL dimensions in terms of bytes instead of
1651     * trying to recalculate based on different format block sizes.
1652     */
1653    buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
1654                                 I915_TILING_Y, buf->pitch, alloc_flags);
1655    if (!buf->bo) {
1656       free(buf);
1657       return NULL;
1658    }
1659
1660    buf->surf = *aux_surf;
1661
1662    return buf;
1663 }
1664
1665 static bool
1666 intel_miptree_alloc_mcs(struct brw_context *brw,
1667                         struct intel_mipmap_tree *mt,
1668                         GLuint num_samples)
1669 {
1670    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
1671    assert(mt->mcs_buf == NULL);
1672    assert((mt->aux_disable & INTEL_AUX_DISABLE_MCS) == 0);
1673
1674    /* Multisampled miptrees are only supported for single level. */
1675    assert(mt->first_level == 0);
1676    enum isl_aux_state **aux_state =
1677       create_aux_state_map(mt, ISL_AUX_STATE_CLEAR);
1678    if (!aux_state)
1679       return false;
1680
1681    struct isl_surf temp_main_surf;
1682    struct isl_surf temp_mcs_surf;
1683
1684    /* Create first an ISL presentation for the main color surface and let ISL
1685     * calculate equivalent MCS surface against it.
1686     */
1687    intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
1688    MAYBE_UNUSED bool ok =
1689       isl_surf_get_mcs_surf(&brw->isl_dev, &temp_main_surf, &temp_mcs_surf);
1690    assert(ok);
1691
1692    /* Buffer needs to be initialised requiring the buffer to be immediately
1693     * mapped to cpu space for writing. Therefore do not use the gpu access
1694     * flag which can cause an unnecessary delay if the backing pages happened
1695     * to be just used by the GPU.
1696     */
1697    const uint32_t alloc_flags = 0;
1698    mt->mcs_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
1699                                         &temp_mcs_surf, alloc_flags, mt);
1700    if (!mt->mcs_buf) {
1701       free(aux_state);
1702       return false;
1703    }
1704
1705    mt->aux_state = aux_state;
1706
1707    intel_miptree_init_mcs(brw, mt, 0xFF);
1708
1709    return true;
1710 }
1711
1712 bool
1713 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
1714                                  struct intel_mipmap_tree *mt,
1715                                  bool is_lossless_compressed)
1716 {
1717    assert(mt->mcs_buf == NULL);
1718    assert(!(mt->aux_disable & (INTEL_AUX_DISABLE_MCS | INTEL_AUX_DISABLE_CCS)));
1719
1720    struct isl_surf temp_main_surf;
1721    struct isl_surf temp_ccs_surf;
1722
1723    /* Create first an ISL presentation for the main color surface and let ISL
1724     * calculate equivalent CCS surface against it.
1725     */
1726    intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
1727    if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf, &temp_ccs_surf))
1728       return false;
1729
1730    assert(temp_ccs_surf.size &&
1731           (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
1732
1733    enum isl_aux_state **aux_state =
1734       create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH);
1735    if (!aux_state)
1736       return false;
1737
1738    /* In case of compression mcs buffer needs to be initialised requiring the
1739     * buffer to be immediately mapped to cpu space for writing. Therefore do
1740     * not use the gpu access flag which can cause an unnecessary delay if the
1741     * backing pages happened to be just used by the GPU.
1742     */
1743    const uint32_t alloc_flags =
1744       is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER;
1745    mt->mcs_buf = intel_alloc_aux_buffer(brw, "ccs-miptree",
1746                                         &temp_ccs_surf, alloc_flags, mt);
1747    if (!mt->mcs_buf) {
1748       free(aux_state);
1749       return false;
1750    }
1751
1752    mt->aux_state = aux_state;
1753
1754    /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are
1755     * used for lossless compression which requires similar initialisation
1756     * as multi-sample compression.
1757     */
1758    if (is_lossless_compressed) {
1759       /* Hardware sets the auxiliary buffer to all zeroes when it does full
1760        * resolve. Initialize it accordingly in case the first renderer is
1761        * cpu (or other none compression aware party).
1762        *
1763        * This is also explicitly stated in the spec (MCS Buffer for Render
1764        * Target(s)):
1765        *   "If Software wants to enable Color Compression without Fast clear,
1766        *    Software needs to initialize MCS with zeros."
1767        */
1768       intel_miptree_init_mcs(brw, mt, 0);
1769       mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS;
1770    }
1771
1772    return true;
1773 }
1774
1775 /**
1776  * Helper for intel_miptree_alloc_hiz() that sets
1777  * \c mt->level[level].has_hiz. Return true if and only if
1778  * \c has_hiz was set.
1779  */
1780 static bool
1781 intel_miptree_level_enable_hiz(struct brw_context *brw,
1782                                struct intel_mipmap_tree *mt,
1783                                uint32_t level)
1784 {
1785    assert(mt->hiz_buf);
1786
1787    if (brw->gen >= 8 || brw->is_haswell) {
1788       uint32_t width = minify(mt->physical_width0, level);
1789       uint32_t height = minify(mt->physical_height0, level);
1790
1791       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
1792        * and the height is 4 aligned. This allows our HiZ support
1793        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
1794        * we can grow the width & height to allow the HiZ op to
1795        * force the proper size alignments.
1796        */
1797       if (level > 0 && ((width & 7) || (height & 3))) {
1798          DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
1799          return false;
1800       }
1801    }
1802
1803    DBG("mt %p level %d: HiZ enabled\n", mt, level);
1804    mt->level[level].has_hiz = true;
1805    return true;
1806 }
1807
1808 bool
1809 intel_miptree_wants_hiz_buffer(struct brw_context *brw,
1810                                struct intel_mipmap_tree *mt)
1811 {
1812    if (!brw->has_hiz)
1813       return false;
1814
1815    if (mt->hiz_buf != NULL)
1816       return false;
1817
1818    if (mt->aux_disable & INTEL_AUX_DISABLE_HIZ)
1819       return false;
1820
1821    switch (mt->format) {
1822    case MESA_FORMAT_Z_FLOAT32:
1823    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
1824    case MESA_FORMAT_Z24_UNORM_X8_UINT:
1825    case MESA_FORMAT_Z24_UNORM_S8_UINT:
1826    case MESA_FORMAT_Z_UNORM16:
1827       return true;
1828    default:
1829       return false;
1830    }
1831 }
1832
1833 bool
1834 intel_miptree_alloc_hiz(struct brw_context *brw,
1835                         struct intel_mipmap_tree *mt)
1836 {
1837    assert(mt->hiz_buf == NULL);
1838    assert((mt->aux_disable & INTEL_AUX_DISABLE_HIZ) == 0);
1839
1840    enum isl_aux_state **aux_state =
1841       create_aux_state_map(mt, ISL_AUX_STATE_AUX_INVALID);
1842    if (!aux_state)
1843       return false;
1844
1845    struct isl_surf temp_main_surf;
1846    struct isl_surf temp_hiz_surf;
1847
1848    intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
1849    MAYBE_UNUSED bool ok =
1850       isl_surf_get_hiz_surf(&brw->isl_dev, &temp_main_surf, &temp_hiz_surf);
1851    assert(ok);
1852
1853    const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
1854    mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
1855                                         &temp_hiz_surf, alloc_flags, mt);
1856
1857    if (!mt->hiz_buf) {
1858       free(aux_state);
1859       return false;
1860    }
1861
1862    for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
1863       intel_miptree_level_enable_hiz(brw, mt, level);
1864
1865    mt->aux_state = aux_state;
1866
1867    return true;
1868 }
1869
1870 /**
1871  * Can the miptree sample using the hiz buffer?
1872  */
1873 bool
1874 intel_miptree_sample_with_hiz(struct brw_context *brw,
1875                               struct intel_mipmap_tree *mt)
1876 {
1877    /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
1878     * so keep things conservative for now and never enable it unless we're SKL+.
1879     */
1880    if (brw->gen < 9) {
1881       return false;
1882    }
1883
1884    if (!mt->hiz_buf) {
1885       return false;
1886    }
1887
1888    /* It seems the hardware won't fallback to the depth buffer if some of the
1889     * mipmap levels aren't available in the HiZ buffer. So we need all levels
1890     * of the texture to be HiZ enabled.
1891     */
1892    for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
1893       if (!intel_miptree_level_has_hiz(mt, level))
1894          return false;
1895    }
1896
1897    /* If compressed multisampling is enabled, then we use it for the auxiliary
1898     * buffer instead.
1899     *
1900     * From the BDW PRM (Volume 2d: Command Reference: Structures
1901     *                   RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
1902     *
1903     *  "If this field is set to AUX_HIZ, Number of Multisamples must be
1904     *   MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
1905     *
1906     * There is no such blurb for 1D textures, but there is sufficient evidence
1907     * that this is broken on SKL+.
1908     */
1909    return (mt->num_samples <= 1 &&
1910            mt->target != GL_TEXTURE_3D &&
1911            mt->target != GL_TEXTURE_1D /* gen9+ restriction */);
1912 }
1913
1914 /**
1915  * Does the miptree slice have hiz enabled?
1916  */
1917 bool
1918 intel_miptree_level_has_hiz(const struct intel_mipmap_tree *mt, uint32_t level)
1919 {
1920    intel_miptree_check_level_layer(mt, level, 0);
1921    return mt->level[level].has_hiz;
1922 }
1923
1924 bool
1925 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt,
1926                                    unsigned start_level, unsigned num_levels,
1927                                    unsigned start_layer, unsigned num_layers)
1928 {
1929    assert(_mesa_is_format_color_format(mt->format));
1930
1931    if (!mt->mcs_buf)
1932       return false;
1933
1934    /* Clamp the level range to fit the miptree */
1935    assert(start_level + num_levels >= start_level);
1936    const uint32_t last_level =
1937       MIN2(mt->last_level, start_level + num_levels - 1);
1938    start_level = MAX2(mt->first_level, start_level);
1939    num_levels = last_level - start_level + 1;
1940
1941    for (uint32_t level = start_level; level <= last_level; level++) {
1942       const uint32_t level_layers = MIN2(num_layers, mt->level[level].depth);
1943       for (unsigned a = 0; a < level_layers; a++) {
1944          enum isl_aux_state aux_state =
1945             intel_miptree_get_aux_state(mt, level, start_layer + a);
1946          assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
1947          if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
1948             return true;
1949       }
1950    }
1951
1952    return false;
1953 }
1954
1955 static void
1956 intel_miptree_check_color_resolve(const struct brw_context *brw,
1957                                   const struct intel_mipmap_tree *mt,
1958                                   unsigned level, unsigned layer)
1959 {
1960
1961    if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) || !mt->mcs_buf)
1962       return;
1963
1964    /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */
1965    assert(brw->gen >= 8 ||
1966           (level == 0 && mt->first_level == 0 && mt->last_level == 0));
1967
1968    /* Compression of arrayed msaa surfaces is supported. */
1969    if (mt->num_samples > 1)
1970       return;
1971
1972    /* Fast color clear is supported for non-msaa arrays only on Gen8+. */
1973    assert(brw->gen >= 8 || (layer == 0 && mt->logical_depth0 == 1));
1974
1975    (void)level;
1976    (void)layer;
1977 }
1978
1979 static enum blorp_fast_clear_op
1980 get_ccs_d_resolve_op(enum isl_aux_state aux_state,
1981                      bool ccs_supported, bool fast_clear_supported)
1982 {
1983    assert(ccs_supported == fast_clear_supported);
1984
1985    switch (aux_state) {
1986    case ISL_AUX_STATE_CLEAR:
1987    case ISL_AUX_STATE_COMPRESSED_CLEAR:
1988       if (!ccs_supported)
1989          return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1990       else
1991          return BLORP_FAST_CLEAR_OP_NONE;
1992
1993    case ISL_AUX_STATE_PASS_THROUGH:
1994       return BLORP_FAST_CLEAR_OP_NONE;
1995
1996    case ISL_AUX_STATE_RESOLVED:
1997    case ISL_AUX_STATE_AUX_INVALID:
1998    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
1999       break;
2000    }
2001
2002    unreachable("Invalid aux state for CCS_D");
2003 }
2004
2005 static enum blorp_fast_clear_op
2006 get_ccs_e_resolve_op(enum isl_aux_state aux_state,
2007                      bool ccs_supported, bool fast_clear_supported)
2008 {
2009    switch (aux_state) {
2010    case ISL_AUX_STATE_CLEAR:
2011    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2012       if (!ccs_supported)
2013          return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2014       else if (!fast_clear_supported)
2015          return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
2016       else
2017          return BLORP_FAST_CLEAR_OP_NONE;
2018
2019    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2020       if (!ccs_supported)
2021          return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
2022       else
2023          return BLORP_FAST_CLEAR_OP_NONE;
2024
2025    case ISL_AUX_STATE_PASS_THROUGH:
2026       return BLORP_FAST_CLEAR_OP_NONE;
2027
2028    case ISL_AUX_STATE_RESOLVED:
2029    case ISL_AUX_STATE_AUX_INVALID:
2030       break;
2031    }
2032
2033    unreachable("Invalid aux state for CCS_E");
2034 }
2035
2036 static void
2037 intel_miptree_prepare_ccs_access(struct brw_context *brw,
2038                                  struct intel_mipmap_tree *mt,
2039                                  uint32_t level, uint32_t layer,
2040                                  bool aux_supported,
2041                                  bool fast_clear_supported)
2042 {
2043    enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2044
2045    enum blorp_fast_clear_op resolve_op;
2046    if (intel_miptree_is_lossless_compressed(brw, mt)) {
2047       resolve_op = get_ccs_e_resolve_op(aux_state, aux_supported,
2048                                         fast_clear_supported);
2049    } else {
2050       resolve_op = get_ccs_d_resolve_op(aux_state, aux_supported,
2051                                         fast_clear_supported);
2052    }
2053
2054    if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) {
2055       intel_miptree_check_color_resolve(brw, mt, level, layer);
2056       brw_blorp_resolve_color(brw, mt, level, layer, resolve_op);
2057
2058       switch (resolve_op) {
2059       case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
2060          /* The CCS full resolve operation destroys the CCS and sets it to the
2061           * pass-through state.  (You can also think of this as being both a
2062           * resolve and an ambiguate in one operation.)
2063           */
2064          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2065                                      ISL_AUX_STATE_PASS_THROUGH);
2066          break;
2067
2068       case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
2069          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2070                                      ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2071          break;
2072
2073       default:
2074          unreachable("Invalid resolve op");
2075       }
2076    }
2077 }
2078
2079 static void
2080 intel_miptree_finish_ccs_write(struct brw_context *brw,
2081                                struct intel_mipmap_tree *mt,
2082                                uint32_t level, uint32_t layer,
2083                                bool written_with_ccs)
2084 {
2085    enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer);
2086
2087    if (intel_miptree_is_lossless_compressed(brw, mt)) {
2088       switch (aux_state) {
2089       case ISL_AUX_STATE_CLEAR:
2090          assert(written_with_ccs);
2091          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2092                                      ISL_AUX_STATE_COMPRESSED_CLEAR);
2093          break;
2094
2095       case ISL_AUX_STATE_COMPRESSED_CLEAR:
2096       case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2097          assert(written_with_ccs);
2098          break; /* Nothing to do */
2099
2100       case ISL_AUX_STATE_PASS_THROUGH:
2101          if (written_with_ccs) {
2102             intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2103                                         ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2104          } else {
2105             /* Nothing to do */
2106          }
2107          break;
2108
2109       case ISL_AUX_STATE_RESOLVED:
2110       case ISL_AUX_STATE_AUX_INVALID:
2111          unreachable("Invalid aux state for CCS_E");
2112       }
2113    } else {
2114       /* CCS_D is a bit simpler */
2115       switch (aux_state) {
2116       case ISL_AUX_STATE_CLEAR:
2117          assert(written_with_ccs);
2118          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2119                                      ISL_AUX_STATE_COMPRESSED_CLEAR);
2120          break;
2121
2122       case ISL_AUX_STATE_COMPRESSED_CLEAR:
2123          assert(written_with_ccs);
2124          break; /* Nothing to do */
2125
2126       case ISL_AUX_STATE_PASS_THROUGH:
2127          /* Nothing to do */
2128          break;
2129
2130       case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2131       case ISL_AUX_STATE_RESOLVED:
2132       case ISL_AUX_STATE_AUX_INVALID:
2133          unreachable("Invalid aux state for CCS_D");
2134       }
2135    }
2136 }
2137
2138 static void
2139 intel_miptree_finish_mcs_write(struct brw_context *brw,
2140                                struct intel_mipmap_tree *mt,
2141                                uint32_t level, uint32_t layer,
2142                                bool written_with_aux)
2143 {
2144    switch (intel_miptree_get_aux_state(mt, level, layer)) {
2145    case ISL_AUX_STATE_CLEAR:
2146       assert(written_with_aux);
2147       intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2148                                   ISL_AUX_STATE_COMPRESSED_CLEAR);
2149       break;
2150
2151    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2152       assert(written_with_aux);
2153       break; /* Nothing to do */
2154
2155    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2156    case ISL_AUX_STATE_RESOLVED:
2157    case ISL_AUX_STATE_PASS_THROUGH:
2158    case ISL_AUX_STATE_AUX_INVALID:
2159       unreachable("Invalid aux state for MCS");
2160    }
2161 }
2162
2163 static void
2164 intel_miptree_prepare_hiz_access(struct brw_context *brw,
2165                                  struct intel_mipmap_tree *mt,
2166                                  uint32_t level, uint32_t layer,
2167                                  bool hiz_supported, bool fast_clear_supported)
2168 {
2169    enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
2170    switch (intel_miptree_get_aux_state(mt, level, layer)) {
2171    case ISL_AUX_STATE_CLEAR:
2172    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2173       if (!hiz_supported || !fast_clear_supported)
2174          hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2175       break;
2176
2177    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2178       if (!hiz_supported)
2179          hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
2180       break;
2181
2182    case ISL_AUX_STATE_PASS_THROUGH:
2183    case ISL_AUX_STATE_RESOLVED:
2184       break;
2185
2186    case ISL_AUX_STATE_AUX_INVALID:
2187       if (hiz_supported)
2188          hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
2189       break;
2190    }
2191
2192    if (hiz_op != BLORP_HIZ_OP_NONE) {
2193       intel_hiz_exec(brw, mt, level, layer, 1, hiz_op);
2194
2195       switch (hiz_op) {
2196       case BLORP_HIZ_OP_DEPTH_RESOLVE:
2197          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2198                                      ISL_AUX_STATE_RESOLVED);
2199          break;
2200
2201       case BLORP_HIZ_OP_HIZ_RESOLVE:
2202          /* The HiZ resolve operation is actually an ambiguate */
2203          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2204                                      ISL_AUX_STATE_PASS_THROUGH);
2205          break;
2206
2207       default:
2208          unreachable("Invalid HiZ op");
2209       }
2210    }
2211 }
2212
2213 static void
2214 intel_miptree_finish_hiz_write(struct brw_context *brw,
2215                                struct intel_mipmap_tree *mt,
2216                                uint32_t level, uint32_t layer,
2217                                bool written_with_hiz)
2218 {
2219    switch (intel_miptree_get_aux_state(mt, level, layer)) {
2220    case ISL_AUX_STATE_CLEAR:
2221       assert(written_with_hiz);
2222       intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2223                                   ISL_AUX_STATE_COMPRESSED_CLEAR);
2224       break;
2225
2226    case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
2227    case ISL_AUX_STATE_COMPRESSED_CLEAR:
2228       assert(written_with_hiz);
2229       break; /* Nothing to do */
2230
2231    case ISL_AUX_STATE_RESOLVED:
2232       if (written_with_hiz) {
2233          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2234                                      ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2235       } else {
2236          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2237                                      ISL_AUX_STATE_AUX_INVALID);
2238       }
2239       break;
2240
2241    case ISL_AUX_STATE_PASS_THROUGH:
2242       if (written_with_hiz) {
2243          intel_miptree_set_aux_state(brw, mt, level, layer, 1,
2244                                      ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
2245       }
2246       break;
2247
2248    case ISL_AUX_STATE_AUX_INVALID:
2249       assert(!written_with_hiz);
2250       break;
2251    }
2252 }
2253
2254 static inline uint32_t
2255 miptree_level_range_length(const struct intel_mipmap_tree *mt,
2256                            uint32_t start_level, uint32_t num_levels)
2257 {
2258    assert(start_level >= mt->first_level);
2259    assert(start_level <= mt->last_level);
2260
2261    if (num_levels == INTEL_REMAINING_LAYERS)
2262       num_levels = mt->last_level - start_level + 1;
2263    /* Check for overflow */
2264    assert(start_level + num_levels >= start_level);
2265    assert(start_level + num_levels <= mt->last_level + 1);
2266
2267    return num_levels;
2268 }
2269
2270 static inline uint32_t
2271 miptree_layer_range_length(const struct intel_mipmap_tree *mt, uint32_t level,
2272                            uint32_t start_layer, uint32_t num_layers)
2273 {
2274    assert(level <= mt->last_level);
2275    uint32_t total_num_layers;
2276
2277    if (mt->surf.size > 0)
2278       total_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
2279          minify(mt->surf.phys_level0_sa.depth, level) :
2280          mt->surf.phys_level0_sa.array_len;
2281    else
2282       total_num_layers = mt->level[level].depth;
2283
2284    assert(start_layer < total_num_layers);
2285    if (num_layers == INTEL_REMAINING_LAYERS)
2286       num_layers = total_num_layers - start_layer;
2287    /* Check for overflow */
2288    assert(start_layer + num_layers >= start_layer);
2289    assert(start_layer + num_layers <= total_num_layers);
2290
2291    return num_layers;
2292 }
2293
2294 void
2295 intel_miptree_prepare_access(struct brw_context *brw,
2296                              struct intel_mipmap_tree *mt,
2297                              uint32_t start_level, uint32_t num_levels,
2298                              uint32_t start_layer, uint32_t num_layers,
2299                              bool aux_supported, bool fast_clear_supported)
2300 {
2301    num_levels = miptree_level_range_length(mt, start_level, num_levels);
2302
2303    if (_mesa_is_format_color_format(mt->format)) {
2304       if (!mt->mcs_buf)
2305          return;
2306
2307       if (mt->num_samples > 1) {
2308          /* Nothing to do for MSAA */
2309          assert(aux_supported && fast_clear_supported);
2310       } else {
2311          for (uint32_t l = 0; l < num_levels; l++) {
2312             const uint32_t level = start_level + l;
2313             const uint32_t level_layers =
2314                miptree_layer_range_length(mt, level, start_layer, num_layers);
2315             for (uint32_t a = 0; a < level_layers; a++) {
2316                intel_miptree_prepare_ccs_access(brw, mt, level,
2317                                                 start_layer + a, aux_supported,
2318                                                 fast_clear_supported);
2319             }
2320          }
2321       }
2322    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2323       /* Nothing to do for stencil */
2324    } else {
2325       if (!mt->hiz_buf)
2326          return;
2327
2328       for (uint32_t l = 0; l < num_levels; l++) {
2329          const uint32_t level = start_level + l;
2330          if (!intel_miptree_level_has_hiz(mt, level))
2331             continue;
2332
2333          const uint32_t level_layers =
2334             miptree_layer_range_length(mt, level, start_layer, num_layers);
2335          for (uint32_t a = 0; a < level_layers; a++) {
2336             intel_miptree_prepare_hiz_access(brw, mt, level, start_layer + a,
2337                                              aux_supported,
2338                                              fast_clear_supported);
2339          }
2340       }
2341    }
2342 }
2343
2344 void
2345 intel_miptree_finish_write(struct brw_context *brw,
2346                            struct intel_mipmap_tree *mt, uint32_t level,
2347                            uint32_t start_layer, uint32_t num_layers,
2348                            bool written_with_aux)
2349 {
2350    num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2351
2352    if (_mesa_is_format_color_format(mt->format)) {
2353       if (!mt->mcs_buf)
2354          return;
2355
2356       if (mt->num_samples > 1) {
2357          for (uint32_t a = 0; a < num_layers; a++) {
2358             intel_miptree_finish_mcs_write(brw, mt, level, start_layer + a,
2359                                            written_with_aux);
2360          }
2361       } else {
2362          for (uint32_t a = 0; a < num_layers; a++) {
2363             intel_miptree_finish_ccs_write(brw, mt, level, start_layer + a,
2364                                            written_with_aux);
2365          }
2366       }
2367    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2368       /* Nothing to do for stencil */
2369    } else {
2370       if (!intel_miptree_level_has_hiz(mt, level))
2371          return;
2372
2373       for (uint32_t a = 0; a < num_layers; a++) {
2374          intel_miptree_finish_hiz_write(brw, mt, level, start_layer + a,
2375                                         written_with_aux);
2376       }
2377    }
2378 }
2379
2380 enum isl_aux_state
2381 intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt,
2382                             uint32_t level, uint32_t layer)
2383 {
2384    intel_miptree_check_level_layer(mt, level, layer);
2385
2386    if (_mesa_is_format_color_format(mt->format)) {
2387       assert(mt->mcs_buf != NULL);
2388       assert(mt->num_samples <= 1 || mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);
2389    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2390       unreachable("Cannot get aux state for stencil");
2391    } else {
2392       assert(intel_miptree_level_has_hiz(mt, level));
2393    }
2394
2395    return mt->aux_state[level][layer];
2396 }
2397
2398 void
2399 intel_miptree_set_aux_state(struct brw_context *brw,
2400                             struct intel_mipmap_tree *mt, uint32_t level,
2401                             uint32_t start_layer, uint32_t num_layers,
2402                             enum isl_aux_state aux_state)
2403 {
2404    num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
2405
2406    if (_mesa_is_format_color_format(mt->format)) {
2407       assert(mt->mcs_buf != NULL);
2408       assert(mt->num_samples <= 1 || mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);
2409    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2410       unreachable("Cannot get aux state for stencil");
2411    } else {
2412       assert(intel_miptree_level_has_hiz(mt, level));
2413    }
2414
2415    for (unsigned a = 0; a < num_layers; a++)
2416       mt->aux_state[level][start_layer + a] = aux_state;
2417 }
2418
2419 /* On Gen9 color buffers may be compressed by the hardware (lossless
2420  * compression). There are, however, format restrictions and care needs to be
2421  * taken that the sampler engine is capable for re-interpreting a buffer with
2422  * format different the buffer was originally written with.
2423  *
2424  * For example, SRGB formats are not compressible and the sampler engine isn't
2425  * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
2426  * color buffer needs to be resolved so that the sampling surface can be
2427  * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
2428  * set).
2429  */
2430 static bool
2431 can_texture_with_ccs(struct brw_context *brw,
2432                      struct intel_mipmap_tree *mt,
2433                      mesa_format view_format)
2434 {
2435    if (!intel_miptree_is_lossless_compressed(brw, mt))
2436       return false;
2437
2438    enum isl_format isl_mt_format = brw_isl_format_for_mesa_format(mt->format);
2439    enum isl_format isl_view_format = brw_isl_format_for_mesa_format(view_format);
2440
2441    if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
2442                                          isl_mt_format, isl_view_format)) {
2443       perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
2444                  _mesa_get_format_name(view_format),
2445                  _mesa_get_format_name(mt->format));
2446       return false;
2447    }
2448
2449    return true;
2450 }
2451
2452 static void
2453 intel_miptree_prepare_texture_slices(struct brw_context *brw,
2454                                      struct intel_mipmap_tree *mt,
2455                                      mesa_format view_format,
2456                                      uint32_t start_level, uint32_t num_levels,
2457                                      uint32_t start_layer, uint32_t num_layers,
2458                                      bool *aux_supported_out)
2459 {
2460    bool aux_supported, clear_supported;
2461    if (_mesa_is_format_color_format(mt->format)) {
2462       if (mt->num_samples > 1) {
2463          aux_supported = clear_supported = true;
2464       } else {
2465          aux_supported = can_texture_with_ccs(brw, mt, view_format);
2466
2467          /* Clear color is specified as ints or floats and the conversion is
2468           * done by the sampler.  If we have a texture view, we would have to
2469           * perform the clear color conversion manually.  Just disable clear
2470           * color.
2471           */
2472          clear_supported = aux_supported && (mt->format == view_format);
2473       }
2474    } else if (mt->format == MESA_FORMAT_S_UINT8) {
2475       aux_supported = clear_supported = false;
2476    } else {
2477       aux_supported = clear_supported = intel_miptree_sample_with_hiz(brw, mt);
2478    }
2479
2480    intel_miptree_prepare_access(brw, mt, start_level, num_levels,
2481                                 start_layer, num_layers,
2482                                 aux_supported, clear_supported);
2483    if (aux_supported_out)
2484       *aux_supported_out = aux_supported;
2485 }
2486
2487 void
2488 intel_miptree_prepare_texture(struct brw_context *brw,
2489                               struct intel_mipmap_tree *mt,
2490                               mesa_format view_format,
2491                               bool *aux_supported_out)
2492 {
2493    intel_miptree_prepare_texture_slices(brw, mt, view_format,
2494                                         0, INTEL_REMAINING_LEVELS,
2495                                         0, INTEL_REMAINING_LAYERS,
2496                                         aux_supported_out);
2497 }
2498
2499 void
2500 intel_miptree_prepare_image(struct brw_context *brw,
2501                             struct intel_mipmap_tree *mt)
2502 {
2503    /* The data port doesn't understand any compression */
2504    intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2505                                 0, INTEL_REMAINING_LAYERS, false, false);
2506 }
2507
2508 void
2509 intel_miptree_prepare_fb_fetch(struct brw_context *brw,
2510                                struct intel_mipmap_tree *mt, uint32_t level,
2511                                uint32_t start_layer, uint32_t num_layers)
2512 {
2513    intel_miptree_prepare_texture_slices(brw, mt, mt->format, level, 1,
2514                                         start_layer, num_layers, NULL);
2515 }
2516
2517 void
2518 intel_miptree_prepare_render(struct brw_context *brw,
2519                              struct intel_mipmap_tree *mt, uint32_t level,
2520                              uint32_t start_layer, uint32_t layer_count,
2521                              bool srgb_enabled)
2522 {
2523    /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
2524     * the single-sampled color renderbuffers because the CCS buffer isn't
2525     * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
2526     * enabled because otherwise the surface state will be programmed with
2527     * the linear equivalent format anyway.
2528     */
2529    if (brw->gen == 9 && srgb_enabled && mt->num_samples <= 1 &&
2530        _mesa_get_srgb_format_linear(mt->format) != mt->format) {
2531
2532       /* Lossless compression is not supported for SRGB formats, it
2533        * should be impossible to get here with such surfaces.
2534        */
2535       assert(!intel_miptree_is_lossless_compressed(brw, mt));
2536       intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2537                                    false, false);
2538    }
2539
2540    /* For layered rendering non-compressed fast cleared buffers need to be
2541     * resolved. Surface state can carry only one fast color clear value
2542     * while each layer may have its own fast clear color value. For
2543     * compressed buffers color value is available in the color buffer.
2544     */
2545    if (layer_count > 1 &&
2546        !(mt->aux_disable & INTEL_AUX_DISABLE_CCS) &&
2547        !intel_miptree_is_lossless_compressed(brw, mt)) {
2548       assert(brw->gen >= 8);
2549
2550       intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2551                                    false, false);
2552    }
2553 }
2554
2555 void
2556 intel_miptree_finish_render(struct brw_context *brw,
2557                             struct intel_mipmap_tree *mt, uint32_t level,
2558                             uint32_t start_layer, uint32_t layer_count)
2559 {
2560    assert(_mesa_is_format_color_format(mt->format));
2561    intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2562                               mt->mcs_buf != NULL);
2563 }
2564
2565 void
2566 intel_miptree_prepare_depth(struct brw_context *brw,
2567                             struct intel_mipmap_tree *mt, uint32_t level,
2568                             uint32_t start_layer, uint32_t layer_count)
2569 {
2570    intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
2571                                 mt->hiz_buf != NULL, mt->hiz_buf != NULL);
2572 }
2573
2574 void
2575 intel_miptree_finish_depth(struct brw_context *brw,
2576                            struct intel_mipmap_tree *mt, uint32_t level,
2577                            uint32_t start_layer, uint32_t layer_count,
2578                            bool depth_written)
2579 {
2580    if (depth_written) {
2581       intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
2582                                  mt->hiz_buf != NULL);
2583    }
2584 }
2585
2586 /**
2587  * Make it possible to share the BO backing the given miptree with another
2588  * process or another miptree.
2589  *
2590  * Fast color clears are unsafe with shared buffers, so we need to resolve and
2591  * then discard the MCS buffer, if present.  We also set the no_ccs flag to
2592  * ensure that no MCS buffer gets allocated in the future.
2593  *
2594  * HiZ is similarly unsafe with shared buffers.
2595  */
2596 void
2597 intel_miptree_make_shareable(struct brw_context *brw,
2598                              struct intel_mipmap_tree *mt)
2599 {
2600    /* MCS buffers are also used for multisample buffers, but we can't resolve
2601     * away a multisample MCS buffer because it's an integral part of how the
2602     * pixel data is stored.  Fortunately this code path should never be
2603     * reached for multisample buffers.
2604     */
2605    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || mt->num_samples <= 1);
2606
2607    intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
2608                                 0, INTEL_REMAINING_LAYERS, false, false);
2609
2610    if (mt->mcs_buf) {
2611       mt->aux_disable |= (INTEL_AUX_DISABLE_CCS | INTEL_AUX_DISABLE_MCS);
2612       brw_bo_unreference(mt->mcs_buf->bo);
2613       free(mt->mcs_buf);
2614       mt->mcs_buf = NULL;
2615
2616       /* Any pending MCS/CCS operations are no longer needed. Trying to
2617        * execute any will likely crash due to the missing aux buffer. So let's
2618        * delete all pending ops.
2619        */
2620       free(mt->aux_state);
2621       mt->aux_state = NULL;
2622    }
2623
2624    if (mt->hiz_buf) {
2625       mt->aux_disable |= INTEL_AUX_DISABLE_HIZ;
2626       intel_miptree_aux_buffer_free(mt->hiz_buf);
2627       mt->hiz_buf = NULL;
2628
2629       for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
2630          mt->level[l].has_hiz = false;
2631       }
2632
2633       /* Any pending HiZ operations are no longer needed. Trying to execute
2634        * any will likely crash due to the missing aux buffer. So let's delete
2635        * all pending ops.
2636        */
2637       free(mt->aux_state);
2638       mt->aux_state = NULL;
2639    }
2640 }
2641
2642
2643 /**
2644  * \brief Get pointer offset into stencil buffer.
2645  *
2646  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
2647  * must decode the tile's layout in software.
2648  *
2649  * See
2650  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
2651  *     Format.
2652  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
2653  *
2654  * Even though the returned offset is always positive, the return type is
2655  * signed due to
2656  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
2657  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
2658  */
2659 static intptr_t
2660 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
2661 {
2662    uint32_t tile_size = 4096;
2663    uint32_t tile_width = 64;
2664    uint32_t tile_height = 64;
2665    uint32_t row_size = 64 * stride;
2666
2667    uint32_t tile_x = x / tile_width;
2668    uint32_t tile_y = y / tile_height;
2669
2670    /* The byte's address relative to the tile's base addres. */
2671    uint32_t byte_x = x % tile_width;
2672    uint32_t byte_y = y % tile_height;
2673
2674    uintptr_t u = tile_y * row_size
2675                + tile_x * tile_size
2676                + 512 * (byte_x / 8)
2677                +  64 * (byte_y / 8)
2678                +  32 * ((byte_y / 4) % 2)
2679                +  16 * ((byte_x / 4) % 2)
2680                +   8 * ((byte_y / 2) % 2)
2681                +   4 * ((byte_x / 2) % 2)
2682                +   2 * (byte_y % 2)
2683                +   1 * (byte_x % 2);
2684
2685    if (swizzled) {
2686       /* adjust for bit6 swizzling */
2687       if (((byte_x / 8) % 2) == 1) {
2688          if (((byte_y / 8) % 2) == 0) {
2689             u += 64;
2690          } else {
2691             u -= 64;
2692          }
2693       }
2694    }
2695
2696    return u;
2697 }
2698
2699 void
2700 intel_miptree_updownsample(struct brw_context *brw,
2701                            struct intel_mipmap_tree *src,
2702                            struct intel_mipmap_tree *dst)
2703 {
2704    unsigned src_w, src_h, dst_w, dst_h;
2705
2706    if (src->surf.size > 0) {
2707       src_w = src->surf.logical_level0_px.width;
2708       src_h = src->surf.logical_level0_px.height;
2709    } else {
2710       src_w = src->logical_width0;
2711       src_h = src->logical_height0;
2712    }
2713
2714    if (dst->surf.size > 0) {
2715       dst_w = dst->surf.logical_level0_px.width;
2716       dst_h = dst->surf.logical_level0_px.height;
2717    } else {
2718       dst_w = dst->logical_width0;
2719       dst_h = dst->logical_height0;
2720    }
2721
2722    brw_blorp_blit_miptrees(brw,
2723                            src, 0 /* level */, 0 /* layer */,
2724                            src->format, SWIZZLE_XYZW,
2725                            dst, 0 /* level */, 0 /* layer */, dst->format,
2726                            0, 0, src_w, src_h,
2727                            0, 0, dst_w, dst_h,
2728                            GL_NEAREST, false, false /*mirror x, y*/,
2729                            false, false);
2730
2731    if (src->stencil_mt) {
2732       if (src->stencil_mt->surf.size > 0) {
2733          src_w = src->stencil_mt->surf.logical_level0_px.width;
2734          src_h = src->stencil_mt->surf.logical_level0_px.height;
2735       } else {
2736          src_w = src->stencil_mt->logical_width0;
2737          src_h = src->stencil_mt->logical_height0;
2738       }
2739
2740       if (dst->stencil_mt->surf.size > 0) {
2741          dst_w = dst->stencil_mt->surf.logical_level0_px.width;
2742          dst_h = dst->stencil_mt->surf.logical_level0_px.height;
2743       } else {
2744          dst_w = dst->stencil_mt->logical_width0;
2745          dst_h = dst->stencil_mt->logical_height0;
2746       }
2747
2748       brw_blorp_blit_miptrees(brw,
2749                               src->stencil_mt, 0 /* level */, 0 /* layer */,
2750                               src->stencil_mt->format, SWIZZLE_XYZW,
2751                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
2752                               dst->stencil_mt->format,
2753                               0, 0, src_w, src_h,
2754                               0, 0, dst_w, dst_h,
2755                               GL_NEAREST, false, false /*mirror x, y*/,
2756                               false, false /* decode/encode srgb */);
2757    }
2758 }
2759
2760 void
2761 intel_update_r8stencil(struct brw_context *brw,
2762                        struct intel_mipmap_tree *mt)
2763 {
2764    assert(brw->gen >= 7);
2765    struct intel_mipmap_tree *src =
2766       mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
2767    if (!src || brw->gen >= 8 || !src->r8stencil_needs_update)
2768       return;
2769
2770    if (!mt->r8stencil_mt) {
2771       const uint32_t r8stencil_flags =
2772          MIPTREE_LAYOUT_ACCELERATED_UPLOAD | MIPTREE_LAYOUT_TILING_Y |
2773          MIPTREE_LAYOUT_DISABLE_AUX;
2774       assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
2775       mt->r8stencil_mt = intel_miptree_create(brw,
2776                                               src->target,
2777                                               MESA_FORMAT_R_UINT8,
2778                                               src->first_level,
2779                                               src->last_level,
2780                                               src->logical_width0,
2781                                               src->logical_height0,
2782                                               src->logical_depth0,
2783                                               src->num_samples,
2784                                               r8stencil_flags);
2785       assert(mt->r8stencil_mt);
2786    }
2787
2788    struct intel_mipmap_tree *dst = mt->r8stencil_mt;
2789
2790    for (int level = src->first_level; level <= src->last_level; level++) {
2791       const unsigned depth = src->level[level].depth;
2792
2793       for (unsigned layer = 0; layer < depth; layer++) {
2794          brw_blorp_copy_miptrees(brw,
2795                                  src, level, layer,
2796                                  dst, level, layer,
2797                                  0, 0, 0, 0,
2798                                  minify(src->logical_width0, level),
2799                                  minify(src->logical_height0, level));
2800       }
2801    }
2802
2803    brw_render_cache_set_check_flush(brw, dst->bo);
2804    src->r8stencil_needs_update = false;
2805 }
2806
2807 static void *
2808 intel_miptree_map_raw(struct brw_context *brw,
2809                       struct intel_mipmap_tree *mt,
2810                       GLbitfield mode)
2811 {
2812    struct brw_bo *bo = mt->bo;
2813
2814    if (brw_batch_references(&brw->batch, bo))
2815       intel_batchbuffer_flush(brw);
2816
2817    return brw_bo_map(brw, bo, mode);
2818 }
2819
2820 static void
2821 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
2822 {
2823    brw_bo_unmap(mt->bo);
2824 }
2825
2826 static void
2827 intel_miptree_map_gtt(struct brw_context *brw,
2828                       struct intel_mipmap_tree *mt,
2829                       struct intel_miptree_map *map,
2830                       unsigned int level, unsigned int slice)
2831 {
2832    unsigned int bw, bh;
2833    void *base;
2834    unsigned int image_x, image_y;
2835    intptr_t x = map->x;
2836    intptr_t y = map->y;
2837
2838    /* For compressed formats, the stride is the number of bytes per
2839     * row of blocks.  intel_miptree_get_image_offset() already does
2840     * the divide.
2841     */
2842    _mesa_get_format_block_size(mt->format, &bw, &bh);
2843    assert(y % bh == 0);
2844    assert(x % bw == 0);
2845    y /= bh;
2846    x /= bw;
2847
2848    base = intel_miptree_map_raw(brw, mt, map->mode) + mt->offset;
2849
2850    if (base == NULL)
2851       map->ptr = NULL;
2852    else {
2853       /* Note that in the case of cube maps, the caller must have passed the
2854        * slice number referencing the face.
2855       */
2856       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2857       x += image_x;
2858       y += image_y;
2859
2860       map->stride = mt->pitch;
2861       map->ptr = base + y * map->stride + x * mt->cpp;
2862    }
2863
2864    DBG("%s: %d,%d %dx%d from mt %p (%s) "
2865        "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
2866        map->x, map->y, map->w, map->h,
2867        mt, _mesa_get_format_name(mt->format),
2868        x, y, map->ptr, map->stride);
2869 }
2870
2871 static void
2872 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
2873 {
2874    intel_miptree_unmap_raw(mt);
2875 }
2876
2877 static void
2878 intel_miptree_map_blit(struct brw_context *brw,
2879                        struct intel_mipmap_tree *mt,
2880                        struct intel_miptree_map *map,
2881                        unsigned int level, unsigned int slice)
2882 {
2883    map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
2884                                          /* first_level */ 0,
2885                                          /* last_level */ 0,
2886                                          map->w, map->h, 1,
2887                                          /* samples */ 0,
2888                                          MIPTREE_LAYOUT_TILING_NONE);
2889
2890    if (!map->linear_mt) {
2891       fprintf(stderr, "Failed to allocate blit temporary\n");
2892       goto fail;
2893    }
2894    map->stride = map->linear_mt->pitch;
2895
2896    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
2897     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
2898     * invalidate is set, since we'll be writing the whole rectangle from our
2899     * temporary buffer back out.
2900     */
2901    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
2902       if (!intel_miptree_copy(brw,
2903                               mt, level, slice, map->x, map->y,
2904                               map->linear_mt, 0, 0, 0, 0,
2905                               map->w, map->h)) {
2906          fprintf(stderr, "Failed to blit\n");
2907          goto fail;
2908       }
2909    }
2910
2911    map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
2912
2913    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2914        map->x, map->y, map->w, map->h,
2915        mt, _mesa_get_format_name(mt->format),
2916        level, slice, map->ptr, map->stride);
2917
2918    return;
2919
2920 fail:
2921    intel_miptree_release(&map->linear_mt);
2922    map->ptr = NULL;
2923    map->stride = 0;
2924 }
2925
2926 static void
2927 intel_miptree_unmap_blit(struct brw_context *brw,
2928                          struct intel_mipmap_tree *mt,
2929                          struct intel_miptree_map *map,
2930                          unsigned int level,
2931                          unsigned int slice)
2932 {
2933    struct gl_context *ctx = &brw->ctx;
2934
2935    intel_miptree_unmap_raw(map->linear_mt);
2936
2937    if (map->mode & GL_MAP_WRITE_BIT) {
2938       bool ok = intel_miptree_copy(brw,
2939                                    map->linear_mt, 0, 0, 0, 0,
2940                                    mt, level, slice, map->x, map->y,
2941                                    map->w, map->h);
2942       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
2943    }
2944
2945    intel_miptree_release(&map->linear_mt);
2946 }
2947
2948 /**
2949  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
2950  */
2951 #if defined(USE_SSE41)
2952 static void
2953 intel_miptree_map_movntdqa(struct brw_context *brw,
2954                            struct intel_mipmap_tree *mt,
2955                            struct intel_miptree_map *map,
2956                            unsigned int level, unsigned int slice)
2957 {
2958    assert(map->mode & GL_MAP_READ_BIT);
2959    assert(!(map->mode & GL_MAP_WRITE_BIT));
2960
2961    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
2962        map->x, map->y, map->w, map->h,
2963        mt, _mesa_get_format_name(mt->format),
2964        level, slice, map->ptr, map->stride);
2965
2966    /* Map the original image */
2967    uint32_t image_x;
2968    uint32_t image_y;
2969    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
2970    image_x += map->x;
2971    image_y += map->y;
2972
2973    void *src = intel_miptree_map_raw(brw, mt, map->mode);
2974    if (!src)
2975       return;
2976
2977    src += mt->offset;
2978
2979    src += image_y * mt->pitch;
2980    src += image_x * mt->cpp;
2981
2982    /* Due to the pixel offsets for the particular image being mapped, our
2983     * src pointer may not be 16-byte aligned.  However, if the pitch is
2984     * divisible by 16, then the amount by which it's misaligned will remain
2985     * consistent from row to row.
2986     */
2987    assert((mt->pitch % 16) == 0);
2988    const int misalignment = ((uintptr_t) src) & 15;
2989
2990    /* Create an untiled temporary buffer for the mapping. */
2991    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
2992
2993    map->stride = ALIGN(misalignment + width_bytes, 16);
2994
2995    map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
2996    /* Offset the destination so it has the same misalignment as src. */
2997    map->ptr = map->buffer + misalignment;
2998
2999    assert((((uintptr_t) map->ptr) & 15) == misalignment);
3000
3001    for (uint32_t y = 0; y < map->h; y++) {
3002       void *dst_ptr = map->ptr + y * map->stride;
3003       void *src_ptr = src + y * mt->pitch;
3004
3005       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
3006    }
3007
3008    intel_miptree_unmap_raw(mt);
3009 }
3010
3011 static void
3012 intel_miptree_unmap_movntdqa(struct brw_context *brw,
3013                              struct intel_mipmap_tree *mt,
3014                              struct intel_miptree_map *map,
3015                              unsigned int level,
3016                              unsigned int slice)
3017 {
3018    _mesa_align_free(map->buffer);
3019    map->buffer = NULL;
3020    map->ptr = NULL;
3021 }
3022 #endif
3023
3024 static void
3025 intel_miptree_map_s8(struct brw_context *brw,
3026                      struct intel_mipmap_tree *mt,
3027                      struct intel_miptree_map *map,
3028                      unsigned int level, unsigned int slice)
3029 {
3030    map->stride = map->w;
3031    map->buffer = map->ptr = malloc(map->stride * map->h);
3032    if (!map->buffer)
3033       return;
3034
3035    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
3036     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
3037     * invalidate is set, since we'll be writing the whole rectangle from our
3038     * temporary buffer back out.
3039     */
3040    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3041       /* ISL uses a stencil pitch value that is expected by hardware whereas
3042        * traditional miptree uses half of that. Below the value gets supplied
3043        * to intel_offset_S8() which expects the legacy interpretation.
3044        */
3045       const unsigned pitch = mt->surf.size > 0 ?
3046                              mt->surf.row_pitch / 2 : mt->pitch;
3047       uint8_t *untiled_s8_map = map->ptr;
3048       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
3049       unsigned int image_x, image_y;
3050
3051       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3052
3053       for (uint32_t y = 0; y < map->h; y++) {
3054          for (uint32_t x = 0; x < map->w; x++) {
3055             ptrdiff_t offset = intel_offset_S8(pitch,
3056                                                x + image_x + map->x,
3057                                                y + image_y + map->y,
3058                                                brw->has_swizzling);
3059             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
3060          }
3061       }
3062
3063       intel_miptree_unmap_raw(mt);
3064
3065       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
3066           map->x, map->y, map->w, map->h,
3067           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
3068    } else {
3069       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3070           map->x, map->y, map->w, map->h,
3071           mt, map->ptr, map->stride);
3072    }
3073 }
3074
3075 static void
3076 intel_miptree_unmap_s8(struct brw_context *brw,
3077                        struct intel_mipmap_tree *mt,
3078                        struct intel_miptree_map *map,
3079                        unsigned int level,
3080                        unsigned int slice)
3081 {
3082    if (map->mode & GL_MAP_WRITE_BIT) {
3083       /* ISL uses a stencil pitch value that is expected by hardware whereas
3084        * traditional miptree uses half of that. Below the value gets supplied
3085        * to intel_offset_S8() which expects the legacy interpretation.
3086        */
3087       const unsigned pitch = mt->surf.size > 0 ?
3088                              mt->surf.row_pitch / 2: mt->pitch;
3089       unsigned int image_x, image_y;
3090       uint8_t *untiled_s8_map = map->ptr;
3091       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
3092
3093       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3094
3095       for (uint32_t y = 0; y < map->h; y++) {
3096          for (uint32_t x = 0; x < map->w; x++) {
3097             ptrdiff_t offset = intel_offset_S8(pitch,
3098                                                image_x + x + map->x,
3099                                                image_y + y + map->y,
3100                                                brw->has_swizzling);
3101             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
3102          }
3103       }
3104
3105       intel_miptree_unmap_raw(mt);
3106    }
3107
3108    free(map->buffer);
3109 }
3110
3111 static void
3112 intel_miptree_map_etc(struct brw_context *brw,
3113                       struct intel_mipmap_tree *mt,
3114                       struct intel_miptree_map *map,
3115                       unsigned int level,
3116                       unsigned int slice)
3117 {
3118    assert(mt->etc_format != MESA_FORMAT_NONE);
3119    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
3120       assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
3121    }
3122
3123    assert(map->mode & GL_MAP_WRITE_BIT);
3124    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
3125
3126    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
3127    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
3128                                                 map->w, map->h, 1));
3129    map->ptr = map->buffer;
3130 }
3131
3132 static void
3133 intel_miptree_unmap_etc(struct brw_context *brw,
3134                         struct intel_mipmap_tree *mt,
3135                         struct intel_miptree_map *map,
3136                         unsigned int level,
3137                         unsigned int slice)
3138 {
3139    uint32_t image_x;
3140    uint32_t image_y;
3141    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
3142
3143    image_x += map->x;
3144    image_y += map->y;
3145
3146    uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
3147                 + image_y * mt->pitch
3148                 + image_x * mt->cpp;
3149
3150    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
3151       _mesa_etc1_unpack_rgba8888(dst, mt->pitch,
3152                                  map->ptr, map->stride,
3153                                  map->w, map->h);
3154    else
3155       _mesa_unpack_etc2_format(dst, mt->pitch,
3156                                map->ptr, map->stride,
3157                                map->w, map->h, mt->etc_format);
3158
3159    intel_miptree_unmap_raw(mt);
3160    free(map->buffer);
3161 }
3162
3163 /**
3164  * Mapping function for packed depth/stencil miptrees backed by real separate
3165  * miptrees for depth and stencil.
3166  *
3167  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
3168  * separate from the depth buffer.  Yet at the GL API level, we have to expose
3169  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
3170  * be able to map that memory for texture storage and glReadPixels-type
3171  * operations.  We give Mesa core that access by mallocing a temporary and
3172  * copying the data between the actual backing store and the temporary.
3173  */
3174 static void
3175 intel_miptree_map_depthstencil(struct brw_context *brw,
3176                                struct intel_mipmap_tree *mt,
3177                                struct intel_miptree_map *map,
3178                                unsigned int level, unsigned int slice)
3179 {
3180    struct intel_mipmap_tree *z_mt = mt;
3181    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3182    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3183    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
3184
3185    map->stride = map->w * packed_bpp;
3186    map->buffer = map->ptr = malloc(map->stride * map->h);
3187    if (!map->buffer)
3188       return;
3189
3190    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
3191     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
3192     * invalidate is set, since we'll be writing the whole rectangle from our
3193     * temporary buffer back out.
3194     */
3195    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
3196       /* ISL uses a stencil pitch value that is expected by hardware whereas
3197        * traditional miptree uses half of that. Below the value gets supplied
3198        * to intel_offset_S8() which expects the legacy interpretation.
3199        */
3200       const unsigned s_pitch = s_mt->surf.size > 0 ?
3201                                s_mt->surf.row_pitch / 2 : s_mt->pitch;
3202       uint32_t *packed_map = map->ptr;
3203       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
3204       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
3205       unsigned int s_image_x, s_image_y;
3206       unsigned int z_image_x, z_image_y;
3207
3208       intel_miptree_get_image_offset(s_mt, level, slice,
3209                                      &s_image_x, &s_image_y);
3210       intel_miptree_get_image_offset(z_mt, level, slice,
3211                                      &z_image_x, &z_image_y);
3212
3213       for (uint32_t y = 0; y < map->h; y++) {
3214          for (uint32_t x = 0; x < map->w; x++) {
3215             int map_x = map->x + x, map_y = map->y + y;
3216             ptrdiff_t s_offset = intel_offset_S8(s_pitch,
3217                                                  map_x + s_image_x,
3218                                                  map_y + s_image_y,
3219                                                  brw->has_swizzling);
3220             ptrdiff_t z_offset = ((map_y + z_image_y) *
3221                                   (z_mt->pitch / 4) +
3222                                   (map_x + z_image_x));
3223             uint8_t s = s_map[s_offset];
3224             uint32_t z = z_map[z_offset];
3225
3226             if (map_z32f_x24s8) {
3227                packed_map[(y * map->w + x) * 2 + 0] = z;
3228                packed_map[(y * map->w + x) * 2 + 1] = s;
3229             } else {
3230                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
3231             }
3232          }
3233       }
3234
3235       intel_miptree_unmap_raw(s_mt);
3236       intel_miptree_unmap_raw(z_mt);
3237
3238       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
3239           __func__,
3240           map->x, map->y, map->w, map->h,
3241           z_mt, map->x + z_image_x, map->y + z_image_y,
3242           s_mt, map->x + s_image_x, map->y + s_image_y,
3243           map->ptr, map->stride);
3244    } else {
3245       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
3246           map->x, map->y, map->w, map->h,
3247           mt, map->ptr, map->stride);
3248    }
3249 }
3250
3251 static void
3252 intel_miptree_unmap_depthstencil(struct brw_context *brw,
3253                                  struct intel_mipmap_tree *mt,
3254                                  struct intel_miptree_map *map,
3255                                  unsigned int level,
3256                                  unsigned int slice)
3257 {
3258    struct intel_mipmap_tree *z_mt = mt;
3259    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
3260    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
3261
3262    if (map->mode & GL_MAP_WRITE_BIT) {
3263       /* ISL uses a stencil pitch value that is expected by hardware whereas
3264        * traditional miptree uses half of that. Below the value gets supplied
3265        * to intel_offset_S8() which expects the legacy interpretation.
3266        */
3267       const unsigned s_pitch = s_mt->surf.size > 0 ?
3268                                s_mt->surf.row_pitch / 2 : s_mt->pitch;
3269       uint32_t *packed_map = map->ptr;
3270       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
3271       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
3272       unsigned int s_image_x, s_image_y;
3273       unsigned int z_image_x, z_image_y;
3274
3275       intel_miptree_get_image_offset(s_mt, level, slice,
3276                                      &s_image_x, &s_image_y);
3277       intel_miptree_get_image_offset(z_mt, level, slice,
3278                                      &z_image_x, &z_image_y);
3279
3280       for (uint32_t y = 0; y < map->h; y++) {
3281          for (uint32_t x = 0; x < map->w; x++) {
3282             ptrdiff_t s_offset = intel_offset_S8(s_pitch,
3283                                                  x + s_image_x + map->x,
3284                                                  y + s_image_y + map->y,
3285                                                  brw->has_swizzling);
3286             ptrdiff_t z_offset = ((y + z_image_y + map->y) *
3287                                   (z_mt->pitch / 4) +
3288                                   (x + z_image_x + map->x));
3289
3290             if (map_z32f_x24s8) {
3291                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
3292                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
3293             } else {
3294                uint32_t packed = packed_map[y * map->w + x];
3295                s_map[s_offset] = packed >> 24;
3296                z_map[z_offset] = packed;
3297             }
3298          }
3299       }
3300
3301       intel_miptree_unmap_raw(s_mt);
3302       intel_miptree_unmap_raw(z_mt);
3303
3304       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
3305           __func__,
3306           map->x, map->y, map->w, map->h,
3307           z_mt, _mesa_get_format_name(z_mt->format),
3308           map->x + z_image_x, map->y + z_image_y,
3309           s_mt, map->x + s_image_x, map->y + s_image_y,
3310           map->ptr, map->stride);
3311    }
3312
3313    free(map->buffer);
3314 }
3315
3316 /**
3317  * Create and attach a map to the miptree at (level, slice). Return the
3318  * attached map.
3319  */
3320 static struct intel_miptree_map*
3321 intel_miptree_attach_map(struct intel_mipmap_tree *mt,
3322                          unsigned int level,
3323                          unsigned int slice,
3324                          unsigned int x,
3325                          unsigned int y,
3326                          unsigned int w,
3327                          unsigned int h,
3328                          GLbitfield mode)
3329 {
3330    struct intel_miptree_map *map = calloc(1, sizeof(*map));
3331
3332    if (!map)
3333       return NULL;
3334
3335    assert(mt->level[level].slice[slice].map == NULL);
3336    mt->level[level].slice[slice].map = map;
3337
3338    map->mode = mode;
3339    map->x = x;
3340    map->y = y;
3341    map->w = w;
3342    map->h = h;
3343
3344    return map;
3345 }
3346
3347 /**
3348  * Release the map at (level, slice).
3349  */
3350 static void
3351 intel_miptree_release_map(struct intel_mipmap_tree *mt,
3352                          unsigned int level,
3353                          unsigned int slice)
3354 {
3355    struct intel_miptree_map **map;
3356
3357    map = &mt->level[level].slice[slice].map;
3358    free(*map);
3359    *map = NULL;
3360 }
3361
3362 static bool
3363 can_blit_slice(struct intel_mipmap_tree *mt,
3364                unsigned int level, unsigned int slice)
3365 {
3366    /* See intel_miptree_blit() for details on the 32k pitch limit. */
3367    if (mt->pitch >= 32768)
3368       return false;
3369
3370    return true;
3371 }
3372
3373 static bool
3374 use_intel_mipree_map_blit(struct brw_context *brw,
3375                           struct intel_mipmap_tree *mt,
3376                           GLbitfield mode,
3377                           unsigned int level,
3378                           unsigned int slice)
3379 {
3380    if (brw->has_llc &&
3381       /* It's probably not worth swapping to the blit ring because of
3382        * all the overhead involved.
3383        */
3384        !(mode & GL_MAP_WRITE_BIT) &&
3385        !mt->compressed &&
3386        (mt->tiling == I915_TILING_X ||
3387         /* Prior to Sandybridge, the blitter can't handle Y tiling */
3388         (brw->gen >= 6 && mt->tiling == I915_TILING_Y) ||
3389         /* Fast copy blit on skl+ supports all tiling formats. */
3390         brw->gen >= 9) &&
3391        can_blit_slice(mt, level, slice))
3392       return true;
3393
3394    if (mt->tiling != I915_TILING_NONE &&
3395        mt->bo->size >= brw->max_gtt_map_object_size) {
3396       assert(can_blit_slice(mt, level, slice));
3397       return true;
3398    }
3399
3400    return false;
3401 }
3402
3403 /**
3404  * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
3405  * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
3406  * arithmetic overflow.
3407  *
3408  * If you call this function and use \a out_stride, then you're doing pointer
3409  * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
3410  * bugs.  The caller must still take care to avoid 32-bit overflow errors in
3411  * all arithmetic expressions that contain buffer offsets and pixel sizes,
3412  * which usually have type uint32_t or GLuint.
3413  */
3414 void
3415 intel_miptree_map(struct brw_context *brw,
3416                   struct intel_mipmap_tree *mt,
3417                   unsigned int level,
3418                   unsigned int slice,
3419                   unsigned int x,
3420                   unsigned int y,
3421                   unsigned int w,
3422                   unsigned int h,
3423                   GLbitfield mode,
3424                   void **out_ptr,
3425                   ptrdiff_t *out_stride)
3426 {
3427    struct intel_miptree_map *map;
3428
3429    assert(mt->num_samples <= 1);
3430
3431    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
3432    if (!map){
3433       *out_ptr = NULL;
3434       *out_stride = 0;
3435       return;
3436    }
3437
3438    intel_miptree_access_raw(brw, mt, level, slice,
3439                             map->mode & GL_MAP_WRITE_BIT);
3440
3441    if (mt->format == MESA_FORMAT_S_UINT8) {
3442       intel_miptree_map_s8(brw, mt, map, level, slice);
3443    } else if (mt->etc_format != MESA_FORMAT_NONE &&
3444               !(mode & BRW_MAP_DIRECT_BIT)) {
3445       intel_miptree_map_etc(brw, mt, map, level, slice);
3446    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
3447       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
3448    } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
3449       intel_miptree_map_blit(brw, mt, map, level, slice);
3450 #if defined(USE_SSE41)
3451    } else if (!(mode & GL_MAP_WRITE_BIT) &&
3452               !mt->compressed && cpu_has_sse4_1 &&
3453               (mt->pitch % 16 == 0)) {
3454       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
3455 #endif
3456    } else {
3457       intel_miptree_map_gtt(brw, mt, map, level, slice);
3458    }
3459
3460    *out_ptr = map->ptr;
3461    *out_stride = map->stride;
3462
3463    if (map->ptr == NULL)
3464       intel_miptree_release_map(mt, level, slice);
3465 }
3466
3467 void
3468 intel_miptree_unmap(struct brw_context *brw,
3469                     struct intel_mipmap_tree *mt,
3470                     unsigned int level,
3471                     unsigned int slice)
3472 {
3473    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
3474
3475    assert(mt->num_samples <= 1);
3476
3477    if (!map)
3478       return;
3479
3480    DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
3481        mt, _mesa_get_format_name(mt->format), level, slice);
3482
3483    if (mt->format == MESA_FORMAT_S_UINT8) {
3484       intel_miptree_unmap_s8(brw, mt, map, level, slice);
3485    } else if (mt->etc_format != MESA_FORMAT_NONE &&
3486               !(map->mode & BRW_MAP_DIRECT_BIT)) {
3487       intel_miptree_unmap_etc(brw, mt, map, level, slice);
3488    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
3489       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
3490    } else if (map->linear_mt) {
3491       intel_miptree_unmap_blit(brw, mt, map, level, slice);
3492 #if defined(USE_SSE41)
3493    } else if (map->buffer && cpu_has_sse4_1) {
3494       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
3495 #endif
3496    } else {
3497       intel_miptree_unmap_gtt(mt);
3498    }
3499
3500    intel_miptree_release_map(mt, level, slice);
3501 }
3502
3503 enum isl_surf_dim
3504 get_isl_surf_dim(GLenum target)
3505 {
3506    switch (target) {
3507    case GL_TEXTURE_1D:
3508    case GL_TEXTURE_1D_ARRAY:
3509       return ISL_SURF_DIM_1D;
3510
3511    case GL_TEXTURE_2D:
3512    case GL_TEXTURE_2D_ARRAY:
3513    case GL_TEXTURE_RECTANGLE:
3514    case GL_TEXTURE_CUBE_MAP:
3515    case GL_TEXTURE_CUBE_MAP_ARRAY:
3516    case GL_TEXTURE_2D_MULTISAMPLE:
3517    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3518    case GL_TEXTURE_EXTERNAL_OES:
3519       return ISL_SURF_DIM_2D;
3520
3521    case GL_TEXTURE_3D:
3522       return ISL_SURF_DIM_3D;
3523    }
3524
3525    unreachable("Invalid texture target");
3526 }
3527
3528 enum isl_dim_layout
3529 get_isl_dim_layout(const struct gen_device_info *devinfo, uint32_t tiling,
3530                    GLenum target, enum miptree_array_layout array_layout)
3531 {
3532    if (array_layout == GEN6_HIZ_STENCIL)
3533       return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
3534
3535    switch (target) {
3536    case GL_TEXTURE_1D:
3537    case GL_TEXTURE_1D_ARRAY:
3538       return (devinfo->gen >= 9 && tiling == I915_TILING_NONE ?
3539               ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
3540
3541    case GL_TEXTURE_2D:
3542    case GL_TEXTURE_2D_ARRAY:
3543    case GL_TEXTURE_RECTANGLE:
3544    case GL_TEXTURE_2D_MULTISAMPLE:
3545    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
3546    case GL_TEXTURE_EXTERNAL_OES:
3547       return ISL_DIM_LAYOUT_GEN4_2D;
3548
3549    case GL_TEXTURE_CUBE_MAP:
3550    case GL_TEXTURE_CUBE_MAP_ARRAY:
3551       return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
3552               ISL_DIM_LAYOUT_GEN4_2D);
3553
3554    case GL_TEXTURE_3D:
3555       return (devinfo->gen >= 9 ?
3556               ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
3557    }
3558
3559    unreachable("Invalid texture target");
3560 }
3561
3562 enum isl_tiling
3563 intel_miptree_get_isl_tiling(const struct intel_mipmap_tree *mt)
3564 {
3565    if (mt->format == MESA_FORMAT_S_UINT8) {
3566       return ISL_TILING_W;
3567    } else {
3568       switch (mt->tiling) {
3569       case I915_TILING_NONE:
3570          return ISL_TILING_LINEAR;
3571       case I915_TILING_X:
3572          return ISL_TILING_X;
3573       case I915_TILING_Y:
3574             return ISL_TILING_Y0;
3575       default:
3576          unreachable("Invalid tiling mode");
3577       }
3578    }
3579 }
3580
3581 void
3582 intel_miptree_get_isl_surf(struct brw_context *brw,
3583                            const struct intel_mipmap_tree *mt,
3584                            struct isl_surf *surf)
3585 {
3586    surf->dim = get_isl_surf_dim(mt->target);
3587    surf->dim_layout = get_isl_dim_layout(&brw->screen->devinfo,
3588                                          mt->tiling, mt->target,
3589                                          mt->array_layout);
3590
3591    if (mt->num_samples > 1) {
3592       switch (mt->msaa_layout) {
3593       case INTEL_MSAA_LAYOUT_IMS:
3594          surf->msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
3595          break;
3596       case INTEL_MSAA_LAYOUT_UMS:
3597       case INTEL_MSAA_LAYOUT_CMS:
3598          surf->msaa_layout = ISL_MSAA_LAYOUT_ARRAY;
3599          break;
3600       default:
3601          unreachable("Invalid MSAA layout");
3602       }
3603    } else {
3604       surf->msaa_layout = ISL_MSAA_LAYOUT_NONE;
3605    }
3606
3607    surf->tiling = intel_miptree_get_isl_tiling(mt);
3608
3609    if (mt->format == MESA_FORMAT_S_UINT8) {
3610       /* The ISL definition of row_pitch matches the surface state pitch field
3611        * a bit better than intel_mipmap_tree.  In particular, ISL incorporates
3612        * the factor of 2 for W-tiling in row_pitch.
3613        */
3614       surf->row_pitch = 2 * mt->pitch;
3615    } else {
3616       surf->row_pitch = mt->pitch;
3617    }
3618
3619    surf->format = translate_tex_format(brw, mt->format, false);
3620
3621    if (brw->gen >= 9) {
3622       if (surf->dim == ISL_SURF_DIM_1D && surf->tiling == ISL_TILING_LINEAR) {
3623          /* For gen9 1-D surfaces, intel_mipmap_tree has a bogus alignment. */
3624          surf->image_alignment_el = isl_extent3d(64, 1, 1);
3625       } else {
3626          /* On gen9+, intel_mipmap_tree stores the horizontal and vertical
3627           * alignment in terms of surface elements like we want.
3628           */
3629          surf->image_alignment_el = isl_extent3d(mt->halign, mt->valign, 1);
3630       }
3631    } else {
3632       /* On earlier gens it's stored in pixels. */
3633       unsigned bw, bh;
3634       _mesa_get_format_block_size(mt->format, &bw, &bh);
3635       surf->image_alignment_el =
3636          isl_extent3d(mt->halign / bw, mt->valign / bh, 1);
3637    }
3638
3639    surf->logical_level0_px.width = mt->logical_width0;
3640    surf->logical_level0_px.height = mt->logical_height0;
3641    if (surf->dim == ISL_SURF_DIM_3D) {
3642       surf->logical_level0_px.depth = mt->logical_depth0;
3643       surf->logical_level0_px.array_len = 1;
3644    } else {
3645       surf->logical_level0_px.depth = 1;
3646       surf->logical_level0_px.array_len = mt->logical_depth0;
3647    }
3648
3649    surf->phys_level0_sa.width = mt->physical_width0;
3650    surf->phys_level0_sa.height = mt->physical_height0;
3651    if (surf->dim == ISL_SURF_DIM_3D) {
3652       surf->phys_level0_sa.depth = mt->physical_depth0;
3653       surf->phys_level0_sa.array_len = 1;
3654    } else {
3655       surf->phys_level0_sa.depth = 1;
3656       surf->phys_level0_sa.array_len = mt->physical_depth0;
3657    }
3658
3659    surf->levels = mt->last_level - mt->first_level + 1;
3660    surf->samples = MAX2(mt->num_samples, 1);
3661
3662    surf->size = 0; /* TODO */
3663    surf->alignment = 0; /* TODO */
3664
3665    switch (surf->dim_layout) {
3666    case ISL_DIM_LAYOUT_GEN4_2D:
3667    case ISL_DIM_LAYOUT_GEN4_3D:
3668    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
3669       if (brw->gen >= 9) {
3670          surf->array_pitch_el_rows = mt->qpitch;
3671       } else {
3672          unsigned bw, bh;
3673          _mesa_get_format_block_size(mt->format, &bw, &bh);
3674          assert(mt->qpitch % bh == 0);
3675          surf->array_pitch_el_rows = mt->qpitch / bh;
3676       }
3677       break;
3678    case ISL_DIM_LAYOUT_GEN9_1D:
3679       surf->array_pitch_el_rows = 1;
3680       break;
3681    }
3682
3683    switch (mt->array_layout) {
3684    case ALL_LOD_IN_EACH_SLICE:
3685       surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_FULL;
3686       break;
3687    case ALL_SLICES_AT_EACH_LOD:
3688    case GEN6_HIZ_STENCIL:
3689       surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_COMPACT;
3690       break;
3691    default:
3692       unreachable("Invalid array layout");
3693    }
3694
3695    GLenum base_format = _mesa_get_format_base_format(mt->format);
3696    switch (base_format) {
3697    case GL_DEPTH_COMPONENT:
3698       surf->usage = ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
3699       break;
3700    case GL_STENCIL_INDEX:
3701       surf->usage = ISL_SURF_USAGE_STENCIL_BIT;
3702       if (brw->gen >= 8)
3703          surf->usage |= ISL_SURF_USAGE_TEXTURE_BIT;
3704       break;
3705    case GL_DEPTH_STENCIL:
3706       /* In this case we only texture from the depth part */
3707       surf->usage = ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
3708                     ISL_SURF_USAGE_TEXTURE_BIT;
3709       break;
3710    default:
3711       surf->usage = ISL_SURF_USAGE_TEXTURE_BIT;
3712       if (brw->mesa_format_supports_render[mt->format])
3713          surf->usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
3714       break;
3715    }
3716
3717    if (_mesa_is_cube_map_texture(mt->target))
3718       surf->usage |= ISL_SURF_USAGE_CUBE_BIT;
3719 }
3720
3721 enum isl_aux_usage
3722 intel_miptree_get_aux_isl_usage(const struct brw_context *brw,
3723                                 const struct intel_mipmap_tree *mt)
3724 {
3725    if (mt->hiz_buf)
3726       return ISL_AUX_USAGE_HIZ;
3727
3728    if (!mt->mcs_buf)
3729       return ISL_AUX_USAGE_NONE;
3730
3731    if (mt->num_samples > 1) {
3732       assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS);
3733       return ISL_AUX_USAGE_MCS;
3734    }
3735
3736    if (intel_miptree_is_lossless_compressed(brw, mt)) {
3737       assert(brw->gen >= 9);
3738       return ISL_AUX_USAGE_CCS_E;
3739    }
3740
3741    if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) == 0)
3742       return ISL_AUX_USAGE_CCS_D;
3743
3744    unreachable("Invalid MCS miptree");
3745 }