src/gallium/drivers/ilo/ilo_layout.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2014 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "ilo_layout.h"
  29
  30 enum {
  31    LAYOUT_TILING_NONE = 1 << INTEL_TILING_NONE,
  32    LAYOUT_TILING_X = 1 << INTEL_TILING_X,
  33    LAYOUT_TILING_Y = 1 << INTEL_TILING_Y,
  34    LAYOUT_TILING_W = 1 << (INTEL_TILING_Y + 1),
  35
  36    LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE |
  37                         LAYOUT_TILING_X |
  38                         LAYOUT_TILING_Y |
  39                         LAYOUT_TILING_W)
  40 };
  41
  42 struct ilo_layout_params {
  43    const struct ilo_dev_info *dev;
  44    const struct pipe_resource *templ;
  45
  46    bool compressed;
  47
  48    unsigned h0, h1;
  49    unsigned max_x, max_y;
  50 };
  51
  52 static void
  53 layout_get_slice_size(const struct ilo_layout *layout,
  54                       const struct ilo_layout_params *params,
  55                       unsigned level, unsigned *width, unsigned *height)
  56 {
  57    const struct pipe_resource *templ = params->templ;
  58    unsigned w, h;
  59
  60    w = u_minify(layout->width0, level);
  61    h = u_minify(layout->height0, level);
  62
  63    /*
  64     * From the Sandy Bridge PRM, volume 1 part 1, page 114:
  65     *
  66     *     "The dimensions of the mip maps are first determined by applying the
  67     *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
  68     *      if necessary, they are padded out to compression block boundaries."
  69     */
  70    w = align(w, layout->block_width);
  71    h = align(h, layout->block_height);
  72
  73    /*
  74     * From the Sandy Bridge PRM, volume 1 part 1, page 111:
  75     *
  76     *     "If the surface is multisampled (4x), these values must be adjusted
  77     *      as follows before proceeding:
  78     *
  79     *        W_L = ceiling(W_L / 2) * 4
  80     *        H_L = ceiling(H_L / 2) * 4"
  81     *
  82     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
  83     *
  84     *     "If the surface is multisampled and it is a depth or stencil surface
  85     *      or Multisampled Surface StorageFormat in SURFACE_STATE is
  86     *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
  87     *      proceeding:
  88     *
  89     *        #samples  W_L =                    H_L =
  90     *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
  91     *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
  92     *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
  93     *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
  94     *
  95     * For interleaved samples (4x), where pixels
  96     *
  97     *   (x, y  ) (x+1, y  )
  98     *   (x, y+1) (x+1, y+1)
  99     *
 100     * would be is occupied by
 101     *
 102     *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
 103     *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
 104     *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
 105     *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
 106     *
 107     * Thus the need to
 108     *
 109     *   w = align(w, 2) * 2;
 110     *   y = align(y, 2) * 2;
 111     */
 112    if (layout->interleaved_samples) {
 113       switch (templ->nr_samples) {
 114       case 0:
 115       case 1:
 116          break;
 117       case 2:
 118          w = align(w, 2) * 2;
 119          break;
 120       case 4:
 121          w = align(w, 2) * 2;
 122          h = align(h, 2) * 2;
 123          break;
 124       case 8:
 125          w = align(w, 2) * 4;
 126          h = align(h, 2) * 2;
 127          break;
 128       case 16:
 129          w = align(w, 2) * 4;
 130          h = align(h, 2) * 4;
 131          break;
 132       default:
 133          assert(!"unsupported sample count");
 134          break;
 135       }
 136    }
 137
 138    /*
 139     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
 140     *
 141     *     "For separate stencil buffer, the width must be mutiplied by 2 and
 142     *      height divided by 2..."
 143     *
 144     * To make things easier (for transfer), we will just double the stencil
 145     * stride in 3DSTATE_STENCIL_BUFFER.
 146     */
 147    w = align(w, layout->align_i);
 148    h = align(h, layout->align_j);
 149
 150    *width = w;
 151    *height = h;
 152 }
 153
 154 static unsigned
 155 layout_get_num_layers(const struct ilo_layout *layout,
 156                       const struct ilo_layout_params *params)
 157 {
 158    const struct pipe_resource *templ = params->templ;
 159    unsigned num_layers = templ->array_size;
 160
 161    /* samples of the same index are stored in a layer */
 162    if (templ->nr_samples > 1 && !layout->interleaved_samples)
 163       num_layers *= templ->nr_samples;
 164
 165    return num_layers;
 166 }
 167
 168 static void
 169 layout_init_layer_height(struct ilo_layout *layout,
 170                          struct ilo_layout_params *params)
 171 {
 172    const struct pipe_resource *templ = params->templ;
 173    unsigned num_layers;
 174
 175    if (layout->walk != ILO_LAYOUT_WALK_LAYER)
 176       return;
 177
 178    num_layers = layout_get_num_layers(layout, params);
 179    if (num_layers <= 1)
 180       return;
 181
 182    /*
 183     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
 184     *
 185     *     "The following equation is used for surface formats other than
 186     *      compressed textures:
 187     *
 188     *        QPitch = (h0 + h1 + 11j)"
 189     *
 190     *     "The equation for compressed textures (BC* and FXT1 surface formats)
 191     *      follows:
 192     *
 193     *        QPitch = (h0 + h1 + 11j) / 4"
 194     *
 195     *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
 196     *      value calculated in the equation above, for every other odd Surface
 197     *      Height starting from 1 i.e. 1,5,9,13"
 198     *
 199     * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
 200     *
 201     *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
 202     *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
 203     *
 204     *        QPitch = (h0 + h1 + 12j)
 205     *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
 206     *
 207     *      (There are many typos or missing words here...)"
 208     *
 209     * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
 210     * the base address.  The PRM divides QPitch by 4 for compressed formats
 211     * because the block height for those formats are 4, and it wants QPitch to
 212     * mean the number of memory rows, as opposed to texel rows, between
 213     * slices.  Since we use texel rows everywhere, we do not need to divide
 214     * QPitch by 4.
 215     */
 216    layout->layer_height = params->h0 + params->h1 +
 217       ((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
 218
 219    if (params->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
 220        layout->height0 % 4 == 1)
 221       layout->layer_height += 4;
 222
 223    params->max_y += layout->layer_height * (num_layers - 1);
 224 }
 225
 226 static void
 227 layout_init_lods(struct ilo_layout *layout,
 228                  struct ilo_layout_params *params)
 229 {
 230    const struct pipe_resource *templ = params->templ;
 231    unsigned cur_x, cur_y;
 232    unsigned lv;
 233
 234    cur_x = 0;
 235    cur_y = 0;
 236    for (lv = 0; lv <= templ->last_level; lv++) {
 237       unsigned lod_w, lod_h;
 238
 239       layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);
 240
 241       layout->lods[lv].x = cur_x;
 242       layout->lods[lv].y = cur_y;
 243       layout->lods[lv].slice_width = lod_w;
 244       layout->lods[lv].slice_height = lod_h;
 245
 246       switch (layout->walk) {
 247       case ILO_LAYOUT_WALK_LOD:
 248          lod_h *= layout_get_num_layers(layout, params);
 249          if (lv == 1)
 250             cur_x += lod_w;
 251          else
 252             cur_y += lod_h;
 253
 254          /* every LOD begins at tile boundaries */
 255          if (templ->last_level > 0) {
 256             assert(layout->format == PIPE_FORMAT_S8_UINT);
 257             cur_x = align(cur_x, 64);
 258             cur_y = align(cur_y, 64);
 259          }
 260          break;
 261       case ILO_LAYOUT_WALK_LAYER:
 262          /* MIPLAYOUT_BELOW */
 263          if (lv == 1)
 264             cur_x += lod_w;
 265          else
 266             cur_y += lod_h;
 267          break;
 268       case ILO_LAYOUT_WALK_3D:
 269          {
 270             const unsigned num_slices = u_minify(templ->depth0, lv);
 271             const unsigned num_slices_per_row = 1 << lv;
 272             const unsigned num_rows =
 273                (num_slices + num_slices_per_row - 1) / num_slices_per_row;
 274
 275             lod_w *= num_slices_per_row;
 276             lod_h *= num_rows;
 277
 278             cur_y += lod_h;
 279          }
 280          break;
 281       }
 282
 283       if (params->max_x < layout->lods[lv].x + lod_w)
 284          params->max_x = layout->lods[lv].x + lod_w;
 285       if (params->max_y < layout->lods[lv].y + lod_h)
 286          params->max_y = layout->lods[lv].y + lod_h;
 287    }
 288
 289    if (layout->walk == ILO_LAYOUT_WALK_LAYER) {
 290       params->h0 = layout->lods[0].slice_height;
 291
 292       if (templ->last_level > 0)
 293          params->h1 = layout->lods[1].slice_height;
 294       else
 295          layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
 296    }
 297 }
 298
 299 static void
 300 layout_init_alignments(struct ilo_layout *layout,
 301                        struct ilo_layout_params *params)
 302 {
 303    const struct pipe_resource *templ = params->templ;
 304
 305    /*
 306     * From the Sandy Bridge PRM, volume 1 part 1, page 113:
 307     *
 308     *     "surface format           align_i     align_j
 309     *      YUV 4:2:2 formats        4           *see below
 310     *      BC1-5                    4           4
 311     *      FXT1                     8           4
 312     *      all other formats        4           *see below"
 313     *
 314     *     "- align_j = 4 for any depth buffer
 315     *      - align_j = 2 for separate stencil buffer
 316     *      - align_j = 4 for any render target surface is multisampled (4x)
 317     *      - align_j = 4 for any render target surface with Surface Vertical
 318     *        Alignment = VALIGN_4
 319     *      - align_j = 2 for any render target surface with Surface Vertical
 320     *        Alignment = VALIGN_2
 321     *      - align_j = 2 for all other render target surface
 322     *      - align_j = 2 for any sampling engine surface with Surface Vertical
 323     *        Alignment = VALIGN_2
 324     *      - align_j = 4 for any sampling engine surface with Surface Vertical
 325     *        Alignment = VALIGN_4"
 326     *
 327     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
 328     *
 329     *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
 330     *      the Surface Format is 96 bits per element (BPE)."
 331     *
 332     * They can be rephrased as
 333     *
 334     *                                  align_i        align_j
 335     *   compressed formats             block width    block height
 336     *   PIPE_FORMAT_S8_UINT            4              2
 337     *   other depth/stencil formats    4              4
 338     *   4x multisampled                4              4
 339     *   bpp 96                         4              2
 340     *   others                         4              2 or 4
 341     */
 342
 343    /*
 344     * From the Ivy Bridge PRM, volume 1 part 1, page 110:
 345     *
 346     *     "surface defined by      surface format     align_i     align_j
 347     *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
 348     *                              not D16_UNORM      4           4
 349     *      3DSTATE_STENCIL_BUFFER  N/A                8           8
 350     *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
 351     *                              FXT1               8           4
 352     *                              all others         (set by SURFACE_STATE)"
 353     *
 354     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
 355     *
 356     *     "- This field (Surface Vertical Aligment) is intended to be set to
 357     *        VALIGN_4 if the surface was rendered as a depth buffer, for a
 358     *        multisampled (4x) render target, or for a multisampled (8x)
 359     *        render target, since these surfaces support only alignment of 4.
 360     *      - Use of VALIGN_4 for other surfaces is supported, but uses more
 361     *        memory.
 362     *      - This field must be set to VALIGN_4 for all tiled Y Render Target
 363     *        surfaces.
 364     *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
 365     *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
 366     *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
 367     *        must be set to VALIGN_4."
 368     *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
 369     *
 370     *     "- This field (Surface Horizontal Aligment) is intended to be set to
 371     *        HALIGN_8 only if the surface was rendered as a depth buffer with
 372     *        Z16 format or a stencil buffer, since these surfaces support only
 373     *        alignment of 8.
 374     *      - Use of HALIGN_8 for other surfaces is supported, but uses more
 375     *        memory.
 376     *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
 377     *      - This field must be set to HALIGN_8 if the Surface Format is
 378     *        FXT1."
 379     *
 380     * They can be rephrased as
 381     *
 382     *                                  align_i        align_j
 383     *  compressed formats              block width    block height
 384     *  PIPE_FORMAT_Z16_UNORM           8              4
 385     *  PIPE_FORMAT_S8_UINT             8              8
 386     *  other depth/stencil formats     4              4
 387     *  2x or 4x multisampled           4 or 8         4
 388     *  tiled Y                         4 or 8         4 (if rt)
 389     *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
 390     *  others                          4 or 8         2 or 4
 391     */
 392
 393    if (params->compressed) {
 394       /* this happens to be the case */
 395       layout->align_i = layout->block_width;
 396       layout->align_j = layout->block_height;
 397    } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 398       if (params->dev->gen >= ILO_GEN(7)) {
 399          switch (layout->format) {
 400          case PIPE_FORMAT_Z16_UNORM:
 401             layout->align_i = 8;
 402             layout->align_j = 4;
 403             break;
 404          case PIPE_FORMAT_S8_UINT:
 405             layout->align_i = 8;
 406             layout->align_j = 8;
 407             break;
 408          default:
 409             layout->align_i = 4;
 410             layout->align_j = 4;
 411             break;
 412          }
 413       } else {
 414          switch (layout->format) {
 415          case PIPE_FORMAT_S8_UINT:
 416             layout->align_i = 4;
 417             layout->align_j = 2;
 418             break;
 419          default:
 420             layout->align_i = 4;
 421             layout->align_j = 4;
 422             break;
 423          }
 424       }
 425    } else {
 426       const bool valign_4 = (templ->nr_samples > 1) ||
 427          (params->dev->gen >= ILO_GEN(7) &&
 428           layout->tiling == INTEL_TILING_Y &&
 429           (templ->bind & PIPE_BIND_RENDER_TARGET));
 430
 431       if (valign_4)
 432          assert(layout->block_size != 12);
 433
 434       layout->align_i = 4;
 435       layout->align_j = (valign_4) ? 4 : 2;
 436    }
 437
 438    /*
 439     * the fact that align i and j are multiples of block width and height
 440     * respectively is what makes the size of the bo a multiple of the block
 441     * size, slices start at block boundaries, and many of the computations
 442     * work.
 443     */
 444    assert(layout->align_i % layout->block_width == 0);
 445    assert(layout->align_j % layout->block_height == 0);
 446
 447    /* make sure align() works */
 448    assert(util_is_power_of_two(layout->align_i) &&
 449           util_is_power_of_two(layout->align_j));
 450    assert(util_is_power_of_two(layout->block_width) &&
 451           util_is_power_of_two(layout->block_height));
 452 }
 453
 454 static unsigned
 455 layout_get_valid_tilings(const struct ilo_layout *layout,
 456                          const struct ilo_layout_params *params)
 457 {
 458    const struct pipe_resource *templ = params->templ;
 459    const enum pipe_format format = layout->format;
 460    unsigned valid_tilings = LAYOUT_TILING_ALL;
 461
 462    /*
 463     * From the Sandy Bridge PRM, volume 1 part 2, page 32:
 464     *
 465     *     "Display/Overlay   Y-Major not supported.
 466     *                        X-Major required for Async Flips"
 467     */
 468    if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
 469       valid_tilings &= LAYOUT_TILING_X;
 470
 471    /*
 472     * From the Sandy Bridge PRM, volume 3 part 2, page 158:
 473     *
 474     *     "The cursor surface address must be 4K byte aligned. The cursor must
 475     *      be in linear memory, it cannot be tiled."
 476     */
 477    if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
 478       valid_tilings &= LAYOUT_TILING_NONE;
 479
 480    /*
 481     * From the Sandy Bridge PRM, volume 2 part 1, page 318:
 482     *
 483     *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
 484     *      Depth Buffer is not supported."
 485     *
 486     *     "The Depth Buffer, if tiled, must use Y-Major tiling."
 487     *
 488     * From the Sandy Bridge PRM, volume 1 part 2, page 22:
 489     *
 490     *     "W-Major Tile Format is used for separate stencil."
 491     */
 492    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 493       switch (format) {
 494       case PIPE_FORMAT_S8_UINT:
 495          valid_tilings &= LAYOUT_TILING_W;
 496          break;
 497       default:
 498          valid_tilings &= LAYOUT_TILING_Y;
 499          break;
 500       }
 501    }
 502
 503    if (templ->bind & PIPE_BIND_RENDER_TARGET) {
 504       /*
 505        * From the Sandy Bridge PRM, volume 1 part 2, page 32:
 506        *
 507        *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
 508        *      either TileX or Linear."
 509        */
 510       if (layout->block_size == 16)
 511          valid_tilings &= ~LAYOUT_TILING_Y;
 512
 513       /*
 514        * From the Ivy Bridge PRM, volume 4 part 1, page 63:
 515        *
 516        *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
 517        *      for all tiled Y Render Target surfaces."
 518        *
 519        *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
 520        */
 521       if (params->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
 522          valid_tilings &= ~LAYOUT_TILING_Y;
 523    }
 524
 525    /* no conflicting binding flags */
 526    assert(valid_tilings);
 527
 528    return valid_tilings;
 529 }
 530
 531 static void
 532 layout_init_tiling(struct ilo_layout *layout,
 533                    struct ilo_layout_params *params)
 534 {
 535    const struct pipe_resource *templ = params->templ;
 536    unsigned valid_tilings = layout_get_valid_tilings(layout, params);
 537
 538    /* no hardware support for W-tile */
 539    if (valid_tilings & LAYOUT_TILING_W)
 540       valid_tilings = (valid_tilings & ~LAYOUT_TILING_W) | LAYOUT_TILING_NONE;
 541
 542    layout->valid_tilings = valid_tilings;
 543
 544    if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
 545       /*
 546        * heuristically set a minimum width/height for enabling tiling
 547        */
 548       if (layout->width0 < 64 && (valid_tilings & ~LAYOUT_TILING_X))
 549          valid_tilings &= ~LAYOUT_TILING_X;
 550
 551       if ((layout->width0 < 32 || layout->height0 < 16) &&
 552           (layout->width0 < 16 || layout->height0 < 32) &&
 553           (valid_tilings & ~LAYOUT_TILING_Y))
 554          valid_tilings &= ~LAYOUT_TILING_Y;
 555    } else {
 556       /* force linear if we are not sure where the texture is bound to */
 557       if (valid_tilings & LAYOUT_TILING_NONE)
 558          valid_tilings &= LAYOUT_TILING_NONE;
 559    }
 560
 561    /* prefer tiled over linear */
 562    if (valid_tilings & LAYOUT_TILING_Y)
 563       layout->tiling = INTEL_TILING_Y;
 564    else if (valid_tilings & LAYOUT_TILING_X)
 565       layout->tiling = INTEL_TILING_X;
 566    else
 567       layout->tiling = INTEL_TILING_NONE;
 568 }
 569
 570 static void
 571 layout_init_walk_gen7(struct ilo_layout *layout,
 572                       struct ilo_layout_params *params)
 573 {
 574    const struct pipe_resource *templ = params->templ;
 575
 576    /*
 577     * It is not explicitly states, but render targets are expected to be
 578     * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
 579     * to be IMS (samples interleaved).
 580     *
 581     * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
 582     */
 583    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 584       /*
 585        * From the Ivy Bridge PRM, volume 1 part 1, page 111:
 586        *
 587        *     "note that the depth buffer and stencil buffer have an implied
 588        *      value of ARYSPC_FULL"
 589        */
 590       layout->walk = (templ->target == PIPE_TEXTURE_3D) ?
 591          ILO_LAYOUT_WALK_3D : ILO_LAYOUT_WALK_LAYER;
 592
 593       layout->interleaved_samples = true;
 594    } else {
 595       /*
 596        * From the Ivy Bridge PRM, volume 4 part 1, page 66:
 597        *
 598        *     "If Multisampled Surface Storage Format is MSFMT_MSS and Number
 599        *      of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
 600        *      Array Spacing) must be set to ARYSPC_LOD0."
 601        *
 602        * As multisampled resources are not mipmapped, we never use
 603        * ARYSPC_FULL for them.
 604        */
 605       if (templ->nr_samples > 1)
 606          assert(templ->last_level == 0);
 607
 608       layout->walk =
 609          (templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D :
 610          (templ->last_level > 0) ? ILO_LAYOUT_WALK_LAYER :
 611          ILO_LAYOUT_WALK_LOD;
 612
 613       layout->interleaved_samples = false;
 614    }
 615 }
 616
 617 static void
 618 layout_init_walk_gen6(struct ilo_layout *layout,
 619                       struct ilo_layout_params *params)
 620 {
 621    /*
 622     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
 623     *
 624     *     "The separate stencil buffer does not support mip mapping, thus the
 625     *      storage for LODs other than LOD 0 is not needed. The following
 626     *      QPitch equation applies only to the separate stencil buffer:
 627     *
 628     *        QPitch = h_0"
 629     *
 630     * GEN6 does not support compact spacing otherwise.
 631     */
 632    layout->walk =
 633       (params->templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D :
 634       (layout->format == PIPE_FORMAT_S8_UINT) ? ILO_LAYOUT_WALK_LOD :
 635       ILO_LAYOUT_WALK_LAYER;
 636
 637    /* GEN6 supports only interleaved samples */
 638    layout->interleaved_samples = true;
 639 }
 640
 641 static void
 642 layout_init_walk(struct ilo_layout *layout,
 643                  struct ilo_layout_params *params)
 644 {
 645    if (params->dev->gen >= ILO_GEN(7))
 646       layout_init_walk_gen7(layout, params);
 647    else
 648       layout_init_walk_gen6(layout, params);
 649 }
 650
 651 static void
 652 layout_init_size_and_format(struct ilo_layout *layout,
 653                             struct ilo_layout_params *params)
 654 {
 655    const struct pipe_resource *templ = params->templ;
 656    enum pipe_format format = templ->format;
 657    bool require_separate_stencil;
 658
 659    layout->width0 = templ->width0;
 660    layout->height0 = templ->height0;
 661
 662    /*
 663     * From the Sandy Bridge PRM, volume 2 part 1, page 317:
 664     *
 665     *     "This field (Separate Stencil Buffer Enable) must be set to the same
 666     *      value (enabled or disabled) as Hierarchical Depth Buffer Enable."
 667     *
 668     * GEN7+ requires separate stencil buffers.
 669     */
 670    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 671       if (params->dev->gen >= ILO_GEN(7))
 672          require_separate_stencil = true;
 673       else
 674          require_separate_stencil = (layout->aux == ILO_LAYOUT_AUX_HIZ);
 675    }
 676
 677    switch (format) {
 678    case PIPE_FORMAT_ETC1_RGB8:
 679       format = PIPE_FORMAT_R8G8B8X8_UNORM;
 680       break;
 681    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
 682       if (require_separate_stencil) {
 683          format = PIPE_FORMAT_Z24X8_UNORM;
 684          layout->separate_stencil = true;
 685       }
 686       break;
 687    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
 688       if (require_separate_stencil) {
 689          format = PIPE_FORMAT_Z32_FLOAT;
 690          layout->separate_stencil = true;
 691       }
 692       break;
 693    default:
 694       break;
 695    }
 696
 697    layout->format = format;
 698    layout->block_width = util_format_get_blockwidth(format);
 699    layout->block_height = util_format_get_blockheight(format);
 700    layout->block_size = util_format_get_blocksize(format);
 701
 702    params->compressed = util_format_is_compressed(format);
 703 }
 704
 705 static bool
 706 layout_want_mcs(struct ilo_layout *layout,
 707                 struct ilo_layout_params *params)
 708 {
 709    const struct pipe_resource *templ = params->templ;
 710    bool want_mcs = false;
 711
 712    /* MCS is for RT on GEN7+ */
 713    if (params->dev->gen < ILO_GEN(7))
 714       return false;
 715
 716    if (templ->target != PIPE_TEXTURE_2D ||
 717        !(templ->bind & PIPE_BIND_RENDER_TARGET))
 718       return false;
 719
 720    /*
 721     * From the Ivy Bridge PRM, volume 4 part 1, page 77:
 722     *
 723     *     "For Render Target and Sampling Engine Surfaces:If the surface is
 724     *      multisampled (Number of Multisamples any value other than
 725     *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
 726     *
 727     *     "This field must be set to 0 for all SINT MSRTs when all RT channels
 728     *      are not written"
 729     */
 730    if (templ->nr_samples > 1 && !layout->interleaved_samples &&
 731        !util_format_is_pure_sint(templ->format)) {
 732       want_mcs = true;
 733    } else if (templ->nr_samples <= 1) {
 734       /*
 735        * From the Ivy Bridge PRM, volume 2 part 1, page 326:
 736        *
 737        *     "When MCS is buffer is used for color clear of non-multisampler
 738        *      render target, the following restrictions apply.
 739        *      - Support is limited to tiled render targets.
 740        *      - Support is for non-mip-mapped and non-array surface types
 741        *        only.
 742        *      - Clear is supported only on the full RT; i.e., no partial clear
 743        *        or overlapping clears.
 744        *      - MCS buffer for non-MSRT is supported only for RT formats
 745        *        32bpp, 64bpp and 128bpp.
 746        *      ..."
 747        */
 748       if (layout->tiling != INTEL_TILING_NONE &&
 749           templ->last_level == 0 && templ->array_size == 1) {
 750          switch (layout->block_size) {
 751          case 4:
 752          case 8:
 753          case 16:
 754             want_mcs = true;
 755             break;
 756          default:
 757             break;
 758          }
 759       }
 760    }
 761
 762    return want_mcs;
 763 }
 764
 765 static bool
 766 layout_want_hiz(const struct ilo_layout *layout,
 767                 const struct ilo_layout_params *params)
 768 {
 769    const struct pipe_resource *templ = params->templ;
 770    const struct util_format_description *desc =
 771       util_format_description(templ->format);
 772    bool want_hiz = false;
 773
 774    if (ilo_debug & ILO_DEBUG_NOHIZ)
 775       return false;
 776
 777    if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
 778       return false;
 779
 780    if (!util_format_has_depth(desc))
 781       return false;
 782
 783    /* no point in having HiZ */
 784    if (templ->usage == PIPE_USAGE_STAGING)
 785       return false;
 786
 787    if (params->dev->gen >= ILO_GEN(7)) {
 788       want_hiz = true;
 789    } else {
 790       /*
 791        * From the Sandy Bridge PRM, volume 2 part 1, page 312:
 792        *
 793        *     "The hierarchical depth buffer does not support the LOD field, it
 794        *      is assumed by hardware to be zero. A separate hierarachical
 795        *      depth buffer is required for each LOD used, and the
 796        *      corresponding buffer's state delivered to hardware each time a
 797        *      new depth buffer state with modified LOD is delivered."
 798        *
 799        * But we have a stronger requirement.  Because of layer offsetting
 800        * (check out the callers of ilo_layout_get_slice_tile_offset()), we
 801        * already have to require the texture to be non-mipmapped and
 802        * non-array.
 803        */
 804       if (templ->last_level == 0 && templ->array_size == 1 &&
 805           templ->depth0 == 1)
 806          want_hiz = true;
 807    }
 808
 809    return want_hiz;
 810 }
 811
 812 static void
 813 layout_init_aux(struct ilo_layout *layout,
 814                 struct ilo_layout_params *params)
 815 {
 816    if (layout_want_hiz(layout, params))
 817       layout->aux = ILO_LAYOUT_AUX_HIZ;
 818    else if (layout_want_mcs(layout, params))
 819       layout->aux = ILO_LAYOUT_AUX_MCS;
 820 }
 821
 822 static void
 823 layout_align(struct ilo_layout *layout, struct ilo_layout_params *params)
 824 {
 825    const struct pipe_resource *templ = params->templ;
 826    int align_w = 1, align_h = 1, pad_h = 0;
 827
 828    /*
 829     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
 830     *
 831     *     "To determine the necessary padding on the bottom and right side of
 832     *      the surface, refer to the table in Section 7.18.3.4 for the i and j
 833     *      parameters for the surface format in use. The surface must then be
 834     *      extended to the next multiple of the alignment unit size in each
 835     *      dimension, and all texels contained in this extended surface must
 836     *      have valid GTT entries."
 837     *
 838     *     "For cube surfaces, an additional two rows of padding are required
 839     *      at the bottom of the surface. This must be ensured regardless of
 840     *      whether the surface is stored tiled or linear.  This is due to the
 841     *      potential rotation of cache line orientation from memory to cache."
 842     *
 843     *     "For compressed textures (BC* and FXT1 surface formats), padding at
 844     *      the bottom of the surface is to an even compressed row, which is
 845     *      equal to a multiple of 8 uncompressed texel rows. Thus, for padding
 846     *      purposes, these surfaces behave as if j = 8 only for surface
 847     *      padding purposes. The value of 4 for j still applies for mip level
 848     *      alignment and QPitch calculation."
 849     */
 850    if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
 851       align_w = MAX2(align_w, layout->align_i);
 852       align_h = MAX2(align_h, layout->align_j);
 853
 854       if (templ->target == PIPE_TEXTURE_CUBE)
 855          pad_h += 2;
 856
 857       if (params->compressed)
 858          align_h = MAX2(align_h, layout->align_j * 2);
 859    }
 860
 861    /*
 862     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
 863     *
 864     *     "If the surface contains an odd number of rows of data, a final row
 865     *      below the surface must be allocated."
 866     */
 867    if (templ->bind & PIPE_BIND_RENDER_TARGET)
 868       align_h = MAX2(align_h, 2);
 869
 870    /*
 871     * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  In
 872     * ilo_texture_can_enable_hiz(), we always return true for the first slice.
 873     * To avoid out-of-bound access, we have to pad.
 874     */
 875    if (layout->aux == ILO_LAYOUT_AUX_HIZ &&
 876        templ->last_level == 0 &&
 877        templ->array_size == 1 &&
 878        templ->depth0 == 1) {
 879       align_w = MAX2(align_w, 8);
 880       align_h = MAX2(align_h, 4);
 881    }
 882
 883    params->max_x = align(params->max_x, align_w);
 884    params->max_y = align(params->max_y + pad_h, align_h);
 885 }
 886
 887 /* note that this may force the texture to be linear */
 888 static void
 889 layout_calculate_bo_size(struct ilo_layout *layout,
 890                          struct ilo_layout_params *params)
 891 {
 892    assert(params->max_x % layout->block_width == 0);
 893    assert(params->max_y % layout->block_height == 0);
 894    assert(layout->layer_height % layout->block_height == 0);
 895
 896    layout->bo_stride =
 897       (params->max_x / layout->block_width) * layout->block_size;
 898    layout->bo_height = params->max_y / layout->block_height;
 899
 900    while (true) {
 901       unsigned w = layout->bo_stride, h = layout->bo_height;
 902       unsigned align_w, align_h;
 903
 904       /*
 905        * From the Haswell PRM, volume 5, page 163:
 906        *
 907        *     "For linear surfaces, additional padding of 64 bytes is required
 908        *      at the bottom of the surface. This is in addition to the padding
 909        *      required above."
 910        */
 911       if (params->dev->gen >= ILO_GEN(7.5) &&
 912           (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
 913           layout->tiling == INTEL_TILING_NONE) {
 914          layout->bo_height +=
 915             (64 + layout->bo_stride - 1) / layout->bo_stride;
 916       }
 917
 918       /*
 919        * From the Sandy Bridge PRM, volume 4 part 1, page 81:
 920        *
 921        *     "- For linear render target surfaces, the pitch must be a
 922        *        multiple of the element size for non-YUV surface formats.
 923        *        Pitch must be a multiple of 2 * element size for YUV surface
 924        *        formats.
 925        *      - For other linear surfaces, the pitch can be any multiple of
 926        *        bytes.
 927        *      - For tiled surfaces, the pitch must be a multiple of the tile
 928        *        width."
 929        *
 930        * Different requirements may exist when the bo is used in different
 931        * places, but our alignments here should be good enough that we do not
 932        * need to check layout->templ->bind.
 933        */
 934       switch (layout->tiling) {
 935       case INTEL_TILING_X:
 936          align_w = 512;
 937          align_h = 8;
 938          break;
 939       case INTEL_TILING_Y:
 940          align_w = 128;
 941          align_h = 32;
 942          break;
 943       default:
 944          if (layout->format == PIPE_FORMAT_S8_UINT) {
 945             /*
 946              * From the Sandy Bridge PRM, volume 1 part 2, page 22:
 947              *
 948              *     "A 4KB tile is subdivided into 8-high by 8-wide array of
 949              *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
 950              *      bytes."
 951              *
 952              * Since we asked for INTEL_TILING_NONE instead of the non-existent
 953              * INTEL_TILING_W, we want to align to W tiles here.
 954              */
 955             align_w = 64;
 956             align_h = 64;
 957          } else {
 958             /* some good enough values */
 959             align_w = 64;
 960             align_h = 2;
 961          }
 962          break;
 963       }
 964
 965       w = align(w, align_w);
 966       h = align(h, align_h);
 967
 968       /* make sure the bo is mappable */
 969       if (layout->tiling != INTEL_TILING_NONE) {
 970          /*
 971           * Usually only the first 256MB of the GTT is mappable.
 972           *
 973           * See also how intel_context::max_gtt_map_object_size is calculated.
 974           */
 975          const size_t mappable_gtt_size = 256 * 1024 * 1024;
 976
 977          /*
 978           * Be conservative.  We may be able to switch from VALIGN_4 to
 979           * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
 980           */
 981          if (mappable_gtt_size / w / 4 < h) {
 982             if (layout->valid_tilings & LAYOUT_TILING_NONE) {
 983                layout->tiling = INTEL_TILING_NONE;
 984                /* MCS support for non-MSRTs is limited to tiled RTs */
 985                if (layout->aux == ILO_LAYOUT_AUX_MCS &&
 986                    params->templ->nr_samples <= 1)
 987                   layout->aux = ILO_LAYOUT_AUX_NONE;
 988
 989                continue;
 990             } else {
 991                ilo_warn("cannot force texture to be linear\n");
 992             }
 993          }
 994       }
 995
 996       layout->bo_stride = w;
 997       layout->bo_height = h;
 998       break;
 999    }
1000 }
1001
1002 static void
1003 layout_calculate_hiz_size(struct ilo_layout *layout,
1004                           struct ilo_layout_params *params)
1005 {
1006    const struct pipe_resource *templ = params->templ;
1007    const unsigned hz_align_j = 8;
1008    enum ilo_layout_walk_type hz_walk;
1009    unsigned hz_width, hz_height, lv;
1010    unsigned hz_clear_w, hz_clear_h;
1011
1012    assert(layout->aux == ILO_LAYOUT_AUX_HIZ);
1013
1014    assert(layout->walk == ILO_LAYOUT_WALK_LAYER ||
1015           layout->walk == ILO_LAYOUT_WALK_3D);
1016
1017    /*
1018     * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1019     *
1020     *     "The hierarchical depth buffer does not support the LOD field, it is
1021     *      assumed by hardware to be zero. A separate hierarachical depth
1022     *      buffer is required for each LOD used, and the corresponding
1023     *      buffer's state delivered to hardware each time a new depth buffer
1024     *      state with modified LOD is delivered."
1025     *
1026     * We will put all LODs in a single bo with ILO_LAYOUT_WALK_LOD.
1027     */
1028    if (params->dev->gen >= ILO_GEN(7))
1029       hz_walk = layout->walk;
1030    else
1031       hz_walk = ILO_LAYOUT_WALK_LOD;
1032
1033    /*
1034     * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1035     * PRM, volume 2 part 1, page 312-313.
1036     *
1037     * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1038     * memory row.
1039     */
1040    switch (hz_walk) {
1041    case ILO_LAYOUT_WALK_LOD:
1042       {
1043          unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
1044          unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
1045          unsigned cur_tx, cur_ty;
1046
1047          /* figure out the tile offsets of LODs */
1048          hz_width = 0;
1049          hz_height = 0;
1050          cur_tx = 0;
1051          cur_ty = 0;
1052          for (lv = 0; lv <= templ->last_level; lv++) {
1053             unsigned tw, th;
1054
1055             lod_tx[lv] = cur_tx;
1056             lod_ty[lv] = cur_ty;
1057
1058             tw = align(layout->lods[lv].slice_width, 16);
1059             th = align(layout->lods[lv].slice_height, hz_align_j) *
1060                templ->array_size / 2;
1061             /* convert to Y-tiles */
1062             tw = align(tw, 128) / 128;
1063             th = align(th, 32) / 32;
1064
1065             if (hz_width < cur_tx + tw)
1066                hz_width = cur_tx + tw;
1067             if (hz_height < cur_ty + th)
1068                hz_height = cur_ty + th;
1069
1070             if (lv == 1)
1071                cur_tx += tw;
1072             else
1073                cur_ty += th;
1074          }
1075
1076          /* convert tile offsets to memory offsets */
1077          for (lv = 0; lv <= templ->last_level; lv++) {
1078             layout->aux_offsets[lv] =
1079                (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1080          }
1081          hz_width *= 128;
1082          hz_height *= 32;
1083       }
1084       break;
1085    case ILO_LAYOUT_WALK_LAYER:
1086       {
1087          const unsigned h0 = align(params->h0, hz_align_j);
1088          const unsigned h1 = align(params->h1, hz_align_j);
1089          const unsigned htail =
1090             ((params->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1091          const unsigned hz_qpitch = h0 + h1 + htail;
1092
1093          hz_width = align(layout->lods[0].slice_width, 16);
1094
1095          hz_height = hz_qpitch * templ->array_size / 2;
1096          if (params->dev->gen >= ILO_GEN(7))
1097             hz_height = align(hz_height, 8);
1098       }
1099       break;
1100    case ILO_LAYOUT_WALK_3D:
1101       hz_width = align(layout->lods[0].slice_width, 16);
1102
1103       hz_height = 0;
1104       for (lv = 0; lv <= templ->last_level; lv++) {
1105          const unsigned h = align(layout->lods[lv].slice_height, hz_align_j);
1106          /* according to the formula, slices are packed together vertically */
1107          hz_height += h * u_minify(templ->depth0, lv);
1108       }
1109       hz_height /= 2;
1110       break;
1111    }
1112
1113    /*
1114     * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1115     * Experiments on Haswell show that aligning the RECTLIST primitive and
1116     * 3DSTATE_DRAWING_RECTANGLE alone are not enough.  The LOD sizes must be
1117     * aligned.
1118     */
1119    hz_clear_w = 8;
1120    hz_clear_h = 4;
1121    switch (templ->nr_samples) {
1122    case 0:
1123    case 1:
1124    default:
1125       break;
1126    case 2:
1127       hz_clear_w /= 2;
1128       break;
1129    case 4:
1130       hz_clear_w /= 2;
1131       hz_clear_h /= 2;
1132       break;
1133    case 8:
1134       hz_clear_w /= 4;
1135       hz_clear_h /= 2;
1136       break;
1137    case 16:
1138       hz_clear_w /= 4;
1139       hz_clear_h /= 4;
1140       break;
1141    }
1142
1143    for (lv = 0; lv <= templ->last_level; lv++) {
1144       if (u_minify(layout->width0, lv) % hz_clear_w ||
1145           u_minify(layout->height0, lv) % hz_clear_h)
1146          break;
1147       layout->aux_enables |= 1 << lv;
1148    }
1149
1150    /* we padded to allow this in layout_align() */
1151    if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
1152       layout->aux_enables |= 0x1;
1153
1154    /* align to Y-tile */
1155    layout->aux_stride = align(hz_width, 128);
1156    layout->aux_height = align(hz_height, 32);
1157 }
1158
1159 static void
1160 layout_calculate_mcs_size(struct ilo_layout *layout,
1161                           struct ilo_layout_params *params)
1162 {
1163    const struct pipe_resource *templ = params->templ;
1164    int mcs_width, mcs_height, mcs_cpp;
1165    int downscale_x, downscale_y;
1166
1167    assert(layout->aux == ILO_LAYOUT_AUX_MCS);
1168
1169    if (templ->nr_samples > 1) {
1170       /*
1171        * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1172        * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA.  The
1173        * need of scale down could be that the clear rectangle is used to clear
1174        * the MCS instead of the RT.
1175        *
1176        * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT.  The
1177        * 2x2 factor could come from that the hardware writes 128 bits (an
1178        * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1179        * the RT.  For 4X MSAA, we need 8 bits in MCS for every pixel in the
1180        * RT.  Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1181        * pixel block in the RT.
1182        */
1183       switch (templ->nr_samples) {
1184       case 2:
1185       case 4:
1186          downscale_x = 8;
1187          downscale_y = 2;
1188          mcs_cpp = 1;
1189          break;
1190       case 8:
1191          downscale_x = 2;
1192          downscale_y = 2;
1193          mcs_cpp = 4;
1194          break;
1195       case 16:
1196          downscale_x = 2;
1197          downscale_y = 1;
1198          mcs_cpp = 8;
1199          break;
1200       default:
1201          assert(!"unsupported sample count");
1202          return;
1203          break;
1204       }
1205
1206       /*
1207        * It also appears that the 2x2 subspans generated by the scaled-down
1208        * clear rectangle cannot be masked.  The scale-down clear rectangle
1209        * thus must be aligned to 2x2, and we need to pad.
1210        */
1211       mcs_width = align(layout->width0, downscale_x * 2);
1212       mcs_height = align(layout->height0, downscale_y * 2);
1213    } else {
1214       /*
1215        * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1216        *
1217        *     "              Pixels  Lines
1218        *      TiledY RT CL
1219        *          bpp
1220        *          32          8        4
1221        *          64          4        4
1222        *          128         2        4
1223        *
1224        *      TiledX RT CL
1225        *          bpp
1226        *          32          16       2
1227        *          64          8        2
1228        *          128         4        2"
1229        *
1230        * This table and the two following tables define the RT alignments, the
1231        * clear rectangle alignments, and the clear rectangle scale factors.
1232        * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1233        * that the clear rectangle alignments are 16x32 blocks, and the clear
1234        * rectangle scale factors are 8x16 blocks.
1235        *
1236        * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1237        * RT.  Similar to the MSAA cases, we can argue that an OWord maps to
1238        * 8x16 blocks.
1239        *
1240        * One problem with this reasoning is that a Y-tile in MCS has 8x32
1241        * OWords and maps to 64x512 128-byte blocks.  This differs from i965,
1242        * which says that a Y-tile maps to 128x256 blocks (\see
1243        * intel_get_non_msrt_mcs_alignment).  It does not really change
1244        * anything except for the size of the allocated MCS.  Let's see if we
1245        * hit out-of-bound access.
1246        */
1247       switch (layout->tiling) {
1248       case INTEL_TILING_X:
1249          downscale_x = 64 / layout->block_size;
1250          downscale_y = 2;
1251          break;
1252       case INTEL_TILING_Y:
1253          downscale_x = 32 / layout->block_size;
1254          downscale_y = 4;
1255          break;
1256       default:
1257          assert(!"unsupported tiling mode");
1258          return;
1259          break;
1260       }
1261
1262       downscale_x *= 8;
1263       downscale_y *= 16;
1264
1265       /*
1266        * From the Haswell PRM, volume 7, page 652:
1267        *
1268        *     "Clear rectangle must be aligned to two times the number of
1269        *      pixels in the table shown below due to 16X16 hashing across the
1270        *      slice."
1271        *
1272        * The scaled-down clear rectangle must be aligned to 4x4 instead of
1273        * 2x2, and we need to pad.
1274        */
1275       mcs_width = align(layout->width0, downscale_x * 4) / downscale_x;
1276       mcs_height = align(layout->height0, downscale_y * 4) / downscale_y;
1277       mcs_cpp = 16; /* an OWord */
1278    }
1279
1280    layout->aux_enables = (1 << (templ->last_level + 1)) - 1;
1281    /* align to Y-tile */
1282    layout->aux_stride = align(mcs_width * mcs_cpp, 128);
1283    layout->aux_height = align(mcs_height, 32);
1284 }
1285
1286 /**
1287  * The texutre is for transfer only.  We can define our own layout to save
1288  * space.
1289  */
1290 static void
1291 layout_init_for_transfer(struct ilo_layout *layout,
1292                          const struct ilo_dev_info *dev,
1293                          const struct pipe_resource *templ)
1294 {
1295    const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
1296       templ->depth0 : templ->array_size;
1297    unsigned layer_width, layer_height;
1298
1299    assert(templ->last_level == 0);
1300    assert(templ->nr_samples <= 1);
1301
1302    layout->aux = ILO_LAYOUT_AUX_NONE;
1303    layout->width0 = templ->width0;
1304    layout->height0 = templ->height0;
1305    layout->format = templ->format;
1306    layout->block_width = util_format_get_blockwidth(templ->format);
1307    layout->block_height = util_format_get_blockheight(templ->format);
1308    layout->block_size = util_format_get_blocksize(templ->format);
1309    layout->walk = ILO_LAYOUT_WALK_LOD;
1310
1311    layout->valid_tilings = LAYOUT_TILING_NONE;
1312    layout->tiling = INTEL_TILING_NONE;
1313
1314    layout->align_i = layout->block_width;
1315    layout->align_j = layout->block_height;
1316
1317    assert(util_is_power_of_two(layout->block_width) &&
1318           util_is_power_of_two(layout->block_height));
1319
1320    /* use packed layout */
1321    layer_width = align(templ->width0, layout->align_i);
1322    layer_height = align(templ->height0, layout->align_j);
1323
1324    layout->lods[0].slice_width = layer_width;
1325    layout->lods[0].slice_height = layer_height;
1326
1327    layout->bo_stride = (layer_width / layout->block_width) * layout->block_size;
1328    layout->bo_stride = align(layout->bo_stride, 64);
1329
1330    layout->bo_height = (layer_height / layout->block_height) * num_layers;
1331 }
1332
1333 /**
1334  * Initialize the layout.  Callers should zero-initialize \p layout first.
1335  */
1336 void ilo_layout_init(struct ilo_layout *layout,
1337                      const struct ilo_dev_info *dev,
1338                      const struct pipe_resource *templ)
1339 {
1340    struct ilo_layout_params params;
1341    bool transfer_only;
1342
1343    /* use transfer layout when the texture is never bound to GPU */
1344    transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
1345                                      PIPE_BIND_TRANSFER_READ));
1346    if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
1347       layout_init_for_transfer(layout, dev, templ);
1348       return;
1349    }
1350
1351    memset(&params, 0, sizeof(params));
1352    params.dev = dev;
1353    params.templ = templ;
1354
1355    /* note that there are dependencies between these functions */
1356    layout_init_aux(layout, &params);
1357    layout_init_size_and_format(layout, &params);
1358    layout_init_walk(layout, &params);
1359    layout_init_tiling(layout, &params);
1360    layout_init_alignments(layout, &params);
1361    layout_init_lods(layout, &params);
1362    layout_init_layer_height(layout, &params);
1363
1364    layout_align(layout, &params);
1365    layout_calculate_bo_size(layout, &params);
1366
1367    switch (layout->aux) {
1368    case ILO_LAYOUT_AUX_HIZ:
1369       layout_calculate_hiz_size(layout, &params);
1370       break;
1371    case ILO_LAYOUT_AUX_MCS:
1372       layout_calculate_mcs_size(layout, &params);
1373       break;
1374    default:
1375       break;
1376    }
1377 }
1378
1379 /**
1380  * Update the tiling mode and bo stride (for imported resources).
1381  */
1382 bool
1383 ilo_layout_update_for_imported_bo(struct ilo_layout *layout,
1384                                   enum intel_tiling_mode tiling,
1385                                   unsigned bo_stride)
1386 {
1387    if (!(layout->valid_tilings & (1 << tiling)))
1388       return false;
1389
1390    if ((tiling == INTEL_TILING_X && bo_stride % 512) ||
1391        (tiling == INTEL_TILING_Y && bo_stride % 128))
1392       return false;
1393
1394    layout->tiling = tiling;
1395    layout->bo_stride = bo_stride;
1396
1397    return true;
1398 }