src/gallium/drivers/ilo/core/ilo_image.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2014 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "ilo_debug.h"
  29 #include "ilo_image.h"
  30
  31 enum {
  32    IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
  33    IMAGE_TILING_X    = 1 << GEN6_TILING_X,
  34    IMAGE_TILING_Y    = 1 << GEN6_TILING_Y,
  35    IMAGE_TILING_W    = 1 << GEN8_TILING_W,
  36
  37    IMAGE_TILING_ALL  = (IMAGE_TILING_NONE |
  38                         IMAGE_TILING_X |
  39                         IMAGE_TILING_Y |
  40                         IMAGE_TILING_W)
  41 };
  42
  43 struct ilo_image_params {
  44    const struct ilo_dev *dev;
  45    const struct pipe_resource *templ;
  46    unsigned valid_tilings;
  47
  48    bool compressed;
  49
  50    unsigned h0, h1;
  51    unsigned max_x, max_y;
  52 };
  53
  54 static void
  55 img_get_slice_size(const struct ilo_image *img,
  56                    const struct ilo_image_params *params,
  57                    unsigned level, unsigned *width, unsigned *height)
  58 {
  59    const struct pipe_resource *templ = params->templ;
  60    unsigned w, h;
  61
  62    w = u_minify(img->width0, level);
  63    h = u_minify(img->height0, level);
  64
  65    /*
  66     * From the Sandy Bridge PRM, volume 1 part 1, page 114:
  67     *
  68     *     "The dimensions of the mip maps are first determined by applying the
  69     *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
  70     *      if necessary, they are padded out to compression block boundaries."
  71     */
  72    w = align(w, img->block_width);
  73    h = align(h, img->block_height);
  74
  75    /*
  76     * From the Sandy Bridge PRM, volume 1 part 1, page 111:
  77     *
  78     *     "If the surface is multisampled (4x), these values must be adjusted
  79     *      as follows before proceeding:
  80     *
  81     *        W_L = ceiling(W_L / 2) * 4
  82     *        H_L = ceiling(H_L / 2) * 4"
  83     *
  84     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
  85     *
  86     *     "If the surface is multisampled and it is a depth or stencil surface
  87     *      or Multisampled Surface StorageFormat in SURFACE_STATE is
  88     *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
  89     *      proceeding:
  90     *
  91     *        #samples  W_L =                    H_L =
  92     *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
  93     *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
  94     *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
  95     *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
  96     *
  97     * For interleaved samples (4x), where pixels
  98     *
  99     *   (x, y  ) (x+1, y  )
 100     *   (x, y+1) (x+1, y+1)
 101     *
 102     * would be is occupied by
 103     *
 104     *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
 105     *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
 106     *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
 107     *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
 108     *
 109     * Thus the need to
 110     *
 111     *   w = align(w, 2) * 2;
 112     *   y = align(y, 2) * 2;
 113     */
 114    if (img->interleaved_samples) {
 115       switch (templ->nr_samples) {
 116       case 0:
 117       case 1:
 118          break;
 119       case 2:
 120          w = align(w, 2) * 2;
 121          break;
 122       case 4:
 123          w = align(w, 2) * 2;
 124          h = align(h, 2) * 2;
 125          break;
 126       case 8:
 127          w = align(w, 2) * 4;
 128          h = align(h, 2) * 2;
 129          break;
 130       case 16:
 131          w = align(w, 2) * 4;
 132          h = align(h, 2) * 4;
 133          break;
 134       default:
 135          assert(!"unsupported sample count");
 136          break;
 137       }
 138    }
 139
 140    /*
 141     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
 142     *
 143     *     "For separate stencil buffer, the width must be mutiplied by 2 and
 144     *      height divided by 2..."
 145     *
 146     * To make things easier (for transfer), we will just double the stencil
 147     * stride in 3DSTATE_STENCIL_BUFFER.
 148     */
 149    w = align(w, img->align_i);
 150    h = align(h, img->align_j);
 151
 152    *width = w;
 153    *height = h;
 154 }
 155
 156 static unsigned
 157 img_get_num_layers(const struct ilo_image *img,
 158                    const struct ilo_image_params *params)
 159 {
 160    const struct pipe_resource *templ = params->templ;
 161    unsigned num_layers = templ->array_size;
 162
 163    /* samples of the same index are stored in a layer */
 164    if (templ->nr_samples > 1 && !img->interleaved_samples)
 165       num_layers *= templ->nr_samples;
 166
 167    return num_layers;
 168 }
 169
 170 static void
 171 img_init_layer_height(struct ilo_image *img,
 172                       struct ilo_image_params *params)
 173 {
 174    const struct pipe_resource *templ = params->templ;
 175    unsigned num_layers;
 176
 177    if (img->walk != ILO_IMAGE_WALK_LAYER)
 178       return;
 179
 180    num_layers = img_get_num_layers(img, params);
 181    if (num_layers <= 1)
 182       return;
 183
 184    /*
 185     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
 186     *
 187     *     "The following equation is used for surface formats other than
 188     *      compressed textures:
 189     *
 190     *        QPitch = (h0 + h1 + 11j)"
 191     *
 192     *     "The equation for compressed textures (BC* and FXT1 surface formats)
 193     *      follows:
 194     *
 195     *        QPitch = (h0 + h1 + 11j) / 4"
 196     *
 197     *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
 198     *      value calculated in the equation above, for every other odd Surface
 199     *      Height starting from 1 i.e. 1,5,9,13"
 200     *
 201     * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
 202     *
 203     *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
 204     *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
 205     *
 206     *        QPitch = (h0 + h1 + 12j)
 207     *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
 208     *
 209     *      (There are many typos or missing words here...)"
 210     *
 211     * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
 212     * the base address.  The PRM divides QPitch by 4 for compressed formats
 213     * because the block height for those formats are 4, and it wants QPitch to
 214     * mean the number of memory rows, as opposed to texel rows, between
 215     * slices.  Since we use texel rows everywhere, we do not need to divide
 216     * QPitch by 4.
 217     */
 218    img->walk_layer_height = params->h0 + params->h1 +
 219       ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
 220
 221    if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
 222        img->height0 % 4 == 1)
 223       img->walk_layer_height += 4;
 224
 225    params->max_y += img->walk_layer_height * (num_layers - 1);
 226 }
 227
 228 static void
 229 img_init_lods(struct ilo_image *img,
 230               struct ilo_image_params *params)
 231 {
 232    const struct pipe_resource *templ = params->templ;
 233    unsigned cur_x, cur_y;
 234    unsigned lv;
 235
 236    cur_x = 0;
 237    cur_y = 0;
 238    for (lv = 0; lv <= templ->last_level; lv++) {
 239       unsigned lod_w, lod_h;
 240
 241       img_get_slice_size(img, params, lv, &lod_w, &lod_h);
 242
 243       img->lods[lv].x = cur_x;
 244       img->lods[lv].y = cur_y;
 245       img->lods[lv].slice_width = lod_w;
 246       img->lods[lv].slice_height = lod_h;
 247
 248       switch (img->walk) {
 249       case ILO_IMAGE_WALK_LAYER:
 250          /* MIPLAYOUT_BELOW */
 251          if (lv == 1)
 252             cur_x += lod_w;
 253          else
 254             cur_y += lod_h;
 255          break;
 256       case ILO_IMAGE_WALK_LOD:
 257          lod_h *= img_get_num_layers(img, params);
 258          if (lv == 1)
 259             cur_x += lod_w;
 260          else
 261             cur_y += lod_h;
 262
 263          /* every LOD begins at tile boundaries */
 264          if (templ->last_level > 0) {
 265             assert(img->format == PIPE_FORMAT_S8_UINT);
 266             cur_x = align(cur_x, 64);
 267             cur_y = align(cur_y, 64);
 268          }
 269          break;
 270       case ILO_IMAGE_WALK_3D:
 271          {
 272             const unsigned num_slices = u_minify(templ->depth0, lv);
 273             const unsigned num_slices_per_row = 1 << lv;
 274             const unsigned num_rows =
 275                (num_slices + num_slices_per_row - 1) / num_slices_per_row;
 276
 277             lod_w *= num_slices_per_row;
 278             lod_h *= num_rows;
 279
 280             cur_y += lod_h;
 281          }
 282          break;
 283       }
 284
 285       if (params->max_x < img->lods[lv].x + lod_w)
 286          params->max_x = img->lods[lv].x + lod_w;
 287       if (params->max_y < img->lods[lv].y + lod_h)
 288          params->max_y = img->lods[lv].y + lod_h;
 289    }
 290
 291    if (img->walk == ILO_IMAGE_WALK_LAYER) {
 292       params->h0 = img->lods[0].slice_height;
 293
 294       if (templ->last_level > 0)
 295          params->h1 = img->lods[1].slice_height;
 296       else
 297          img_get_slice_size(img, params, 1, &cur_x, &params->h1);
 298    }
 299 }
 300
 301 static void
 302 img_init_alignments(struct ilo_image *img,
 303                     const struct ilo_image_params *params)
 304 {
 305    const struct pipe_resource *templ = params->templ;
 306
 307    /*
 308     * From the Sandy Bridge PRM, volume 1 part 1, page 113:
 309     *
 310     *     "surface format           align_i     align_j
 311     *      YUV 4:2:2 formats        4           *see below
 312     *      BC1-5                    4           4
 313     *      FXT1                     8           4
 314     *      all other formats        4           *see below"
 315     *
 316     *     "- align_j = 4 for any depth buffer
 317     *      - align_j = 2 for separate stencil buffer
 318     *      - align_j = 4 for any render target surface is multisampled (4x)
 319     *      - align_j = 4 for any render target surface with Surface Vertical
 320     *        Alignment = VALIGN_4
 321     *      - align_j = 2 for any render target surface with Surface Vertical
 322     *        Alignment = VALIGN_2
 323     *      - align_j = 2 for all other render target surface
 324     *      - align_j = 2 for any sampling engine surface with Surface Vertical
 325     *        Alignment = VALIGN_2
 326     *      - align_j = 4 for any sampling engine surface with Surface Vertical
 327     *        Alignment = VALIGN_4"
 328     *
 329     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
 330     *
 331     *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
 332     *      the Surface Format is 96 bits per element (BPE)."
 333     *
 334     * They can be rephrased as
 335     *
 336     *                                  align_i        align_j
 337     *   compressed formats             block width    block height
 338     *   PIPE_FORMAT_S8_UINT            4              2
 339     *   other depth/stencil formats    4              4
 340     *   4x multisampled                4              4
 341     *   bpp 96                         4              2
 342     *   others                         4              2 or 4
 343     */
 344
 345    /*
 346     * From the Ivy Bridge PRM, volume 1 part 1, page 110:
 347     *
 348     *     "surface defined by      surface format     align_i     align_j
 349     *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
 350     *                              not D16_UNORM      4           4
 351     *      3DSTATE_STENCIL_BUFFER  N/A                8           8
 352     *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
 353     *                              FXT1               8           4
 354     *                              all others         (set by SURFACE_STATE)"
 355     *
 356     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
 357     *
 358     *     "- This field (Surface Vertical Aligment) is intended to be set to
 359     *        VALIGN_4 if the surface was rendered as a depth buffer, for a
 360     *        multisampled (4x) render target, or for a multisampled (8x)
 361     *        render target, since these surfaces support only alignment of 4.
 362     *      - Use of VALIGN_4 for other surfaces is supported, but uses more
 363     *        memory.
 364     *      - This field must be set to VALIGN_4 for all tiled Y Render Target
 365     *        surfaces.
 366     *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
 367     *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
 368     *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
 369     *        must be set to VALIGN_4."
 370     *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
 371     *
 372     *     "- This field (Surface Horizontal Aligment) is intended to be set to
 373     *        HALIGN_8 only if the surface was rendered as a depth buffer with
 374     *        Z16 format or a stencil buffer, since these surfaces support only
 375     *        alignment of 8.
 376     *      - Use of HALIGN_8 for other surfaces is supported, but uses more
 377     *        memory.
 378     *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
 379     *      - This field must be set to HALIGN_8 if the Surface Format is
 380     *        FXT1."
 381     *
 382     * They can be rephrased as
 383     *
 384     *                                  align_i        align_j
 385     *  compressed formats              block width    block height
 386     *  PIPE_FORMAT_Z16_UNORM           8              4
 387     *  PIPE_FORMAT_S8_UINT             8              8
 388     *  other depth/stencil formats     4              4
 389     *  2x or 4x multisampled           4 or 8         4
 390     *  tiled Y                         4 or 8         4 (if rt)
 391     *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
 392     *  others                          4 or 8         2 or 4
 393     */
 394
 395    if (params->compressed) {
 396       /* this happens to be the case */
 397       img->align_i = img->block_width;
 398       img->align_j = img->block_height;
 399    } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 400       if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
 401          switch (img->format) {
 402          case PIPE_FORMAT_Z16_UNORM:
 403             img->align_i = 8;
 404             img->align_j = 4;
 405             break;
 406          case PIPE_FORMAT_S8_UINT:
 407             img->align_i = 8;
 408             img->align_j = 8;
 409             break;
 410          default:
 411             img->align_i = 4;
 412             img->align_j = 4;
 413             break;
 414          }
 415       } else {
 416          switch (img->format) {
 417          case PIPE_FORMAT_S8_UINT:
 418             img->align_i = 4;
 419             img->align_j = 2;
 420             break;
 421          default:
 422             img->align_i = 4;
 423             img->align_j = 4;
 424             break;
 425          }
 426       }
 427    } else {
 428       const bool valign_4 =
 429          (templ->nr_samples > 1) ||
 430          (ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
 431          (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
 432           img->tiling == GEN6_TILING_Y &&
 433           (templ->bind & PIPE_BIND_RENDER_TARGET));
 434
 435       if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
 436           ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
 437          assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
 438
 439       img->align_i = 4;
 440       img->align_j = (valign_4) ? 4 : 2;
 441    }
 442
 443    /*
 444     * the fact that align i and j are multiples of block width and height
 445     * respectively is what makes the size of the bo a multiple of the block
 446     * size, slices start at block boundaries, and many of the computations
 447     * work.
 448     */
 449    assert(img->align_i % img->block_width == 0);
 450    assert(img->align_j % img->block_height == 0);
 451
 452    /* make sure align() works */
 453    assert(util_is_power_of_two(img->align_i) &&
 454           util_is_power_of_two(img->align_j));
 455    assert(util_is_power_of_two(img->block_width) &&
 456           util_is_power_of_two(img->block_height));
 457 }
 458
 459 static void
 460 img_init_tiling(struct ilo_image *img,
 461                 const struct ilo_image_params *params)
 462 {
 463    const struct pipe_resource *templ = params->templ;
 464    unsigned preferred_tilings = params->valid_tilings;
 465
 466    /* no fencing nor BLT support */
 467    if (preferred_tilings & ~IMAGE_TILING_W)
 468       preferred_tilings &= ~IMAGE_TILING_W;
 469
 470    if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
 471       /*
 472        * heuristically set a minimum width/height for enabling tiling
 473        */
 474       if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
 475          preferred_tilings &= ~IMAGE_TILING_X;
 476
 477       if ((img->width0 < 32 || img->height0 < 16) &&
 478           (img->width0 < 16 || img->height0 < 32) &&
 479           (preferred_tilings & ~IMAGE_TILING_Y))
 480          preferred_tilings &= ~IMAGE_TILING_Y;
 481    } else {
 482       /* force linear if we are not sure where the texture is bound to */
 483       if (preferred_tilings & IMAGE_TILING_NONE)
 484          preferred_tilings &= IMAGE_TILING_NONE;
 485    }
 486
 487    /* prefer tiled over linear */
 488    if (preferred_tilings & IMAGE_TILING_Y)
 489       img->tiling = GEN6_TILING_Y;
 490    else if (preferred_tilings & IMAGE_TILING_X)
 491       img->tiling = GEN6_TILING_X;
 492    else if (preferred_tilings & IMAGE_TILING_W)
 493       img->tiling = GEN8_TILING_W;
 494    else
 495       img->tiling = GEN6_TILING_NONE;
 496 }
 497
 498 static void
 499 img_init_walk_gen7(struct ilo_image *img,
 500                    const struct ilo_image_params *params)
 501 {
 502    const struct pipe_resource *templ = params->templ;
 503
 504    /*
 505     * It is not explicitly states, but render targets are expected to be
 506     * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
 507     * to be IMS (samples interleaved).
 508     *
 509     * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
 510     */
 511    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 512       /*
 513        * From the Ivy Bridge PRM, volume 1 part 1, page 111:
 514        *
 515        *     "note that the depth buffer and stencil buffer have an implied
 516        *      value of ARYSPC_FULL"
 517        */
 518       img->walk = (templ->target == PIPE_TEXTURE_3D) ?
 519          ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
 520
 521       img->interleaved_samples = true;
 522    } else {
 523       /*
 524        * From the Ivy Bridge PRM, volume 4 part 1, page 66:
 525        *
 526        *     "If Multisampled Surface Storage Format is MSFMT_MSS and Number
 527        *      of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
 528        *      Array Spacing) must be set to ARYSPC_LOD0."
 529        *
 530        * As multisampled resources are not mipmapped, we never use
 531        * ARYSPC_FULL for them.
 532        */
 533       if (templ->nr_samples > 1)
 534          assert(templ->last_level == 0);
 535
 536       img->walk =
 537          (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
 538          (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
 539          ILO_IMAGE_WALK_LOD;
 540
 541       img->interleaved_samples = false;
 542    }
 543 }
 544
 545 static void
 546 img_init_walk_gen6(struct ilo_image *img,
 547                    const struct ilo_image_params *params)
 548 {
 549    /*
 550     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
 551     *
 552     *     "The separate stencil buffer does not support mip mapping, thus the
 553     *      storage for LODs other than LOD 0 is not needed. The following
 554     *      QPitch equation applies only to the separate stencil buffer:
 555     *
 556     *        QPitch = h_0"
 557     *
 558     * GEN6 does not support compact spacing otherwise.
 559     */
 560    img->walk =
 561       (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
 562       (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
 563       ILO_IMAGE_WALK_LAYER;
 564
 565    /* GEN6 supports only interleaved samples */
 566    img->interleaved_samples = true;
 567 }
 568
 569 static void
 570 img_init_walk(struct ilo_image *img,
 571               const struct ilo_image_params *params)
 572 {
 573    if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
 574       img_init_walk_gen7(img, params);
 575    else
 576       img_init_walk_gen6(img, params);
 577 }
 578
 579 static unsigned
 580 img_get_valid_tilings(const struct ilo_image *img,
 581                       const struct ilo_image_params *params)
 582 {
 583    const struct pipe_resource *templ = params->templ;
 584    const enum pipe_format format = img->format;
 585    unsigned valid_tilings = params->valid_tilings;
 586
 587    /*
 588     * From the Sandy Bridge PRM, volume 1 part 2, page 32:
 589     *
 590     *     "Display/Overlay   Y-Major not supported.
 591     *                        X-Major required for Async Flips"
 592     */
 593    if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
 594       valid_tilings &= IMAGE_TILING_X;
 595
 596    /*
 597     * From the Sandy Bridge PRM, volume 3 part 2, page 158:
 598     *
 599     *     "The cursor surface address must be 4K byte aligned. The cursor must
 600     *      be in linear memory, it cannot be tiled."
 601     */
 602    if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
 603       valid_tilings &= IMAGE_TILING_NONE;
 604
 605    /*
 606     * From the Sandy Bridge PRM, volume 2 part 1, page 318:
 607     *
 608     *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
 609     *      Depth Buffer is not supported."
 610     *
 611     *     "The Depth Buffer, if tiled, must use Y-Major tiling."
 612     *
 613     * From the Sandy Bridge PRM, volume 1 part 2, page 22:
 614     *
 615     *     "W-Major Tile Format is used for separate stencil."
 616     */
 617    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 618       switch (format) {
 619       case PIPE_FORMAT_S8_UINT:
 620          valid_tilings &= IMAGE_TILING_W;
 621          break;
 622       default:
 623          valid_tilings &= IMAGE_TILING_Y;
 624          break;
 625       }
 626    }
 627
 628    if (templ->bind & PIPE_BIND_RENDER_TARGET) {
 629       /*
 630        * From the Sandy Bridge PRM, volume 1 part 2, page 32:
 631        *
 632        *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
 633        *      either TileX or Linear."
 634        *
 635        * From the Haswell PRM, volume 5, page 32:
 636        *
 637        *     "NOTE: 128 BPP format color buffer (render target) supports
 638        *      Linear, TiledX and TiledY."
 639        */
 640       if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
 641          valid_tilings &= ~IMAGE_TILING_Y;
 642
 643       /*
 644        * From the Ivy Bridge PRM, volume 4 part 1, page 63:
 645        *
 646        *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
 647        *      for all tiled Y Render Target surfaces."
 648        *
 649        *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
 650        */
 651       if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
 652           ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
 653           img->format == PIPE_FORMAT_R32G32B32_FLOAT)
 654          valid_tilings &= ~IMAGE_TILING_Y;
 655
 656       valid_tilings &= ~IMAGE_TILING_W;
 657    }
 658
 659    if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
 660       if (ilo_dev_gen(params->dev) < ILO_GEN(8))
 661          valid_tilings &= ~IMAGE_TILING_W;
 662    }
 663
 664    /* no conflicting binding flags */
 665    assert(valid_tilings);
 666
 667    return valid_tilings;
 668 }
 669
 670 static void
 671 img_init_size_and_format(struct ilo_image *img,
 672                          struct ilo_image_params *params)
 673 {
 674    const struct pipe_resource *templ = params->templ;
 675    enum pipe_format format = templ->format;
 676    bool require_separate_stencil = false;
 677
 678    img->target = templ->target;
 679    img->width0 = templ->width0;
 680    img->height0 = templ->height0;
 681    img->depth0 = templ->depth0;
 682    img->array_size = templ->array_size;
 683    img->level_count = templ->last_level + 1;
 684    img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
 685
 686    /*
 687     * From the Sandy Bridge PRM, volume 2 part 1, page 317:
 688     *
 689     *     "This field (Separate Stencil Buffer Enable) must be set to the same
 690     *      value (enabled or disabled) as Hierarchical Depth Buffer Enable."
 691     *
 692     * GEN7+ requires separate stencil buffers.
 693     */
 694    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
 695       if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
 696          require_separate_stencil = true;
 697       else
 698          require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
 699    }
 700
 701    switch (format) {
 702    case PIPE_FORMAT_ETC1_RGB8:
 703       format = PIPE_FORMAT_R8G8B8X8_UNORM;
 704       break;
 705    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
 706       if (require_separate_stencil) {
 707          format = PIPE_FORMAT_Z24X8_UNORM;
 708          img->separate_stencil = true;
 709       }
 710       break;
 711    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
 712       if (require_separate_stencil) {
 713          format = PIPE_FORMAT_Z32_FLOAT;
 714          img->separate_stencil = true;
 715       }
 716       break;
 717    default:
 718       break;
 719    }
 720
 721    img->format = format;
 722    img->block_width = util_format_get_blockwidth(format);
 723    img->block_height = util_format_get_blockheight(format);
 724    img->block_size = util_format_get_blocksize(format);
 725
 726    params->valid_tilings = img_get_valid_tilings(img, params);
 727    params->compressed = util_format_is_compressed(img->format);
 728 }
 729
 730 static bool
 731 img_want_mcs(const struct ilo_image *img,
 732              const struct ilo_image_params *params)
 733 {
 734    const struct pipe_resource *templ = params->templ;
 735    bool want_mcs = false;
 736
 737    /* MCS is for RT on GEN7+ */
 738    if (ilo_dev_gen(params->dev) < ILO_GEN(7))
 739       return false;
 740
 741    if (templ->target != PIPE_TEXTURE_2D ||
 742        !(templ->bind & PIPE_BIND_RENDER_TARGET))
 743       return false;
 744
 745    /*
 746     * From the Ivy Bridge PRM, volume 4 part 1, page 77:
 747     *
 748     *     "For Render Target and Sampling Engine Surfaces:If the surface is
 749     *      multisampled (Number of Multisamples any value other than
 750     *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
 751     *
 752     *     "This field must be set to 0 for all SINT MSRTs when all RT channels
 753     *      are not written"
 754     */
 755    if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
 756       want_mcs = true;
 757    } else if (templ->nr_samples <= 1) {
 758       /*
 759        * From the Ivy Bridge PRM, volume 2 part 1, page 326:
 760        *
 761        *     "When MCS is buffer is used for color clear of non-multisampler
 762        *      render target, the following restrictions apply.
 763        *      - Support is limited to tiled render targets.
 764        *      - Support is for non-mip-mapped and non-array surface types
 765        *        only.
 766        *      - Clear is supported only on the full RT; i.e., no partial clear
 767        *        or overlapping clears.
 768        *      - MCS buffer for non-MSRT is supported only for RT formats
 769        *        32bpp, 64bpp and 128bpp.
 770        *      ..."
 771        */
 772       if (img->tiling != GEN6_TILING_NONE &&
 773           templ->last_level == 0 && templ->array_size == 1) {
 774          switch (img->block_size) {
 775          case 4:
 776          case 8:
 777          case 16:
 778             want_mcs = true;
 779             break;
 780          default:
 781             break;
 782          }
 783       }
 784    }
 785
 786    return want_mcs;
 787 }
 788
 789 static bool
 790 img_want_hiz(const struct ilo_image *img,
 791              const struct ilo_image_params *params)
 792 {
 793    const struct pipe_resource *templ = params->templ;
 794    const struct util_format_description *desc =
 795       util_format_description(templ->format);
 796
 797    if (ilo_debug & ILO_DEBUG_NOHIZ)
 798       return false;
 799
 800    /* we want 8x4 aligned levels */
 801    if (templ->target == PIPE_TEXTURE_1D)
 802       return false;
 803
 804    if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
 805       return false;
 806
 807    if (!util_format_has_depth(desc))
 808       return false;
 809
 810    /* no point in having HiZ */
 811    if (templ->usage == PIPE_USAGE_STAGING)
 812       return false;
 813
 814    /*
 815     * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
 816     * for every level.  This is generally fine except on GEN6, where HiZ and
 817     * separate stencil are enabled and disabled at the same time.  When the
 818     * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
 819     * can result in incompatible formats.
 820     */
 821    if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
 822        templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
 823        templ->last_level)
 824       return false;
 825
 826    return true;
 827 }
 828
 829 static void
 830 img_init_aux(struct ilo_image *img,
 831              const struct ilo_image_params *params)
 832 {
 833    if (img_want_hiz(img, params))
 834       img->aux.type = ILO_IMAGE_AUX_HIZ;
 835    else if (img_want_mcs(img, params))
 836       img->aux.type = ILO_IMAGE_AUX_MCS;
 837 }
 838
 839 static void
 840 img_align(struct ilo_image *img, struct ilo_image_params *params)
 841 {
 842    const struct pipe_resource *templ = params->templ;
 843    int align_w = 1, align_h = 1, pad_h = 0;
 844
 845    /*
 846     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
 847     *
 848     *     "To determine the necessary padding on the bottom and right side of
 849     *      the surface, refer to the table in Section 7.18.3.4 for the i and j
 850     *      parameters for the surface format in use. The surface must then be
 851     *      extended to the next multiple of the alignment unit size in each
 852     *      dimension, and all texels contained in this extended surface must
 853     *      have valid GTT entries."
 854     *
 855     *     "For cube surfaces, an additional two rows of padding are required
 856     *      at the bottom of the surface. This must be ensured regardless of
 857     *      whether the surface is stored tiled or linear.  This is due to the
 858     *      potential rotation of cache line orientation from memory to cache."
 859     *
 860     *     "For compressed textures (BC* and FXT1 surface formats), padding at
 861     *      the bottom of the surface is to an even compressed row, which is
 862     *      equal to a multiple of 8 uncompressed texel rows. Thus, for padding
 863     *      purposes, these surfaces behave as if j = 8 only for surface
 864     *      padding purposes. The value of 4 for j still applies for mip level
 865     *      alignment and QPitch calculation."
 866     */
 867    if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
 868       align_w = MAX2(align_w, img->align_i);
 869       align_h = MAX2(align_h, img->align_j);
 870
 871       if (templ->target == PIPE_TEXTURE_CUBE)
 872          pad_h += 2;
 873
 874       if (params->compressed)
 875          align_h = MAX2(align_h, img->align_j * 2);
 876    }
 877
 878    /*
 879     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
 880     *
 881     *     "If the surface contains an odd number of rows of data, a final row
 882     *      below the surface must be allocated."
 883     */
 884    if (templ->bind & PIPE_BIND_RENDER_TARGET)
 885       align_h = MAX2(align_h, 2);
 886
 887    /*
 888     * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  Pad to allow HiZ
 889     * for unaligned non-mipmapped and non-array images.
 890     */
 891    if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
 892        templ->last_level == 0 &&
 893        templ->array_size == 1 &&
 894        templ->depth0 == 1) {
 895       align_w = MAX2(align_w, 8);
 896       align_h = MAX2(align_h, 4);
 897    }
 898
 899    params->max_x = align(params->max_x, align_w);
 900    params->max_y = align(params->max_y + pad_h, align_h);
 901 }
 902
 903 /* note that this may force the texture to be linear */
 904 static void
 905 img_calculate_bo_size(struct ilo_image *img,
 906                       const struct ilo_image_params *params)
 907 {
 908    assert(params->max_x % img->block_width == 0);
 909    assert(params->max_y % img->block_height == 0);
 910    assert(img->walk_layer_height % img->block_height == 0);
 911
 912    img->bo_stride =
 913       (params->max_x / img->block_width) * img->block_size;
 914    img->bo_height = params->max_y / img->block_height;
 915
 916    while (true) {
 917       unsigned w = img->bo_stride, h = img->bo_height;
 918       unsigned align_w, align_h;
 919
 920       /*
 921        * From the Haswell PRM, volume 5, page 163:
 922        *
 923        *     "For linear surfaces, additional padding of 64 bytes is required
 924        *      at the bottom of the surface. This is in addition to the padding
 925        *      required above."
 926        */
 927       if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
 928           (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
 929           img->tiling == GEN6_TILING_NONE)
 930          h += (64 + img->bo_stride - 1) / img->bo_stride;
 931
 932       /*
 933        * From the Sandy Bridge PRM, volume 4 part 1, page 81:
 934        *
 935        *     "- For linear render target surfaces, the pitch must be a
 936        *        multiple of the element size for non-YUV surface formats.
 937        *        Pitch must be a multiple of 2 * element size for YUV surface
 938        *        formats.
 939        *      - For other linear surfaces, the pitch can be any multiple of
 940        *        bytes.
 941        *      - For tiled surfaces, the pitch must be a multiple of the tile
 942        *        width."
 943        *
 944        * Different requirements may exist when the bo is used in different
 945        * places, but our alignments here should be good enough that we do not
 946        * need to check params->templ->bind.
 947        */
 948       switch (img->tiling) {
 949       case GEN6_TILING_X:
 950          align_w = 512;
 951          align_h = 8;
 952          break;
 953       case GEN6_TILING_Y:
 954          align_w = 128;
 955          align_h = 32;
 956          break;
 957       case GEN8_TILING_W:
 958          /*
 959           * From the Sandy Bridge PRM, volume 1 part 2, page 22:
 960           *
 961           *     "A 4KB tile is subdivided into 8-high by 8-wide array of
 962           *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
 963           *      bytes."
 964           */
 965          align_w = 64;
 966          align_h = 64;
 967          break;
 968       default:
 969          assert(img->tiling == GEN6_TILING_NONE);
 970          /* some good enough values */
 971          align_w = 64;
 972          align_h = 2;
 973          break;
 974       }
 975
 976       w = align(w, align_w);
 977       h = align(h, align_h);
 978
 979       /* make sure the bo is mappable */
 980       if (img->tiling != GEN6_TILING_NONE) {
 981          /*
 982           * Usually only the first 256MB of the GTT is mappable.
 983           *
 984           * See also how intel_context::max_gtt_map_object_size is calculated.
 985           */
 986          const size_t mappable_gtt_size = 256 * 1024 * 1024;
 987
 988          /*
 989           * Be conservative.  We may be able to switch from VALIGN_4 to
 990           * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
 991           */
 992          if (mappable_gtt_size / w / 4 < h) {
 993             if (params->valid_tilings & IMAGE_TILING_NONE) {
 994                img->tiling = GEN6_TILING_NONE;
 995                /* MCS support for non-MSRTs is limited to tiled RTs */
 996                if (img->aux.type == ILO_IMAGE_AUX_MCS &&
 997                    params->templ->nr_samples <= 1)
 998                   img->aux.type = ILO_IMAGE_AUX_NONE;
 999
1000                continue;
1001             } else {
1002                ilo_warn("cannot force texture to be linear\n");
1003             }
1004          }
1005       }
1006
1007       img->bo_stride = w;
1008       img->bo_height = h;
1009       break;
1010    }
1011 }
1012
1013 static void
1014 img_calculate_hiz_size(struct ilo_image *img,
1015                        const struct ilo_image_params *params)
1016 {
1017    const struct pipe_resource *templ = params->templ;
1018    const unsigned hz_align_j = 8;
1019    enum ilo_image_walk_type hz_walk;
1020    unsigned hz_width, hz_height, lv;
1021    unsigned hz_clear_w, hz_clear_h;
1022
1023    assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
1024
1025    assert(img->walk == ILO_IMAGE_WALK_LAYER ||
1026           img->walk == ILO_IMAGE_WALK_3D);
1027
1028    /*
1029     * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1030     *
1031     *     "The hierarchical depth buffer does not support the LOD field, it is
1032     *      assumed by hardware to be zero. A separate hierarachical depth
1033     *      buffer is required for each LOD used, and the corresponding
1034     *      buffer's state delivered to hardware each time a new depth buffer
1035     *      state with modified LOD is delivered."
1036     *
1037     * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1038     */
1039    if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
1040       hz_walk = img->walk;
1041    else
1042       hz_walk = ILO_IMAGE_WALK_LOD;
1043
1044    /*
1045     * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1046     * PRM, volume 2 part 1, page 312-313.
1047     *
1048     * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1049     * memory row.
1050     */
1051    switch (hz_walk) {
1052    case ILO_IMAGE_WALK_LAYER:
1053       {
1054          const unsigned h0 = align(params->h0, hz_align_j);
1055          const unsigned h1 = align(params->h1, hz_align_j);
1056          const unsigned htail =
1057             ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1058          const unsigned hz_qpitch = h0 + h1 + htail;
1059
1060          hz_width = align(img->lods[0].slice_width, 16);
1061
1062          hz_height = hz_qpitch * templ->array_size / 2;
1063          if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
1064             hz_height = align(hz_height, 8);
1065
1066          img->aux.walk_layer_height = hz_qpitch;
1067       }
1068       break;
1069    case ILO_IMAGE_WALK_LOD:
1070       {
1071          unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
1072          unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
1073          unsigned cur_tx, cur_ty;
1074
1075          /* figure out the tile offsets of LODs */
1076          hz_width = 0;
1077          hz_height = 0;
1078          cur_tx = 0;
1079          cur_ty = 0;
1080          for (lv = 0; lv <= templ->last_level; lv++) {
1081             unsigned tw, th;
1082
1083             lod_tx[lv] = cur_tx;
1084             lod_ty[lv] = cur_ty;
1085
1086             tw = align(img->lods[lv].slice_width, 16);
1087             th = align(img->lods[lv].slice_height, hz_align_j) *
1088                templ->array_size / 2;
1089             /* convert to Y-tiles */
1090             tw = align(tw, 128) / 128;
1091             th = align(th, 32) / 32;
1092
1093             if (hz_width < cur_tx + tw)
1094                hz_width = cur_tx + tw;
1095             if (hz_height < cur_ty + th)
1096                hz_height = cur_ty + th;
1097
1098             if (lv == 1)
1099                cur_tx += tw;
1100             else
1101                cur_ty += th;
1102          }
1103
1104          /* convert tile offsets to memory offsets */
1105          for (lv = 0; lv <= templ->last_level; lv++) {
1106             img->aux.walk_lod_offsets[lv] =
1107                (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1108          }
1109          hz_width *= 128;
1110          hz_height *= 32;
1111       }
1112       break;
1113    case ILO_IMAGE_WALK_3D:
1114       hz_width = align(img->lods[0].slice_width, 16);
1115
1116       hz_height = 0;
1117       for (lv = 0; lv <= templ->last_level; lv++) {
1118          const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
1119          /* according to the formula, slices are packed together vertically */
1120          hz_height += h * u_minify(templ->depth0, lv);
1121       }
1122       hz_height /= 2;
1123       break;
1124    default:
1125       assert(!"unknown HiZ walk");
1126       hz_width = 0;
1127       hz_height = 0;
1128       break;
1129    }
1130
1131    /*
1132     * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1133     * Experiments on Haswell show that aligning the RECTLIST primitive and
1134     * 3DSTATE_DRAWING_RECTANGLE alone are not enough.  The LOD sizes must be
1135     * aligned.
1136     */
1137    hz_clear_w = 8;
1138    hz_clear_h = 4;
1139    switch (templ->nr_samples) {
1140    case 0:
1141    case 1:
1142    default:
1143       break;
1144    case 2:
1145       hz_clear_w /= 2;
1146       break;
1147    case 4:
1148       hz_clear_w /= 2;
1149       hz_clear_h /= 2;
1150       break;
1151    case 8:
1152       hz_clear_w /= 4;
1153       hz_clear_h /= 2;
1154       break;
1155    case 16:
1156       hz_clear_w /= 4;
1157       hz_clear_h /= 4;
1158       break;
1159    }
1160
1161    for (lv = 0; lv <= templ->last_level; lv++) {
1162       if (u_minify(img->width0, lv) % hz_clear_w ||
1163           u_minify(img->height0, lv) % hz_clear_h)
1164          break;
1165       img->aux.enables |= 1 << lv;
1166    }
1167
1168    /* we padded to allow this in img_align() */
1169    if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
1170       img->aux.enables |= 0x1;
1171
1172    /* align to Y-tile */
1173    img->aux.bo_stride = align(hz_width, 128);
1174    img->aux.bo_height = align(hz_height, 32);
1175 }
1176
1177 static void
1178 img_calculate_mcs_size(struct ilo_image *img,
1179                        const struct ilo_image_params *params)
1180 {
1181    const struct pipe_resource *templ = params->templ;
1182    int mcs_width, mcs_height, mcs_cpp;
1183    int downscale_x, downscale_y;
1184
1185    assert(img->aux.type == ILO_IMAGE_AUX_MCS);
1186
1187    if (templ->nr_samples > 1) {
1188       /*
1189        * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1190        * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA.  The
1191        * need of scale down could be that the clear rectangle is used to clear
1192        * the MCS instead of the RT.
1193        *
1194        * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT.  The
1195        * 2x2 factor could come from that the hardware writes 128 bits (an
1196        * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1197        * the RT.  For 4X MSAA, we need 8 bits in MCS for every pixel in the
1198        * RT.  Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1199        * pixel block in the RT.
1200        */
1201       switch (templ->nr_samples) {
1202       case 2:
1203       case 4:
1204          downscale_x = 8;
1205          downscale_y = 2;
1206          mcs_cpp = 1;
1207          break;
1208       case 8:
1209          downscale_x = 2;
1210          downscale_y = 2;
1211          mcs_cpp = 4;
1212          break;
1213       case 16:
1214          downscale_x = 2;
1215          downscale_y = 1;
1216          mcs_cpp = 8;
1217          break;
1218       default:
1219          assert(!"unsupported sample count");
1220          return;
1221          break;
1222       }
1223
1224       /*
1225        * It also appears that the 2x2 subspans generated by the scaled-down
1226        * clear rectangle cannot be masked.  The scale-down clear rectangle
1227        * thus must be aligned to 2x2, and we need to pad.
1228        */
1229       mcs_width = align(img->width0, downscale_x * 2);
1230       mcs_height = align(img->height0, downscale_y * 2);
1231    } else {
1232       /*
1233        * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1234        *
1235        *     "              Pixels  Lines
1236        *      TiledY RT CL
1237        *          bpp
1238        *          32          8        4
1239        *          64          4        4
1240        *          128         2        4
1241        *
1242        *      TiledX RT CL
1243        *          bpp
1244        *          32          16       2
1245        *          64          8        2
1246        *          128         4        2"
1247        *
1248        * This table and the two following tables define the RT alignments, the
1249        * clear rectangle alignments, and the clear rectangle scale factors.
1250        * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1251        * that the clear rectangle alignments are 16x32 blocks, and the clear
1252        * rectangle scale factors are 8x16 blocks.
1253        *
1254        * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1255        * RT.  Similar to the MSAA cases, we can argue that an OWord maps to
1256        * 8x16 blocks.
1257        *
1258        * One problem with this reasoning is that a Y-tile in MCS has 8x32
1259        * OWords and maps to 64x512 128-byte blocks.  This differs from i965,
1260        * which says that a Y-tile maps to 128x256 blocks (\see
1261        * intel_get_non_msrt_mcs_alignment).  It does not really change
1262        * anything except for the size of the allocated MCS.  Let's see if we
1263        * hit out-of-bound access.
1264        */
1265       switch (img->tiling) {
1266       case GEN6_TILING_X:
1267          downscale_x = 64 / img->block_size;
1268          downscale_y = 2;
1269          break;
1270       case GEN6_TILING_Y:
1271          downscale_x = 32 / img->block_size;
1272          downscale_y = 4;
1273          break;
1274       default:
1275          assert(!"unsupported tiling mode");
1276          return;
1277          break;
1278       }
1279
1280       downscale_x *= 8;
1281       downscale_y *= 16;
1282
1283       /*
1284        * From the Haswell PRM, volume 7, page 652:
1285        *
1286        *     "Clear rectangle must be aligned to two times the number of
1287        *      pixels in the table shown below due to 16X16 hashing across the
1288        *      slice."
1289        *
1290        * The scaled-down clear rectangle must be aligned to 4x4 instead of
1291        * 2x2, and we need to pad.
1292        */
1293       mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
1294       mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
1295       mcs_cpp = 16; /* an OWord */
1296    }
1297
1298    img->aux.enables = (1 << (templ->last_level + 1)) - 1;
1299    /* align to Y-tile */
1300    img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
1301    img->aux.bo_height = align(mcs_height, 32);
1302 }
1303
1304 static void
1305 img_init(struct ilo_image *img,
1306          struct ilo_image_params *params)
1307 {
1308    /* there are hard dependencies between every function here */
1309
1310    img_init_aux(img, params);
1311    img_init_size_and_format(img, params);
1312    img_init_walk(img, params);
1313    img_init_tiling(img, params);
1314    img_init_alignments(img, params);
1315    img_init_lods(img, params);
1316    img_init_layer_height(img, params);
1317
1318    img_align(img, params);
1319    img_calculate_bo_size(img, params);
1320
1321    img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
1322
1323    switch (img->aux.type) {
1324    case ILO_IMAGE_AUX_HIZ:
1325       img_calculate_hiz_size(img, params);
1326       break;
1327    case ILO_IMAGE_AUX_MCS:
1328       img_calculate_mcs_size(img, params);
1329       break;
1330    default:
1331       break;
1332    }
1333 }
1334
1335 /**
1336  * The texutre is for transfer only.  We can define our own layout to save
1337  * space.
1338  */
1339 static void
1340 img_init_for_transfer(struct ilo_image *img,
1341                       const struct ilo_dev *dev,
1342                       const struct pipe_resource *templ)
1343 {
1344    const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
1345       templ->depth0 : templ->array_size;
1346    unsigned layer_width, layer_height;
1347
1348    assert(templ->last_level == 0);
1349    assert(templ->nr_samples <= 1);
1350
1351    img->aux.type = ILO_IMAGE_AUX_NONE;
1352
1353    img->target = templ->target;
1354    img->width0 = templ->width0;
1355    img->height0 = templ->height0;
1356    img->depth0 = templ->depth0;
1357    img->array_size = templ->array_size;
1358    img->level_count = 1;
1359    img->sample_count = 1;
1360
1361    img->format = templ->format;
1362    img->block_width = util_format_get_blockwidth(templ->format);
1363    img->block_height = util_format_get_blockheight(templ->format);
1364    img->block_size = util_format_get_blocksize(templ->format);
1365
1366    img->walk = ILO_IMAGE_WALK_LOD;
1367
1368    img->tiling = GEN6_TILING_NONE;
1369
1370    img->align_i = img->block_width;
1371    img->align_j = img->block_height;
1372
1373    assert(util_is_power_of_two(img->block_width) &&
1374           util_is_power_of_two(img->block_height));
1375
1376    /* use packed layout */
1377    layer_width = align(templ->width0, img->align_i);
1378    layer_height = align(templ->height0, img->align_j);
1379
1380    img->lods[0].slice_width = layer_width;
1381    img->lods[0].slice_height = layer_height;
1382
1383    img->bo_stride = (layer_width / img->block_width) * img->block_size;
1384    img->bo_stride = align(img->bo_stride, 64);
1385
1386    img->bo_height = (layer_height / img->block_height) * num_layers;
1387 }
1388
1389 /**
1390  * Initialize the image.  Callers should zero-initialize \p img first.
1391  */
1392 void ilo_image_init(struct ilo_image *img,
1393                     const struct ilo_dev *dev,
1394                     const struct pipe_resource *templ)
1395 {
1396    struct ilo_image_params params;
1397    bool transfer_only;
1398
1399    assert(ilo_is_zeroed(img, sizeof(*img)));
1400
1401    /* use transfer layout when the texture is never bound to GPU */
1402    transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
1403                                      PIPE_BIND_TRANSFER_READ));
1404    if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
1405       img_init_for_transfer(img, dev, templ);
1406       return;
1407    }
1408
1409    memset(&params, 0, sizeof(params));
1410    params.dev = dev;
1411    params.templ = templ;
1412    params.valid_tilings = IMAGE_TILING_ALL;
1413
1414    img_init(img, &params);
1415 }
1416
1417 bool
1418 ilo_image_init_for_imported(struct ilo_image *img,
1419                             const struct ilo_dev *dev,
1420                             const struct pipe_resource *templ,
1421                             enum gen_surface_tiling tiling,
1422                             unsigned bo_stride)
1423 {
1424    struct ilo_image_params params;
1425
1426    assert(ilo_is_zeroed(img, sizeof(*img)));
1427
1428    if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
1429        (tiling == GEN6_TILING_Y && bo_stride % 128) ||
1430        (tiling == GEN8_TILING_W && bo_stride % 64))
1431       return false;
1432
1433    memset(&params, 0, sizeof(params));
1434    params.dev = dev;
1435    params.templ = templ;
1436    params.valid_tilings = 1 << tiling;
1437
1438    img_init(img, &params);
1439
1440    assert(img->tiling == tiling);
1441    if (img->bo_stride > bo_stride)
1442       return false;
1443
1444    img->bo_stride = bo_stride;
1445
1446    /* assume imported RTs are also scanouts */
1447    if (!img->scanout)
1448       img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
1449
1450    return true;
1451 }
1452
1453 bool
1454 ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev)
1455 {
1456    /* HiZ is required for separate stencil on Gen6 */
1457    if (ilo_dev_gen(dev) == ILO_GEN(6) &&
1458        img->aux.type == ILO_IMAGE_AUX_HIZ &&
1459        img->separate_stencil)
1460       return false;
1461
1462    /* MCS is required for multisample images */
1463    if (img->aux.type == ILO_IMAGE_AUX_MCS &&
1464        img->sample_count > 1)
1465       return false;
1466
1467    img->aux.enables = 0x0;
1468
1469    return true;
1470 }