src/mesa/drivers/dri/i965/brw_tex_layout.c

   1 /*
   2  * Copyright 2006 VMware, Inc.
   3  * Copyright © 2006 Intel Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sublicense, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial
  15  * portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /**
  27  * \file brw_tex_layout.cpp
  28  *
  29  * Code to lay out images in a mipmap tree.
  30  *
  31  * \author Keith Whitwell <keithw@vmware.com>
  32  * \author Michel Dänzer <daenzer@vmware.com>
  33  */
  34
  35 #include "intel_mipmap_tree.h"
  36 #include "brw_context.h"
  37 #include "main/macros.h"
  38 #include "main/glformats.h"
  39
  40 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  41
  42 static unsigned int
  43 intel_horizontal_texture_alignment_unit(struct brw_context *brw,
  44                                         struct intel_mipmap_tree *mt)
  45 {
  46    /**
  47     * From the "Alignment Unit Size" section of various specs, namely:
  48     * - Gen3 Spec: "Memory Data Formats" Volume,         Section 1.20.1.4
  49     * - i965 and G45 PRMs:             Volume 1,         Section 6.17.3.4.
  50     * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
  51     * - BSpec (for Ivybridge and slight variations in separate stencil)
  52     *
  53     * +----------------------------------------------------------------------+
  54     * |                                        | alignment unit width  ("i") |
  55     * | Surface Property                       |-----------------------------|
  56     * |                                        | 915 | 965 | ILK | SNB | IVB |
  57     * +----------------------------------------------------------------------+
  58     * | YUV 4:2:2 format                       |  8  |  4  |  4  |  4  |  4  |
  59     * | BC1-5 compressed format (DXTn/S3TC)    |  4  |  4  |  4  |  4  |  4  |
  60     * | FXT1  compressed format                |  8  |  8  |  8  |  8  |  8  |
  61     * | Depth Buffer (16-bit)                  |  4  |  4  |  4  |  4  |  8  |
  62     * | Depth Buffer (other)                   |  4  |  4  |  4  |  4  |  4  |
  63     * | Separate Stencil Buffer                | N/A | N/A |  8  |  8  |  8  |
  64     * | All Others                             |  4  |  4  |  4  |  4  |  4  |
  65     * +----------------------------------------------------------------------+
  66     *
  67     * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE
  68     * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8.
  69     */
  70     if (_mesa_is_format_compressed(mt->format)) {
  71        /* The hardware alignment requirements for compressed textures
  72         * happen to match the block boundaries.
  73         */
  74       unsigned int i, j;
  75       _mesa_get_format_block_size(mt->format, &i, &j);
  76
  77       /* On Gen9+ we can pick our own alignment for compressed textures but it
  78        * has to be a multiple of the block size. The minimum alignment we can
  79        * pick is 4 so we effectively have to align to 4 times the block
  80        * size
  81        */
  82       if (brw->gen >= 9)
  83          return i * 4;
  84       else
  85          return i;
  86     }
  87
  88    if (mt->format == MESA_FORMAT_S_UINT8)
  89       return 8;
  90
  91    if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
  92       return 8;
  93
  94    if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1)
  95       return 16;
  96
  97    return 4;
  98 }
  99
 100 static unsigned int
 101 intel_vertical_texture_alignment_unit(struct brw_context *brw,
 102                                       mesa_format format, bool multisampled)
 103 {
 104    /**
 105     * From the "Alignment Unit Size" section of various specs, namely:
 106     * - Gen3 Spec: "Memory Data Formats" Volume,         Section 1.20.1.4
 107     * - i965 and G45 PRMs:             Volume 1,         Section 6.17.3.4.
 108     * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
 109     * - BSpec (for Ivybridge and slight variations in separate stencil)
 110     *
 111     * +----------------------------------------------------------------------+
 112     * |                                        | alignment unit height ("j") |
 113     * | Surface Property                       |-----------------------------|
 114     * |                                        | 915 | 965 | ILK | SNB | IVB |
 115     * +----------------------------------------------------------------------+
 116     * | BC1-5 compressed format (DXTn/S3TC)    |  4  |  4  |  4  |  4  |  4  |
 117     * | FXT1  compressed format                |  4  |  4  |  4  |  4  |  4  |
 118     * | Depth Buffer                           |  2  |  2  |  2  |  4  |  4  |
 119     * | Separate Stencil Buffer                | N/A | N/A | N/A |  4  |  8  |
 120     * | Multisampled (4x or 8x) render target  | N/A | N/A | N/A |  4  |  4  |
 121     * | All Others                             |  2  |  2  |  2  |  *  |  *  |
 122     * +----------------------------------------------------------------------+
 123     *
 124     * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
 125     * the SURFACE_STATE "Surface Vertical Alignment" field.
 126     */
 127    if (_mesa_is_format_compressed(format))
 128       /* See comment above for the horizontal alignment */
 129       return brw->gen >= 9 ? 16 : 4;
 130
 131    if (format == MESA_FORMAT_S_UINT8)
 132       return brw->gen >= 7 ? 8 : 4;
 133
 134    /* Broadwell only supports VALIGN of 4, 8, and 16.  The BSpec says 4
 135     * should always be used, except for stencil buffers, which should be 8.
 136     */
 137    if (brw->gen >= 8)
 138       return 4;
 139
 140    if (multisampled)
 141       return 4;
 142
 143    GLenum base_format = _mesa_get_format_base_format(format);
 144
 145    if (brw->gen >= 6 &&
 146        (base_format == GL_DEPTH_COMPONENT ||
 147         base_format == GL_DEPTH_STENCIL)) {
 148       return 4;
 149    }
 150
 151    if (brw->gen == 7) {
 152       /* On Gen7, we prefer a vertical alignment of 4 when possible, because
 153        * that allows Y tiled render targets.
 154        *
 155        * From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
 156        * messages), on p64, under the heading "Surface Vertical Alignment":
 157        *
 158        *     Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
 159        *     (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
 160        *     (0x190)
 161        *
 162        *     VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
 163        */
 164       if (base_format == GL_YCBCR_MESA || format == MESA_FORMAT_RGB_FLOAT32)
 165          return 2;
 166
 167       return 4;
 168    }
 169
 170    return 2;
 171 }
 172
 173 static void
 174 gen9_miptree_layout_1d(struct intel_mipmap_tree *mt)
 175 {
 176    unsigned x = 0;
 177    unsigned width = mt->physical_width0;
 178    unsigned depth = mt->physical_depth0; /* number of array layers. */
 179
 180    /* When this layout is used the horizontal alignment is fixed at 64 and the
 181     * hardware ignores the value given in the surface state
 182     */
 183    const unsigned int align_w = 64;
 184
 185    mt->total_height = mt->physical_height0;
 186    mt->total_width = 0;
 187
 188    for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
 189       unsigned img_width;
 190
 191       intel_miptree_set_level_info(mt, level, x, 0, depth);
 192
 193       img_width = ALIGN(width, align_w);
 194
 195       mt->total_width = MAX2(mt->total_width, x + img_width);
 196
 197       x += img_width;
 198
 199       width = minify(width, 1);
 200    }
 201 }
 202
 203 static void
 204 brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
 205 {
 206    unsigned x = 0;
 207    unsigned y = 0;
 208    unsigned width = mt->physical_width0;
 209    unsigned height = mt->physical_height0;
 210    unsigned depth = mt->physical_depth0; /* number of array layers. */
 211    unsigned int bw, bh;
 212
 213    _mesa_get_format_block_size(mt->format, &bw, &bh);
 214
 215    mt->total_width = mt->physical_width0;
 216
 217    if (mt->compressed) {
 218        mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
 219    }
 220
 221    /* May need to adjust width to accommodate the placement of
 222     * the 2nd mipmap.  This occurs when the alignment
 223     * constraints of mipmap placement push the right edge of the
 224     * 2nd mipmap out past the width of its parent.
 225     */
 226    if (mt->first_level != mt->last_level) {
 227        unsigned mip1_width;
 228
 229        if (mt->compressed) {
 230           mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
 231              ALIGN(minify(mt->physical_width0, 2), bw);
 232        } else {
 233           mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
 234              minify(mt->physical_width0, 2);
 235        }
 236
 237        if (mip1_width > mt->total_width) {
 238            mt->total_width = mip1_width;
 239        }
 240    }
 241
 242    mt->total_height = 0;
 243
 244    for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
 245       unsigned img_height;
 246
 247       intel_miptree_set_level_info(mt, level, x, y, depth);
 248
 249       img_height = ALIGN(height, mt->align_h);
 250       if (mt->compressed)
 251          img_height /= bh;
 252
 253       if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
 254          /* Compact arrays with separated miplevels */
 255          img_height *= depth;
 256       }
 257
 258       /* Because the images are packed better, the final offset
 259        * might not be the maximal one:
 260        */
 261       mt->total_height = MAX2(mt->total_height, y + img_height);
 262
 263       /* Layout_below: step right after second mipmap.
 264        */
 265       if (level == mt->first_level + 1) {
 266          x += ALIGN(width, mt->align_w);
 267       } else {
 268          y += img_height;
 269       }
 270
 271       width  = minify(width, 1);
 272       height = minify(height, 1);
 273
 274       if (mt->target == GL_TEXTURE_3D)
 275          depth = minify(depth, 1);
 276    }
 277 }
 278
 279 unsigned
 280 brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
 281                                        const struct intel_mipmap_tree *mt,
 282                                        unsigned level)
 283 {
 284    assert(brw->gen < 9);
 285
 286    if (mt->target == GL_TEXTURE_3D ||
 287        (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
 288       return ALIGN(minify(mt->physical_width0, level), mt->align_w);
 289    } else {
 290       return 0;
 291    }
 292 }
 293
 294 unsigned
 295 brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
 296                                      const struct intel_mipmap_tree *mt,
 297                                      unsigned level)
 298 {
 299    if (brw->gen >= 9) {
 300       /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will
 301        * effectively end up with a packed qpitch anyway whenever
 302        * mt->first_level == mt->last_level.
 303        */
 304       assert(mt->array_layout != ALL_SLICES_AT_EACH_LOD);
 305
 306       /* On Gen9 we can pick whatever qpitch we like as long as it's aligned
 307        * to the vertical alignment so we don't need to add any extra rows.
 308        */
 309       unsigned qpitch = mt->total_height;
 310
 311       /* If the surface might be used as a stencil buffer or HiZ buffer then
 312        * it needs to be a multiple of 8.
 313        */
 314       const GLenum base_format = _mesa_get_format_base_format(mt->format);
 315       if (_mesa_is_depth_or_stencil_format(base_format))
 316          qpitch = ALIGN(qpitch, 8);
 317
 318       /* 3D textures need to be aligned to the tile height. At this point we
 319        * don't know which tiling will be used so let's just align it to 32
 320        */
 321       if (mt->target == GL_TEXTURE_3D)
 322          qpitch = ALIGN(qpitch, 32);
 323
 324       return qpitch;
 325
 326    } else if (mt->target == GL_TEXTURE_3D ||
 327               (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) ||
 328               mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
 329       return ALIGN(minify(mt->physical_height0, level), mt->align_h);
 330
 331    } else {
 332       const unsigned h0 = ALIGN(mt->physical_height0, mt->align_h);
 333       const unsigned h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
 334
 335       return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h;
 336    }
 337 }
 338
 339 static void
 340 align_cube(struct intel_mipmap_tree *mt)
 341 {
 342    /* The 965's sampler lays cachelines out according to how accesses
 343     * in the texture surfaces run, so they may be "vertical" through
 344     * memory.  As a result, the docs say in Surface Padding Requirements:
 345     * Sampling Engine Surfaces that two extra rows of padding are required.
 346     */
 347    if (mt->target == GL_TEXTURE_CUBE_MAP)
 348       mt->total_height += 2;
 349 }
 350
 351 static bool
 352 use_linear_1d_layout(struct brw_context *brw,
 353                      struct intel_mipmap_tree *mt)
 354 {
 355    /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a
 356     * horizontal line. This isn't done for depth/stencil buffers however
 357     * because those will be using a tiled layout
 358     */
 359    if (brw->gen >= 9 &&
 360        (mt->target == GL_TEXTURE_1D ||
 361         mt->target == GL_TEXTURE_1D_ARRAY)) {
 362       GLenum base_format = _mesa_get_format_base_format(mt->format);
 363
 364       if (base_format != GL_DEPTH_COMPONENT &&
 365           base_format != GL_DEPTH_STENCIL &&
 366           base_format != GL_STENCIL_INDEX)
 367          return true;
 368    }
 369
 370    return false;
 371 }
 372
 373 static void
 374 brw_miptree_layout_texture_array(struct brw_context *brw,
 375                                  struct intel_mipmap_tree *mt)
 376 {
 377    unsigned height = mt->physical_height0;
 378    bool layout_1d = use_linear_1d_layout(brw, mt);
 379    int physical_qpitch;
 380
 381    if (layout_1d)
 382       gen9_miptree_layout_1d(mt);
 383    else
 384       brw_miptree_layout_2d(mt);
 385
 386    if (layout_1d) {
 387       physical_qpitch = 1;
 388       /* When using the horizontal layout the qpitch specifies the distance in
 389        * pixels between array slices. The total_width is forced to be a
 390        * multiple of the horizontal alignment in brw_miptree_layout_1d (in
 391        * this case it's always 64). The vertical alignment is ignored.
 392        */
 393       mt->qpitch = mt->total_width;
 394    } else {
 395       mt->qpitch = brw_miptree_get_vertical_slice_pitch(brw, mt, 0);
 396       /* Unlike previous generations the qpitch is a multiple of the
 397        * compressed block size on Gen9 so physical_qpitch matches mt->qpitch.
 398        */
 399       physical_qpitch = (mt->compressed && brw->gen < 9 ? mt->qpitch / 4 :
 400                          mt->qpitch);
 401    }
 402
 403    for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
 404       unsigned img_height;
 405       img_height = ALIGN(height, mt->align_h);
 406       if (mt->compressed)
 407          img_height /= mt->align_h;
 408
 409       for (int q = 0; q < mt->level[level].depth; q++) {
 410          if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
 411             intel_miptree_set_image_offset(mt, level, q, 0, q * img_height);
 412          } else {
 413             intel_miptree_set_image_offset(mt, level, q, 0, q * physical_qpitch);
 414          }
 415       }
 416       height = minify(height, 1);
 417    }
 418    if (mt->array_layout == ALL_LOD_IN_EACH_SLICE)
 419       mt->total_height = physical_qpitch * mt->physical_depth0;
 420
 421    align_cube(mt);
 422 }
 423
 424 static void
 425 brw_miptree_layout_texture_3d(struct brw_context *brw,
 426                               struct intel_mipmap_tree *mt)
 427 {
 428    unsigned yscale = mt->compressed ? 4 : 1;
 429
 430    mt->total_width = 0;
 431    mt->total_height = 0;
 432
 433    unsigned ysum = 0;
 434    for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
 435       unsigned WL = MAX2(mt->physical_width0 >> level, 1);
 436       unsigned HL = MAX2(mt->physical_height0 >> level, 1);
 437       unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
 438       unsigned wL = ALIGN(WL, mt->align_w);
 439       unsigned hL = ALIGN(HL, mt->align_h);
 440
 441       if (mt->target == GL_TEXTURE_CUBE_MAP)
 442          DL = 6;
 443
 444       intel_miptree_set_level_info(mt, level, 0, 0, DL);
 445
 446       for (unsigned q = 0; q < DL; q++) {
 447          unsigned x = (q % (1 << level)) * wL;
 448          unsigned y = ysum + (q >> level) * hL;
 449
 450          intel_miptree_set_image_offset(mt, level, q, x, y / yscale);
 451          mt->total_width = MAX2(mt->total_width, x + wL);
 452          mt->total_height = MAX2(mt->total_height, (y + hL) / yscale);
 453       }
 454
 455       ysum += ALIGN(DL, 1 << level) / (1 << level) * hL;
 456    }
 457
 458    align_cube(mt);
 459 }
 460
 461 void
 462 brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
 463 {
 464    bool multisampled = mt->num_samples > 1;
 465    bool gen6_hiz_or_stencil = false;
 466
 467    if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
 468       const GLenum base_format = _mesa_get_format_base_format(mt->format);
 469       gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
 470    }
 471
 472    if (gen6_hiz_or_stencil) {
 473       /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
 474        * hardware doesn't support multiple mip levels on stencil/hiz.
 475        *
 476        * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
 477        * "The hierarchical depth buffer does not support the LOD field"
 478        *
 479        * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
 480        * "The stencil depth buffer does not support the LOD field"
 481        */
 482       if (mt->format == MESA_FORMAT_S_UINT8) {
 483          /* Stencil uses W tiling, so we force W tiling alignment for the
 484           * ALL_SLICES_AT_EACH_LOD miptree layout.
 485           */
 486          mt->align_w = 64;
 487          mt->align_h = 64;
 488       } else {
 489          /* Depth uses Y tiling, so we force need Y tiling alignment for the
 490           * ALL_SLICES_AT_EACH_LOD miptree layout.
 491           */
 492          mt->align_w = 128 / mt->cpp;
 493          mt->align_h = 32;
 494       }
 495    } else {
 496       mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt);
 497       mt->align_h =
 498          intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
 499    }
 500
 501    switch (mt->target) {
 502    case GL_TEXTURE_CUBE_MAP:
 503       if (brw->gen == 4) {
 504          /* Gen4 stores cube maps as 3D textures. */
 505          assert(mt->physical_depth0 == 6);
 506          brw_miptree_layout_texture_3d(brw, mt);
 507       } else {
 508          /* All other hardware stores cube maps as 2D arrays. */
 509          brw_miptree_layout_texture_array(brw, mt);
 510       }
 511       break;
 512
 513    case GL_TEXTURE_3D:
 514       if (brw->gen >= 9)
 515          brw_miptree_layout_texture_array(brw, mt);
 516       else
 517          brw_miptree_layout_texture_3d(brw, mt);
 518       break;
 519
 520    case GL_TEXTURE_1D_ARRAY:
 521    case GL_TEXTURE_2D_ARRAY:
 522    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 523    case GL_TEXTURE_CUBE_MAP_ARRAY:
 524       brw_miptree_layout_texture_array(brw, mt);
 525       break;
 526
 527    default:
 528       switch (mt->msaa_layout) {
 529       case INTEL_MSAA_LAYOUT_UMS:
 530       case INTEL_MSAA_LAYOUT_CMS:
 531          brw_miptree_layout_texture_array(brw, mt);
 532          break;
 533       case INTEL_MSAA_LAYOUT_NONE:
 534       case INTEL_MSAA_LAYOUT_IMS:
 535          if (use_linear_1d_layout(brw, mt))
 536             gen9_miptree_layout_1d(mt);
 537          else
 538             brw_miptree_layout_2d(mt);
 539          break;
 540       }
 541       break;
 542    }
 543    DBG("%s: %dx%dx%d\n", __func__,
 544        mt->total_width, mt->total_height, mt->cpp);
 545
 546    /* On Gen9+ the alignment values are expressed in multiples of the block
 547     * size
 548     */
 549    if (brw->gen >= 9) {
 550       unsigned int i, j;
 551       _mesa_get_format_block_size(mt->format, &i, &j);
 552       mt->align_w /= i;
 553       mt->align_h /= j;
 554    }
 555 }
 556