src/intel/isl/isl_surface_state.c

   1 /*
   2  * Copyright 2016 Intel Corporation
   3  *
   4  *  Permission is hereby granted, free of charge, to any person obtaining a
   5  *  copy of this software and associated documentation files (the "Software"),
   6  *  to deal in the Software without restriction, including without limitation
   7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  *  and/or sell copies of the Software, and to permit persons to whom the
   9  *  Software is furnished to do so, subject to the following conditions:
  10  *
  11  *  The above copyright notice and this permission notice (including the next
  12  *  paragraph) shall be included in all copies or substantial portions of the
  13  *  Software.
  14  *
  15  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  *  IN THE SOFTWARE.
  22  */
  23
  24 #include <stdint.h>
  25
  26 #define __gen_address_type uint64_t
  27 #define __gen_user_data void
  28
  29 static uint64_t
  30 __gen_combine_address(__attribute__((unused)) void *data,
  31                       __attribute__((unused)) void *loc, uint64_t addr,
  32                       uint32_t delta)
  33 {
  34    return addr + delta;
  35 }
  36
  37 #include "genxml/gen_macros.h"
  38 #include "genxml/genX_pack.h"
  39
  40 #include "isl_priv.h"
  41
  42 #if GEN_GEN >= 8
  43 static const uint8_t isl_to_gen_halign[] = {
  44     [4] = HALIGN4,
  45     [8] = HALIGN8,
  46     [16] = HALIGN16,
  47 };
  48 #elif GEN_GEN >= 7
  49 static const uint8_t isl_to_gen_halign[] = {
  50     [4] = HALIGN_4,
  51     [8] = HALIGN_8,
  52 };
  53 #endif
  54
  55 #if GEN_GEN >= 8
  56 static const uint8_t isl_to_gen_valign[] = {
  57     [4] = VALIGN4,
  58     [8] = VALIGN8,
  59     [16] = VALIGN16,
  60 };
  61 #elif GEN_GEN >= 6
  62 static const uint8_t isl_to_gen_valign[] = {
  63     [2] = VALIGN_2,
  64     [4] = VALIGN_4,
  65 };
  66 #endif
  67
  68 #if GEN_GEN >= 8
  69 static const uint8_t isl_to_gen_tiling[] = {
  70    [ISL_TILING_LINEAR]  = LINEAR,
  71    [ISL_TILING_X]       = XMAJOR,
  72    [ISL_TILING_Y0]      = YMAJOR,
  73    [ISL_TILING_Yf]      = YMAJOR,
  74    [ISL_TILING_Ys]      = YMAJOR,
  75 #if GEN_GEN <= 11
  76    [ISL_TILING_W]       = WMAJOR,
  77 #endif
  78 };
  79 #endif
  80
  81 #if GEN_GEN >= 7
  82 static const uint32_t isl_to_gen_multisample_layout[] = {
  83    [ISL_MSAA_LAYOUT_NONE]           = MSFMT_MSS,
  84    [ISL_MSAA_LAYOUT_INTERLEAVED]    = MSFMT_DEPTH_STENCIL,
  85    [ISL_MSAA_LAYOUT_ARRAY]          = MSFMT_MSS,
  86 };
  87 #endif
  88
  89 #if GEN_GEN >= 12
  90 static const uint32_t isl_to_gen_aux_mode[] = {
  91    [ISL_AUX_USAGE_NONE] = AUX_NONE,
  92    [ISL_AUX_USAGE_MCS] = AUX_CCS_E,
  93    [ISL_AUX_USAGE_GEN12_CCS_E] = AUX_CCS_E,
  94    [ISL_AUX_USAGE_CCS_E] = AUX_CCS_E,
  95    [ISL_AUX_USAGE_HIZ_CCS_WT] = AUX_CCS_E,
  96    [ISL_AUX_USAGE_MCS_CCS] = AUX_MCS_LCE,
  97    [ISL_AUX_USAGE_STC_CCS] = AUX_CCS_E,
  98 };
  99 #elif GEN_GEN >= 9
 100 static const uint32_t isl_to_gen_aux_mode[] = {
 101    [ISL_AUX_USAGE_NONE] = AUX_NONE,
 102    [ISL_AUX_USAGE_HIZ] = AUX_HIZ,
 103    [ISL_AUX_USAGE_MCS] = AUX_CCS_D,
 104    [ISL_AUX_USAGE_CCS_D] = AUX_CCS_D,
 105    [ISL_AUX_USAGE_CCS_E] = AUX_CCS_E,
 106 };
 107 #elif GEN_GEN >= 8
 108 static const uint32_t isl_to_gen_aux_mode[] = {
 109    [ISL_AUX_USAGE_NONE] = AUX_NONE,
 110    [ISL_AUX_USAGE_HIZ] = AUX_HIZ,
 111    [ISL_AUX_USAGE_MCS] = AUX_MCS,
 112    [ISL_AUX_USAGE_CCS_D] = AUX_MCS,
 113 };
 114 #endif
 115
 116 static uint8_t
 117 get_surftype(enum isl_surf_dim dim, isl_surf_usage_flags_t usage)
 118 {
 119    switch (dim) {
 120    default:
 121       unreachable("bad isl_surf_dim");
 122    case ISL_SURF_DIM_1D:
 123       assert(!(usage & ISL_SURF_USAGE_CUBE_BIT));
 124       return SURFTYPE_1D;
 125    case ISL_SURF_DIM_2D:
 126       if ((usage & ISL_SURF_USAGE_CUBE_BIT) &&
 127           (usage & ISL_SURF_USAGE_TEXTURE_BIT)) {
 128          /* We need SURFTYPE_CUBE to make cube sampling work */
 129          return SURFTYPE_CUBE;
 130       } else {
 131          /* Everything else (render and storage) treat cubes as plain
 132           * 2D array textures
 133           */
 134          return SURFTYPE_2D;
 135       }
 136    case ISL_SURF_DIM_3D:
 137       assert(!(usage & ISL_SURF_USAGE_CUBE_BIT));
 138       return SURFTYPE_3D;
 139    }
 140 }
 141
 142 /**
 143  * Get the horizontal and vertical alignment in the units expected by the
 144  * hardware.  Note that this does NOT give you the actual hardware enum values
 145  * but an index into the isl_to_gen_[hv]align arrays above.
 146  */
 147 UNUSED static struct isl_extent3d
 148 get_image_alignment(const struct isl_surf *surf)
 149 {
 150    if (GEN_GEN >= 9) {
 151       if (isl_tiling_is_std_y(surf->tiling) ||
 152           surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
 153          /* The hardware ignores the alignment values. Anyway, the surface's
 154           * true alignment is likely outside the enum range of HALIGN* and
 155           * VALIGN*.
 156           */
 157          return isl_extent3d(4, 4, 1);
 158       } else {
 159          /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units
 160           * of surface elements (not pixels nor samples). For compressed formats,
 161           * a "surface element" is defined as a compression block.  For example,
 162           * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2
 163           * format (ETC2 has a block height of 4), then the vertical alignment is
 164           * 4 compression blocks or, equivalently, 16 pixels.
 165           */
 166          return isl_surf_get_image_alignment_el(surf);
 167       }
 168    } else {
 169       /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in
 170        * units of surface samples.  For example, if SurfaceVerticalAlignment
 171        * is VALIGN_4 and the surface is singlesampled, then for any surface
 172        * format (compressed or not) the vertical alignment is
 173        * 4 pixels.
 174        */
 175       return isl_surf_get_image_alignment_sa(surf);
 176    }
 177 }
 178
 179 #if GEN_GEN >= 8
 180 static uint32_t
 181 get_qpitch(const struct isl_surf *surf)
 182 {
 183    switch (surf->dim_layout) {
 184    default:
 185       unreachable("Bad isl_surf_dim");
 186    case ISL_DIM_LAYOUT_GEN4_2D:
 187       if (GEN_GEN >= 9) {
 188          if (surf->dim == ISL_SURF_DIM_3D && surf->tiling == ISL_TILING_W) {
 189             /* This is rather annoying and completely undocumented.  It
 190              * appears that the hardware has a bug (or undocumented feature)
 191              * regarding stencil buffers most likely related to the way
 192              * W-tiling is handled as modified Y-tiling.  If you bind a 3-D
 193              * stencil buffer normally, and use texelFetch on it, the z or
 194              * array index will get implicitly multiplied by 2 for no obvious
 195              * reason.  The fix appears to be to divide qpitch by 2 for
 196              * W-tiled surfaces.
 197              */
 198             return isl_surf_get_array_pitch_el_rows(surf) / 2;
 199          } else {
 200             return isl_surf_get_array_pitch_el_rows(surf);
 201          }
 202       } else {
 203          /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch
 204           *
 205           *    "This field must be set to an integer multiple of the Surface
 206           *    Vertical Alignment. For compressed textures (BC*, FXT1,
 207           *    ETC*, and EAC* Surface Formats), this field is in units of
 208           *    rows in the uncompressed surface, and must be set to an
 209           *    integer multiple of the vertical alignment parameter "j"
 210           *    defined in the Common Surface Formats section."
 211           */
 212          return isl_surf_get_array_pitch_sa_rows(surf);
 213       }
 214    case ISL_DIM_LAYOUT_GEN9_1D:
 215       /* QPitch is usually expressed as rows of surface elements (where
 216        * a surface element is an compression block or a single surface
 217        * sample). Skylake 1D is an outlier.
 218        *
 219        * From the Skylake BSpec >> Memory Views >> Common Surface
 220        * Formats >> Surface Layout and Tiling >> 1D Surfaces:
 221        *
 222        *    Surface QPitch specifies the distance in pixels between array
 223        *    slices.
 224        */
 225       return isl_surf_get_array_pitch_el(surf);
 226    case ISL_DIM_LAYOUT_GEN4_3D:
 227       /* QPitch doesn't make sense for ISL_DIM_LAYOUT_GEN4_3D since it uses a
 228        * different pitch at each LOD.  Also, the QPitch field is ignored for
 229        * these surfaces.  From the Broadwell PRM documentation for QPitch:
 230        *
 231        *    This field specifies the distance in rows between array slices. It
 232        *    is used only in the following cases:
 233        *     - Surface Array is enabled OR
 234        *     - Number of Mulitsamples is not NUMSAMPLES_1 and Multisampled
 235        *       Surface Storage Format set to MSFMT_MSS OR
 236        *     - Surface Type is SURFTYPE_CUBE
 237        *
 238        * None of the three conditions above can possibly apply to a 3D surface
 239        * so it is safe to just set QPitch to 0.
 240        */
 241       return 0;
 242    }
 243 }
 244 #endif /* GEN_GEN >= 8 */
 245
 246 void
 247 isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
 248                             const struct isl_surf_fill_state_info *restrict info)
 249 {
 250    struct GENX(RENDER_SURFACE_STATE) s = { 0 };
 251
 252    s.SurfaceType = get_surftype(info->surf->dim, info->view->usage);
 253
 254    if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)
 255       assert(isl_format_supports_rendering(dev->info, info->view->format));
 256    else if (info->view->usage & ISL_SURF_USAGE_TEXTURE_BIT)
 257       assert(isl_format_supports_sampling(dev->info, info->view->format));
 258
 259    /* From the Sky Lake PRM Vol. 2d, RENDER_SURFACE_STATE::SurfaceFormat
 260     *
 261     *    This field cannot be a compressed (BC*, DXT*, FXT*, ETC*, EAC*)
 262     *    format if the Surface Type is SURFTYPE_1D
 263     */
 264    if (info->surf->dim == ISL_SURF_DIM_1D)
 265       assert(!isl_format_is_compressed(info->view->format));
 266
 267    if (isl_format_is_compressed(info->surf->format)) {
 268       /* You're not allowed to make a view of a compressed format with any
 269        * format other than the surface format.  None of the userspace APIs
 270        * allow for this directly and doing so would mess up a number of
 271        * surface parameters such as Width, Height, and alignments.  Ideally,
 272        * we'd like to assert that the two formats match.  However, we have an
 273        * S3TC workaround that requires us to do reinterpretation.  So assert
 274        * that they're at least the same bpb and block size.
 275        */
 276       ASSERTED const struct isl_format_layout *surf_fmtl =
 277          isl_format_get_layout(info->surf->format);
 278       ASSERTED const struct isl_format_layout *view_fmtl =
 279          isl_format_get_layout(info->surf->format);
 280       assert(surf_fmtl->bpb == view_fmtl->bpb);
 281       assert(surf_fmtl->bw == view_fmtl->bw);
 282       assert(surf_fmtl->bh == view_fmtl->bh);
 283    }
 284
 285    s.SurfaceFormat = info->view->format;
 286
 287 #if GEN_GEN >= 12
 288    /* The BSpec description of this field says:
 289     *
 290     *    "This bit field, when set, indicates if the resource is created as
 291     *    Depth/Stencil resource."
 292     *
 293     *    "SW must set this bit for any resource that was created with
 294     *    Depth/Stencil resource flag. Setting this bit allows HW to properly
 295     *    interpret the data-layout for various cases. For any resource that's
 296     *    created without Depth/Stencil resource flag, it must be reset."
 297     *
 298     * Even though the docs for this bit seem to imply that it's required for
 299     * anything which might have been used for depth/stencil, empirical
 300     * evidence suggests that it only affects CCS compression usage.  There are
 301     * a few things which back this up:
 302     *
 303     *  1. The docs are also pretty clear that this bit was added as part
 304     *     of enabling Gen12 depth/stencil lossless compression.
 305     *
 306     *  2. The only new difference between depth/stencil and color images on
 307     *     Gen12 (where the bit was added) is how they treat CCS compression.
 308     *     All other differences such as alignment requirements and MSAA layout
 309     *     are already covered by other bits.
 310     *
 311     * Under these assumptions, it makes sense for ISL to model this bit as
 312     * being an extension of AuxiliarySurfaceMode where STC_CCS and HIZ_CCS_WT
 313     * are indicated by AuxiliarySurfaceMode == CCS_E and DepthStencilResource
 314     * == true.
 315     */
 316    s.DepthStencilResource = info->aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT ||
 317                             info->aux_usage == ISL_AUX_USAGE_STC_CCS;
 318 #endif
 319
 320 #if GEN_GEN <= 5
 321    s.ColorBufferComponentWriteDisables = info->write_disables;
 322 #else
 323    assert(info->write_disables == 0);
 324 #endif
 325
 326 #if GEN_IS_HASWELL
 327    s.IntegerSurfaceFormat =
 328       isl_format_has_int_channel((enum isl_format) s.SurfaceFormat);
 329 #endif
 330
 331    assert(info->surf->logical_level0_px.width > 0 &&
 332           info->surf->logical_level0_px.height > 0);
 333
 334    s.Width = info->surf->logical_level0_px.width - 1;
 335    s.Height = info->surf->logical_level0_px.height - 1;
 336
 337    /* In the gen6 PRM Volume 1 Part 1: Graphics Core, Section 7.18.3.7.1
 338     * (Surface Arrays For all surfaces other than separate stencil buffer):
 339     *
 340     * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the value
 341     *  calculated in the equation above , for every other odd Surface Height
 342     *  starting from 1 i.e. 1,5,9,13"
 343     *
 344     * Since this Qpitch errata only impacts the sampler, we have to adjust the
 345     * input for the rendering surface to achieve the same qpitch. For the
 346     * affected heights, we increment the height by 1 for the rendering
 347     * surface.
 348     */
 349    if (GEN_GEN == 6 && (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
 350        info->surf->samples > 1 &&
 351        (info->surf->logical_level0_px.height % 4) == 1)
 352       s.Height++;
 353
 354    switch (s.SurfaceType) {
 355    case SURFTYPE_1D:
 356    case SURFTYPE_2D:
 357       /* From the Ivy Bridge PRM >> RENDER_SURFACE_STATE::MinimumArrayElement:
 358        *
 359        *    "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
 360        *    must be set to zero if this surface is used with sampling engine
 361        *    messages."
 362        *
 363        * This restriction appears to exist only on Ivy Bridge.
 364        */
 365       if (GEN_GEN == 7 && !GEN_IS_HASWELL && !ISL_DEV_IS_BAYTRAIL(dev) &&
 366           (info->view->usage & ISL_SURF_USAGE_TEXTURE_BIT) &&
 367           info->surf->samples > 1)
 368          assert(info->view->base_array_layer == 0);
 369
 370       s.MinimumArrayElement = info->view->base_array_layer;
 371
 372       /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth:
 373        *
 374        *    For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced
 375        *    by one for each increase from zero of Minimum Array Element. For
 376        *    example, if Minimum Array Element is set to 1024 on a 2D surface,
 377        *    the range of this field is reduced to [0,1023].
 378        *
 379        * In other words, 'Depth' is the number of array layers.
 380        */
 381       s.Depth = info->view->array_len - 1;
 382
 383       /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent:
 384        *
 385        *    For Render Target and Typed Dataport 1D and 2D Surfaces:
 386        *    This field must be set to the same value as the Depth field.
 387        */
 388       if (info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
 389                                ISL_SURF_USAGE_STORAGE_BIT))
 390          s.RenderTargetViewExtent = s.Depth;
 391       break;
 392    case SURFTYPE_CUBE:
 393       s.MinimumArrayElement = info->view->base_array_layer;
 394       /* Same as SURFTYPE_2D, but divided by 6 */
 395       s.Depth = info->view->array_len / 6 - 1;
 396       if (info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
 397                                ISL_SURF_USAGE_STORAGE_BIT))
 398          s.RenderTargetViewExtent = s.Depth;
 399       break;
 400    case SURFTYPE_3D:
 401       /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth:
 402        *
 403        *    If the volume texture is MIP-mapped, this field specifies the
 404        *    depth of the base MIP level.
 405        */
 406       s.Depth = info->surf->logical_level0_px.depth - 1;
 407
 408       /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent:
 409        *
 410        *    For Render Target and Typed Dataport 3D Surfaces: This field
 411        *    indicates the extent of the accessible 'R' coordinates minus 1 on
 412        *    the LOD currently being rendered to.
 413        *
 414        * The docs specify that this only matters for render targets and
 415        * surfaces used with typed dataport messages.  Prior to Ivy Bridge, the
 416        * Depth field has more bits than RenderTargetViewExtent so we can have
 417        * textures with more levels than we can render to.  In order to prevent
 418        * assert-failures in the packing function below, we only set the field
 419        * when it's actually going to be used by the hardware.
 420        *
 421        * Similaraly, the MinimumArrayElement field is ignored by all hardware
 422        * prior to Sky Lake when texturing and we want it set to 0 anyway.
 423        * Since it's already initialized to 0, we can just leave it alone for
 424        * texture surfaces.
 425        */
 426       if (info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
 427                                ISL_SURF_USAGE_STORAGE_BIT)) {
 428          s.MinimumArrayElement = info->view->base_array_layer;
 429          s.RenderTargetViewExtent = info->view->array_len - 1;
 430       }
 431       break;
 432    default:
 433       unreachable("bad SurfaceType");
 434    }
 435
 436 #if GEN_GEN >= 12
 437    /* GEN:BUG:1806565034: Only set SurfaceArray if arrayed surface is > 1. */
 438    s.SurfaceArray = info->surf->dim != ISL_SURF_DIM_3D &&
 439       info->view->array_len > 1;
 440 #elif GEN_GEN >= 7
 441    s.SurfaceArray = info->surf->dim != ISL_SURF_DIM_3D;
 442 #endif
 443
 444    if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
 445       /* For render target surfaces, the hardware interprets field
 446        * MIPCount/LOD as LOD. The Broadwell PRM says:
 447        *
 448        *    MIPCountLOD defines the LOD that will be rendered into.
 449        *    SurfaceMinLOD is ignored.
 450        */
 451       s.MIPCountLOD = info->view->base_level;
 452       s.SurfaceMinLOD = 0;
 453    } else {
 454       /* For non render target surfaces, the hardware interprets field
 455        * MIPCount/LOD as MIPCount.  The range of levels accessible by the
 456        * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD].
 457        */
 458       s.SurfaceMinLOD = info->view->base_level;
 459       s.MIPCountLOD = MAX(info->view->levels, 1) - 1;
 460    }
 461
 462 #if GEN_GEN >= 9
 463    /* We don't use miptails yet.  The PRM recommends that you set "Mip Tail
 464     * Start LOD" to 15 to prevent the hardware from trying to use them.
 465     */
 466    s.TiledResourceMode = NONE;
 467    s.MipTailStartLOD = 15;
 468 #endif
 469
 470 #if GEN_GEN >= 6
 471    const struct isl_extent3d image_align = get_image_alignment(info->surf);
 472    s.SurfaceVerticalAlignment = isl_to_gen_valign[image_align.height];
 473 #if GEN_GEN >= 7
 474    s.SurfaceHorizontalAlignment = isl_to_gen_halign[image_align.width];
 475 #endif
 476 #endif
 477
 478    if (info->surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
 479       /* For gen9 1-D textures, surface pitch is ignored */
 480       s.SurfacePitch = 0;
 481    } else {
 482       s.SurfacePitch = info->surf->row_pitch_B - 1;
 483    }
 484
 485 #if GEN_GEN >= 8
 486    s.SurfaceQPitch = get_qpitch(info->surf) >> 2;
 487 #elif GEN_GEN == 7
 488    s.SurfaceArraySpacing = info->surf->array_pitch_span ==
 489                            ISL_ARRAY_PITCH_SPAN_COMPACT;
 490 #endif
 491
 492 #if GEN_GEN >= 8
 493    assert(GEN_GEN < 12 || info->surf->tiling != ISL_TILING_W);
 494    s.TileMode = isl_to_gen_tiling[info->surf->tiling];
 495 #else
 496    s.TiledSurface = info->surf->tiling != ISL_TILING_LINEAR,
 497    s.TileWalk = info->surf->tiling == ISL_TILING_Y0 ? TILEWALK_YMAJOR :
 498                                                       TILEWALK_XMAJOR,
 499 #endif
 500
 501 #if GEN_GEN >= 8
 502    s.RenderCacheReadWriteMode = WriteOnlyCache;
 503 #else
 504    s.RenderCacheReadWriteMode = 0;
 505 #endif
 506
 507 #if GEN_GEN >= 11
 508    /* We've seen dEQP failures when enabling this bit with UINT formats,
 509     * which particularly affects blorp_copy() operations.  It shouldn't
 510     * have any effect on UINT textures anyway, so disable it for them.
 511     */
 512    s.EnableUnormPathInColorPipe =
 513       !isl_format_has_int_channel(info->view->format);
 514 #endif
 515
 516    s.CubeFaceEnablePositiveZ = 1;
 517    s.CubeFaceEnableNegativeZ = 1;
 518    s.CubeFaceEnablePositiveY = 1;
 519    s.CubeFaceEnableNegativeY = 1;
 520    s.CubeFaceEnablePositiveX = 1;
 521    s.CubeFaceEnableNegativeX = 1;
 522
 523 #if GEN_GEN >= 6
 524    s.NumberofMultisamples = ffs(info->surf->samples) - 1;
 525 #if GEN_GEN >= 7
 526    s.MultisampledSurfaceStorageFormat =
 527       isl_to_gen_multisample_layout[info->surf->msaa_layout];
 528 #endif
 529 #endif
 530
 531 #if (GEN_GEN >= 8 || GEN_IS_HASWELL)
 532    if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)
 533       assert(isl_swizzle_supports_rendering(dev->info, info->view->swizzle));
 534
 535    s.ShaderChannelSelectRed = (enum GENX(ShaderChannelSelect)) info->view->swizzle.r;
 536    s.ShaderChannelSelectGreen = (enum GENX(ShaderChannelSelect)) info->view->swizzle.g;
 537    s.ShaderChannelSelectBlue = (enum GENX(ShaderChannelSelect)) info->view->swizzle.b;
 538    s.ShaderChannelSelectAlpha = (enum GENX(ShaderChannelSelect)) info->view->swizzle.a;
 539 #else
 540    assert(isl_swizzle_is_identity(info->view->swizzle));
 541 #endif
 542
 543    s.SurfaceBaseAddress = info->address;
 544
 545 #if GEN_GEN >= 6
 546    s.MOCS = info->mocs;
 547 #endif
 548
 549 #if GEN_GEN > 4 || GEN_IS_G4X
 550    if (info->x_offset_sa != 0 || info->y_offset_sa != 0) {
 551       /* There are fairly strict rules about when the offsets can be used.
 552        * These are mostly taken from the Sky Lake PRM documentation for
 553        * RENDER_SURFACE_STATE.
 554        */
 555       assert(info->surf->tiling != ISL_TILING_LINEAR);
 556       assert(info->surf->dim == ISL_SURF_DIM_2D);
 557       assert(isl_is_pow2(isl_format_get_layout(info->view->format)->bpb));
 558       assert(info->surf->levels == 1);
 559       assert(info->surf->logical_level0_px.array_len == 1);
 560       assert(info->aux_usage == ISL_AUX_USAGE_NONE);
 561
 562       if (GEN_GEN >= 8) {
 563          /* Broadwell added more rules. */
 564          assert(info->surf->samples == 1);
 565          if (isl_format_get_layout(info->view->format)->bpb == 8)
 566             assert(info->x_offset_sa % 16 == 0);
 567          if (isl_format_get_layout(info->view->format)->bpb == 16)
 568             assert(info->x_offset_sa % 8 == 0);
 569       }
 570
 571 #if GEN_GEN >= 7
 572       s.SurfaceArray = false;
 573 #endif
 574    }
 575
 576    const unsigned x_div = 4;
 577    const unsigned y_div = GEN_GEN >= 8 ? 4 : 2;
 578    assert(info->x_offset_sa % x_div == 0);
 579    assert(info->y_offset_sa % y_div == 0);
 580    s.XOffset = info->x_offset_sa / x_div;
 581    s.YOffset = info->y_offset_sa / y_div;
 582 #else
 583    assert(info->x_offset_sa == 0);
 584    assert(info->y_offset_sa == 0);
 585 #endif
 586
 587 #if GEN_GEN >= 7
 588    if (info->aux_usage != ISL_AUX_USAGE_NONE) {
 589       /* Check valid aux usages per-gen */
 590       if (GEN_GEN >= 12) {
 591          assert(info->aux_usage == ISL_AUX_USAGE_MCS ||
 592                 info->aux_usage == ISL_AUX_USAGE_CCS_E ||
 593                 info->aux_usage == ISL_AUX_USAGE_GEN12_CCS_E ||
 594                 info->aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT ||
 595                 info->aux_usage == ISL_AUX_USAGE_MCS_CCS ||
 596                 info->aux_usage == ISL_AUX_USAGE_STC_CCS);
 597       } else if (GEN_GEN >= 9) {
 598          assert(info->aux_usage == ISL_AUX_USAGE_HIZ ||
 599                 info->aux_usage == ISL_AUX_USAGE_MCS ||
 600                 info->aux_usage == ISL_AUX_USAGE_CCS_D ||
 601                 info->aux_usage == ISL_AUX_USAGE_CCS_E);
 602       } else if (GEN_GEN >= 8) {
 603          assert(info->aux_usage == ISL_AUX_USAGE_HIZ ||
 604                 info->aux_usage == ISL_AUX_USAGE_MCS ||
 605                 info->aux_usage == ISL_AUX_USAGE_CCS_D);
 606       } else if (GEN_GEN >= 7) {
 607          assert(info->aux_usage == ISL_AUX_USAGE_MCS ||
 608                 info->aux_usage == ISL_AUX_USAGE_CCS_D);
 609       }
 610
 611       /* The docs don't appear to say anything whatsoever about compression
 612        * and the data port.  Testing seems to indicate that the data port
 613        * completely ignores the AuxiliarySurfaceMode field.
 614        *
 615        * On gen12 HDC supports compression.
 616        */
 617       if (GEN_GEN < 12)
 618          assert(!(info->view->usage & ISL_SURF_USAGE_STORAGE_BIT));
 619
 620       if (isl_surf_usage_is_depth(info->surf->usage))
 621          assert(isl_aux_usage_has_hiz(info->aux_usage));
 622
 623       if (isl_surf_usage_is_stencil(info->surf->usage))
 624          assert(info->aux_usage == ISL_AUX_USAGE_STC_CCS);
 625
 626       if (isl_aux_usage_has_hiz(info->aux_usage)) {
 627          /* For Gen8-10, there are some restrictions around sampling from HiZ.
 628           * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
 629           * say:
 630           *
 631           *    "If this field is set to AUX_HIZ, Number of Multisamples must
 632           *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
 633           *
 634           * On Gen12, the docs are a bit less obvious but the restriction is
 635           * the same.  The limitation isn't called out explicitly but the docs
 636           * for the CCS_E value of RENDER_SURFACE_STATE::AuxiliarySurfaceMode
 637           * say:
 638           *
 639           *    "If Number of multisamples > 1, programming this value means
 640           *    MSAA compression is enabled for that surface. Auxillary surface
 641           *    is MSC with tile y."
 642           *
 643           * Since this interpretation ignores whether the surface is
 644           * depth/stencil or not and since multisampled depth buffers use
 645           * ISL_MSAA_LAYOUT_INTERLEAVED which is incompatible with MCS
 646           * compression, this means that we can't even specify MSAA depth CCS
 647           * in RENDER_SURFACE_STATE::AuxiliarySurfaceMode.
 648           */
 649          assert(info->surf->samples == 1);
 650
 651          /* The dimension must not be 3D */
 652          assert(info->surf->dim != ISL_SURF_DIM_3D);
 653
 654          /* The format must be one of the following: */
 655          switch (info->view->format) {
 656          case ISL_FORMAT_R32_FLOAT:
 657          case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
 658          case ISL_FORMAT_R16_UNORM:
 659             break;
 660          default:
 661             assert(!"Incompatible HiZ Sampling format");
 662             break;
 663          }
 664       }
 665
 666 #if GEN_GEN >= 8
 667       s.AuxiliarySurfaceMode = isl_to_gen_aux_mode[info->aux_usage];
 668 #else
 669       s.MCSEnable = true;
 670 #endif
 671    }
 672
 673    /* The auxiliary buffer info is filled when it's useable by the HW.
 674     *
 675     * Starting with Gen12, the only form of compression that can be used
 676     * with RENDER_SURFACE_STATE which requires an aux surface is MCS.
 677     * HiZ still requires a surface but the HiZ surface can only be
 678     * accessed through 3DSTATE_HIER_DEPTH_BUFFER.
 679     *
 680     * On all earlier hardware, an aux surface is required for all forms
 681     * of compression.
 682     */
 683    if ((GEN_GEN < 12 && info->aux_usage != ISL_AUX_USAGE_NONE) ||
 684        (GEN_GEN >= 12 && isl_aux_usage_has_mcs(info->aux_usage))) {
 685
 686       assert(info->aux_surf != NULL);
 687
 688       struct isl_tile_info tile_info;
 689       isl_surf_get_tile_info(info->aux_surf, &tile_info);
 690       uint32_t pitch_in_tiles =
 691          info->aux_surf->row_pitch_B / tile_info.phys_extent_B.width;
 692
 693       s.AuxiliarySurfaceBaseAddress = info->aux_address;
 694       s.AuxiliarySurfacePitch = pitch_in_tiles - 1;
 695
 696 #if GEN_GEN >= 8
 697       /* Auxiliary surfaces in ISL have compressed formats but the hardware
 698        * doesn't expect our definition of the compression, it expects qpitch
 699        * in units of samples on the main surface.
 700        */
 701       s.AuxiliarySurfaceQPitch =
 702          isl_surf_get_array_pitch_sa_rows(info->aux_surf) >> 2;
 703 #endif
 704    }
 705 #endif
 706
 707 #if GEN_GEN >= 8 && GEN_GEN < 11
 708    /* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0
 709     * bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes):
 710     *
 711     *    This bit must be set for the following surface types: BC2_UNORM
 712     *    BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM
 713     */
 714    if (GEN_GEN >= 9 || dev->info->is_cherryview) {
 715       switch (info->view->format) {
 716       case ISL_FORMAT_BC2_UNORM:
 717       case ISL_FORMAT_BC3_UNORM:
 718       case ISL_FORMAT_BC5_UNORM:
 719       case ISL_FORMAT_BC5_SNORM:
 720       case ISL_FORMAT_BC7_UNORM:
 721          s.SamplerL2BypassModeDisable = true;
 722          break;
 723       default:
 724          /* From the SKL PRM, Programming Note under Sampler Output Channel
 725           * Mapping:
 726           *
 727           *    If a surface has an associated HiZ Auxilliary surface, the
 728           *    Sampler L2 Bypass Mode Disable field in the RENDER_SURFACE_STATE
 729           *    must be set.
 730           */
 731          if (GEN_GEN >= 9 && info->aux_usage == ISL_AUX_USAGE_HIZ)
 732             s.SamplerL2BypassModeDisable = true;
 733          break;
 734       }
 735    }
 736 #endif
 737
 738    if (info->aux_usage != ISL_AUX_USAGE_NONE) {
 739       if (info->use_clear_address) {
 740 #if GEN_GEN >= 10
 741          s.ClearValueAddressEnable = true;
 742          s.ClearValueAddress = info->clear_address;
 743 #else
 744          unreachable("Gen9 and earlier do not support indirect clear colors");
 745 #endif
 746       }
 747
 748 #if GEN_GEN == 11
 749       /*
 750        * From BXML > GT > Shared Functions > vol5c Shared Functions >
 751        * [Structure] RENDER_SURFACE_STATE [BDW+] > ClearColorConversionEnable:
 752        *
 753        *   Project: Gen11
 754        *
 755        *   "Enables Pixel backend hw to convert clear values into native format
 756        *    and write back to clear address, so that display and sampler can use
 757        *    the converted value for resolving fast cleared RTs."
 758        *
 759        * Summary:
 760        *   Clear color conversion must be enabled if the clear color is stored
 761        *   indirectly and fast color clears are enabled.
 762        */
 763       if (info->use_clear_address) {
 764          s.ClearColorConversionEnable = true;
 765       }
 766 #endif
 767
 768 #if GEN_GEN >= 12
 769       assert(info->use_clear_address);
 770 #elif GEN_GEN >= 9
 771       if (!info->use_clear_address) {
 772          s.RedClearColor = info->clear_color.u32[0];
 773          s.GreenClearColor = info->clear_color.u32[1];
 774          s.BlueClearColor = info->clear_color.u32[2];
 775          s.AlphaClearColor = info->clear_color.u32[3];
 776       }
 777 #elif GEN_GEN >= 7
 778       /* Prior to Sky Lake, we only have one bit for the clear color which
 779        * gives us 0 or 1 in whatever the surface's format happens to be.
 780        */
 781       if (isl_format_has_int_channel(info->view->format)) {
 782          for (unsigned i = 0; i < 4; i++) {
 783             assert(info->clear_color.u32[i] == 0 ||
 784                    info->clear_color.u32[i] == 1);
 785          }
 786          s.RedClearColor = info->clear_color.u32[0] != 0;
 787          s.GreenClearColor = info->clear_color.u32[1] != 0;
 788          s.BlueClearColor = info->clear_color.u32[2] != 0;
 789          s.AlphaClearColor = info->clear_color.u32[3] != 0;
 790       } else {
 791          for (unsigned i = 0; i < 4; i++) {
 792             assert(info->clear_color.f32[i] == 0.0f ||
 793                    info->clear_color.f32[i] == 1.0f);
 794          }
 795          s.RedClearColor = info->clear_color.f32[0] != 0.0f;
 796          s.GreenClearColor = info->clear_color.f32[1] != 0.0f;
 797          s.BlueClearColor = info->clear_color.f32[2] != 0.0f;
 798          s.AlphaClearColor = info->clear_color.f32[3] != 0.0f;
 799       }
 800 #endif
 801    }
 802
 803    GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s);
 804 }
 805
 806 void
 807 isl_genX(buffer_fill_state_s)(const struct isl_device *dev, void *state,
 808                               const struct isl_buffer_fill_state_info *restrict info)
 809 {
 810    uint64_t buffer_size = info->size_B;
 811
 812    /* Uniform and Storage buffers need to have surface size not less that the
 813     * aligned 32-bit size of the buffer. To calculate the array lenght on
 814     * unsized arrays in StorageBuffer the last 2 bits store the padding size
 815     * added to the surface, so we can calculate latter the original buffer
 816     * size to know the number of elements.
 817     *
 818     *  surface_size = isl_align(buffer_size, 4) +
 819     *                 (isl_align(buffer_size) - buffer_size)
 820     *
 821     *  buffer_size = (surface_size & ~3) - (surface_size & 3)
 822     */
 823    if (info->format == ISL_FORMAT_RAW  ||
 824        info->stride_B < isl_format_get_layout(info->format)->bpb / 8) {
 825       assert(info->stride_B == 1);
 826       uint64_t aligned_size = isl_align(buffer_size, 4);
 827       buffer_size = aligned_size + (aligned_size - buffer_size);
 828    }
 829
 830    uint32_t num_elements = buffer_size / info->stride_B;
 831
 832    if (GEN_GEN >= 7) {
 833       /* From the IVB PRM, SURFACE_STATE::Height,
 834        *
 835        *    For typed buffer and structured buffer surfaces, the number
 836        *    of entries in the buffer ranges from 1 to 2^27. For raw buffer
 837        *    surfaces, the number of entries in the buffer is the number of bytes
 838        *    which can range from 1 to 2^30.
 839        */
 840       if (info->format == ISL_FORMAT_RAW) {
 841          assert(num_elements <= (1ull << 30));
 842          assert(num_elements > 0);
 843       } else {
 844          assert(num_elements <= (1ull << 27));
 845       }
 846    } else {
 847       assert(num_elements <= (1ull << 27));
 848    }
 849
 850    struct GENX(RENDER_SURFACE_STATE) s = { 0, };
 851
 852    s.SurfaceType = SURFTYPE_BUFFER;
 853    s.SurfaceFormat = info->format;
 854
 855 #if GEN_GEN >= 6
 856    s.SurfaceVerticalAlignment = isl_to_gen_valign[4];
 857 #if GEN_GEN >= 7
 858    s.SurfaceHorizontalAlignment = isl_to_gen_halign[4];
 859    s.SurfaceArray = false;
 860 #endif
 861 #endif
 862
 863 #if GEN_GEN >= 7
 864    s.Height = ((num_elements - 1) >> 7) & 0x3fff;
 865    s.Width = (num_elements - 1) & 0x7f;
 866    s.Depth = ((num_elements - 1) >> 21) & 0x3ff;
 867 #else
 868    s.Height = ((num_elements - 1) >> 7) & 0x1fff;
 869    s.Width = (num_elements - 1) & 0x7f;
 870    s.Depth = ((num_elements - 1) >> 20) & 0x7f;
 871 #endif
 872
 873    if (GEN_GEN == 12 && dev->info->revision == 0) {
 874       /* TGL-LP A0 has a HW bug (fixed in later HW) which causes buffer
 875        * textures with very close base addresses (delta < 64B) to corrupt each
 876        * other.  We can sort-of work around this by making small buffer
 877        * textures 1D textures instead.  This doesn't fix the problem for large
 878        * buffer textures but the liklihood of large, overlapping, and very
 879        * close buffer textures is fairly low and the point is to hack around
 880        * the bug so we can run apps and tests.
 881        */
 882        if (info->format != ISL_FORMAT_RAW &&
 883            info->stride_B == isl_format_get_layout(info->format)->bpb / 8 &&
 884            num_elements <= (1 << 14)) {
 885          s.SurfaceType = SURFTYPE_1D;
 886          s.Width = num_elements - 1;
 887          s.Height = 0;
 888          s.Depth = 0;
 889       }
 890    }
 891
 892    s.SurfacePitch = info->stride_B - 1;
 893
 894 #if GEN_GEN >= 6
 895    s.NumberofMultisamples = MULTISAMPLECOUNT_1;
 896 #endif
 897
 898 #if (GEN_GEN >= 8)
 899    s.TileMode = LINEAR;
 900 #else
 901    s.TiledSurface = false;
 902 #endif
 903
 904 #if (GEN_GEN >= 8)
 905    s.RenderCacheReadWriteMode = WriteOnlyCache;
 906 #else
 907    s.RenderCacheReadWriteMode = 0;
 908 #endif
 909
 910    s.SurfaceBaseAddress = info->address;
 911 #if GEN_GEN >= 6
 912    s.MOCS = info->mocs;
 913 #endif
 914
 915 #if (GEN_GEN >= 8 || GEN_IS_HASWELL)
 916    s.ShaderChannelSelectRed = (enum GENX(ShaderChannelSelect)) info->swizzle.r;
 917    s.ShaderChannelSelectGreen = (enum GENX(ShaderChannelSelect)) info->swizzle.g;
 918    s.ShaderChannelSelectBlue = (enum GENX(ShaderChannelSelect)) info->swizzle.b;
 919    s.ShaderChannelSelectAlpha = (enum GENX(ShaderChannelSelect)) info->swizzle.a;
 920 #endif
 921
 922    GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s);
 923 }
 924
 925 void
 926 isl_genX(null_fill_state)(void *state, struct isl_extent3d size)
 927 {
 928    struct GENX(RENDER_SURFACE_STATE) s = {
 929       .SurfaceType = SURFTYPE_NULL,
 930       /* We previously had this format set to B8G8R8A8_UNORM but ran into
 931        * hangs on IVB. R32_UINT seems to work for everybody.
 932        *
 933        * https://gitlab.freedesktop.org/mesa/mesa/-/issues/1872
 934        */
 935       .SurfaceFormat = ISL_FORMAT_R32_UINT,
 936 #if GEN_GEN >= 7
 937       .SurfaceArray = size.depth > 1,
 938 #endif
 939 #if GEN_GEN >= 8
 940       .TileMode = YMAJOR,
 941 #else
 942       .TiledSurface = true,
 943       .TileWalk = TILEWALK_YMAJOR,
 944 #endif
 945 #if GEN_GEN == 7
 946       /* According to PRMs: "Volume 4 Part 1: Subsystem and Cores – Shared
 947        * Functions"
 948        *
 949        * RENDER_SURFACE_STATE::Surface Vertical Alignment
 950        *
 951        *    "This field must be set to VALIGN_4 for all tiled Y Render Target
 952        *     surfaces."
 953        *
 954        * Affect IVB, HSW.
 955        */
 956       .SurfaceVerticalAlignment = VALIGN_4,
 957 #endif
 958       .Width = size.width - 1,
 959       .Height = size.height - 1,
 960       .Depth = size.depth - 1,
 961       .RenderTargetViewExtent = size.depth - 1,
 962 #if GEN_GEN <= 5
 963       .ColorBufferComponentWriteDisables = 0xf,
 964 #endif
 965    };
 966    GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s);
 967 }