src/intel/compiler/brw_nir_lower_image_load_store.c

   1 /*
   2  * Copyright © 2018 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "isl/isl.h"
  25
  26 #include "brw_nir.h"
  27 #include "compiler/nir/nir_builder.h"
  28 #include "compiler/nir/nir_format_convert.h"
  29
  30 /* The higher compiler layers use the GL enums for image formats even if
  31  * they come in from SPIR-V or Vulkan.  We need to turn them into an ISL
  32  * enum before we can use them.
  33  */
  34 static enum isl_format
  35 isl_format_for_gl_format(uint32_t gl_format)
  36 {
  37    switch (gl_format) {
  38    case GL_R8:             return ISL_FORMAT_R8_UNORM;
  39    case GL_R8_SNORM:       return ISL_FORMAT_R8_SNORM;
  40    case GL_R8UI:           return ISL_FORMAT_R8_UINT;
  41    case GL_R8I:            return ISL_FORMAT_R8_SINT;
  42    case GL_RG8:            return ISL_FORMAT_R8G8_UNORM;
  43    case GL_RG8_SNORM:      return ISL_FORMAT_R8G8_SNORM;
  44    case GL_RG8UI:          return ISL_FORMAT_R8G8_UINT;
  45    case GL_RG8I:           return ISL_FORMAT_R8G8_SINT;
  46    case GL_RGBA8:          return ISL_FORMAT_R8G8B8A8_UNORM;
  47    case GL_RGBA8_SNORM:    return ISL_FORMAT_R8G8B8A8_SNORM;
  48    case GL_RGBA8UI:        return ISL_FORMAT_R8G8B8A8_UINT;
  49    case GL_RGBA8I:         return ISL_FORMAT_R8G8B8A8_SINT;
  50    case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT;
  51    case GL_RGB10_A2:       return ISL_FORMAT_R10G10B10A2_UNORM;
  52    case GL_RGB10_A2UI:     return ISL_FORMAT_R10G10B10A2_UINT;
  53    case GL_R16:            return ISL_FORMAT_R16_UNORM;
  54    case GL_R16_SNORM:      return ISL_FORMAT_R16_SNORM;
  55    case GL_R16F:           return ISL_FORMAT_R16_FLOAT;
  56    case GL_R16UI:          return ISL_FORMAT_R16_UINT;
  57    case GL_R16I:           return ISL_FORMAT_R16_SINT;
  58    case GL_RG16:           return ISL_FORMAT_R16G16_UNORM;
  59    case GL_RG16_SNORM:     return ISL_FORMAT_R16G16_SNORM;
  60    case GL_RG16F:          return ISL_FORMAT_R16G16_FLOAT;
  61    case GL_RG16UI:         return ISL_FORMAT_R16G16_UINT;
  62    case GL_RG16I:          return ISL_FORMAT_R16G16_SINT;
  63    case GL_RGBA16:         return ISL_FORMAT_R16G16B16A16_UNORM;
  64    case GL_RGBA16_SNORM:   return ISL_FORMAT_R16G16B16A16_SNORM;
  65    case GL_RGBA16F:        return ISL_FORMAT_R16G16B16A16_FLOAT;
  66    case GL_RGBA16UI:       return ISL_FORMAT_R16G16B16A16_UINT;
  67    case GL_RGBA16I:        return ISL_FORMAT_R16G16B16A16_SINT;
  68    case GL_R32F:           return ISL_FORMAT_R32_FLOAT;
  69    case GL_R32UI:          return ISL_FORMAT_R32_UINT;
  70    case GL_R32I:           return ISL_FORMAT_R32_SINT;
  71    case GL_RG32F:          return ISL_FORMAT_R32G32_FLOAT;
  72    case GL_RG32UI:         return ISL_FORMAT_R32G32_UINT;
  73    case GL_RG32I:          return ISL_FORMAT_R32G32_SINT;
  74    case GL_RGBA32F:        return ISL_FORMAT_R32G32B32A32_FLOAT;
  75    case GL_RGBA32UI:       return ISL_FORMAT_R32G32B32A32_UINT;
  76    case GL_RGBA32I:        return ISL_FORMAT_R32G32B32A32_SINT;
  77    case GL_NONE:           return ISL_FORMAT_UNSUPPORTED;
  78    default:
  79       assert(!"Invalid image format");
  80       return ISL_FORMAT_UNSUPPORTED;
  81    }
  82 }
  83
  84 static nir_ssa_def *
  85 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
  86 {
  87    nir_intrinsic_instr *load =
  88       nir_intrinsic_instr_create(b->shader,
  89                                  nir_intrinsic_image_deref_load_param_intel);
  90    load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
  91    nir_intrinsic_set_base(load, offset / 4);
  92
  93    switch (offset) {
  94    case BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET:
  95       load->num_components = 1;
  96       break;
  97    case BRW_IMAGE_PARAM_OFFSET_OFFSET:
  98    case BRW_IMAGE_PARAM_SWIZZLING_OFFSET:
  99       load->num_components = 2;
 100       break;
 101    case BRW_IMAGE_PARAM_TILING_OFFSET:
 102    case BRW_IMAGE_PARAM_SIZE_OFFSET:
 103       load->num_components = 3;
 104       break;
 105    case BRW_IMAGE_PARAM_STRIDE_OFFSET:
 106       load->num_components = 4;
 107       break;
 108    default:
 109       unreachable("Invalid param offset");
 110    }
 111    nir_ssa_dest_init(&load->instr, &load->dest,
 112                      load->num_components, 32, NULL);
 113
 114    nir_builder_instr_insert(b, &load->instr);
 115    return &load->dest.ssa;
 116 }
 117
 118 #define load_image_param(b, d, o) \
 119    _load_image_param(b, d, BRW_IMAGE_PARAM_##o##_OFFSET)
 120
 121 static nir_ssa_def *
 122 image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref,
 123                          nir_ssa_def *coord)
 124 {
 125    nir_ssa_def *size = load_image_param(b, deref, SIZE);
 126    nir_ssa_def *cmp = nir_ilt(b, coord, size);
 127
 128    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
 129    nir_ssa_def *in_bounds = nir_imm_int(b, NIR_TRUE);
 130    for (unsigned i = 0; i < coord_comps; i++)
 131       in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i));
 132
 133    return in_bounds;
 134 }
 135
 136 /** Calculate the offset in memory of the texel given by \p coord.
 137  *
 138  * This is meant to be used with untyped surface messages to access a tiled
 139  * surface, what involves taking into account the tiling and swizzling modes
 140  * of the surface manually so it will hopefully not happen very often.
 141  *
 142  * The tiling algorithm implemented here matches either the X or Y tiling
 143  * layouts supported by the hardware depending on the tiling coefficients
 144  * passed to the program as uniforms.  See Volume 1 Part 2 Section 4.5
 145  * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
 146  * the hardware tiling format.
 147  */
 148 static nir_ssa_def *
 149 image_address(nir_builder *b, const struct gen_device_info *devinfo,
 150               nir_deref_instr *deref, nir_ssa_def *coord)
 151 {
 152    if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D &&
 153        glsl_sampler_type_is_array(deref->type)) {
 154       /* It's easier if 1D arrays are treated like 2D arrays */
 155       coord = nir_vec3(b, nir_channel(b, coord, 0),
 156                           nir_imm_int(b, 0),
 157                           nir_channel(b, coord, 1));
 158    } else {
 159       unsigned dims = glsl_get_sampler_coordinate_components(deref->type);
 160       coord = nir_channels(b, coord, (1 << dims) - 1);
 161    }
 162
 163    nir_ssa_def *offset = load_image_param(b, deref, OFFSET);
 164    nir_ssa_def *tiling = load_image_param(b, deref, TILING);
 165    nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
 166
 167    /* Shift the coordinates by the fixed surface offset.  It may be non-zero
 168     * if the image is a single slice of a higher-dimensional surface, or if a
 169     * non-zero mipmap level of the surface is bound to the pipeline.  The
 170     * offset needs to be applied here rather than at surface state set-up time
 171     * because the desired slice-level may start mid-tile, so simply shifting
 172     * the surface base address wouldn't give a well-formed tiled surface in
 173     * the general case.
 174     */
 175    nir_ssa_def *xypos = (coord->num_components == 1) ?
 176                         nir_vec2(b, coord, nir_imm_int(b, 0)) :
 177                         nir_channels(b, coord, 0x3);
 178    xypos = nir_iadd(b, xypos, offset);
 179
 180    /* The layout of 3-D textures in memory is sort-of like a tiling
 181     * format.  At each miplevel, the slices are arranged in rows of
 182     * 2^level slices per row.  The slice row is stored in tmp.y and
 183     * the slice within the row is stored in tmp.x.
 184     *
 185     * The layout of 2-D array textures and cubemaps is much simpler:
 186     * Depending on whether the ARYSPC_LOD0 layout is in use it will be
 187     * stored in memory as an array of slices, each one being a 2-D
 188     * arrangement of miplevels, or as a 2D arrangement of miplevels,
 189     * each one being an array of slices.  In either case the separation
 190     * between slices of the same LOD is equal to the qpitch value
 191     * provided as stride.w.
 192     *
 193     * This code can be made to handle either 2D arrays and 3D textures
 194     * by passing in the miplevel as tile.z for 3-D textures and 0 in
 195     * tile.z for 2-D array textures.
 196     *
 197     * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
 198     * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
 199     * of the hardware 3D texture and 2D array layouts.
 200     */
 201    if (coord->num_components > 2) {
 202       /* Decompose z into a major (tmp.y) and a minor (tmp.x)
 203        * index.
 204        */
 205       nir_ssa_def *z = nir_channel(b, coord, 2);
 206       nir_ssa_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0),
 207                                   nir_channel(b, tiling, 2));
 208       nir_ssa_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2));
 209
 210       /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
 211        * slice offset.
 212        */
 213       xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y),
 214                                              nir_channels(b, stride, 0xc)));
 215    }
 216
 217    nir_ssa_def *addr;
 218    if (coord->num_components > 1) {
 219       /* Calculate the major/minor x and y indices.  In order to
 220        * accommodate both X and Y tiling, the Y-major tiling format is
 221        * treated as being a bunch of narrow X-tiles placed next to each
 222        * other.  This means that the tile width for Y-tiling is actually
 223        * the width of one sub-column of the Y-major tile where each 4K
 224        * tile has 8 512B sub-columns.
 225        *
 226        * The major Y value is the row of tiles in which the pixel lives.
 227        * The major X value is the tile sub-column in which the pixel
 228        * lives; for X tiling, this is the same as the tile column, for Y
 229        * tiling, each tile has 8 sub-columns.  The minor X and Y indices
 230        * are the position within the sub-column.
 231        */
 232
 233       /* Calculate the minor x and y indices. */
 234       nir_ssa_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0),
 235                                        nir_channels(b, tiling, 0x3));
 236       nir_ssa_def *major = nir_ushr(b, xypos, nir_channels(b, tiling, 0x3));
 237
 238       /* Calculate the texel index from the start of the tile row and the
 239        * vertical coordinate of the row.
 240        * Equivalent to:
 241        *   tmp.x = (major.x << tile.y << tile.x) +
 242        *           (minor.y << tile.x) + minor.x
 243        *   tmp.y = major.y << tile.y
 244        */
 245       nir_ssa_def *idx_x, *idx_y;
 246       idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1));
 247       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1));
 248       idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0));
 249       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0));
 250       idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1));
 251
 252       /* Add it to the start of the tile row. */
 253       nir_ssa_def *idx;
 254       idx = nir_imul(b, idx_y, nir_channel(b, stride, 1));
 255       idx = nir_iadd(b, idx, idx_x);
 256
 257       /* Multiply by the Bpp value. */
 258       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
 259
 260       if (devinfo->gen < 8 && !devinfo->is_baytrail) {
 261          /* Take into account the two dynamically specified shifts.  Both are
 262           * used to implement swizzling of X-tiled surfaces.  For Y-tiled
 263           * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
 264           * address, so a swz value of 0xff (actually interpreted as 31 by the
 265           * hardware) will be provided to cause the relevant bit of tmp.y to
 266           * be zero and turn the first XOR into the identity.  For linear
 267           * surfaces or platforms lacking address swizzling both shifts will
 268           * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
 269           * what effectively disables swizzling.
 270           */
 271          nir_ssa_def *swizzle = load_image_param(b, deref, SWIZZLING);
 272          nir_ssa_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0));
 273          nir_ssa_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1));
 274
 275          /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
 276          nir_ssa_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1),
 277                                         nir_imm_int(b, 1 << 6));
 278          addr = nir_ixor(b, addr, bit);
 279       }
 280    } else {
 281       /* Multiply by the Bpp/stride value.  Note that the addr.y may be
 282        * non-zero even if the image is one-dimensional because a vertical
 283        * offset may have been applied above to select a non-zero slice or
 284        * level of a higher-dimensional texture.
 285        */
 286       nir_ssa_def *idx;
 287       idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1));
 288       idx = nir_iadd(b, nir_channel(b, xypos, 0), idx);
 289       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
 290    }
 291
 292    return addr;
 293 }
 294
 295 struct format_info {
 296    const struct isl_format_layout *fmtl;
 297    unsigned chans;
 298    unsigned bits[4];
 299 };
 300
 301 static struct format_info
 302 get_format_info(enum isl_format fmt)
 303 {
 304    const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
 305
 306    return (struct format_info) {
 307       .fmtl = fmtl,
 308       .chans = isl_format_get_num_channels(fmt),
 309       .bits = {
 310          fmtl->channels.r.bits,
 311          fmtl->channels.g.bits,
 312          fmtl->channels.b.bits,
 313          fmtl->channels.a.bits
 314       },
 315    };
 316 }
 317
 318 static nir_ssa_def *
 319 nir_zero_vec(nir_builder *b, unsigned num_components)
 320 {
 321    nir_const_value v;
 322    memset(&v, 0, sizeof(v));
 323
 324    return nir_build_imm(b, num_components, 32, v);
 325 }
 326
 327 static nir_ssa_def *
 328 convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
 329                        nir_ssa_def *color,
 330                        enum isl_format image_fmt, enum isl_format lower_fmt,
 331                        unsigned dest_components)
 332 {
 333    if (image_fmt == lower_fmt)
 334       goto expand_vec;
 335
 336    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
 337       assert(lower_fmt == ISL_FORMAT_R32_UINT);
 338       color = nir_format_unpack_11f11f10f(b, color);
 339       goto expand_vec;
 340    }
 341
 342    struct format_info image = get_format_info(image_fmt);
 343    struct format_info lower = get_format_info(lower_fmt);
 344
 345    const bool needs_sign_extension =
 346       isl_format_has_snorm_channel(image_fmt) ||
 347       isl_format_has_sint_channel(image_fmt);
 348
 349    /* We only check the red channel to detect if we need to pack/unpack */
 350    assert(image.bits[0] != lower.bits[0] ||
 351           memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0);
 352
 353    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
 354       if (needs_sign_extension)
 355          color = nir_format_unpack_sint(b, color, image.bits, image.chans);
 356       else
 357          color = nir_format_unpack_uint(b, color, image.bits, image.chans);
 358    } else {
 359       /* All these formats are homogeneous */
 360       for (unsigned i = 1; i < image.chans; i++)
 361          assert(image.bits[i] == image.bits[0]);
 362
 363       /* On IVB, we rely on the undocumented behavior that typed reads from
 364        * surfaces of the unsupported R8 and R16 formats return useful data in
 365        * their least significant bits.  However, the data in the high bits is
 366        * garbage so we have to discard it.
 367        */
 368       if (devinfo->gen == 7 && !devinfo->is_haswell &&
 369           (lower_fmt == ISL_FORMAT_R16_UINT ||
 370            lower_fmt == ISL_FORMAT_R8_UINT))
 371          color = nir_format_mask_uvec(b, color, lower.bits);
 372
 373       if (image.bits[0] != lower.bits[0]) {
 374          color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0],
 375                                                   image.bits[0]);
 376       }
 377
 378       if (needs_sign_extension)
 379          color = nir_format_sign_extend_ivec(b, color, image.bits);
 380    }
 381
 382    switch (image.fmtl->channels.r.type) {
 383    case ISL_UNORM:
 384       assert(isl_format_has_uint_channel(lower_fmt));
 385       color = nir_format_unorm_to_float(b, color, image.bits);
 386       break;
 387
 388    case ISL_SNORM:
 389       assert(isl_format_has_uint_channel(lower_fmt));
 390       color = nir_format_snorm_to_float(b, color, image.bits);
 391       break;
 392
 393    case ISL_SFLOAT:
 394       if (image.bits[0] == 16)
 395          color = nir_unpack_half_2x16_split_x(b, color);
 396       break;
 397
 398    case ISL_UINT:
 399    case ISL_SINT:
 400       break;
 401
 402    default:
 403       unreachable("Invalid image channel type");
 404    }
 405
 406 expand_vec:
 407    assert(dest_components == 1 || dest_components == 4);
 408    assert(color->num_components <= dest_components);
 409    if (color->num_components == dest_components)
 410       return color;
 411
 412    nir_ssa_def *comps[4];
 413    for (unsigned i = 0; i < color->num_components; i++)
 414       comps[i] = nir_channel(b, color, i);
 415
 416    for (unsigned i = color->num_components; i < 3; i++)
 417       comps[i] = nir_imm_int(b, 0);
 418
 419    if (color->num_components < 4) {
 420       if (isl_format_has_int_channel(image_fmt))
 421          comps[3] = nir_imm_int(b, 1);
 422       else
 423          comps[3] = nir_imm_float(b, 1);
 424    }
 425
 426    return nir_vec(b, comps, dest_components);
 427 }
 428
 429 static bool
 430 lower_image_load_instr(nir_builder *b,
 431                        const struct gen_device_info *devinfo,
 432                        nir_intrinsic_instr *intrin)
 433 {
 434    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 435    nir_variable *var = nir_deref_instr_get_variable(deref);
 436    const enum isl_format image_fmt =
 437       isl_format_for_gl_format(var->data.image.format);
 438
 439    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
 440       const enum isl_format lower_fmt =
 441          isl_lower_storage_image_format(devinfo, image_fmt);
 442       const unsigned dest_components = intrin->num_components;
 443
 444       /* Use an undef to hold the uses of the load while we do the color
 445        * conversion.
 446        */
 447       nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
 448       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
 449
 450       intrin->num_components = isl_format_get_num_channels(lower_fmt);
 451       intrin->dest.ssa.num_components = intrin->num_components;
 452
 453       b->cursor = nir_after_instr(&intrin->instr);
 454
 455       nir_ssa_def *color = convert_color_for_load(b, devinfo,
 456                                                   &intrin->dest.ssa,
 457                                                   image_fmt, lower_fmt,
 458                                                   dest_components);
 459
 460       nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color));
 461       nir_instr_remove(placeholder->parent_instr);
 462    } else {
 463       const struct isl_format_layout *image_fmtl =
 464          isl_format_get_layout(image_fmt);
 465       /* We have a matching typed format for everything 32b and below */
 466       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
 467       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
 468                                 ISL_FORMAT_R32G32_UINT :
 469                                 ISL_FORMAT_R32G32B32A32_UINT;
 470       const unsigned dest_components = intrin->num_components;
 471
 472       b->cursor = nir_instr_remove(&intrin->instr);
 473
 474       nir_ssa_def *coord = intrin->src[1].ssa;
 475
 476       nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
 477       if (devinfo->gen == 7 && !devinfo->is_haswell) {
 478          /* Check whether the first stride component (i.e. the Bpp value)
 479           * is greater than four, what on Gen7 indicates that a surface of
 480           * type RAW has been bound for untyped access.  Reading or writing
 481           * to a surface of type other than RAW using untyped surface
 482           * messages causes a hang on IVB and VLV.
 483           */
 484          nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
 485          nir_ssa_def *is_raw =
 486             nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
 487          do_load = nir_iand(b, do_load, is_raw);
 488       }
 489       nir_push_if(b, do_load);
 490
 491       nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
 492       nir_intrinsic_instr *load =
 493          nir_intrinsic_instr_create(b->shader,
 494                                     nir_intrinsic_image_deref_load_raw_intel);
 495       load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
 496       load->src[1] = nir_src_for_ssa(addr);
 497       load->num_components = image_fmtl->bpb / 32;
 498       nir_ssa_dest_init(&load->instr, &load->dest,
 499                         load->num_components, 32, NULL);
 500       nir_builder_instr_insert(b, &load->instr);
 501
 502       nir_push_else(b, NULL);
 503
 504       nir_ssa_def *zero = nir_zero_vec(b, load->num_components);
 505
 506       nir_pop_if(b, NULL);
 507
 508       nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero);
 509
 510       nir_ssa_def *color = convert_color_for_load(b, devinfo, value,
 511                                                   image_fmt, raw_fmt,
 512                                                   dest_components);
 513
 514       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color));
 515    }
 516
 517    return true;
 518 }
 519
 520 static nir_ssa_def *
 521 convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo,
 522                         nir_ssa_def *color,
 523                         enum isl_format image_fmt, enum isl_format lower_fmt)
 524 {
 525    struct format_info image = get_format_info(image_fmt);
 526    struct format_info lower = get_format_info(lower_fmt);
 527
 528    color = nir_channels(b, color, (1 << image.chans) - 1);
 529
 530    if (image_fmt == lower_fmt)
 531       return color;
 532
 533    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
 534       assert(lower_fmt == ISL_FORMAT_R32_UINT);
 535       return nir_format_pack_11f11f10f(b, color);
 536    }
 537
 538    switch (image.fmtl->channels.r.type) {
 539    case ISL_UNORM:
 540       assert(isl_format_has_uint_channel(lower_fmt));
 541       color = nir_format_float_to_unorm(b, color, image.bits);
 542       break;
 543
 544    case ISL_SNORM:
 545       assert(isl_format_has_uint_channel(lower_fmt));
 546       color = nir_format_float_to_snorm(b, color, image.bits);
 547       break;
 548
 549    case ISL_SFLOAT:
 550       if (image.bits[0] == 16) {
 551          nir_ssa_def *f16comps[4];
 552          for (unsigned i = 0; i < image.chans; i++) {
 553             f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, color, i),
 554                                                       nir_imm_float(b, 0));
 555          }
 556          color = nir_vec(b, f16comps, image.chans);
 557       }
 558       break;
 559
 560    case ISL_UINT:
 561       if (image.bits[0] < 32) {
 562          nir_const_value max;
 563          for (unsigned i = 0; i < image.chans; i++) {
 564             assert(image.bits[i] < 32);
 565             max.u32[i] = (1u << image.bits[i]) - 1;
 566          }
 567          color = nir_umin(b, color, nir_build_imm(b, image.chans, 32, max));
 568       }
 569       break;
 570
 571    case ISL_SINT:
 572       if (image.bits[0] < 32) {
 573          nir_const_value min, max;
 574          for (unsigned i = 0; i < image.chans; i++) {
 575             assert(image.bits[i] < 32);
 576             max.i32[i] = (1 << (image.bits[i] - 1)) - 1;
 577             min.i32[i] = -(1 << (image.bits[i] - 1));
 578          }
 579          color = nir_imin(b, color, nir_build_imm(b, image.chans, 32, max));
 580          color = nir_imax(b, color, nir_build_imm(b, image.chans, 32, min));
 581       }
 582       break;
 583
 584    default:
 585       unreachable("Invalid image channel type");
 586    }
 587
 588    if (image.bits[0] < 32 &&
 589        (isl_format_has_snorm_channel(image_fmt) ||
 590         isl_format_has_sint_channel(image_fmt)))
 591       color = nir_format_mask_uvec(b, color, image.bits);
 592
 593    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
 594       color = nir_format_pack_uint(b, color, image.bits, image.chans);
 595    } else {
 596       /* All these formats are homogeneous */
 597       for (unsigned i = 1; i < image.chans; i++)
 598          assert(image.bits[i] == image.bits[0]);
 599
 600       if (image.bits[0] != lower.bits[0]) {
 601          color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0],
 602                                                   lower.bits[0]);
 603       }
 604    }
 605
 606    return color;
 607 }
 608
 609 static bool
 610 lower_image_store_instr(nir_builder *b,
 611                         const struct gen_device_info *devinfo,
 612                         nir_intrinsic_instr *intrin)
 613 {
 614    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 615    nir_variable *var = nir_deref_instr_get_variable(deref);
 616
 617    /* For write-only surfaces, we trust that the hardware can just do the
 618     * conversion for us.
 619     */
 620    if (var->data.image.access & ACCESS_NON_READABLE)
 621       return false;
 622
 623    const enum isl_format image_fmt =
 624       isl_format_for_gl_format(var->data.image.format);
 625
 626    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
 627       const enum isl_format lower_fmt =
 628          isl_lower_storage_image_format(devinfo, image_fmt);
 629
 630       /* Color conversion goes before the store */
 631       b->cursor = nir_before_instr(&intrin->instr);
 632
 633       nir_ssa_def *color = convert_color_for_store(b, devinfo,
 634                                                    intrin->src[3].ssa,
 635                                                    image_fmt, lower_fmt);
 636       intrin->num_components = isl_format_get_num_channels(lower_fmt);
 637       nir_instr_rewrite_src(&intrin->instr, &intrin->src[3],
 638                             nir_src_for_ssa(color));
 639    } else {
 640       const struct isl_format_layout *image_fmtl =
 641          isl_format_get_layout(image_fmt);
 642       /* We have a matching typed format for everything 32b and below */
 643       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
 644       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
 645                                 ISL_FORMAT_R32G32_UINT :
 646                                 ISL_FORMAT_R32G32B32A32_UINT;
 647
 648       b->cursor = nir_instr_remove(&intrin->instr);
 649
 650       nir_ssa_def *coord = intrin->src[1].ssa;
 651
 652       nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
 653       if (devinfo->gen == 7 && !devinfo->is_haswell) {
 654          /* Check whether the first stride component (i.e. the Bpp value)
 655           * is greater than four, what on Gen7 indicates that a surface of
 656           * type RAW has been bound for untyped access.  Reading or writing
 657           * to a surface of type other than RAW using untyped surface
 658           * messages causes a hang on IVB and VLV.
 659           */
 660          nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
 661          nir_ssa_def *is_raw =
 662             nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
 663          do_store = nir_iand(b, do_store, is_raw);
 664       }
 665       nir_push_if(b, do_store);
 666
 667       nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
 668       nir_ssa_def *color = convert_color_for_store(b, devinfo,
 669                                                    intrin->src[3].ssa,
 670                                                    image_fmt, raw_fmt);
 671
 672       nir_intrinsic_instr *store =
 673          nir_intrinsic_instr_create(b->shader,
 674                                     nir_intrinsic_image_deref_store_raw_intel);
 675       store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
 676       store->src[1] = nir_src_for_ssa(addr);
 677       store->src[2] = nir_src_for_ssa(color);
 678       store->num_components = image_fmtl->bpb / 32;
 679       nir_builder_instr_insert(b, &store->instr);
 680
 681       nir_pop_if(b, NULL);
 682    }
 683
 684    return true;
 685 }
 686
 687 static bool
 688 lower_image_atomic_instr(nir_builder *b,
 689                          const struct gen_device_info *devinfo,
 690                          nir_intrinsic_instr *intrin)
 691 {
 692    if (devinfo->is_haswell || devinfo->gen >= 8)
 693       return false;
 694
 695    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 696
 697    b->cursor = nir_instr_remove(&intrin->instr);
 698
 699    /* Use an undef to hold the uses of the load conversion. */
 700    nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
 701    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
 702
 703    /* Check the first component of the size field to find out if the
 704     * image is bound.  Necessary on IVB for typed atomics because
 705     * they don't seem to respect null surfaces and will happily
 706     * corrupt or read random memory when no image is bound.
 707     */
 708    nir_ssa_def *size = load_image_param(b, deref, SIZE);
 709    nir_ssa_def *zero = nir_imm_int(b, 0);
 710    nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero));
 711
 712    nir_builder_instr_insert(b, &intrin->instr);
 713
 714    nir_pop_if(b, NULL);
 715
 716    nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero);
 717    nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result));
 718
 719    return true;
 720 }
 721
 722 static bool
 723 lower_image_size_instr(nir_builder *b,
 724                        const struct gen_device_info *devinfo,
 725                        nir_intrinsic_instr *intrin)
 726 {
 727    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 728    nir_variable *var = nir_deref_instr_get_variable(deref);
 729
 730    /* For write-only images, we have an actual image surface so we fall back
 731     * and let the back-end emit a TXS for this.
 732     */
 733    if (var->data.image.access & ACCESS_NON_READABLE)
 734       return false;
 735
 736    /* If we have a matching typed format, then we have an actual image surface
 737     * so we fall back and let the back-end emit a TXS for this.
 738     */
 739    const enum isl_format image_fmt =
 740       isl_format_for_gl_format(var->data.image.format);
 741    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
 742       return false;
 743
 744    b->cursor = nir_instr_remove(&intrin->instr);
 745
 746    nir_ssa_def *size = load_image_param(b, deref, SIZE);
 747
 748    nir_ssa_def *comps[4] = { NULL, NULL, NULL, NULL };
 749
 750    enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
 751    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
 752    for (unsigned c = 0; c < coord_comps; c++) {
 753       if (c == 2 && dim == GLSL_SAMPLER_DIM_CUBE) {
 754          comps[2] = nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6));
 755       } else {
 756          comps[c] = nir_channel(b, size, c);
 757       }
 758    }
 759
 760    for (unsigned c = coord_comps; c < intrin->dest.ssa.num_components; ++c)
 761       comps[c] = nir_imm_int(b, 1);
 762
 763    nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
 764    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec));
 765
 766    return true;
 767 }
 768
 769 bool
 770 brw_nir_lower_image_load_store(nir_shader *shader,
 771                                const struct gen_device_info *devinfo)
 772 {
 773    bool progress = false;
 774
 775    nir_foreach_function(function, shader) {
 776       if (function->impl == NULL)
 777          continue;
 778
 779       nir_foreach_block_safe(block, function->impl) {
 780          nir_builder b;
 781          nir_builder_init(&b, function->impl);
 782
 783          nir_foreach_instr_safe(instr, block) {
 784             if (instr->type != nir_instr_type_intrinsic)
 785                continue;
 786
 787             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 788             switch (intrin->intrinsic) {
 789             case nir_intrinsic_image_deref_load:
 790                if (lower_image_load_instr(&b, devinfo, intrin))
 791                   progress = true;
 792                break;
 793
 794             case nir_intrinsic_image_deref_store:
 795                if (lower_image_store_instr(&b, devinfo, intrin))
 796                   progress = true;
 797                break;
 798
 799             case nir_intrinsic_image_deref_atomic_add:
 800             case nir_intrinsic_image_deref_atomic_min:
 801             case nir_intrinsic_image_deref_atomic_max:
 802             case nir_intrinsic_image_deref_atomic_and:
 803             case nir_intrinsic_image_deref_atomic_or:
 804             case nir_intrinsic_image_deref_atomic_xor:
 805             case nir_intrinsic_image_deref_atomic_exchange:
 806             case nir_intrinsic_image_deref_atomic_comp_swap:
 807                if (lower_image_atomic_instr(&b, devinfo, intrin))
 808                   progress = true;
 809                break;
 810
 811             case nir_intrinsic_image_deref_size:
 812                if (lower_image_size_instr(&b, devinfo, intrin))
 813                   progress = true;
 814                break;
 815
 816             default:
 817                /* Nothing to do */
 818                break;
 819             }
 820          }
 821       }
 822
 823       nir_metadata_preserve(function->impl, nir_metadata_block_index |
 824                                             nir_metadata_dominance);
 825    }
 826
 827    return progress;
 828 }
 829
 830 void
 831 brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
 832                                 nir_ssa_def *index)
 833 {
 834    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 835    nir_variable *var = nir_deref_instr_get_variable(deref);
 836
 837    switch (intrin->intrinsic) {
 838 #define CASE(op) \
 839    case nir_intrinsic_image_deref_##op: \
 840       intrin->intrinsic = nir_intrinsic_image_##op; \
 841       break;
 842    CASE(load)
 843    CASE(store)
 844    CASE(atomic_add)
 845    CASE(atomic_min)
 846    CASE(atomic_max)
 847    CASE(atomic_and)
 848    CASE(atomic_or)
 849    CASE(atomic_xor)
 850    CASE(atomic_exchange)
 851    CASE(atomic_comp_swap)
 852    CASE(atomic_fadd)
 853    CASE(size)
 854    CASE(samples)
 855    CASE(load_raw_intel)
 856    CASE(store_raw_intel)
 857 #undef CASE
 858    default:
 859       unreachable("Unhanded image intrinsic");
 860    }
 861
 862    nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
 863    nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
 864    nir_intrinsic_set_access(intrin, var->data.image.access);
 865    nir_intrinsic_set_format(intrin, var->data.image.format);
 866
 867    nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
 868                          nir_src_for_ssa(index));
 869 }