src/intel/compiler/brw_nir_lower_image_load_store.c

   1 /*
   2  * Copyright © 2018 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "isl/isl.h"
  25
  26 #include "brw_nir.h"
  27 #include "compiler/nir/nir_builder.h"
  28 #include "compiler/nir/nir_format_convert.h"
  29
  30 static nir_ssa_def *
  31 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
  32 {
  33    nir_intrinsic_instr *load =
  34       nir_intrinsic_instr_create(b->shader,
  35                                  nir_intrinsic_image_deref_load_param_intel);
  36    load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
  37    nir_intrinsic_set_base(load, offset / 4);
  38
  39    switch (offset) {
  40    case BRW_IMAGE_PARAM_OFFSET_OFFSET:
  41    case BRW_IMAGE_PARAM_SWIZZLING_OFFSET:
  42       load->num_components = 2;
  43       break;
  44    case BRW_IMAGE_PARAM_TILING_OFFSET:
  45    case BRW_IMAGE_PARAM_SIZE_OFFSET:
  46       load->num_components = 3;
  47       break;
  48    case BRW_IMAGE_PARAM_STRIDE_OFFSET:
  49       load->num_components = 4;
  50       break;
  51    default:
  52       unreachable("Invalid param offset");
  53    }
  54    nir_ssa_dest_init(&load->instr, &load->dest,
  55                      load->num_components, 32, NULL);
  56
  57    nir_builder_instr_insert(b, &load->instr);
  58    return &load->dest.ssa;
  59 }
  60
  61 #define load_image_param(b, d, o) \
  62    _load_image_param(b, d, BRW_IMAGE_PARAM_##o##_OFFSET)
  63
  64 static nir_ssa_def *
  65 image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref,
  66                          nir_ssa_def *coord)
  67 {
  68    nir_ssa_def *size = load_image_param(b, deref, SIZE);
  69    nir_ssa_def *cmp = nir_ilt(b, coord, size);
  70
  71    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
  72    nir_ssa_def *in_bounds = nir_imm_true(b);
  73    for (unsigned i = 0; i < coord_comps; i++)
  74       in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i));
  75
  76    return in_bounds;
  77 }
  78
  79 /** Calculate the offset in memory of the texel given by \p coord.
  80  *
  81  * This is meant to be used with untyped surface messages to access a tiled
  82  * surface, what involves taking into account the tiling and swizzling modes
  83  * of the surface manually so it will hopefully not happen very often.
  84  *
  85  * The tiling algorithm implemented here matches either the X or Y tiling
  86  * layouts supported by the hardware depending on the tiling coefficients
  87  * passed to the program as uniforms.  See Volume 1 Part 2 Section 4.5
  88  * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
  89  * the hardware tiling format.
  90  */
  91 static nir_ssa_def *
  92 image_address(nir_builder *b, const struct gen_device_info *devinfo,
  93               nir_deref_instr *deref, nir_ssa_def *coord)
  94 {
  95    if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D &&
  96        glsl_sampler_type_is_array(deref->type)) {
  97       /* It's easier if 1D arrays are treated like 2D arrays */
  98       coord = nir_vec3(b, nir_channel(b, coord, 0),
  99                           nir_imm_int(b, 0),
 100                           nir_channel(b, coord, 1));
 101    } else {
 102       unsigned dims = glsl_get_sampler_coordinate_components(deref->type);
 103       coord = nir_channels(b, coord, (1 << dims) - 1);
 104    }
 105
 106    nir_ssa_def *offset = load_image_param(b, deref, OFFSET);
 107    nir_ssa_def *tiling = load_image_param(b, deref, TILING);
 108    nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
 109
 110    /* Shift the coordinates by the fixed surface offset.  It may be non-zero
 111     * if the image is a single slice of a higher-dimensional surface, or if a
 112     * non-zero mipmap level of the surface is bound to the pipeline.  The
 113     * offset needs to be applied here rather than at surface state set-up time
 114     * because the desired slice-level may start mid-tile, so simply shifting
 115     * the surface base address wouldn't give a well-formed tiled surface in
 116     * the general case.
 117     */
 118    nir_ssa_def *xypos = (coord->num_components == 1) ?
 119                         nir_vec2(b, coord, nir_imm_int(b, 0)) :
 120                         nir_channels(b, coord, 0x3);
 121    xypos = nir_iadd(b, xypos, offset);
 122
 123    /* The layout of 3-D textures in memory is sort-of like a tiling
 124     * format.  At each miplevel, the slices are arranged in rows of
 125     * 2^level slices per row.  The slice row is stored in tmp.y and
 126     * the slice within the row is stored in tmp.x.
 127     *
 128     * The layout of 2-D array textures and cubemaps is much simpler:
 129     * Depending on whether the ARYSPC_LOD0 layout is in use it will be
 130     * stored in memory as an array of slices, each one being a 2-D
 131     * arrangement of miplevels, or as a 2D arrangement of miplevels,
 132     * each one being an array of slices.  In either case the separation
 133     * between slices of the same LOD is equal to the qpitch value
 134     * provided as stride.w.
 135     *
 136     * This code can be made to handle either 2D arrays and 3D textures
 137     * by passing in the miplevel as tile.z for 3-D textures and 0 in
 138     * tile.z for 2-D array textures.
 139     *
 140     * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
 141     * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
 142     * of the hardware 3D texture and 2D array layouts.
 143     */
 144    if (coord->num_components > 2) {
 145       /* Decompose z into a major (tmp.y) and a minor (tmp.x)
 146        * index.
 147        */
 148       nir_ssa_def *z = nir_channel(b, coord, 2);
 149       nir_ssa_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0),
 150                                   nir_channel(b, tiling, 2));
 151       nir_ssa_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2));
 152
 153       /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
 154        * slice offset.
 155        */
 156       xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y),
 157                                              nir_channels(b, stride, 0xc)));
 158    }
 159
 160    nir_ssa_def *addr;
 161    if (coord->num_components > 1) {
 162       /* Calculate the major/minor x and y indices.  In order to
 163        * accommodate both X and Y tiling, the Y-major tiling format is
 164        * treated as being a bunch of narrow X-tiles placed next to each
 165        * other.  This means that the tile width for Y-tiling is actually
 166        * the width of one sub-column of the Y-major tile where each 4K
 167        * tile has 8 512B sub-columns.
 168        *
 169        * The major Y value is the row of tiles in which the pixel lives.
 170        * The major X value is the tile sub-column in which the pixel
 171        * lives; for X tiling, this is the same as the tile column, for Y
 172        * tiling, each tile has 8 sub-columns.  The minor X and Y indices
 173        * are the position within the sub-column.
 174        */
 175
 176       /* Calculate the minor x and y indices. */
 177       nir_ssa_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0),
 178                                        nir_channels(b, tiling, 0x3));
 179       nir_ssa_def *major = nir_ushr(b, xypos, nir_channels(b, tiling, 0x3));
 180
 181       /* Calculate the texel index from the start of the tile row and the
 182        * vertical coordinate of the row.
 183        * Equivalent to:
 184        *   tmp.x = (major.x << tile.y << tile.x) +
 185        *           (minor.y << tile.x) + minor.x
 186        *   tmp.y = major.y << tile.y
 187        */
 188       nir_ssa_def *idx_x, *idx_y;
 189       idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1));
 190       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1));
 191       idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0));
 192       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0));
 193       idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1));
 194
 195       /* Add it to the start of the tile row. */
 196       nir_ssa_def *idx;
 197       idx = nir_imul(b, idx_y, nir_channel(b, stride, 1));
 198       idx = nir_iadd(b, idx, idx_x);
 199
 200       /* Multiply by the Bpp value. */
 201       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
 202
 203       if (devinfo->gen < 8 && !devinfo->is_baytrail) {
 204          /* Take into account the two dynamically specified shifts.  Both are
 205           * used to implement swizzling of X-tiled surfaces.  For Y-tiled
 206           * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
 207           * address, so a swz value of 0xff (actually interpreted as 31 by the
 208           * hardware) will be provided to cause the relevant bit of tmp.y to
 209           * be zero and turn the first XOR into the identity.  For linear
 210           * surfaces or platforms lacking address swizzling both shifts will
 211           * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
 212           * what effectively disables swizzling.
 213           */
 214          nir_ssa_def *swizzle = load_image_param(b, deref, SWIZZLING);
 215          nir_ssa_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0));
 216          nir_ssa_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1));
 217
 218          /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
 219          nir_ssa_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1),
 220                                         nir_imm_int(b, 1 << 6));
 221          addr = nir_ixor(b, addr, bit);
 222       }
 223    } else {
 224       /* Multiply by the Bpp/stride value.  Note that the addr.y may be
 225        * non-zero even if the image is one-dimensional because a vertical
 226        * offset may have been applied above to select a non-zero slice or
 227        * level of a higher-dimensional texture.
 228        */
 229       nir_ssa_def *idx;
 230       idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1));
 231       idx = nir_iadd(b, nir_channel(b, xypos, 0), idx);
 232       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
 233    }
 234
 235    return addr;
 236 }
 237
 238 struct format_info {
 239    const struct isl_format_layout *fmtl;
 240    unsigned chans;
 241    unsigned bits[4];
 242 };
 243
 244 static struct format_info
 245 get_format_info(enum isl_format fmt)
 246 {
 247    const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
 248
 249    return (struct format_info) {
 250       .fmtl = fmtl,
 251       .chans = isl_format_get_num_channels(fmt),
 252       .bits = {
 253          fmtl->channels.r.bits,
 254          fmtl->channels.g.bits,
 255          fmtl->channels.b.bits,
 256          fmtl->channels.a.bits
 257       },
 258    };
 259 }
 260
 261 static nir_ssa_def *
 262 convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
 263                        nir_ssa_def *color,
 264                        enum isl_format image_fmt, enum isl_format lower_fmt,
 265                        unsigned dest_components)
 266 {
 267    if (image_fmt == lower_fmt)
 268       goto expand_vec;
 269
 270    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
 271       assert(lower_fmt == ISL_FORMAT_R32_UINT);
 272       color = nir_format_unpack_11f11f10f(b, color);
 273       goto expand_vec;
 274    }
 275
 276    struct format_info image = get_format_info(image_fmt);
 277    struct format_info lower = get_format_info(lower_fmt);
 278
 279    const bool needs_sign_extension =
 280       isl_format_has_snorm_channel(image_fmt) ||
 281       isl_format_has_sint_channel(image_fmt);
 282
 283    /* We only check the red channel to detect if we need to pack/unpack */
 284    assert(image.bits[0] != lower.bits[0] ||
 285           memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0);
 286
 287    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
 288       if (needs_sign_extension)
 289          color = nir_format_unpack_sint(b, color, image.bits, image.chans);
 290       else
 291          color = nir_format_unpack_uint(b, color, image.bits, image.chans);
 292    } else {
 293       /* All these formats are homogeneous */
 294       for (unsigned i = 1; i < image.chans; i++)
 295          assert(image.bits[i] == image.bits[0]);
 296
 297       /* On IVB, we rely on the undocumented behavior that typed reads from
 298        * surfaces of the unsupported R8 and R16 formats return useful data in
 299        * their least significant bits.  However, the data in the high bits is
 300        * garbage so we have to discard it.
 301        */
 302       if (devinfo->gen == 7 && !devinfo->is_haswell &&
 303           (lower_fmt == ISL_FORMAT_R16_UINT ||
 304            lower_fmt == ISL_FORMAT_R8_UINT))
 305          color = nir_format_mask_uvec(b, color, lower.bits);
 306
 307       if (image.bits[0] != lower.bits[0]) {
 308          color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0],
 309                                                   image.bits[0]);
 310       }
 311
 312       if (needs_sign_extension)
 313          color = nir_format_sign_extend_ivec(b, color, image.bits);
 314    }
 315
 316    switch (image.fmtl->channels.r.type) {
 317    case ISL_UNORM:
 318       assert(isl_format_has_uint_channel(lower_fmt));
 319       color = nir_format_unorm_to_float(b, color, image.bits);
 320       break;
 321
 322    case ISL_SNORM:
 323       assert(isl_format_has_uint_channel(lower_fmt));
 324       color = nir_format_snorm_to_float(b, color, image.bits);
 325       break;
 326
 327    case ISL_SFLOAT:
 328       if (image.bits[0] == 16)
 329          color = nir_unpack_half_2x16_split_x(b, color);
 330       break;
 331
 332    case ISL_UINT:
 333    case ISL_SINT:
 334       break;
 335
 336    default:
 337       unreachable("Invalid image channel type");
 338    }
 339
 340 expand_vec:
 341    assert(dest_components == 1 || dest_components == 4);
 342    assert(color->num_components <= dest_components);
 343    if (color->num_components == dest_components)
 344       return color;
 345
 346    nir_ssa_def *comps[4];
 347    for (unsigned i = 0; i < color->num_components; i++)
 348       comps[i] = nir_channel(b, color, i);
 349
 350    for (unsigned i = color->num_components; i < 3; i++)
 351       comps[i] = nir_imm_int(b, 0);
 352
 353    if (color->num_components < 4) {
 354       if (isl_format_has_int_channel(image_fmt))
 355          comps[3] = nir_imm_int(b, 1);
 356       else
 357          comps[3] = nir_imm_float(b, 1);
 358    }
 359
 360    return nir_vec(b, comps, dest_components);
 361 }
 362
 363 static bool
 364 lower_image_load_instr(nir_builder *b,
 365                        const struct gen_device_info *devinfo,
 366                        nir_intrinsic_instr *intrin)
 367 {
 368    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 369    nir_variable *var = nir_deref_instr_get_variable(deref);
 370    const enum isl_format image_fmt =
 371       isl_format_for_pipe_format(var->data.image.format);
 372
 373    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
 374       const enum isl_format lower_fmt =
 375          isl_lower_storage_image_format(devinfo, image_fmt);
 376       const unsigned dest_components = intrin->num_components;
 377
 378       /* Use an undef to hold the uses of the load while we do the color
 379        * conversion.
 380        */
 381       nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
 382       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
 383
 384       intrin->num_components = isl_format_get_num_channels(lower_fmt);
 385       intrin->dest.ssa.num_components = intrin->num_components;
 386
 387       b->cursor = nir_after_instr(&intrin->instr);
 388
 389       nir_ssa_def *color = convert_color_for_load(b, devinfo,
 390                                                   &intrin->dest.ssa,
 391                                                   image_fmt, lower_fmt,
 392                                                   dest_components);
 393
 394       nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color));
 395       nir_instr_remove(placeholder->parent_instr);
 396    } else {
 397       const struct isl_format_layout *image_fmtl =
 398          isl_format_get_layout(image_fmt);
 399       /* We have a matching typed format for everything 32b and below */
 400       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
 401       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
 402                                 ISL_FORMAT_R32G32_UINT :
 403                                 ISL_FORMAT_R32G32B32A32_UINT;
 404       const unsigned dest_components = intrin->num_components;
 405
 406       b->cursor = nir_instr_remove(&intrin->instr);
 407
 408       nir_ssa_def *coord = intrin->src[1].ssa;
 409
 410       nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
 411       if (devinfo->gen == 7 && !devinfo->is_haswell) {
 412          /* Check whether the first stride component (i.e. the Bpp value)
 413           * is greater than four, what on Gen7 indicates that a surface of
 414           * type RAW has been bound for untyped access.  Reading or writing
 415           * to a surface of type other than RAW using untyped surface
 416           * messages causes a hang on IVB and VLV.
 417           */
 418          nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
 419          nir_ssa_def *is_raw =
 420             nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
 421          do_load = nir_iand(b, do_load, is_raw);
 422       }
 423       nir_push_if(b, do_load);
 424
 425       nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
 426       nir_intrinsic_instr *load =
 427          nir_intrinsic_instr_create(b->shader,
 428                                     nir_intrinsic_image_deref_load_raw_intel);
 429       load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
 430       load->src[1] = nir_src_for_ssa(addr);
 431       load->num_components = image_fmtl->bpb / 32;
 432       nir_ssa_dest_init(&load->instr, &load->dest,
 433                         load->num_components, 32, NULL);
 434       nir_builder_instr_insert(b, &load->instr);
 435
 436       nir_push_else(b, NULL);
 437
 438       nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32);
 439
 440       nir_pop_if(b, NULL);
 441
 442       nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero);
 443
 444       nir_ssa_def *color = convert_color_for_load(b, devinfo, value,
 445                                                   image_fmt, raw_fmt,
 446                                                   dest_components);
 447
 448       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color));
 449    }
 450
 451    return true;
 452 }
 453
 454 static nir_ssa_def *
 455 convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo,
 456                         nir_ssa_def *color,
 457                         enum isl_format image_fmt, enum isl_format lower_fmt)
 458 {
 459    struct format_info image = get_format_info(image_fmt);
 460    struct format_info lower = get_format_info(lower_fmt);
 461
 462    color = nir_channels(b, color, (1 << image.chans) - 1);
 463
 464    if (image_fmt == lower_fmt)
 465       return color;
 466
 467    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
 468       assert(lower_fmt == ISL_FORMAT_R32_UINT);
 469       return nir_format_pack_11f11f10f(b, color);
 470    }
 471
 472    switch (image.fmtl->channels.r.type) {
 473    case ISL_UNORM:
 474       assert(isl_format_has_uint_channel(lower_fmt));
 475       color = nir_format_float_to_unorm(b, color, image.bits);
 476       break;
 477
 478    case ISL_SNORM:
 479       assert(isl_format_has_uint_channel(lower_fmt));
 480       color = nir_format_float_to_snorm(b, color, image.bits);
 481       break;
 482
 483    case ISL_SFLOAT:
 484       if (image.bits[0] == 16)
 485          color = nir_format_float_to_half(b, color);
 486       break;
 487
 488    case ISL_UINT:
 489       color = nir_format_clamp_uint(b, color, image.bits);
 490       break;
 491
 492    case ISL_SINT:
 493       color = nir_format_clamp_sint(b, color, image.bits);
 494       break;
 495
 496    default:
 497       unreachable("Invalid image channel type");
 498    }
 499
 500    if (image.bits[0] < 32 &&
 501        (isl_format_has_snorm_channel(image_fmt) ||
 502         isl_format_has_sint_channel(image_fmt)))
 503       color = nir_format_mask_uvec(b, color, image.bits);
 504
 505    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
 506       color = nir_format_pack_uint(b, color, image.bits, image.chans);
 507    } else {
 508       /* All these formats are homogeneous */
 509       for (unsigned i = 1; i < image.chans; i++)
 510          assert(image.bits[i] == image.bits[0]);
 511
 512       if (image.bits[0] != lower.bits[0]) {
 513          color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0],
 514                                                   lower.bits[0]);
 515       }
 516    }
 517
 518    return color;
 519 }
 520
 521 static bool
 522 lower_image_store_instr(nir_builder *b,
 523                         const struct gen_device_info *devinfo,
 524                         nir_intrinsic_instr *intrin)
 525 {
 526    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 527    nir_variable *var = nir_deref_instr_get_variable(deref);
 528
 529    /* For write-only surfaces, we trust that the hardware can just do the
 530     * conversion for us.
 531     */
 532    if (var->data.access & ACCESS_NON_READABLE)
 533       return false;
 534
 535    const enum isl_format image_fmt =
 536       isl_format_for_pipe_format(var->data.image.format);
 537
 538    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
 539       const enum isl_format lower_fmt =
 540          isl_lower_storage_image_format(devinfo, image_fmt);
 541
 542       /* Color conversion goes before the store */
 543       b->cursor = nir_before_instr(&intrin->instr);
 544
 545       nir_ssa_def *color = convert_color_for_store(b, devinfo,
 546                                                    intrin->src[3].ssa,
 547                                                    image_fmt, lower_fmt);
 548       intrin->num_components = isl_format_get_num_channels(lower_fmt);
 549       nir_instr_rewrite_src(&intrin->instr, &intrin->src[3],
 550                             nir_src_for_ssa(color));
 551    } else {
 552       const struct isl_format_layout *image_fmtl =
 553          isl_format_get_layout(image_fmt);
 554       /* We have a matching typed format for everything 32b and below */
 555       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
 556       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
 557                                 ISL_FORMAT_R32G32_UINT :
 558                                 ISL_FORMAT_R32G32B32A32_UINT;
 559
 560       b->cursor = nir_instr_remove(&intrin->instr);
 561
 562       nir_ssa_def *coord = intrin->src[1].ssa;
 563
 564       nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
 565       if (devinfo->gen == 7 && !devinfo->is_haswell) {
 566          /* Check whether the first stride component (i.e. the Bpp value)
 567           * is greater than four, what on Gen7 indicates that a surface of
 568           * type RAW has been bound for untyped access.  Reading or writing
 569           * to a surface of type other than RAW using untyped surface
 570           * messages causes a hang on IVB and VLV.
 571           */
 572          nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
 573          nir_ssa_def *is_raw =
 574             nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
 575          do_store = nir_iand(b, do_store, is_raw);
 576       }
 577       nir_push_if(b, do_store);
 578
 579       nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
 580       nir_ssa_def *color = convert_color_for_store(b, devinfo,
 581                                                    intrin->src[3].ssa,
 582                                                    image_fmt, raw_fmt);
 583
 584       nir_intrinsic_instr *store =
 585          nir_intrinsic_instr_create(b->shader,
 586                                     nir_intrinsic_image_deref_store_raw_intel);
 587       store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
 588       store->src[1] = nir_src_for_ssa(addr);
 589       store->src[2] = nir_src_for_ssa(color);
 590       store->num_components = image_fmtl->bpb / 32;
 591       nir_builder_instr_insert(b, &store->instr);
 592
 593       nir_pop_if(b, NULL);
 594    }
 595
 596    return true;
 597 }
 598
 599 static bool
 600 lower_image_atomic_instr(nir_builder *b,
 601                          const struct gen_device_info *devinfo,
 602                          nir_intrinsic_instr *intrin)
 603 {
 604    if (devinfo->is_haswell || devinfo->gen >= 8)
 605       return false;
 606
 607    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 608
 609    b->cursor = nir_instr_remove(&intrin->instr);
 610
 611    /* Use an undef to hold the uses of the load conversion. */
 612    nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
 613    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
 614
 615    /* Check the first component of the size field to find out if the
 616     * image is bound.  Necessary on IVB for typed atomics because
 617     * they don't seem to respect null surfaces and will happily
 618     * corrupt or read random memory when no image is bound.
 619     */
 620    nir_ssa_def *size = load_image_param(b, deref, SIZE);
 621    nir_ssa_def *zero = nir_imm_int(b, 0);
 622    nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero));
 623
 624    nir_builder_instr_insert(b, &intrin->instr);
 625
 626    nir_pop_if(b, NULL);
 627
 628    nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero);
 629    nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result));
 630
 631    return true;
 632 }
 633
 634 static bool
 635 lower_image_size_instr(nir_builder *b,
 636                        const struct gen_device_info *devinfo,
 637                        nir_intrinsic_instr *intrin)
 638 {
 639    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 640    nir_variable *var = nir_deref_instr_get_variable(deref);
 641
 642    /* For write-only images, we have an actual image surface so we fall back
 643     * and let the back-end emit a TXS for this.
 644     */
 645    if (var->data.access & ACCESS_NON_READABLE)
 646       return false;
 647
 648    /* If we have a matching typed format, then we have an actual image surface
 649     * so we fall back and let the back-end emit a TXS for this.
 650     */
 651    const enum isl_format image_fmt =
 652       isl_format_for_pipe_format(var->data.image.format);
 653    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
 654       return false;
 655
 656    b->cursor = nir_instr_remove(&intrin->instr);
 657
 658    nir_ssa_def *size = load_image_param(b, deref, SIZE);
 659
 660    nir_ssa_def *comps[4] = { NULL, NULL, NULL, NULL };
 661
 662    enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
 663    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
 664    for (unsigned c = 0; c < coord_comps; c++) {
 665       if (c == 2 && dim == GLSL_SAMPLER_DIM_CUBE) {
 666          comps[2] = nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6));
 667       } else {
 668          comps[c] = nir_channel(b, size, c);
 669       }
 670    }
 671
 672    for (unsigned c = coord_comps; c < intrin->dest.ssa.num_components; ++c)
 673       comps[c] = nir_imm_int(b, 1);
 674
 675    nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
 676    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec));
 677
 678    return true;
 679 }
 680
 681 bool
 682 brw_nir_lower_image_load_store(nir_shader *shader,
 683                                const struct gen_device_info *devinfo,
 684                                bool *uses_atomic_load_store)
 685 {
 686    bool progress = false;
 687
 688    nir_foreach_function(function, shader) {
 689       if (function->impl == NULL)
 690          continue;
 691
 692       bool impl_progress = false;
 693       nir_foreach_block_safe(block, function->impl) {
 694          nir_builder b;
 695          nir_builder_init(&b, function->impl);
 696
 697          nir_foreach_instr_safe(instr, block) {
 698             if (instr->type != nir_instr_type_intrinsic)
 699                continue;
 700
 701             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 702             switch (intrin->intrinsic) {
 703             case nir_intrinsic_image_deref_load:
 704                if (lower_image_load_instr(&b, devinfo, intrin))
 705                   impl_progress = true;
 706                break;
 707
 708             case nir_intrinsic_image_deref_store:
 709                if (lower_image_store_instr(&b, devinfo, intrin))
 710                   impl_progress = true;
 711                break;
 712
 713             case nir_intrinsic_image_deref_atomic_add:
 714             case nir_intrinsic_image_deref_atomic_imin:
 715             case nir_intrinsic_image_deref_atomic_umin:
 716             case nir_intrinsic_image_deref_atomic_imax:
 717             case nir_intrinsic_image_deref_atomic_umax:
 718             case nir_intrinsic_image_deref_atomic_and:
 719             case nir_intrinsic_image_deref_atomic_or:
 720             case nir_intrinsic_image_deref_atomic_xor:
 721             case nir_intrinsic_image_deref_atomic_exchange:
 722             case nir_intrinsic_image_deref_atomic_comp_swap:
 723                if (uses_atomic_load_store)
 724                   *uses_atomic_load_store = true;
 725                if (lower_image_atomic_instr(&b, devinfo, intrin))
 726                   impl_progress = true;
 727                break;
 728
 729             case nir_intrinsic_image_deref_size:
 730                if (lower_image_size_instr(&b, devinfo, intrin))
 731                   impl_progress = true;
 732                break;
 733
 734             default:
 735                /* Nothing to do */
 736                break;
 737             }
 738          }
 739       }
 740
 741       if (impl_progress) {
 742          progress = true;
 743          nir_metadata_preserve(function->impl, nir_metadata_none);
 744       } else {
 745          nir_metadata_preserve(function->impl, nir_metadata_all);
 746       }
 747    }
 748
 749    return progress;
 750 }