i965/urb: fixes division by zero

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_surface_builder.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp

index a0bedf26bc9ea2c204dcb375c8eb058ff813554d..fc1fc139dde2fa1d228d5f604773d5671dcdb335 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -21,6 +21,7 @@
   * IN THE SOFTWARE.
   */
  
+#include "isl/isl.h"
  #include "brw_fs_surface_builder.h"
  #include "brw_fs.h"
  
@@ -44,7 +45,7 @@ namespace brw {
               */
              const fs_reg usurface = bld.emit_uniformize(surface);
              const fs_reg srcs[] = {
-               addr, src, usurface, fs_reg(dims), fs_reg(arg)
+               addr, src, usurface, brw_imm_ud(dims), brw_imm_ud(arg)
              };
              const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
              fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
@@ -163,14 +164,234 @@ namespace brw {
  }
  
  namespace {
+   namespace image_format_info {
+      /* The higher compiler layers use the GL enums for image formats even if
+       * they come in from SPIR-V or Vulkan.  We need to turn them into an ISL
+       * enum before we can use them.
+       */
+      enum isl_format
+      isl_format_for_gl_format(uint32_t gl_format)
+      {
+         switch (gl_format) {
+         case GL_R8:             return ISL_FORMAT_R8_UNORM;
+         case GL_R8_SNORM:       return ISL_FORMAT_R8_SNORM;
+         case GL_R8UI:           return ISL_FORMAT_R8_UINT;
+         case GL_R8I:            return ISL_FORMAT_R8_SINT;
+         case GL_RG8:            return ISL_FORMAT_R8G8_UNORM;
+         case GL_RG8_SNORM:      return ISL_FORMAT_R8G8_SNORM;
+         case GL_RG8UI:          return ISL_FORMAT_R8G8_UINT;
+         case GL_RG8I:           return ISL_FORMAT_R8G8_SINT;
+         case GL_RGBA8:          return ISL_FORMAT_R8G8B8A8_UNORM;
+         case GL_RGBA8_SNORM:    return ISL_FORMAT_R8G8B8A8_SNORM;
+         case GL_RGBA8UI:        return ISL_FORMAT_R8G8B8A8_UINT;
+         case GL_RGBA8I:         return ISL_FORMAT_R8G8B8A8_SINT;
+         case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT;
+         case GL_RGB10_A2:       return ISL_FORMAT_R10G10B10A2_UNORM;
+         case GL_RGB10_A2UI:     return ISL_FORMAT_R10G10B10A2_UINT;
+         case GL_R16:            return ISL_FORMAT_R16_UNORM;
+         case GL_R16_SNORM:      return ISL_FORMAT_R16_SNORM;
+         case GL_R16F:           return ISL_FORMAT_R16_FLOAT;
+         case GL_R16UI:          return ISL_FORMAT_R16_UINT;
+         case GL_R16I:           return ISL_FORMAT_R16_SINT;
+         case GL_RG16:           return ISL_FORMAT_R16G16_UNORM;
+         case GL_RG16_SNORM:     return ISL_FORMAT_R16G16_SNORM;
+         case GL_RG16F:          return ISL_FORMAT_R16G16_FLOAT;
+         case GL_RG16UI:         return ISL_FORMAT_R16G16_UINT;
+         case GL_RG16I:          return ISL_FORMAT_R16G16_SINT;
+         case GL_RGBA16:         return ISL_FORMAT_R16G16B16A16_UNORM;
+         case GL_RGBA16_SNORM:   return ISL_FORMAT_R16G16B16A16_SNORM;
+         case GL_RGBA16F:        return ISL_FORMAT_R16G16B16A16_FLOAT;
+         case GL_RGBA16UI:       return ISL_FORMAT_R16G16B16A16_UINT;
+         case GL_RGBA16I:        return ISL_FORMAT_R16G16B16A16_SINT;
+         case GL_R32F:           return ISL_FORMAT_R32_FLOAT;
+         case GL_R32UI:          return ISL_FORMAT_R32_UINT;
+         case GL_R32I:           return ISL_FORMAT_R32_SINT;
+         case GL_RG32F:          return ISL_FORMAT_R32G32_FLOAT;
+         case GL_RG32UI:         return ISL_FORMAT_R32G32_UINT;
+         case GL_RG32I:          return ISL_FORMAT_R32G32_SINT;
+         case GL_RGBA32F:        return ISL_FORMAT_R32G32B32A32_FLOAT;
+         case GL_RGBA32UI:       return ISL_FORMAT_R32G32B32A32_UINT;
+         case GL_RGBA32I:        return ISL_FORMAT_R32G32B32A32_SINT;
+         case GL_NONE:           return ISL_FORMAT_UNSUPPORTED;
+         default:
+            assert(!"Invalid image format");
+            return ISL_FORMAT_UNSUPPORTED;
+         }
+      }
+
+      /**
+       * Simple 4-tuple of scalars used to pass around per-color component
+       * values.
+       */
+      struct color_u {
+         color_u(unsigned x = 0) : r(x), g(x), b(x), a(x)
+         {
+         }
+
+         color_u(unsigned r, unsigned g, unsigned b, unsigned a) :
+            r(r), g(g), b(b), a(a)
+         {
+         }
+
+         unsigned
+         operator[](unsigned i) const
+         {
+            const unsigned xs[] = { r, g, b, a };
+            return xs[i];
+         }
+
+         unsigned r, g, b, a;
+      };
+
+      /**
+       * Return the per-channel bitfield widths for a given image format.
+       */
+      inline color_u
+      get_bit_widths(isl_format format)
+      {
+         const isl_format_layout *fmtl = isl_format_get_layout(format);
+
+         return color_u(fmtl->channels.r.bits,
+                        fmtl->channels.g.bits,
+                        fmtl->channels.b.bits,
+                        fmtl->channels.a.bits);
+      }
+
+      /**
+       * Return the per-channel bitfield shifts for a given image format.
+       */
+      inline color_u
+      get_bit_shifts(isl_format format)
+      {
+         const color_u widths = get_bit_widths(format);
+         return color_u(0, widths.r, widths.r + widths.g,
+                        widths.r + widths.g + widths.b);
+      }
+
+      /**
+       * Return true if all present components have the same bit width.
+       */
+      inline bool
+      is_homogeneous(isl_format format)
+      {
+         const color_u widths = get_bit_widths(format);
+         return ((widths.g == 0 || widths.g == widths.r) &&
+                 (widths.b == 0 || widths.b == widths.r) &&
+                 (widths.a == 0 || widths.a == widths.r));
+      }
+
+      /**
+       * Return true if the format conversion boils down to a trivial copy.
+       */
+      inline bool
+      is_conversion_trivial(const brw_device_info *devinfo, isl_format format)
+      {
+         return (get_bit_widths(format).r == 32 && is_homogeneous(format)) ||
+                 format == isl_lower_storage_image_format(devinfo, format);
+      }
+
+      /**
+       * Return true if the hardware natively supports some format with
+       * compatible bitfield layout, but possibly different data types.
+       */
+      inline bool
+      has_supported_bit_layout(const brw_device_info *devinfo,
+                               isl_format format)
+      {
+         const color_u widths = get_bit_widths(format);
+         const color_u lower_widths = get_bit_widths(
+            isl_lower_storage_image_format(devinfo, format));
+
+         return (widths.r == lower_widths.r &&
+                 widths.g == lower_widths.g &&
+                 widths.b == lower_widths.b &&
+                 widths.a == lower_widths.a);
+      }
+
+      /**
+       * Return true if we are required to spread individual components over
+       * several components of the format used by the hardware (RG32 and
+       * friends implemented as RGBA16UI).
+       */
+      inline bool
+      has_split_bit_layout(const brw_device_info *devinfo, isl_format format)
+      {
+         const isl_format lower_format =
+            isl_lower_storage_image_format(devinfo, format);
+
+         return (isl_format_get_num_channels(format) <
+                 isl_format_get_num_channels(lower_format));
+      }
+
+      /**
+       * Return true if the hardware returns garbage in the unused high bits
+       * of each component.  This may happen on IVB because we rely on the
+       * undocumented behavior that typed reads from surfaces of the
+       * unsupported R8 and R16 formats return useful data in their least
+       * significant bits.
+       */
+      inline bool
+      has_undefined_high_bits(const brw_device_info *devinfo,
+                              isl_format format)
+      {
+         const isl_format lower_format =
+            isl_lower_storage_image_format(devinfo, format);
+
+         return (devinfo->gen == 7 && !devinfo->is_haswell &&
+                 (lower_format == ISL_FORMAT_R16_UINT ||
+                  lower_format == ISL_FORMAT_R8_UINT));
+      }
+
+      /**
+       * Return true if the format represents values as signed integers
+       * requiring sign extension when unpacking.
+       */
+      inline bool
+      needs_sign_extension(isl_format format)
+      {
+         return isl_format_has_snorm_channel(format) ||
+                isl_format_has_sint_channel(format);
+      }
+   }
+
     namespace image_validity {
+      /**
+       * Check whether the bound image is suitable for untyped access.
+       */
+      brw_predicate
+      emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
+                               brw_predicate pred)
+      {
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
+
+         if (devinfo->gen == 7 && !devinfo->is_haswell) {
+            /* Check whether the first stride component (i.e. the Bpp value)
+             * is greater than four, what on Gen7 indicates that a surface of
+             * type RAW has been bound for untyped access.  Reading or writing
+             * to a surface of type other than RAW using untyped surface
+             * messages causes a hang on IVB and VLV.
+             */
+            set_predicate(pred,
+                          bld.CMP(bld.null_reg_ud(), stride, brw_imm_d(4),
+                                  BRW_CONDITIONAL_G));
+
+            return BRW_PREDICATE_NORMAL;
+         } else {
+            /* More recent generations handle the format mismatch
+             * gracefully.
+             */
+            return pred;
+         }
+      }
+
        /**
         * Check whether there is an image bound at the given index and write
         * the comparison result to f0.0.  Returns an appropriate predication
         * mode to use on subsequent image operations.
         */
        brw_predicate
-      emit_surface_check(const fs_builder &bld, const fs_reg &image)
+      emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
        {
           const brw_device_info *devinfo = bld.shader->devinfo;
           const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
@@ -183,7 +404,7 @@ namespace {
               */
              bld.CMP(bld.null_reg_ud(),
                      retype(size, BRW_REGISTER_TYPE_UD),
-                    fs_reg(0), BRW_CONDITIONAL_NZ);
+                    brw_imm_d(0), BRW_CONDITIONAL_NZ);
  
              return BRW_PREDICATE_NORMAL;
           } else {
@@ -217,6 +438,59 @@ namespace {
     }
  
     namespace image_coordinates {
+      /**
+       * Return the total number of coordinates needed to address a texel of
+       * the surface, which may be more than the sum of \p surf_dims and \p
+       * arr_dims if padding is required.
+       */
+      unsigned
+      num_image_coordinates(const fs_builder &bld,
+                            unsigned surf_dims, unsigned arr_dims,
+                            isl_format format)
+      {
+         /* HSW in vec4 mode and our software coordinate handling for untyped
+          * reads want the array index to be at the Z component.
+          */
+         const bool array_index_at_z =
+            format != ISL_FORMAT_UNSUPPORTED &&
+            !isl_has_matching_typed_storage_image_format(
+               bld.shader->devinfo, format);
+         const unsigned zero_dims =
+            ((surf_dims == 1 && arr_dims == 1 && array_index_at_z) ? 1 : 0);
+
+         return surf_dims + zero_dims + arr_dims;
+      }
+
+      /**
+       * Transform image coordinates into the form expected by the
+       * implementation.
+       */
+      fs_reg
+      emit_image_coordinates(const fs_builder &bld, const fs_reg &addr,
+                             unsigned surf_dims, unsigned arr_dims,
+                             isl_format format)
+      {
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+         if (dims > surf_dims + arr_dims) {
+            assert(surf_dims == 1 && arr_dims == 1 && dims == 3);
+            /* The array index is required to be passed in as the Z component,
+             * insert a zero at the Y component to shift it to the right
+             * position.
+             *
+             * FINISHME: Factor out this frequently recurring pattern into a
+             * helper function.
+             */
+            const fs_reg srcs[] = { addr, brw_imm_d(0), offset(addr, bld, 1) };
+            const fs_reg dst = bld.vgrf(addr.type, dims);
+            bld.LOAD_PAYLOAD(dst, srcs, dims, 0);
+            return dst;
+         } else {
+            return addr;
+         }
+      }
+
        /**
         * Calculate the offset in memory of the texel given by \p coord.
         *
@@ -258,7 +532,7 @@ namespace {
              bld.ADD(offset(addr, bld, c), offset(off, bld, c),
                      (c < dims ?
                       offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) :
-                     fs_reg(0)));
+                     fs_reg(brw_imm_d(0))));
  
           /* The layout of 3-D textures in memory is sort-of like a tiling
            * format.  At each miplevel, the slices are arranged in rows of
@@ -285,7 +559,7 @@ namespace {
              /* Decompose z into a major (tmp.y) and a minor (tmp.x)
               * index.
               */
-            bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0),
+            bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), brw_imm_d(0),
                      offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2));
              bld.SHR(offset(tmp, bld, 1),
                      offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2),
@@ -319,7 +593,7 @@ namespace {
              for (unsigned c = 0; c < 2; ++c) {
                 /* Calculate the minor x and y indices. */
                 bld.BFE(offset(minor, bld, c), offset(tile, bld, c),
-                       fs_reg(0), offset(addr, bld, c));
+                       brw_imm_d(0), offset(addr, bld, c));
  
                 /* Calculate the major x and y indices. */
                 bld.SHR(offset(major, bld, c),
@@ -365,7 +639,7 @@ namespace {
  
                 /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
                 bld.XOR(tmp, tmp, offset(tmp, bld, 1));
-               bld.AND(tmp, tmp, fs_reg(1 << 6));
+               bld.AND(tmp, tmp, brw_imm_d(1 << 6));
                 bld.XOR(dst, dst, tmp);
              }
  
@@ -384,4 +658,537 @@ namespace {
           return dst;
        }
     }
+
+   namespace image_format_conversion {
+      using image_format_info::color_u;
+
+      namespace {
+         /**
+          * Maximum representable value in an unsigned integer with the given
+          * number of bits.
+          */
+         inline unsigned
+         scale(unsigned n)
+         {
+            return (1 << n) - 1;
+         }
+      }
+
+      /**
+       * Pack the vector \p src in a bitfield given the per-component bit
+       * shifts and widths.  Note that bitfield components are not allowed to
+       * cross 32-bit boundaries.
+       */
+      fs_reg
+      emit_pack(const fs_builder &bld, const fs_reg &src,
+                const color_u &shifts, const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         bool seen[4] = {};
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+               /* Shift each component left to the correct bitfield position. */
+               bld.SHL(tmp, offset(src, bld, c), brw_imm_ud(shifts[c] % 32));
+
+               /* Add everything up. */
+               if (seen[shifts[c] / 32]) {
+                  bld.OR(offset(dst, bld, shifts[c] / 32),
+                         offset(dst, bld, shifts[c] / 32), tmp);
+               } else {
+                  bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
+                  seen[shifts[c] / 32] = true;
+               }
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Unpack a vector from the bitfield \p src given the per-component bit
+       * shifts and widths.  Note that bitfield components are not allowed to
+       * cross 32-bit boundaries.
+       */
+      fs_reg
+      emit_unpack(const fs_builder &bld, const fs_reg &src,
+                  const color_u &shifts, const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(src.type, 4);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Shift left to discard the most significant bits. */
+               bld.SHL(offset(dst, bld, c),
+                       offset(src, bld, shifts[c] / 32),
+                       brw_imm_ud(32 - shifts[c] % 32 - widths[c]));
+
+               /* Shift back to the least significant bits using an arithmetic
+                * shift to get sign extension on signed types.
+                */
+               bld.ASR(offset(dst, bld, c),
+                       offset(dst, bld, c), brw_imm_ud(32 - widths[c]));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert an integer vector into another integer vector of the
+       * specified bit widths, properly handling overflow.
+       */
+      fs_reg
+      emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
+                              const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+         assert(src.type == dst.type);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Clamp to the maximum value. */
+               bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
+                               brw_imm_d((int)scale(widths[c] - s)),
+                               BRW_CONDITIONAL_L);
+
+               /* Clamp to the minimum value. */
+               if (is_signed)
+                  bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
+                                  brw_imm_d(-(int)scale(widths[c] - s) - 1),
+                                  BRW_CONDITIONAL_GE);
+
+               /* Mask off all but the bits we actually want.  Otherwise, if
+                * we pass a negative number into the hardware when it's
+                * expecting something like UINT8, it will happily clamp it to
+                * +255 for us.
+                */
+               if (is_signed && widths[c] < 32)
+                  bld.AND(offset(dst, bld, c), offset(dst, bld, c),
+                          brw_imm_d(scale(widths[c])));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert a normalized fixed-point vector of the specified signedness
+       * and bit widths into a floating point vector.
+       */
+      fs_reg
+      emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
+                               const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Convert to float. */
+               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+               /* Divide by the normalization constants. */
+               bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
+                       brw_imm_f(1.0f / scale(widths[c] - s)));
+
+               /* Clamp to the minimum value. */
+               if (is_signed)
+                  bld.emit_minmax(offset(dst, bld, c),
+                                  offset(dst, bld, c), brw_imm_f(-1.0f),
+                                  BRW_CONDITIONAL_GE);
+            }
+         }
+         return dst;
+      }
+
+      /**
+       * Convert a floating-point vector into a normalized fixed-point vector
+       * of the specified signedness and bit widths.
+       */
+      fs_reg
+      emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
+                             const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Clamp the normalized floating-point argument. */
+               if (is_signed) {
+                  bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
+                                  brw_imm_f(-1.0f), BRW_CONDITIONAL_GE);
+
+                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+                                  brw_imm_f(1.0f), BRW_CONDITIONAL_L);
+               } else {
+                  set_saturate(true, bld.MOV(offset(fdst, bld, c),
+                                             offset(src, bld, c)));
+               }
+
+               /* Multiply by the normalization constants. */
+               bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
+                       brw_imm_f((float)scale(widths[c] - s)));
+
+               /* Convert to integer. */
+               bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
+               bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
+
+               /* Mask off all but the bits we actually want.  Otherwise, if
+                * we pass a negative number into the hardware when it's
+                * expecting something like UINT8, it will happily clamp it to
+                * +255 for us.
+                */
+               if (is_signed && widths[c] < 32)
+                  bld.AND(offset(dst, bld, c), offset(dst, bld, c),
+                          brw_imm_d(scale(widths[c])));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert a floating point vector of the specified bit widths into a
+       * 32-bit floating point vector.
+       */
+      fs_reg
+      emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
+                              const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+               /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
+                * This works because they have a 5-bit exponent just like the
+                * 16-bit floating point format, and they have no sign bit.
+                */
+               if (widths[c] < 16)
+                  bld.SHL(offset(dst, bld, c),
+                          offset(dst, bld, c), brw_imm_ud(15 - widths[c]));
+
+               /* Convert to 32-bit floating point. */
+               bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
+            }
+         }
+
+         return fdst;
+      }
+
+      /**
+       * Convert a vector into a floating point vector of the specified bit
+       * widths.
+       */
+      fs_reg
+      emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
+                            const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
+
+               /* Clamp to the minimum value. */
+               if (widths[c] < 16)
+                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+                                  brw_imm_f(0.0f), BRW_CONDITIONAL_GE);
+
+               /* Convert to 16-bit floating-point. */
+               bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
+
+               /* Discard the least significant bits to get floating point
+                * numbers of the requested width.  This works because the
+                * 10-bit and 11-bit floating point formats have a 5-bit
+                * exponent just like the 16-bit format, and they have no sign
+                * bit.
+                */
+               if (widths[c] < 16)
+                  bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
+                          brw_imm_ud(15 - widths[c]));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Fill missing components of a vector with 0, 0, 0, 1.
+       */
+      fs_reg
+      emit_pad(const fs_builder &bld, const fs_reg &src,
+               const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(src.type, 4);
+         const unsigned pad[] = { 0, 0, 0, 1 };
+
+         for (unsigned c = 0; c < 4; ++c)
+            bld.MOV(offset(dst, bld, c),
+                    widths[c] ? offset(src, bld, c)
+                              : fs_reg(brw_imm_ud(pad[c])));
+
+         return dst;
+      }
+   }
+}
+
+namespace brw {
+   namespace image_access {
+      /**
+       * Load a vector from a surface of the given format and dimensionality
+       * at the given coordinates.  \p surf_dims and \p arr_dims give the
+       * number of non-array and array coordinates of the image respectively.
+       */
+      fs_reg
+      emit_image_load(const fs_builder &bld,
+                      const fs_reg &image, const fs_reg &addr,
+                      unsigned surf_dims, unsigned arr_dims,
+                      unsigned gl_format)
+      {
+         using namespace image_format_info;
+         using namespace image_format_conversion;
+         using namespace image_validity;
+         using namespace image_coordinates;
+         using namespace surface_access;
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const isl_format format = isl_format_for_gl_format(gl_format);
+         const isl_format lower_format =
+            isl_lower_storage_image_format(devinfo, format);
+         fs_reg tmp;
+
+         /* Transform the image coordinates into actual surface coordinates. */
+         const fs_reg saddr =
+            emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+         if (isl_has_matching_typed_storage_image_format(devinfo, format)) {
+            /* Hopefully we get here most of the time... */
+            tmp = emit_typed_read(bld, image, saddr, dims,
+                                  isl_format_get_num_channels(lower_format));
+         } else {
+            /* Untyped surface reads return 32 bits of the surface per
+             * component, without any sort of unpacking or type conversion,
+             */
+            const unsigned size = isl_format_get_layout(format)->bs / 4;
+            /* they don't properly handle out of bounds access, so we have to
+             * check manually if the coordinates are valid and predicate the
+             * surface read on the result,
+             */
+            const brw_predicate pred =
+               emit_untyped_image_check(bld, image,
+                                        emit_bounds_check(bld, image,
+                                                          saddr, dims));
+
+            /* and they don't know about surface coordinates, we need to
+             * convert them to a raw memory offset.
+             */
+            const fs_reg laddr = emit_address_calculation(bld, image, saddr, dims);
+
+            tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
+
+            /* An out of bounds surface access should give zero as result. */
+            for (unsigned c = 0; c < size; ++c)
+               set_predicate(pred, bld.SEL(offset(tmp, bld, c),
+                                           offset(tmp, bld, c), brw_imm_d(0)));
+         }
+
+         /* Set the register type to D instead of UD if the data type is
+          * represented as a signed integer in memory so that sign extension
+          * is handled correctly by unpack.
+          */
+         if (needs_sign_extension(format))
+            tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+
+         if (!has_supported_bit_layout(devinfo, format)) {
+            /* Unpack individual vector components from the bitfield if the
+             * hardware is unable to do it for us.
+             */
+            if (has_split_bit_layout(devinfo, format))
+               tmp = emit_pack(bld, tmp, get_bit_shifts(lower_format),
+                               get_bit_widths(lower_format));
+            else
+               tmp = emit_unpack(bld, tmp, get_bit_shifts(format),
+                                 get_bit_widths(format));
+
+         } else if ((needs_sign_extension(format) &&
+                     !is_conversion_trivial(devinfo, format)) ||
+                    has_undefined_high_bits(devinfo, format)) {
+            /* Perform a trivial unpack even though the bit layout matches in
+             * order to get the most significant bits of each component
+             * initialized properly.
+             */
+            tmp = emit_unpack(bld, tmp, color_u(0, 32, 64, 96),
+                              get_bit_widths(format));
+         }
+
+         if (!isl_format_has_int_channel(format)) {
+            if (is_conversion_trivial(devinfo, format)) {
+               /* Just need to cast the vector to the target type. */
+               tmp = retype(tmp, BRW_REGISTER_TYPE_F);
+            } else {
+               /* Do the right sort of type conversion to float. */
+               if (isl_format_has_float_channel(format))
+                  tmp = emit_convert_from_float(
+                     bld, tmp, get_bit_widths(format));
+               else
+                  tmp = emit_convert_from_scaled(
+                     bld, tmp, get_bit_widths(format),
+                     isl_format_has_snorm_channel(format));
+            }
+         }
+
+         /* Initialize missing components of the result. */
+         return emit_pad(bld, tmp, get_bit_widths(format));
+      }
+
+      /**
+       * Store a vector in a surface of the given format and dimensionality at
+       * the given coordinates.  \p surf_dims and \p arr_dims give the number
+       * of non-array and array coordinates of the image respectively.
+       */
+      void
+      emit_image_store(const fs_builder &bld, const fs_reg &image,
+                       const fs_reg &addr, const fs_reg &src,
+                       unsigned surf_dims, unsigned arr_dims,
+                       unsigned gl_format)
+      {
+         using namespace image_format_info;
+         using namespace image_format_conversion;
+         using namespace image_validity;
+         using namespace image_coordinates;
+         using namespace surface_access;
+         const isl_format format = isl_format_for_gl_format(gl_format);
+         const brw_device_info *devinfo = bld.shader->devinfo;
+
+         /* Transform the image coordinates into actual surface coordinates. */
+         const fs_reg saddr =
+            emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+         if (gl_format == GL_NONE) {
+            /* We don't know what the format is, but that's fine because it
+             * implies write-only access, and typed surface writes are always
+             * able to take care of type conversion and packing for us.
+             */
+            emit_typed_write(bld, image, saddr, src, dims, 4);
+
+         } else {
+            const isl_format lower_format =
+               isl_lower_storage_image_format(devinfo, format);
+            fs_reg tmp = src;
+
+            if (!is_conversion_trivial(devinfo, format)) {
+               /* Do the right sort of type conversion. */
+               if (isl_format_has_float_channel(format))
+                  tmp = emit_convert_to_float(bld, tmp, get_bit_widths(format));
+
+               else if (isl_format_has_int_channel(format))
+                  tmp = emit_convert_to_integer(bld, tmp, get_bit_widths(format),
+                                                isl_format_has_sint_channel(format));
+
+               else
+                  tmp = emit_convert_to_scaled(bld, tmp, get_bit_widths(format),
+                                               isl_format_has_snorm_channel(format));
+            }
+
+            /* We're down to bit manipulation at this point. */
+            tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+
+            if (!has_supported_bit_layout(devinfo, format)) {
+               /* Pack the vector components into a bitfield if the hardware
+                * is unable to do it for us.
+                */
+               if (has_split_bit_layout(devinfo, format))
+                  tmp = emit_unpack(bld, tmp, get_bit_shifts(lower_format),
+                                    get_bit_widths(lower_format));
+
+               else
+                  tmp = emit_pack(bld, tmp, get_bit_shifts(format),
+                                  get_bit_widths(format));
+            }
+
+            if (isl_has_matching_typed_storage_image_format(devinfo, format)) {
+               /* Hopefully we get here most of the time... */
+               emit_typed_write(bld, image, saddr, tmp, dims,
+                                isl_format_get_num_channels(lower_format));
+
+            } else {
+               /* Untyped surface writes store 32 bits of the surface per
+                * component, without any sort of packing or type conversion,
+                */
+               const unsigned size = isl_format_get_layout(format)->bs / 4;
+
+               /* they don't properly handle out of bounds access, so we have
+                * to check manually if the coordinates are valid and predicate
+                * the surface write on the result,
+                */
+               const brw_predicate pred =
+                  emit_untyped_image_check(bld, image,
+                                           emit_bounds_check(bld, image,
+                                                             saddr, dims));
+
+               /* and, phew, they don't know about surface coordinates, we
+                * need to convert them to a raw memory offset.
+                */
+               const fs_reg laddr = emit_address_calculation(
+                  bld, image, saddr, dims);
+
+               emit_untyped_write(bld, image, laddr, tmp, 1, size, pred);
+            }
+         }
+      }
+
+      /**
+       * Perform an atomic read-modify-write operation in a surface of the
+       * given dimensionality at the given coordinates.  \p surf_dims and \p
+       * arr_dims give the number of non-array and array coordinates of the
+       * image respectively.  Main building block of the imageAtomic GLSL
+       * built-ins.
+       */
+      fs_reg
+      emit_image_atomic(const fs_builder &bld,
+                        const fs_reg &image, const fs_reg &addr,
+                        const fs_reg &src0, const fs_reg &src1,
+                        unsigned surf_dims, unsigned arr_dims,
+                        unsigned rsize, unsigned op)
+      {
+         using namespace image_validity;
+         using namespace image_coordinates;
+         using namespace surface_access;
+         /* Avoid performing an atomic operation on an unbound surface. */
+         const brw_predicate pred = emit_typed_atomic_check(bld, image);
+
+         /* Transform the image coordinates into actual surface coordinates. */
+         const fs_reg saddr =
+            emit_image_coordinates(bld, addr, surf_dims, arr_dims,
+                                   ISL_FORMAT_R32_UINT);
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims,
+                                  ISL_FORMAT_R32_UINT);
+
+         /* Thankfully we can do without untyped atomics here. */
+         const fs_reg tmp = emit_typed_atomic(bld, image, saddr, src0, src1,
+                                              dims, rsize, op, pred);
+
+         /* An unbound surface access should give zero as result. */
+         if (rsize && pred)
+            set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
+
+         return retype(tmp, src0.type);
+      }
+   }
  }