src/intel/compiler/brw_vec4_surface_builder.cpp

   1 /*
   2  * Copyright © 2013-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_vec4_surface_builder.h"
  25
  26 using namespace brw;
  27
  28 namespace {
  29    namespace array_utils {
  30       /**
  31        * Copy one every \p src_stride logical components of the argument into
  32        * one every \p dst_stride logical components of the result.
  33        */
  34       static src_reg
  35       emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
  36                   unsigned dst_stride, unsigned src_stride)
  37       {
  38          if (src_stride == 1 && dst_stride == 1) {
  39             return src;
  40          } else {
  41             const dst_reg dst = bld.vgrf(src.type,
  42                                          DIV_ROUND_UP(size * dst_stride, 4));
  43
  44             for (unsigned i = 0; i < size; ++i)
  45                bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
  46                                  1 << (i * dst_stride % 4)),
  47                        swizzle(offset(src, 8, i * src_stride / 4),
  48                                brw_swizzle_for_mask(1 << (i * src_stride % 4))));
  49
  50             return src_reg(dst);
  51          }
  52       }
  53
  54       /**
  55        * Convert a VEC4 into an array of registers with the layout expected by
  56        * the recipient shared unit.  If \p has_simd4x2 is true the argument is
  57        * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
  58        * a SIMD8 vector.
  59        */
  60       static src_reg
  61       emit_insert(const vec4_builder &bld, const src_reg &src,
  62                   unsigned n, bool has_simd4x2)
  63       {
  64          if (src.file == BAD_FILE || n == 0) {
  65             return src_reg();
  66
  67          } else {
  68             /* Pad unused components with zeroes. */
  69             const unsigned mask = (1 << n) - 1;
  70             const dst_reg tmp = bld.vgrf(src.type);
  71
  72             bld.MOV(writemask(tmp, mask), src);
  73             if (n < 4)
  74                bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
  75
  76             return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
  77          }
  78       }
  79
  80       /**
  81        * Convert an array of registers back into a VEC4 according to the
  82        * layout expected from some shared unit.  If \p has_simd4x2 is true the
  83        * argument is left unmodified in SIMD4x2 form, otherwise it will be
  84        * rearranged from SIMD8 form.
  85        */
  86       static src_reg
  87       emit_extract(const vec4_builder &bld, const src_reg src,
  88                    unsigned n, bool has_simd4x2)
  89       {
  90          if (src.file == BAD_FILE || n == 0) {
  91             return src_reg();
  92
  93          } else {
  94             return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
  95          }
  96       }
  97    }
  98 }
  99
 100 namespace brw {
 101    namespace surface_access {
 102       namespace {
 103          using namespace array_utils;
 104
 105          /**
 106           * Generate a send opcode for a surface message and return the
 107           * result.
 108           */
 109          src_reg
 110          emit_send(const vec4_builder &bld, enum opcode op,
 111                    const src_reg &header,
 112                    const src_reg &addr, unsigned addr_sz,
 113                    const src_reg &src, unsigned src_sz,
 114                    const src_reg &surface,
 115                    unsigned arg, unsigned ret_sz,
 116                    brw_predicate pred = BRW_PREDICATE_NONE)
 117          {
 118             /* Calculate the total number of components of the payload. */
 119             const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
 120             const unsigned sz = header_sz + addr_sz + src_sz;
 121
 122             /* Construct the payload. */
 123             const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
 124             unsigned n = 0;
 125
 126             if (header_sz)
 127                bld.exec_all().MOV(offset(payload, 8, n++),
 128                                   retype(header, BRW_REGISTER_TYPE_UD));
 129
 130             for (unsigned i = 0; i < addr_sz; i++)
 131                bld.MOV(offset(payload, 8, n++),
 132                        offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
 133
 134             for (unsigned i = 0; i < src_sz; i++)
 135                bld.MOV(offset(payload, 8, n++),
 136                        offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
 137
 138             /* Reduce the dynamically uniform surface index to a single
 139              * scalar.
 140              */
 141             const src_reg usurface = bld.emit_uniformize(surface);
 142
 143             /* Emit the message send instruction. */
 144             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
 145             vec4_instruction *inst =
 146                bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
 147             inst->mlen = sz;
 148             inst->size_written = ret_sz * REG_SIZE;
 149             inst->header_size = header_sz;
 150             inst->predicate = pred;
 151
 152             return src_reg(dst);
 153          }
 154       }
 155
 156       /**
 157        * Emit an untyped surface read opcode.  \p dims determines the number
 158        * of components of the address and \p size the number of components of
 159        * the returned value.
 160        */
 161       src_reg
 162       emit_untyped_read(const vec4_builder &bld,
 163                         const src_reg &surface, const src_reg &addr,
 164                         unsigned dims, unsigned size,
 165                         brw_predicate pred)
 166       {
 167          return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
 168                           emit_insert(bld, addr, dims, true), 1,
 169                           src_reg(), 0,
 170                           surface, size, 1, pred);
 171       }
 172
 173       /**
 174        * Emit an untyped surface write opcode.  \p dims determines the number
 175        * of components of the address and \p size the number of components of
 176        * the argument.
 177        */
 178       void
 179       emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
 180                          const src_reg &addr, const src_reg &src,
 181                          unsigned dims, unsigned size,
 182                          brw_predicate pred)
 183       {
 184          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
 185                                    bld.shader->devinfo->is_haswell);
 186          emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
 187                    emit_insert(bld, addr, dims, has_simd4x2),
 188                    has_simd4x2 ? 1 : dims,
 189                    emit_insert(bld, src, size, has_simd4x2),
 190                    has_simd4x2 ? 1 : size,
 191                    surface, size, 0, pred);
 192       }
 193
 194       /**
 195        * Emit an untyped surface atomic opcode.  \p dims determines the number
 196        * of components of the address and \p rsize the number of components of
 197        * the returned value (either zero or one).
 198        */
 199       src_reg
 200       emit_untyped_atomic(const vec4_builder &bld,
 201                           const src_reg &surface, const src_reg &addr,
 202                           const src_reg &src0, const src_reg &src1,
 203                           unsigned dims, unsigned rsize, unsigned op,
 204                           brw_predicate pred)
 205       {
 206          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
 207                                    bld.shader->devinfo->is_haswell);
 208
 209          /* Zip the components of both sources, they are represented as the X
 210           * and Y components of the same vector.
 211           */
 212          const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
 213          const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
 214
 215          if (size >= 1) {
 216             bld.MOV(writemask(srcs, WRITEMASK_X),
 217                     swizzle(src0, BRW_SWIZZLE_XXXX));
 218          }
 219
 220          if (size >= 2) {
 221             bld.MOV(writemask(srcs, WRITEMASK_Y),
 222                     swizzle(src1, BRW_SWIZZLE_XXXX));
 223          }
 224
 225          return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
 226                           emit_insert(bld, addr, dims, has_simd4x2),
 227                           has_simd4x2 ? 1 : dims,
 228                           emit_insert(bld, src_reg(srcs), size, has_simd4x2),
 229                           has_simd4x2 && size ? 1 : size,
 230                           surface, op, rsize, pred);
 231       }
 232
 233       namespace {
 234          /**
 235           * Initialize the header present in typed surface messages.
 236           */
 237          src_reg
 238          emit_typed_message_header(const vec4_builder &bld)
 239          {
 240             const vec4_builder ubld = bld.exec_all();
 241             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
 242
 243             ubld.MOV(dst, brw_imm_d(0));
 244
 245             if (bld.shader->devinfo->gen == 7 &&
 246                 !bld.shader->devinfo->is_haswell) {
 247                /* The sample mask is used on IVB for the SIMD8 messages that
 248                 * have no SIMD4x2 variant.  We only use the two X channels
 249                 * in that case, mask everything else out.
 250                 */
 251                ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11));
 252             }
 253
 254             return src_reg(dst);
 255          }
 256       }
 257
 258       /**
 259        * Emit a typed surface read opcode.  \p dims determines the number of
 260        * components of the address and \p size the number of components of the
 261        * returned value.
 262        */
 263       src_reg
 264       emit_typed_read(const vec4_builder &bld, const src_reg &surface,
 265                       const src_reg &addr, unsigned dims, unsigned size)
 266       {
 267          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
 268                                    bld.shader->devinfo->is_haswell);
 269          const src_reg tmp =
 270             emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
 271                       emit_typed_message_header(bld),
 272                       emit_insert(bld, addr, dims, has_simd4x2),
 273                       has_simd4x2 ? 1 : dims,
 274                       src_reg(), 0,
 275                       surface, size,
 276                       has_simd4x2 ? 1 : size);
 277
 278          return emit_extract(bld, tmp, size, has_simd4x2);
 279       }
 280
 281       /**
 282        * Emit a typed surface write opcode.  \p dims determines the number of
 283        * components of the address and \p size the number of components of the
 284        * argument.
 285        */
 286       void
 287       emit_typed_write(const vec4_builder &bld, const src_reg &surface,
 288                        const src_reg &addr, const src_reg &src,
 289                        unsigned dims, unsigned size)
 290       {
 291          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
 292                                    bld.shader->devinfo->is_haswell);
 293          emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
 294                    emit_typed_message_header(bld),
 295                    emit_insert(bld, addr, dims, has_simd4x2),
 296                    has_simd4x2 ? 1 : dims,
 297                    emit_insert(bld, src, size, has_simd4x2),
 298                    has_simd4x2 ? 1 : size,
 299                    surface, size, 0);
 300       }
 301
 302       /**
 303        * Emit a typed surface atomic opcode.  \p dims determines the number of
 304        * components of the address and \p rsize the number of components of
 305        * the returned value (either zero or one).
 306        */
 307       src_reg
 308       emit_typed_atomic(const vec4_builder &bld,
 309                         const src_reg &surface, const src_reg &addr,
 310                         const src_reg &src0, const src_reg &src1,
 311                         unsigned dims, unsigned rsize, unsigned op,
 312                         brw_predicate pred)
 313       {
 314          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
 315                                    bld.shader->devinfo->is_haswell);
 316
 317          /* Zip the components of both sources, they are represented as the X
 318           * and Y components of the same vector.
 319           */
 320          const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
 321          const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
 322
 323          if (size >= 1)
 324             bld.MOV(writemask(srcs, WRITEMASK_X), src0);
 325          if (size >= 2)
 326             bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
 327
 328          return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
 329                           emit_typed_message_header(bld),
 330                           emit_insert(bld, addr, dims, has_simd4x2),
 331                           has_simd4x2 ? 1 : dims,
 332                           emit_insert(bld, src_reg(srcs), size, has_simd4x2),
 333                           has_simd4x2 ? 1 : size,
 334                           surface, op, rsize, pred);
 335       }
 336    }
 337 }