src/compiler/spirv/vtn_alu.c

   1 /*
   2  * Copyright © 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <math.h>
  25 #include "vtn_private.h"
  26
  27 /*
  28  * Normally, column vectors in SPIR-V correspond to a single NIR SSA
  29  * definition. But for matrix multiplies, we want to do one routine for
  30  * multiplying a matrix by a matrix and then pretend that vectors are matrices
  31  * with one column. So we "wrap" these things, and unwrap the result before we
  32  * send it off.
  33  */
  34
  35 static struct vtn_ssa_value *
  36 wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val)
  37 {
  38    if (val == NULL)
  39       return NULL;
  40
  41    if (glsl_type_is_matrix(val->type))
  42       return val;
  43
  44    struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
  45    dest->type = val->type;
  46    dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1);
  47    dest->elems[0] = val;
  48
  49    return dest;
  50 }
  51
  52 static struct vtn_ssa_value *
  53 unwrap_matrix(struct vtn_ssa_value *val)
  54 {
  55    if (glsl_type_is_matrix(val->type))
  56          return val;
  57
  58    return val->elems[0];
  59 }
  60
  61 static struct vtn_ssa_value *
  62 matrix_multiply(struct vtn_builder *b,
  63                 struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
  64 {
  65
  66    struct vtn_ssa_value *src0 = wrap_matrix(b, _src0);
  67    struct vtn_ssa_value *src1 = wrap_matrix(b, _src1);
  68    struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed);
  69    struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed);
  70
  71    unsigned src0_rows = glsl_get_vector_elements(src0->type);
  72    unsigned src0_columns = glsl_get_matrix_columns(src0->type);
  73    unsigned src1_columns = glsl_get_matrix_columns(src1->type);
  74
  75    const struct glsl_type *dest_type;
  76    if (src1_columns > 1) {
  77       dest_type = glsl_matrix_type(glsl_get_base_type(src0->type),
  78                                    src0_rows, src1_columns);
  79    } else {
  80       dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows);
  81    }
  82    struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type);
  83
  84    dest = wrap_matrix(b, dest);
  85
  86    bool transpose_result = false;
  87    if (src0_transpose && src1_transpose) {
  88       /* transpose(A) * transpose(B) = transpose(B * A) */
  89       src1 = src0_transpose;
  90       src0 = src1_transpose;
  91       src0_transpose = NULL;
  92       src1_transpose = NULL;
  93       transpose_result = true;
  94    }
  95
  96    if (src0_transpose && !src1_transpose &&
  97        glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
  98       /* We already have the rows of src0 and the columns of src1 available,
  99        * so we can just take the dot product of each row with each column to
 100        * get the result.
 101        */
 102
 103       for (unsigned i = 0; i < src1_columns; i++) {
 104          nir_ssa_def *vec_src[4];
 105          for (unsigned j = 0; j < src0_rows; j++) {
 106             vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
 107                                           src1->elems[i]->def);
 108          }
 109          dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
 110       }
 111    } else {
 112       /* We don't handle the case where src1 is transposed but not src0, since
 113        * the general case only uses individual components of src1 so the
 114        * optimizer should chew through the transpose we emitted for src1.
 115        */
 116
 117       for (unsigned i = 0; i < src1_columns; i++) {
 118          /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
 119          dest->elems[i]->def =
 120             nir_fmul(&b->nb, src0->elems[0]->def,
 121                      nir_channel(&b->nb, src1->elems[i]->def, 0));
 122          for (unsigned j = 1; j < src0_columns; j++) {
 123             dest->elems[i]->def =
 124                nir_fadd(&b->nb, dest->elems[i]->def,
 125                         nir_fmul(&b->nb, src0->elems[j]->def,
 126                                  nir_channel(&b->nb, src1->elems[i]->def, j)));
 127          }
 128       }
 129    }
 130
 131    dest = unwrap_matrix(dest);
 132
 133    if (transpose_result)
 134       dest = vtn_ssa_transpose(b, dest);
 135
 136    return dest;
 137 }
 138
 139 static struct vtn_ssa_value *
 140 mat_times_scalar(struct vtn_builder *b,
 141                  struct vtn_ssa_value *mat,
 142                  nir_ssa_def *scalar)
 143 {
 144    struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type);
 145    for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) {
 146       if (glsl_base_type_is_integer(glsl_get_base_type(mat->type)))
 147          dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar);
 148       else
 149          dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar);
 150    }
 151
 152    return dest;
 153 }
 154
 155 static void
 156 vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
 157                       struct vtn_value *dest,
 158                       struct vtn_ssa_value *src0, struct vtn_ssa_value *src1)
 159 {
 160    switch (opcode) {
 161    case SpvOpFNegate: {
 162       dest->ssa = vtn_create_ssa_value(b, src0->type);
 163       unsigned cols = glsl_get_matrix_columns(src0->type);
 164       for (unsigned i = 0; i < cols; i++)
 165          dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def);
 166       break;
 167    }
 168
 169    case SpvOpFAdd: {
 170       dest->ssa = vtn_create_ssa_value(b, src0->type);
 171       unsigned cols = glsl_get_matrix_columns(src0->type);
 172       for (unsigned i = 0; i < cols; i++)
 173          dest->ssa->elems[i]->def =
 174             nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
 175       break;
 176    }
 177
 178    case SpvOpFSub: {
 179       dest->ssa = vtn_create_ssa_value(b, src0->type);
 180       unsigned cols = glsl_get_matrix_columns(src0->type);
 181       for (unsigned i = 0; i < cols; i++)
 182          dest->ssa->elems[i]->def =
 183             nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
 184       break;
 185    }
 186
 187    case SpvOpTranspose:
 188       dest->ssa = vtn_ssa_transpose(b, src0);
 189       break;
 190
 191    case SpvOpMatrixTimesScalar:
 192       if (src0->transposed) {
 193          dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed,
 194                                                            src1->def));
 195       } else {
 196          dest->ssa = mat_times_scalar(b, src0, src1->def);
 197       }
 198       break;
 199
 200    case SpvOpVectorTimesMatrix:
 201    case SpvOpMatrixTimesVector:
 202    case SpvOpMatrixTimesMatrix:
 203       if (opcode == SpvOpVectorTimesMatrix) {
 204          dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0);
 205       } else {
 206          dest->ssa = matrix_multiply(b, src0, src1);
 207       }
 208       break;
 209
 210    default: vtn_fail("unknown matrix opcode");
 211    }
 212 }
 213
 214 nir_op
 215 vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
 216                                 SpvOp opcode, bool *swap,
 217                                 unsigned src_bit_size, unsigned dst_bit_size)
 218 {
 219    /* Indicates that the first two arguments should be swapped.  This is
 220     * used for implementing greater-than and less-than-or-equal.
 221     */
 222    *swap = false;
 223
 224    switch (opcode) {
 225    case SpvOpSNegate:            return nir_op_ineg;
 226    case SpvOpFNegate:            return nir_op_fneg;
 227    case SpvOpNot:                return nir_op_inot;
 228    case SpvOpIAdd:               return nir_op_iadd;
 229    case SpvOpFAdd:               return nir_op_fadd;
 230    case SpvOpISub:               return nir_op_isub;
 231    case SpvOpFSub:               return nir_op_fsub;
 232    case SpvOpIMul:               return nir_op_imul;
 233    case SpvOpFMul:               return nir_op_fmul;
 234    case SpvOpUDiv:               return nir_op_udiv;
 235    case SpvOpSDiv:               return nir_op_idiv;
 236    case SpvOpFDiv:               return nir_op_fdiv;
 237    case SpvOpUMod:               return nir_op_umod;
 238    case SpvOpSMod:               return nir_op_imod;
 239    case SpvOpFMod:               return nir_op_fmod;
 240    case SpvOpSRem:               return nir_op_irem;
 241    case SpvOpFRem:               return nir_op_frem;
 242
 243    case SpvOpShiftRightLogical:     return nir_op_ushr;
 244    case SpvOpShiftRightArithmetic:  return nir_op_ishr;
 245    case SpvOpShiftLeftLogical:      return nir_op_ishl;
 246    case SpvOpLogicalOr:             return nir_op_ior;
 247    case SpvOpLogicalEqual:          return nir_op_ieq;
 248    case SpvOpLogicalNotEqual:       return nir_op_ine;
 249    case SpvOpLogicalAnd:            return nir_op_iand;
 250    case SpvOpLogicalNot:            return nir_op_inot;
 251    case SpvOpBitwiseOr:             return nir_op_ior;
 252    case SpvOpBitwiseXor:            return nir_op_ixor;
 253    case SpvOpBitwiseAnd:            return nir_op_iand;
 254    case SpvOpSelect:                return nir_op_bcsel;
 255    case SpvOpIEqual:                return nir_op_ieq;
 256
 257    case SpvOpBitFieldInsert:        return nir_op_bitfield_insert;
 258    case SpvOpBitFieldSExtract:      return nir_op_ibitfield_extract;
 259    case SpvOpBitFieldUExtract:      return nir_op_ubitfield_extract;
 260    case SpvOpBitReverse:            return nir_op_bitfield_reverse;
 261    case SpvOpBitCount:              return nir_op_bit_count;
 262
 263    /* The ordered / unordered operators need special implementation besides
 264     * the logical operator to use since they also need to check if operands are
 265     * ordered.
 266     */
 267    case SpvOpFOrdEqual:                            return nir_op_feq;
 268    case SpvOpFUnordEqual:                          return nir_op_feq;
 269    case SpvOpINotEqual:                            return nir_op_ine;
 270    case SpvOpFOrdNotEqual:                         return nir_op_fne;
 271    case SpvOpFUnordNotEqual:                       return nir_op_fne;
 272    case SpvOpULessThan:                            return nir_op_ult;
 273    case SpvOpSLessThan:                            return nir_op_ilt;
 274    case SpvOpFOrdLessThan:                         return nir_op_flt;
 275    case SpvOpFUnordLessThan:                       return nir_op_flt;
 276    case SpvOpUGreaterThan:          *swap = true;  return nir_op_ult;
 277    case SpvOpSGreaterThan:          *swap = true;  return nir_op_ilt;
 278    case SpvOpFOrdGreaterThan:       *swap = true;  return nir_op_flt;
 279    case SpvOpFUnordGreaterThan:     *swap = true;  return nir_op_flt;
 280    case SpvOpULessThanEqual:        *swap = true;  return nir_op_uge;
 281    case SpvOpSLessThanEqual:        *swap = true;  return nir_op_ige;
 282    case SpvOpFOrdLessThanEqual:     *swap = true;  return nir_op_fge;
 283    case SpvOpFUnordLessThanEqual:   *swap = true;  return nir_op_fge;
 284    case SpvOpUGreaterThanEqual:                    return nir_op_uge;
 285    case SpvOpSGreaterThanEqual:                    return nir_op_ige;
 286    case SpvOpFOrdGreaterThanEqual:                 return nir_op_fge;
 287    case SpvOpFUnordGreaterThanEqual:               return nir_op_fge;
 288
 289    /* Conversions: */
 290    case SpvOpQuantizeToF16:         return nir_op_fquantize2f16;
 291    case SpvOpUConvert:
 292    case SpvOpConvertFToU:
 293    case SpvOpConvertFToS:
 294    case SpvOpConvertSToF:
 295    case SpvOpConvertUToF:
 296    case SpvOpSConvert:
 297    case SpvOpFConvert: {
 298       nir_alu_type src_type;
 299       nir_alu_type dst_type;
 300
 301       switch (opcode) {
 302       case SpvOpConvertFToS:
 303          src_type = nir_type_float;
 304          dst_type = nir_type_int;
 305          break;
 306       case SpvOpConvertFToU:
 307          src_type = nir_type_float;
 308          dst_type = nir_type_uint;
 309          break;
 310       case SpvOpFConvert:
 311          src_type = dst_type = nir_type_float;
 312          break;
 313       case SpvOpConvertSToF:
 314          src_type = nir_type_int;
 315          dst_type = nir_type_float;
 316          break;
 317       case SpvOpSConvert:
 318          src_type = dst_type = nir_type_int;
 319          break;
 320       case SpvOpConvertUToF:
 321          src_type = nir_type_uint;
 322          dst_type = nir_type_float;
 323          break;
 324       case SpvOpUConvert:
 325          src_type = dst_type = nir_type_uint;
 326          break;
 327       default:
 328          unreachable("Invalid opcode");
 329       }
 330       src_type |= src_bit_size;
 331       dst_type |= dst_bit_size;
 332       return nir_type_conversion_op(src_type, dst_type, nir_rounding_mode_undef);
 333    }
 334    /* Derivatives: */
 335    case SpvOpDPdx:         return nir_op_fddx;
 336    case SpvOpDPdy:         return nir_op_fddy;
 337    case SpvOpDPdxFine:     return nir_op_fddx_fine;
 338    case SpvOpDPdyFine:     return nir_op_fddy_fine;
 339    case SpvOpDPdxCoarse:   return nir_op_fddx_coarse;
 340    case SpvOpDPdyCoarse:   return nir_op_fddy_coarse;
 341
 342    default:
 343       vtn_fail("No NIR equivalent: %u", opcode);
 344    }
 345 }
 346
 347 static void
 348 handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member,
 349                       const struct vtn_decoration *dec, void *_void)
 350 {
 351    vtn_assert(dec->scope == VTN_DEC_DECORATION);
 352    if (dec->decoration != SpvDecorationNoContraction)
 353       return;
 354
 355    b->nb.exact = true;
 356 }
 357
 358 static void
 359 handle_rounding_mode(struct vtn_builder *b, struct vtn_value *val, int member,
 360                      const struct vtn_decoration *dec, void *_out_rounding_mode)
 361 {
 362    nir_rounding_mode *out_rounding_mode = _out_rounding_mode;
 363    assert(dec->scope == VTN_DEC_DECORATION);
 364    if (dec->decoration != SpvDecorationFPRoundingMode)
 365       return;
 366    switch (dec->literals[0]) {
 367    case SpvFPRoundingModeRTE:
 368       *out_rounding_mode = nir_rounding_mode_rtne;
 369       break;
 370    case SpvFPRoundingModeRTZ:
 371       *out_rounding_mode = nir_rounding_mode_rtz;
 372       break;
 373    default:
 374       unreachable("Not supported rounding mode");
 375       break;
 376    }
 377 }
 378
 379 void
 380 vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
 381                const uint32_t *w, unsigned count)
 382 {
 383    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 384    const struct glsl_type *type =
 385       vtn_value(b, w[1], vtn_value_type_type)->type->type;
 386
 387    vtn_foreach_decoration(b, val, handle_no_contraction, NULL);
 388
 389    /* Collect the various SSA sources */
 390    const unsigned num_inputs = count - 3;
 391    struct vtn_ssa_value *vtn_src[4] = { NULL, };
 392    for (unsigned i = 0; i < num_inputs; i++)
 393       vtn_src[i] = vtn_ssa_value(b, w[i + 3]);
 394
 395    if (glsl_type_is_matrix(vtn_src[0]->type) ||
 396        (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
 397       vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
 398       b->nb.exact = b->exact;
 399       return;
 400    }
 401
 402    val->ssa = vtn_create_ssa_value(b, type);
 403    nir_ssa_def *src[4] = { NULL, };
 404    for (unsigned i = 0; i < num_inputs; i++) {
 405       vtn_assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type));
 406       src[i] = vtn_src[i]->def;
 407    }
 408
 409    switch (opcode) {
 410    case SpvOpAny:
 411       if (src[0]->num_components == 1) {
 412          val->ssa->def = nir_imov(&b->nb, src[0]);
 413       } else {
 414          nir_op op;
 415          switch (src[0]->num_components) {
 416          case 2:  op = nir_op_bany_inequal2; break;
 417          case 3:  op = nir_op_bany_inequal3; break;
 418          case 4:  op = nir_op_bany_inequal4; break;
 419          default: vtn_fail("invalid number of components");
 420          }
 421          val->ssa->def = nir_build_alu(&b->nb, op, src[0],
 422                                        nir_imm_false(&b->nb),
 423                                        NULL, NULL);
 424       }
 425       break;
 426
 427    case SpvOpAll:
 428       if (src[0]->num_components == 1) {
 429          val->ssa->def = nir_imov(&b->nb, src[0]);
 430       } else {
 431          nir_op op;
 432          switch (src[0]->num_components) {
 433          case 2:  op = nir_op_ball_iequal2;  break;
 434          case 3:  op = nir_op_ball_iequal3;  break;
 435          case 4:  op = nir_op_ball_iequal4;  break;
 436          default: vtn_fail("invalid number of components");
 437          }
 438          val->ssa->def = nir_build_alu(&b->nb, op, src[0],
 439                                        nir_imm_true(&b->nb),
 440                                        NULL, NULL);
 441       }
 442       break;
 443
 444    case SpvOpOuterProduct: {
 445       for (unsigned i = 0; i < src[1]->num_components; i++) {
 446          val->ssa->elems[i]->def =
 447             nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i));
 448       }
 449       break;
 450    }
 451
 452    case SpvOpDot:
 453       val->ssa->def = nir_fdot(&b->nb, src[0], src[1]);
 454       break;
 455
 456    case SpvOpIAddCarry:
 457       vtn_assert(glsl_type_is_struct(val->ssa->type));
 458       val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]);
 459       val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]);
 460       break;
 461
 462    case SpvOpISubBorrow:
 463       vtn_assert(glsl_type_is_struct(val->ssa->type));
 464       val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]);
 465       val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]);
 466       break;
 467
 468    case SpvOpUMulExtended: {
 469       vtn_assert(glsl_type_is_struct(val->ssa->type));
 470       nir_ssa_def *umul = nir_umul_2x32_64(&b->nb, src[0], src[1]);
 471       val->ssa->elems[0]->def = nir_unpack_64_2x32_split_x(&b->nb, umul);
 472       val->ssa->elems[1]->def = nir_unpack_64_2x32_split_y(&b->nb, umul);
 473       break;
 474    }
 475
 476    case SpvOpSMulExtended: {
 477       vtn_assert(glsl_type_is_struct(val->ssa->type));
 478       nir_ssa_def *smul = nir_imul_2x32_64(&b->nb, src[0], src[1]);
 479       val->ssa->elems[0]->def = nir_unpack_64_2x32_split_x(&b->nb, smul);
 480       val->ssa->elems[1]->def = nir_unpack_64_2x32_split_y(&b->nb, smul);
 481       break;
 482    }
 483
 484    case SpvOpFwidth:
 485       val->ssa->def = nir_fadd(&b->nb,
 486                                nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
 487                                nir_fabs(&b->nb, nir_fddy(&b->nb, src[0])));
 488       break;
 489    case SpvOpFwidthFine:
 490       val->ssa->def = nir_fadd(&b->nb,
 491                                nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
 492                                nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0])));
 493       break;
 494    case SpvOpFwidthCoarse:
 495       val->ssa->def = nir_fadd(&b->nb,
 496                                nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
 497                                nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0])));
 498       break;
 499
 500    case SpvOpVectorTimesScalar:
 501       /* The builder will take care of splatting for us. */
 502       val->ssa->def = nir_fmul(&b->nb, src[0], src[1]);
 503       break;
 504
 505    case SpvOpIsNan:
 506       val->ssa->def = nir_fne(&b->nb, src[0], src[0]);
 507       break;
 508
 509    case SpvOpIsInf: {
 510       nir_ssa_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size);
 511       val->ssa->def = nir_ieq(&b->nb, nir_fabs(&b->nb, src[0]), inf);
 512       break;
 513    }
 514
 515    case SpvOpFUnordEqual:
 516    case SpvOpFUnordNotEqual:
 517    case SpvOpFUnordLessThan:
 518    case SpvOpFUnordGreaterThan:
 519    case SpvOpFUnordLessThanEqual:
 520    case SpvOpFUnordGreaterThanEqual: {
 521       bool swap;
 522       unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
 523       unsigned dst_bit_size = glsl_get_bit_size(type);
 524       nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
 525                                                   src_bit_size, dst_bit_size);
 526
 527       if (swap) {
 528          nir_ssa_def *tmp = src[0];
 529          src[0] = src[1];
 530          src[1] = tmp;
 531       }
 532
 533       val->ssa->def =
 534          nir_ior(&b->nb,
 535                  nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL),
 536                  nir_ior(&b->nb,
 537                          nir_fne(&b->nb, src[0], src[0]),
 538                          nir_fne(&b->nb, src[1], src[1])));
 539       break;
 540    }
 541
 542    case SpvOpFOrdNotEqual: {
 543       /* For all the SpvOpFOrd* comparisons apart from NotEqual, the value
 544        * from the ALU will probably already be false if the operands are not
 545        * ordered so we don’t need to handle it specially.
 546        */
 547       bool swap;
 548       unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
 549       unsigned dst_bit_size = glsl_get_bit_size(type);
 550       nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
 551                                                   src_bit_size, dst_bit_size);
 552
 553       assert(!swap);
 554
 555       val->ssa->def =
 556          nir_iand(&b->nb,
 557                   nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL),
 558                   nir_iand(&b->nb,
 559                           nir_feq(&b->nb, src[0], src[0]),
 560                           nir_feq(&b->nb, src[1], src[1])));
 561       break;
 562    }
 563
 564    case SpvOpBitcast:
 565       /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
 566        *
 567        *    "If Result Type has the same number of components as Operand, they
 568        *    must also have the same component width, and results are computed
 569        *    per component.
 570        *
 571        *    If Result Type has a different number of components than Operand,
 572        *    the total number of bits in Result Type must equal the total
 573        *    number of bits in Operand. Let L be the type, either Result Type
 574        *    or Operand’s type, that has the larger number of components. Let S
 575        *    be the other type, with the smaller number of components. The
 576        *    number of components in L must be an integer multiple of the
 577        *    number of components in S.  The first component (that is, the only
 578        *    or lowest-numbered component) of S maps to the first components of
 579        *    L, and so on, up to the last component of S mapping to the last
 580        *    components of L. Within this mapping, any single component of S
 581        *    (mapping to multiple components of L) maps its lower-ordered bits
 582        *    to the lower-numbered components of L."
 583        */
 584       vtn_fail_if(src[0]->num_components * src[0]->bit_size !=
 585                   glsl_get_vector_elements(type) * glsl_get_bit_size(type),
 586                   "Source and destination of OpBitcast must have the same "
 587                   "total number of bits");
 588       val->ssa->def = nir_bitcast_vector(&b->nb, src[0],
 589                                          glsl_get_bit_size(type));
 590       break;
 591
 592    case SpvOpFConvert: {
 593       nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type);
 594       nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type);
 595       nir_rounding_mode rounding_mode = nir_rounding_mode_undef;
 596
 597       vtn_foreach_decoration(b, val, handle_rounding_mode, &rounding_mode);
 598       nir_op op = nir_type_conversion_op(src_alu_type, dst_alu_type, rounding_mode);
 599
 600       val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL);
 601       break;
 602    }
 603
 604    case SpvOpBitFieldInsert:
 605    case SpvOpBitFieldSExtract:
 606    case SpvOpBitFieldUExtract:
 607    case SpvOpShiftLeftLogical:
 608    case SpvOpShiftRightArithmetic:
 609    case SpvOpShiftRightLogical: {
 610       bool swap;
 611       unsigned src0_bit_size = glsl_get_bit_size(vtn_src[0]->type);
 612       unsigned dst_bit_size = glsl_get_bit_size(type);
 613       nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
 614                                                   src0_bit_size, dst_bit_size);
 615
 616       assert (op == nir_op_ushr || op == nir_op_ishr || op == nir_op_ishl ||
 617               op == nir_op_bitfield_insert || op == nir_op_ubitfield_extract ||
 618               op == nir_op_ibitfield_extract);
 619
 620       for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
 621          unsigned src_bit_size =
 622             nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]);
 623          if (src_bit_size == 0)
 624             continue;
 625          if (src_bit_size != src[i]->bit_size) {
 626             assert(src_bit_size == 32);
 627             /* Convert the Shift, Offset and Count  operands to 32 bits, which is the bitsize
 628              * supported by the NIR instructions. See discussion here:
 629              *
 630              * https://lists.freedesktop.org/archives/mesa-dev/2018-April/193026.html
 631              */
 632             src[i] = nir_u2u32(&b->nb, src[i]);
 633          }
 634       }
 635       val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
 636       break;
 637    }
 638
 639    case SpvOpSignBitSet: {
 640       unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
 641       if (src[0]->num_components == 1)
 642          val->ssa->def =
 643             nir_ushr(&b->nb, src[0], nir_imm_int(&b->nb, src_bit_size - 1));
 644       else
 645          val->ssa->def =
 646             nir_ishr(&b->nb, src[0], nir_imm_int(&b->nb, src_bit_size - 1));
 647
 648       if (src_bit_size != 32)
 649          val->ssa->def = nir_u2u32(&b->nb, val->ssa->def);
 650
 651       break;
 652    }
 653
 654    default: {
 655       bool swap;
 656       unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
 657       unsigned dst_bit_size = glsl_get_bit_size(type);
 658       nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
 659                                                   src_bit_size, dst_bit_size);
 660
 661       if (swap) {
 662          nir_ssa_def *tmp = src[0];
 663          src[0] = src[1];
 664          src[1] = tmp;
 665       }
 666
 667       switch (op) {
 668       case nir_op_ishl:
 669       case nir_op_ishr:
 670       case nir_op_ushr:
 671          if (src[1]->bit_size != 32)
 672             src[1] = nir_u2u32(&b->nb, src[1]);
 673          break;
 674       default:
 675          break;
 676       }
 677
 678       val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
 679       break;
 680    } /* default */
 681    }
 682
 683    b->nb.exact = b->exact;
 684 }