src/compiler/spirv/vtn_amd.c

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  * Copyright © 2017 Red Hat
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  *
  24  */
  25
  26 #include "vtn_private.h"
  27 #include "GLSL.ext.AMD.h"
  28
  29 bool
  30 vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
  31                                       const uint32_t *w, unsigned count)
  32 {
  33    const struct glsl_type *dest_type =
  34                            vtn_value(b, w[1], vtn_value_type_type)->type->type;
  35    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
  36    val->ssa = vtn_create_ssa_value(b, dest_type);
  37
  38    switch ((enum GcnShaderAMD)ext_opcode) {
  39    case CubeFaceIndexAMD:
  40       val->ssa->def = nir_cube_face_index(&b->nb, vtn_ssa_value(b, w[5])->def);
  41           break;
  42    case CubeFaceCoordAMD:
  43       val->ssa->def = nir_cube_face_coord(&b->nb, vtn_ssa_value(b, w[5])->def);
  44       break;
  45    case TimeAMD: {
  46       nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader,
  47                                     nir_intrinsic_shader_clock);
  48       nir_ssa_dest_init(&intrin->instr, &intrin->dest, 2, 32, NULL);
  49       nir_builder_instr_insert(&b->nb, &intrin->instr);
  50       val->ssa->def = nir_pack_64_2x32(&b->nb, &intrin->dest.ssa);
  51       break;
  52    }
  53    default:
  54       unreachable("Invalid opcode");
  55    }
  56    return true;
  57 }
  58
  59 bool
  60 vtn_handle_amd_shader_ballot_instruction(struct vtn_builder *b, SpvOp ext_opcode,
  61                                          const uint32_t *w, unsigned count)
  62 {
  63    const struct glsl_type *dest_type =
  64                            vtn_value(b, w[1], vtn_value_type_type)->type->type;
  65    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
  66    val->ssa = vtn_create_ssa_value(b, dest_type);
  67
  68    unsigned num_args;
  69    nir_intrinsic_op op;
  70    switch ((enum ShaderBallotAMD)ext_opcode) {
  71    case SwizzleInvocationsAMD:
  72       num_args = 1;
  73       op = nir_intrinsic_quad_swizzle_amd;
  74       break;
  75    case SwizzleInvocationsMaskedAMD:
  76       num_args = 1;
  77       op = nir_intrinsic_masked_swizzle_amd;
  78       break;
  79    case WriteInvocationAMD:
  80       num_args = 3;
  81       op = nir_intrinsic_write_invocation_amd;
  82       break;
  83    case MbcntAMD:
  84       num_args = 1;
  85       op = nir_intrinsic_mbcnt_amd;
  86       break;
  87    default:
  88       unreachable("Invalid opcode");
  89    }
  90
  91    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
  92    nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, dest_type, NULL);
  93    intrin->num_components = intrin->dest.ssa.num_components;
  94
  95    for (unsigned i = 0; i < num_args; i++)
  96       intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 5])->def);
  97
  98    if (intrin->intrinsic == nir_intrinsic_quad_swizzle_amd) {
  99       struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
 100       unsigned mask = val->constant->values[0].u32 |
 101                       val->constant->values[1].u32 << 2 |
 102                       val->constant->values[2].u32 << 4 |
 103                       val->constant->values[3].u32 << 6;
 104       nir_intrinsic_set_swizzle_mask(intrin, mask);
 105
 106    } else if (intrin->intrinsic == nir_intrinsic_masked_swizzle_amd) {
 107       struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
 108       unsigned mask = val->constant->values[0].u32 |
 109                       val->constant->values[1].u32 << 5 |
 110                       val->constant->values[2].u32 << 10;
 111       nir_intrinsic_set_swizzle_mask(intrin, mask);
 112    }
 113
 114    nir_builder_instr_insert(&b->nb, &intrin->instr);
 115    val->ssa->def = &intrin->dest.ssa;
 116
 117    return true;
 118 }
 119
 120 bool
 121 vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, SpvOp ext_opcode,
 122                                                  const uint32_t *w, unsigned count)
 123 {
 124    struct nir_builder *nb = &b->nb;
 125    const struct glsl_type *dest_type =
 126       vtn_value(b, w[1], vtn_value_type_type)->type->type;
 127    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 128    val->ssa = vtn_create_ssa_value(b, dest_type);
 129
 130    unsigned num_inputs = count - 5;
 131    assert(num_inputs == 3);
 132    nir_ssa_def *src[3] = { NULL, };
 133    for (unsigned i = 0; i < num_inputs; i++)
 134       src[i] = vtn_ssa_value(b, w[i + 5])->def;
 135
 136    switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) {
 137    case FMin3AMD:
 138       val->ssa->def = nir_fmin3(nb, src[0], src[1], src[2]);
 139       break;
 140    case UMin3AMD:
 141       val->ssa->def = nir_umin3(nb, src[0], src[1], src[2]);
 142       break;
 143    case SMin3AMD:
 144       val->ssa->def = nir_imin3(nb, src[0], src[1], src[2]);
 145       break;
 146    case FMax3AMD:
 147       val->ssa->def = nir_fmax3(nb, src[0], src[1], src[2]);
 148       break;
 149    case UMax3AMD:
 150       val->ssa->def = nir_umax3(nb, src[0], src[1], src[2]);
 151       break;
 152    case SMax3AMD:
 153       val->ssa->def = nir_imax3(nb, src[0], src[1], src[2]);
 154       break;
 155    case FMid3AMD:
 156       val->ssa->def = nir_fmed3(nb, src[0], src[1], src[2]);
 157       break;
 158    case UMid3AMD:
 159       val->ssa->def = nir_umed3(nb, src[0], src[1], src[2]);
 160       break;
 161    case SMid3AMD:
 162       val->ssa->def = nir_imed3(nb, src[0], src[1], src[2]);
 163       break;
 164    default:
 165       unreachable("unknown opcode\n");
 166       break;
 167    }
 168
 169    return true;
 170 }