src/compiler/spirv/vtn_amd.c

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  * Copyright © 2017 Red Hat
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  *
  24  */
  25
  26 #include "vtn_private.h"
  27 #include "GLSL.ext.AMD.h"
  28
  29 bool
  30 vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
  31                                       const uint32_t *w, unsigned count)
  32 {
  33    nir_ssa_def *def;
  34    switch ((enum GcnShaderAMD)ext_opcode) {
  35    case CubeFaceIndexAMD:
  36       def = nir_cube_face_index(&b->nb, vtn_get_nir_ssa(b, w[5]));
  37       break;
  38    case CubeFaceCoordAMD:
  39       def = nir_cube_face_coord(&b->nb, vtn_get_nir_ssa(b, w[5]));
  40       break;
  41    case TimeAMD: {
  42       nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader,
  43                                     nir_intrinsic_shader_clock);
  44       nir_ssa_dest_init(&intrin->instr, &intrin->dest, 2, 32, NULL);
  45       nir_intrinsic_set_memory_scope(intrin, NIR_SCOPE_SUBGROUP);
  46       nir_builder_instr_insert(&b->nb, &intrin->instr);
  47       def = nir_pack_64_2x32(&b->nb, &intrin->dest.ssa);
  48       break;
  49    }
  50    default:
  51       unreachable("Invalid opcode");
  52    }
  53
  54    vtn_push_nir_ssa(b, w[2], def);
  55
  56    return true;
  57 }
  58
  59 bool
  60 vtn_handle_amd_shader_ballot_instruction(struct vtn_builder *b, SpvOp ext_opcode,
  61                                          const uint32_t *w, unsigned count)
  62 {
  63    unsigned num_args;
  64    nir_intrinsic_op op;
  65    switch ((enum ShaderBallotAMD)ext_opcode) {
  66    case SwizzleInvocationsAMD:
  67       num_args = 1;
  68       op = nir_intrinsic_quad_swizzle_amd;
  69       break;
  70    case SwizzleInvocationsMaskedAMD:
  71       num_args = 1;
  72       op = nir_intrinsic_masked_swizzle_amd;
  73       break;
  74    case WriteInvocationAMD:
  75       num_args = 3;
  76       op = nir_intrinsic_write_invocation_amd;
  77       break;
  78    case MbcntAMD:
  79       num_args = 1;
  80       op = nir_intrinsic_mbcnt_amd;
  81       break;
  82    default:
  83       unreachable("Invalid opcode");
  84    }
  85
  86    const struct glsl_type *dest_type = vtn_get_type(b, w[1])->type;
  87    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
  88    nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, dest_type, NULL);
  89    if (nir_intrinsic_infos[op].src_components[0] == 0)
  90       intrin->num_components = intrin->dest.ssa.num_components;
  91
  92    for (unsigned i = 0; i < num_args; i++)
  93       intrin->src[i] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[i + 5]));
  94
  95    if (intrin->intrinsic == nir_intrinsic_quad_swizzle_amd) {
  96       struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
  97       unsigned mask = val->constant->values[0].u32 |
  98                       val->constant->values[1].u32 << 2 |
  99                       val->constant->values[2].u32 << 4 |
 100                       val->constant->values[3].u32 << 6;
 101       nir_intrinsic_set_swizzle_mask(intrin, mask);
 102
 103    } else if (intrin->intrinsic == nir_intrinsic_masked_swizzle_amd) {
 104       struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
 105       unsigned mask = val->constant->values[0].u32 |
 106                       val->constant->values[1].u32 << 5 |
 107                       val->constant->values[2].u32 << 10;
 108       nir_intrinsic_set_swizzle_mask(intrin, mask);
 109    }
 110
 111    nir_builder_instr_insert(&b->nb, &intrin->instr);
 112    vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa);
 113
 114    return true;
 115 }
 116
 117 bool
 118 vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, SpvOp ext_opcode,
 119                                                  const uint32_t *w, unsigned count)
 120 {
 121    struct nir_builder *nb = &b->nb;
 122
 123    unsigned num_inputs = count - 5;
 124    assert(num_inputs == 3);
 125    nir_ssa_def *src[3] = { NULL, };
 126    for (unsigned i = 0; i < num_inputs; i++)
 127       src[i] = vtn_get_nir_ssa(b, w[i + 5]);
 128
 129    /* place constants at src[1-2] for easier constant-folding */
 130    for (unsigned i = 1; i <= 2; i++) {
 131       if (nir_src_as_const_value(nir_src_for_ssa(src[0]))) {
 132          nir_ssa_def* tmp = src[i];
 133          src[i] = src[0];
 134          src[0] = tmp;
 135       }
 136    }
 137    nir_ssa_def *def;
 138    switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) {
 139    case FMin3AMD:
 140       def = nir_fmin(nb, src[0], nir_fmin(nb, src[1], src[2]));
 141       break;
 142    case UMin3AMD:
 143       def = nir_umin(nb, src[0], nir_umin(nb, src[1], src[2]));
 144       break;
 145    case SMin3AMD:
 146       def = nir_imin(nb, src[0], nir_imin(nb, src[1], src[2]));
 147       break;
 148    case FMax3AMD:
 149       def = nir_fmax(nb, src[0], nir_fmax(nb, src[1], src[2]));
 150       break;
 151    case UMax3AMD:
 152       def = nir_umax(nb, src[0], nir_umax(nb, src[1], src[2]));
 153       break;
 154    case SMax3AMD:
 155       def = nir_imax(nb, src[0], nir_imax(nb, src[1], src[2]));
 156       break;
 157    case FMid3AMD:
 158       def = nir_fmin(nb, nir_fmax(nb, src[0], nir_fmin(nb, src[1], src[2])),
 159                      nir_fmax(nb, src[1], src[2]));
 160       break;
 161    case UMid3AMD:
 162       def = nir_umin(nb, nir_umax(nb, src[0], nir_umin(nb, src[1], src[2])),
 163                      nir_umax(nb, src[1], src[2]));
 164       break;
 165    case SMid3AMD:
 166       def = nir_imin(nb, nir_imax(nb, src[0], nir_imin(nb, src[1], src[2])),
 167                      nir_imax(nb, src[1], src[2]));
 168       break;
 169    default:
 170       unreachable("unknown opcode\n");
 171       break;
 172    }
 173
 174    vtn_push_nir_ssa(b, w[2], def);
 175
 176    return true;
 177 }
 178
 179 bool
 180 vtn_handle_amd_shader_explicit_vertex_parameter_instruction(struct vtn_builder *b, SpvOp ext_opcode,
 181                                                             const uint32_t *w, unsigned count)
 182 {
 183    nir_intrinsic_op op;
 184    switch ((enum ShaderExplicitVertexParameterAMD)ext_opcode) {
 185    case InterpolateAtVertexAMD:
 186       op = nir_intrinsic_interp_deref_at_vertex;
 187       break;
 188    default:
 189       unreachable("unknown opcode");
 190    }
 191
 192    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
 193
 194    struct vtn_pointer *ptr =
 195       vtn_value(b, w[5], vtn_value_type_pointer)->pointer;
 196    nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
 197
 198    /* If the value we are interpolating has an index into a vector then
 199     * interpolate the vector and index the result of that instead. This is
 200     * necessary because the index will get generated as a series of nir_bcsel
 201     * instructions so it would no longer be an input variable.
 202     */
 203    const bool vec_array_deref = deref->deref_type == nir_deref_type_array &&
 204       glsl_type_is_vector(nir_deref_instr_parent(deref)->type);
 205
 206    nir_deref_instr *vec_deref = NULL;
 207    if (vec_array_deref) {
 208       vec_deref = deref;
 209       deref = nir_deref_instr_parent(deref);
 210    }
 211    intrin->src[0] = nir_src_for_ssa(&deref->dest.ssa);
 212    intrin->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
 213
 214    intrin->num_components = glsl_get_vector_elements(deref->type);
 215    nir_ssa_dest_init(&intrin->instr, &intrin->dest,
 216                      glsl_get_vector_elements(deref->type),
 217                      glsl_get_bit_size(deref->type), NULL);
 218
 219    nir_builder_instr_insert(&b->nb, &intrin->instr);
 220
 221    nir_ssa_def *def;
 222    if (vec_array_deref) {
 223       assert(vec_deref);
 224       def = nir_vector_extract(&b->nb, &intrin->dest.ssa,
 225                                vec_deref->arr.index.ssa);
 226    } else {
 227       def = &intrin->dest.ssa;
 228    }
 229    vtn_push_nir_ssa(b, w[2], def);
 230
 231    return true;
 232 }