Revert "nir: Add an option for lowering TessLevelInner/Outer to vecs"
[mesa.git] / src / compiler / spirv / vtn_amd.c
1 /*
2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2017 Red Hat
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 */
25
26 #include "vtn_private.h"
27 #include "GLSL.ext.AMD.h"
28
29 bool
30 vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
31 const uint32_t *w, unsigned count)
32 {
33 const struct glsl_type *dest_type =
34 vtn_value(b, w[1], vtn_value_type_type)->type->type;
35 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
36 val->ssa = vtn_create_ssa_value(b, dest_type);
37
38 switch ((enum GcnShaderAMD)ext_opcode) {
39 case CubeFaceIndexAMD:
40 val->ssa->def = nir_cube_face_index(&b->nb, vtn_ssa_value(b, w[5])->def);
41 break;
42 case CubeFaceCoordAMD:
43 val->ssa->def = nir_cube_face_coord(&b->nb, vtn_ssa_value(b, w[5])->def);
44 break;
45 case TimeAMD: {
46 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader,
47 nir_intrinsic_shader_clock);
48 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 2, 32, NULL);
49 nir_intrinsic_set_memory_scope(intrin, NIR_SCOPE_SUBGROUP);
50 nir_builder_instr_insert(&b->nb, &intrin->instr);
51 val->ssa->def = nir_pack_64_2x32(&b->nb, &intrin->dest.ssa);
52 break;
53 }
54 default:
55 unreachable("Invalid opcode");
56 }
57 return true;
58 }
59
60 bool
61 vtn_handle_amd_shader_ballot_instruction(struct vtn_builder *b, SpvOp ext_opcode,
62 const uint32_t *w, unsigned count)
63 {
64 const struct glsl_type *dest_type =
65 vtn_value(b, w[1], vtn_value_type_type)->type->type;
66 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
67 val->ssa = vtn_create_ssa_value(b, dest_type);
68
69 unsigned num_args;
70 nir_intrinsic_op op;
71 switch ((enum ShaderBallotAMD)ext_opcode) {
72 case SwizzleInvocationsAMD:
73 num_args = 1;
74 op = nir_intrinsic_quad_swizzle_amd;
75 break;
76 case SwizzleInvocationsMaskedAMD:
77 num_args = 1;
78 op = nir_intrinsic_masked_swizzle_amd;
79 break;
80 case WriteInvocationAMD:
81 num_args = 3;
82 op = nir_intrinsic_write_invocation_amd;
83 break;
84 case MbcntAMD:
85 num_args = 1;
86 op = nir_intrinsic_mbcnt_amd;
87 break;
88 default:
89 unreachable("Invalid opcode");
90 }
91
92 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
93 nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, dest_type, NULL);
94 if (nir_intrinsic_infos[op].src_components[0] == 0)
95 intrin->num_components = intrin->dest.ssa.num_components;
96
97 for (unsigned i = 0; i < num_args; i++)
98 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 5])->def);
99
100 if (intrin->intrinsic == nir_intrinsic_quad_swizzle_amd) {
101 struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
102 unsigned mask = val->constant->values[0].u32 |
103 val->constant->values[1].u32 << 2 |
104 val->constant->values[2].u32 << 4 |
105 val->constant->values[3].u32 << 6;
106 nir_intrinsic_set_swizzle_mask(intrin, mask);
107
108 } else if (intrin->intrinsic == nir_intrinsic_masked_swizzle_amd) {
109 struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
110 unsigned mask = val->constant->values[0].u32 |
111 val->constant->values[1].u32 << 5 |
112 val->constant->values[2].u32 << 10;
113 nir_intrinsic_set_swizzle_mask(intrin, mask);
114 }
115
116 nir_builder_instr_insert(&b->nb, &intrin->instr);
117 val->ssa->def = &intrin->dest.ssa;
118
119 return true;
120 }
121
122 bool
123 vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, SpvOp ext_opcode,
124 const uint32_t *w, unsigned count)
125 {
126 struct nir_builder *nb = &b->nb;
127 const struct glsl_type *dest_type =
128 vtn_value(b, w[1], vtn_value_type_type)->type->type;
129 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
130 val->ssa = vtn_create_ssa_value(b, dest_type);
131
132 unsigned num_inputs = count - 5;
133 assert(num_inputs == 3);
134 nir_ssa_def *src[3] = { NULL, };
135 for (unsigned i = 0; i < num_inputs; i++)
136 src[i] = vtn_ssa_value(b, w[i + 5])->def;
137
138 switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) {
139 case FMin3AMD:
140 val->ssa->def = nir_fmin3(nb, src[0], src[1], src[2]);
141 break;
142 case UMin3AMD:
143 val->ssa->def = nir_umin3(nb, src[0], src[1], src[2]);
144 break;
145 case SMin3AMD:
146 val->ssa->def = nir_imin3(nb, src[0], src[1], src[2]);
147 break;
148 case FMax3AMD:
149 val->ssa->def = nir_fmax3(nb, src[0], src[1], src[2]);
150 break;
151 case UMax3AMD:
152 val->ssa->def = nir_umax3(nb, src[0], src[1], src[2]);
153 break;
154 case SMax3AMD:
155 val->ssa->def = nir_imax3(nb, src[0], src[1], src[2]);
156 break;
157 case FMid3AMD:
158 val->ssa->def = nir_fmed3(nb, src[0], src[1], src[2]);
159 break;
160 case UMid3AMD:
161 val->ssa->def = nir_umed3(nb, src[0], src[1], src[2]);
162 break;
163 case SMid3AMD:
164 val->ssa->def = nir_imed3(nb, src[0], src[1], src[2]);
165 break;
166 default:
167 unreachable("unknown opcode\n");
168 break;
169 }
170
171 return true;
172 }
173
174 bool
175 vtn_handle_amd_shader_explicit_vertex_parameter_instruction(struct vtn_builder *b, SpvOp ext_opcode,
176 const uint32_t *w, unsigned count)
177 {
178 const struct glsl_type *dest_type =
179 vtn_value(b, w[1], vtn_value_type_type)->type->type;
180
181 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
182 val->ssa = vtn_create_ssa_value(b, dest_type);
183
184 nir_intrinsic_op op;
185 switch ((enum ShaderExplicitVertexParameterAMD)ext_opcode) {
186 case InterpolateAtVertexAMD:
187 op = nir_intrinsic_interp_deref_at_vertex;
188 break;
189 default:
190 unreachable("unknown opcode");
191 }
192
193 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
194
195 struct vtn_pointer *ptr =
196 vtn_value(b, w[5], vtn_value_type_pointer)->pointer;
197 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
198
199 /* If the value we are interpolating has an index into a vector then
200 * interpolate the vector and index the result of that instead. This is
201 * necessary because the index will get generated as a series of nir_bcsel
202 * instructions so it would no longer be an input variable.
203 */
204 const bool vec_array_deref = deref->deref_type == nir_deref_type_array &&
205 glsl_type_is_vector(nir_deref_instr_parent(deref)->type);
206
207 nir_deref_instr *vec_deref = NULL;
208 if (vec_array_deref) {
209 vec_deref = deref;
210 deref = nir_deref_instr_parent(deref);
211 }
212 intrin->src[0] = nir_src_for_ssa(&deref->dest.ssa);
213 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
214
215 intrin->num_components = glsl_get_vector_elements(deref->type);
216 nir_ssa_dest_init(&intrin->instr, &intrin->dest,
217 glsl_get_vector_elements(deref->type),
218 glsl_get_bit_size(deref->type), NULL);
219
220 nir_builder_instr_insert(&b->nb, &intrin->instr);
221
222 if (vec_array_deref) {
223 assert(vec_deref);
224 val->ssa->def = nir_vector_extract(&b->nb, &intrin->dest.ssa,
225 vec_deref->arr.index.ssa);
226 } else {
227 val->ssa->def = &intrin->dest.ssa;
228 }
229
230 return true;
231 }