gallivm/nir: add sample_mask_in support
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_nir_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "util/u_math.h"
38 /*
39 * combine the execution mask if there is one with the current mask.
40 */
41 static LLVMValueRef
42 mask_vec(struct lp_build_nir_context *bld_base)
43 {
44 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
45 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
46 struct lp_exec_mask *exec_mask = &bld->exec_mask;
47 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
48 if (!exec_mask->has_mask) {
49 return bld_mask;
50 }
51 if (!bld_mask)
52 return exec_mask->exec_mask;
53 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
54 exec_mask->exec_mask, "");
55 }
56
57 static LLVMValueRef
58 emit_fetch_64bit(
59 struct lp_build_nir_context * bld_base,
60 LLVMValueRef input,
61 LLVMValueRef input2)
62 {
63 struct gallivm_state *gallivm = bld_base->base.gallivm;
64 LLVMBuilderRef builder = gallivm->builder;
65 LLVMValueRef res;
66 int i;
67 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
68 int len = bld_base->base.type.length * 2;
69 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
70
71 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
72 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
73 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
74 }
75 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
76
77 return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
78 }
79
80 static void
81 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
82 LLVMValueRef value,
83 LLVMValueRef split_values[2])
84 {
85 struct gallivm_state *gallivm = bld_base->base.gallivm;
86 LLVMBuilderRef builder = gallivm->builder;
87 unsigned i;
88 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
89 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
90 int len = bld_base->base.type.length * 2;
91
92 value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
93 for (i = 0; i < bld_base->base.type.length; i++) {
94 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
95 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
96 }
97
98 split_values[0] = LLVMBuildShuffleVector(builder, value,
99 LLVMGetUndef(LLVMTypeOf(value)),
100 LLVMConstVector(shuffles,
101 bld_base->base.type.length),
102 "");
103 split_values[1] = LLVMBuildShuffleVector(builder, value,
104 LLVMGetUndef(LLVMTypeOf(value)),
105 LLVMConstVector(shuffles2,
106 bld_base->base.type.length),
107 "");
108 }
109
110 static void
111 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
112 LLVMValueRef chan_ptr,
113 LLVMValueRef chan_ptr2,
114 LLVMValueRef value)
115 {
116 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
117 struct lp_build_context *float_bld = &bld_base->base;
118 LLVMValueRef split_vals[2];
119
120 emit_store_64bit_split(bld_base, value, split_vals);
121
122 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
123 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
124 }
125
126 static LLVMValueRef
127 get_soa_array_offsets(struct lp_build_context *uint_bld,
128 LLVMValueRef indirect_index,
129 int num_components,
130 unsigned chan_index,
131 bool need_perelement_offset)
132 {
133 struct gallivm_state *gallivm = uint_bld->gallivm;
134 LLVMValueRef chan_vec =
135 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
136 LLVMValueRef length_vec =
137 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
138 LLVMValueRef index_vec;
139
140 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
141 index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
142 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
143 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
144
145 if (need_perelement_offset) {
146 LLVMValueRef pixel_offsets;
147 unsigned i;
148 /* build pixel offset vector: {0, 1, 2, 3, ...} */
149 pixel_offsets = uint_bld->undef;
150 for (i = 0; i < uint_bld->type.length; i++) {
151 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
152 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
153 ii, ii, "");
154 }
155 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
156 }
157 return index_vec;
158 }
159
160 static LLVMValueRef
161 build_gather(struct lp_build_nir_context *bld_base,
162 struct lp_build_context *bld,
163 LLVMValueRef base_ptr,
164 LLVMValueRef indexes,
165 LLVMValueRef overflow_mask,
166 LLVMValueRef indexes2)
167 {
168 struct gallivm_state *gallivm = bld_base->base.gallivm;
169 LLVMBuilderRef builder = gallivm->builder;
170 struct lp_build_context *uint_bld = &bld_base->uint_bld;
171 LLVMValueRef res;
172 unsigned i;
173
174 if (indexes2)
175 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
176 else
177 res = bld->undef;
178 /*
179 * overflow_mask is a vector telling us which channels
180 * in the vector overflowed. We use the overflow behavior for
181 * constant buffers which is defined as:
182 * Out of bounds access to constant buffer returns 0 in all
183 * components. Out of bounds behavior is always with respect
184 * to the size of the buffer bound at that slot.
185 */
186
187 if (overflow_mask) {
188 /*
189 * We avoid per-element control flow here (also due to llvm going crazy,
190 * though I suspect it's better anyway since overflow is likely rare).
191 * Note that since we still fetch from buffers even if num_elements was
192 * zero (in this case we'll fetch from index zero) the jit func callers
193 * MUST provide valid fake constant buffers of size 4x32 (the values do
194 * not matter), otherwise we'd still need (not per element though)
195 * control flow.
196 */
197 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
198 if (indexes2)
199 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
200 }
201
202 /*
203 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
204 */
205 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
206 LLVMValueRef si, di;
207 LLVMValueRef index;
208 LLVMValueRef scalar_ptr, scalar;
209
210 di = lp_build_const_int32(gallivm, i);
211 if (indexes2)
212 si = lp_build_const_int32(gallivm, i >> 1);
213 else
214 si = di;
215
216 if (indexes2 && (i & 1)) {
217 index = LLVMBuildExtractElement(builder,
218 indexes2, si, "");
219 } else {
220 index = LLVMBuildExtractElement(builder,
221 indexes, si, "");
222 }
223 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
224 &index, 1, "gather_ptr");
225 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
226
227 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
228 }
229
230 if (overflow_mask) {
231 if (indexes2) {
232 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
233 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
234 bld_base->dbl_bld.int_vec_type, "");
235 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
236 bld_base->dbl_bld.zero, res);
237 } else
238 res = lp_build_select(bld, overflow_mask, bld->zero, res);
239 }
240
241 return res;
242 }
243
244 /**
245 * Scatter/store vector.
246 */
247 static void
248 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
249 LLVMValueRef base_ptr,
250 LLVMValueRef indexes,
251 LLVMValueRef values,
252 struct lp_exec_mask *mask)
253 {
254 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
255 LLVMBuilderRef builder = gallivm->builder;
256 unsigned i;
257 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
258
259 /*
260 * Loop over elements of index_vec, store scalar value.
261 */
262 for (i = 0; i < bld->bld_base.base.type.length; i++) {
263 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
264 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
265 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
266 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
267 LLVMValueRef scalar_pred = pred ?
268 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
269
270 if (0)
271 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
272 ii, val, index, scalar_ptr);
273
274 if (scalar_pred) {
275 LLVMValueRef real_val, dst_val;
276 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
277 real_val = lp_build_select(&bld->uint_elem_bld, scalar_pred, val, dst_val);
278 LLVMBuildStore(builder, real_val, scalar_ptr);
279 }
280 else {
281 LLVMBuildStore(builder, val, scalar_ptr);
282 }
283 }
284 }
285
286 static void emit_load_var(struct lp_build_nir_context *bld_base,
287 nir_variable_mode deref_mode,
288 unsigned num_components,
289 unsigned bit_size,
290 nir_variable *var,
291 unsigned vertex_index,
292 LLVMValueRef indir_vertex_index,
293 unsigned const_index,
294 LLVMValueRef indir_index,
295 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
296 {
297 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
298 struct gallivm_state *gallivm = bld_base->base.gallivm;
299 int dmul = bit_size == 64 ? 2 : 1;
300 switch (deref_mode) {
301 case nir_var_shader_in:
302 for (unsigned i = 0; i < num_components; i++) {
303 int idx = (i * dmul) + var->data.location_frac;
304 if (bld->gs_iface) {
305 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
306 LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
307 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
308 LLVMValueRef result2;
309 result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
310 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
311 if (bit_size == 64) {
312 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
313 result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
314 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
315 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
316 }
317 } else if (bld->tes_iface) {
318 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
319 LLVMValueRef attrib_index_val;
320 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
321 LLVMValueRef result2;
322
323 if (indir_index)
324 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
325 else
326 attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
327 if (var->data.patch) {
328 result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
329 indir_index ? true : false, attrib_index_val, swizzle_index_val);
330 if (bit_size == 64) {
331 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
332 result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
333 indir_index ? true : false, attrib_index_val, swizzle_index_val);
334 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
335 }
336 }
337 else {
338 result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
339 indir_vertex_index ? true : false,
340 indir_vertex_index ? indir_vertex_index : vertex_index_val,
341 indir_index ? true : false, attrib_index_val, swizzle_index_val);
342 if (bit_size == 64) {
343 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
344 result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
345 indir_vertex_index ? true : false,
346 indir_vertex_index ? indir_vertex_index : vertex_index_val,
347 indir_index ? true : false, attrib_index_val, swizzle_index_val);
348 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
349 }
350 }
351 } else if (bld->tcs_iface) {
352 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
353 LLVMValueRef attrib_index_val;
354 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
355
356 if (indir_index)
357 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
358 else
359 attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
360 result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
361 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
362 indir_index ? true : false, attrib_index_val, swizzle_index_val);
363 if (bit_size == 64) {
364 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
365 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
366 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
367 indir_index ? true : false, attrib_index_val, swizzle_index_val);
368 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
369 }
370 } else {
371 if (indir_index) {
372 LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
373 LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
374 attrib_index_val, 4, idx,
375 TRUE);
376 LLVMValueRef index_vec2 = NULL;
377 LLVMTypeRef fptr_type;
378 LLVMValueRef inputs_array;
379 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
380 inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
381
382 if (bit_size == 64)
383 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
384 indir_index, 4, idx + 1, TRUE);
385
386 /* Gather values from the input register array */
387 result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
388 } else {
389 if (bld->indirects & nir_var_shader_in) {
390 LLVMValueRef lindex = lp_build_const_int32(gallivm,
391 var->data.driver_location * 4 + idx);
392 LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
393 bld->inputs_array, lindex);
394 if (bit_size == 64) {
395 LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
396 var->data.driver_location * 4 + (idx + 1));
397 LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
398 bld->inputs_array, lindex2);
399 result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
400 } else {
401 result[i] = input_ptr;
402 }
403 } else {
404 if (bit_size == 64) {
405 LLVMValueRef tmp[2];
406 tmp[0] = bld->inputs[var->data.driver_location + const_index][idx];
407 tmp[1] = bld->inputs[var->data.driver_location + const_index][idx + 1];
408 result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
409 } else {
410 result[i] = bld->inputs[var->data.driver_location + const_index][idx];
411 }
412 }
413 }
414 }
415 }
416 break;
417 case nir_var_shader_out:
418 for (unsigned i = 0; i < num_components; i++) {
419 int idx = (i * dmul) + var->data.location_frac;
420 if (bld->tcs_iface) {
421 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
422 LLVMValueRef attrib_index_val;
423 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
424
425 if (indir_index)
426 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
427 else
428 attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
429
430 result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
431 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
432 indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
433 if (bit_size == 64) {
434 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
435 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
436 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
437 indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
438 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
439 }
440 }
441 }
442 break;
443 default:
444 break;
445 }
446 }
447
448 static void emit_store_chan(struct lp_build_nir_context *bld_base,
449 nir_variable_mode deref_mode,
450 unsigned bit_size,
451 unsigned location, unsigned comp,
452 unsigned chan,
453 LLVMValueRef dst)
454 {
455 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
456 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
457 struct lp_build_context *float_bld = &bld_base->base;
458
459 if (bit_size == 64) {
460 chan *= 2;
461 chan += comp;
462 if (chan >= 4) {
463 chan -= 4;
464 location++;
465 }
466 emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
467 bld->outputs[location][chan + 1], dst);
468 } else {
469 dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
470 lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
471 bld->outputs[location][chan + comp]);
472 }
473 }
474
475 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
476 unsigned bit_size,
477 unsigned location,
478 unsigned const_index,
479 LLVMValueRef indir_vertex_index,
480 LLVMValueRef indir_index,
481 unsigned comp,
482 unsigned chan,
483 LLVMValueRef chan_val)
484 {
485 struct gallivm_state *gallivm = bld_base->base.gallivm;
486 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
487 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
488 unsigned swizzle = chan;
489 if (bit_size == 64) {
490 swizzle *= 2;
491 swizzle += comp;
492 if (swizzle >= 4) {
493 swizzle -= 4;
494 location++;
495 }
496 } else
497 swizzle += comp;
498 LLVMValueRef attrib_index_val;
499 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
500
501 if (indir_index)
502 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
503 else
504 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
505 if (bit_size == 64) {
506 LLVMValueRef split_vals[2];
507 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
508 emit_store_64bit_split(bld_base, chan_val, split_vals);
509 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
510 indir_vertex_index ? true : false,
511 indir_vertex_index,
512 indir_index ? true : false,
513 attrib_index_val, swizzle_index_val,
514 split_vals[0], mask_vec(bld_base));
515 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
516 indir_vertex_index ? true : false,
517 indir_vertex_index,
518 indir_index ? true : false,
519 attrib_index_val, swizzle_index_val2,
520 split_vals[1], mask_vec(bld_base));
521 } else {
522 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
523 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
524 indir_vertex_index ? true : false,
525 indir_vertex_index,
526 indir_index ? true : false,
527 attrib_index_val, swizzle_index_val,
528 chan_val, mask_vec(bld_base));
529 }
530 }
531
532 static void emit_store_var(struct lp_build_nir_context *bld_base,
533 nir_variable_mode deref_mode,
534 unsigned num_components,
535 unsigned bit_size,
536 nir_variable *var,
537 unsigned writemask,
538 LLVMValueRef indir_vertex_index,
539 unsigned const_index,
540 LLVMValueRef indir_index,
541 LLVMValueRef dst)
542 {
543 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
544 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
545 switch (deref_mode) {
546 case nir_var_shader_out: {
547 unsigned location = var->data.driver_location;
548 unsigned comp = var->data.location_frac;
549 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
550 if (var->data.location == FRAG_RESULT_STENCIL)
551 comp = 1;
552 else if (var->data.location == FRAG_RESULT_DEPTH)
553 comp = 2;
554 }
555
556 for (unsigned chan = 0; chan < num_components; chan++) {
557 if (writemask & (1u << chan)) {
558 LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
559 if (bld->tcs_iface) {
560 emit_store_tcs_chan(bld_base, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
561 } else
562 emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
563 }
564 }
565 break;
566 }
567 default:
568 break;
569 }
570 }
571
572 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
573 struct lp_build_context *reg_bld,
574 const nir_reg_src *reg,
575 LLVMValueRef indir_src,
576 LLVMValueRef reg_storage)
577 {
578 struct gallivm_state *gallivm = bld_base->base.gallivm;
579 LLVMBuilderRef builder = gallivm->builder;
580 int nc = reg->reg->num_components;
581 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
582 struct lp_build_context *uint_bld = &bld_base->uint_bld;
583 if (reg->reg->num_array_elems) {
584 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
585 if (reg->indirect) {
586 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
587 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
588 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
589 }
590 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
591 for (unsigned i = 0; i < nc; i++) {
592 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
593 vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
594 }
595 } else {
596 for (unsigned i = 0; i < nc; i++) {
597 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
598 lp_build_const_int32(gallivm, i));
599 vals[i] = LLVMBuildLoad(builder, this_storage, "");
600 }
601 }
602 return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
603 }
604
605 static void emit_store_reg(struct lp_build_nir_context *bld_base,
606 struct lp_build_context *reg_bld,
607 const nir_reg_dest *reg,
608 unsigned writemask,
609 LLVMValueRef indir_src,
610 LLVMValueRef reg_storage,
611 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
612 {
613 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
614 struct gallivm_state *gallivm = bld_base->base.gallivm;
615 LLVMBuilderRef builder = gallivm->builder;
616 struct lp_build_context *uint_bld = &bld_base->uint_bld;
617 int nc = reg->reg->num_components;
618 if (reg->reg->num_array_elems > 0) {
619 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
620 if (reg->indirect) {
621 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
622 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
623 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
624 }
625 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
626 for (unsigned i = 0; i < nc; i++) {
627 if (!(writemask & (1 << i)))
628 continue;
629 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
630 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
631 emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
632 }
633 return;
634 }
635
636 for (unsigned i = 0; i < nc; i++) {
637 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
638 lp_build_const_int32(gallivm, i));
639 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
640 lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
641 }
642 }
643
644 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
645 unsigned nc,
646 unsigned bit_size,
647 unsigned offset_bit_size,
648 bool offset_is_uniform,
649 LLVMValueRef offset,
650 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
651 {
652 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
653 struct gallivm_state *gallivm = bld_base->base.gallivm;
654 LLVMBuilderRef builder = gallivm->builder;
655 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
656 LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
657 unsigned size_shift = 0;
658 struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
659 if (bit_size == 16)
660 size_shift = 1;
661 else if (bit_size == 32)
662 size_shift = 2;
663 else if (bit_size == 64)
664 size_shift = 3;
665 if (size_shift)
666 offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
667
668 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
669 kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
670
671 if (offset_is_uniform) {
672 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
673
674 for (unsigned c = 0; c < nc; c++) {
675 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
676
677 LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
678 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
679 }
680 }
681 }
682
683 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
684 {
685 LLVMBuilderRef builder = gallivm->builder;
686 switch (bit_size) {
687 case 8:
688 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
689 break;
690 case 16:
691 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
692 break;
693 case 32:
694 default:
695 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
696 break;
697 case 64:
698 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
699 break;
700 }
701 return addr_ptr;
702 }
703
704 static void emit_load_global(struct lp_build_nir_context *bld_base,
705 unsigned nc,
706 unsigned bit_size,
707 unsigned addr_bit_size,
708 LLVMValueRef addr,
709 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
710 {
711 struct gallivm_state *gallivm = bld_base->base.gallivm;
712 LLVMBuilderRef builder = gallivm->builder;
713 struct lp_build_context *uint_bld = &bld_base->uint_bld;
714 struct lp_build_context *res_bld;
715
716 res_bld = get_int_bld(bld_base, true, bit_size);
717
718 for (unsigned c = 0; c < nc; c++) {
719 LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
720
721 struct lp_build_loop_state loop_state;
722 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
723
724 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
725 loop_state.counter, "");
726 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
727
728 LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
729
730 LLVMValueRef temp_res;
731 temp_res = LLVMBuildLoad(builder, result, "");
732 temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
733 LLVMBuildStore(builder, temp_res, result);
734 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
735 NULL, LLVMIntUGE);
736 outval[c] = LLVMBuildLoad(builder, result, "");
737 }
738 }
739
740 static void emit_store_global(struct lp_build_nir_context *bld_base,
741 unsigned writemask,
742 unsigned nc, unsigned bit_size,
743 unsigned addr_bit_size,
744 LLVMValueRef addr,
745 LLVMValueRef dst)
746 {
747 struct gallivm_state *gallivm = bld_base->base.gallivm;
748 LLVMBuilderRef builder = gallivm->builder;
749 struct lp_build_context *uint_bld = &bld_base->uint_bld;
750
751 for (unsigned c = 0; c < nc; c++) {
752 if (!(writemask & (1u << c)))
753 continue;
754 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
755
756 LLVMValueRef exec_mask = mask_vec(bld_base);
757 struct lp_build_loop_state loop_state;
758 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
759 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
760 loop_state.counter, "");
761
762 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
763 loop_state.counter, "");
764 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
765 switch (bit_size) {
766 case 32:
767 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
768 break;
769 case 64:
770 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
771 break;
772 default:
773 break;
774 }
775 struct lp_build_if_state ifthen;
776
777 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
778 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
779 lp_build_if(&ifthen, gallivm, cond);
780 lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
781 lp_build_endif(&ifthen);
782 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
783 NULL, LLVMIntUGE);
784 }
785 }
786
787 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
788 nir_intrinsic_op nir_op,
789 unsigned addr_bit_size,
790 LLVMValueRef addr,
791 LLVMValueRef val, LLVMValueRef val2,
792 LLVMValueRef *result)
793 {
794 struct gallivm_state *gallivm = bld_base->base.gallivm;
795 LLVMBuilderRef builder = gallivm->builder;
796 struct lp_build_context *uint_bld = &bld_base->uint_bld;
797
798 LLVMValueRef atom_res = lp_build_alloca(gallivm,
799 uint_bld->vec_type, "");
800 LLVMValueRef exec_mask = mask_vec(bld_base);
801 struct lp_build_loop_state loop_state;
802 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
803
804 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
805 loop_state.counter, "");
806
807 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
808 loop_state.counter, "");
809 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
810 struct lp_build_if_state ifthen;
811 LLVMValueRef cond, temp_res;
812 LLVMValueRef scalar;
813 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
814 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
815 lp_build_if(&ifthen, gallivm, cond);
816
817 if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
818 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
819 loop_state.counter, "");
820 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
821 scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
822 cas_src_ptr,
823 LLVMAtomicOrderingSequentiallyConsistent,
824 LLVMAtomicOrderingSequentiallyConsistent,
825 false);
826 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
827 } else {
828 LLVMAtomicRMWBinOp op;
829 switch (nir_op) {
830 case nir_intrinsic_global_atomic_add:
831 op = LLVMAtomicRMWBinOpAdd;
832 break;
833 case nir_intrinsic_global_atomic_exchange:
834 op = LLVMAtomicRMWBinOpXchg;
835 break;
836 case nir_intrinsic_global_atomic_and:
837 op = LLVMAtomicRMWBinOpAnd;
838 break;
839 case nir_intrinsic_global_atomic_or:
840 op = LLVMAtomicRMWBinOpOr;
841 break;
842 case nir_intrinsic_global_atomic_xor:
843 op = LLVMAtomicRMWBinOpXor;
844 break;
845 case nir_intrinsic_global_atomic_umin:
846 op = LLVMAtomicRMWBinOpUMin;
847 break;
848 case nir_intrinsic_global_atomic_umax:
849 op = LLVMAtomicRMWBinOpUMax;
850 break;
851 case nir_intrinsic_global_atomic_imin:
852 op = LLVMAtomicRMWBinOpMin;
853 break;
854 case nir_intrinsic_global_atomic_imax:
855 op = LLVMAtomicRMWBinOpMax;
856 break;
857 default:
858 unreachable("unknown atomic op");
859 }
860
861 scalar = LLVMBuildAtomicRMW(builder, op,
862 addr_ptr, value_ptr,
863 LLVMAtomicOrderingSequentiallyConsistent,
864 false);
865 }
866 temp_res = LLVMBuildLoad(builder, atom_res, "");
867 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
868 LLVMBuildStore(builder, temp_res, atom_res);
869 lp_build_else(&ifthen);
870 temp_res = LLVMBuildLoad(builder, atom_res, "");
871 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
872 LLVMBuildStore(builder, temp_res, atom_res);
873 lp_build_endif(&ifthen);
874 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
875 NULL, LLVMIntUGE);
876 *result = LLVMBuildLoad(builder, atom_res, "");
877 }
878
879 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
880 unsigned nc,
881 unsigned bit_size,
882 bool offset_is_uniform,
883 LLVMValueRef index,
884 LLVMValueRef offset,
885 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
886 {
887 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
888 struct gallivm_state *gallivm = bld_base->base.gallivm;
889 LLVMBuilderRef builder = gallivm->builder;
890 struct lp_build_context *uint_bld = &bld_base->uint_bld;
891 struct lp_build_context *bld_broad = bit_size == 64 ? &bld_base->dbl_bld : &bld_base->base;
892 LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
893 unsigned size_shift = 0;
894 if (bit_size == 32)
895 size_shift = 2;
896 else if (bit_size == 64)
897 size_shift = 3;
898 if (size_shift)
899 offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
900 if (bit_size == 64) {
901 LLVMTypeRef dptr_type = LLVMPointerType(bld_base->dbl_bld.elem_type, 0);
902 consts_ptr = LLVMBuildBitCast(builder, consts_ptr, dptr_type, "");
903 }
904
905 if (offset_is_uniform) {
906 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
907
908 for (unsigned c = 0; c < nc; c++) {
909 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
910
911 LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset);
912 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
913 }
914 } else {
915 LLVMValueRef overflow_mask;
916 LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
917
918 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 4), "");
919 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
920 for (unsigned c = 0; c < nc; c++) {
921 LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
922 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
923 this_offset, num_consts);
924
925 result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
926 }
927 }
928 }
929
930
931 static void emit_load_mem(struct lp_build_nir_context *bld_base,
932 unsigned nc,
933 unsigned bit_size,
934 LLVMValueRef index,
935 LLVMValueRef offset,
936 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
937 {
938 struct gallivm_state *gallivm = bld_base->base.gallivm;
939 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
940 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
941 LLVMValueRef ssbo_ptr = NULL;
942 struct lp_build_context *uint_bld = &bld_base->uint_bld;
943 struct lp_build_context *uint64_bld = &bld_base->uint64_bld;
944 LLVMValueRef ssbo_limit = NULL;
945
946 if (index) {
947 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
948 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, bit_size == 64 ? 3 : 2), "");
949 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
950
951 ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
952 } else
953 ssbo_ptr = bld->shared_ptr;
954
955 offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, bit_size == 64 ? 3 : 2), "");
956 for (unsigned c = 0; c < nc; c++) {
957 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
958 LLVMValueRef exec_mask = mask_vec(bld_base);
959
960 if (ssbo_limit) {
961 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
962 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
963 }
964
965 LLVMValueRef result = lp_build_alloca(gallivm, bit_size == 64 ? uint64_bld->vec_type : uint_bld->vec_type, "");
966 struct lp_build_loop_state loop_state;
967 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
968
969 struct lp_build_if_state ifthen;
970 LLVMValueRef cond, temp_res;
971
972 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
973 loop_state.counter, "");
974
975 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
976 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
977
978 lp_build_if(&ifthen, gallivm, cond);
979 LLVMValueRef scalar;
980 if (bit_size == 64) {
981 LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(uint64_bld->elem_type, 0), "");
982 scalar = lp_build_pointer_get(builder, ssbo_ptr2, loop_index);
983 } else
984 scalar = lp_build_pointer_get(builder, ssbo_ptr, loop_index);
985
986 temp_res = LLVMBuildLoad(builder, result, "");
987 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
988 LLVMBuildStore(builder, temp_res, result);
989 lp_build_else(&ifthen);
990 temp_res = LLVMBuildLoad(builder, result, "");
991 LLVMValueRef zero;
992 if (bit_size == 64)
993 zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
994 else
995 zero = lp_build_const_int32(gallivm, 0);
996 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
997 LLVMBuildStore(builder, temp_res, result);
998 lp_build_endif(&ifthen);
999 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1000 NULL, LLVMIntUGE);
1001 outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
1002 }
1003 }
1004
1005 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1006 unsigned writemask,
1007 unsigned nc,
1008 unsigned bit_size,
1009 LLVMValueRef index,
1010 LLVMValueRef offset,
1011 LLVMValueRef dst)
1012 {
1013 struct gallivm_state *gallivm = bld_base->base.gallivm;
1014 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1015 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1016 LLVMValueRef ssbo_ptr;
1017 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1018 LLVMValueRef ssbo_limit = NULL;
1019
1020 if (index) {
1021 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1022 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, bit_size == 64 ? 3 : 2), "");
1023 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1024 ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1025 } else
1026 ssbo_ptr = bld->shared_ptr;
1027
1028 offset = lp_build_shr_imm(uint_bld, offset, bit_size == 64 ? 3 : 2);
1029 for (unsigned c = 0; c < nc; c++) {
1030 if (!(writemask & (1u << c)))
1031 continue;
1032 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1033 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1034
1035 LLVMValueRef exec_mask = mask_vec(bld_base);
1036 if (ssbo_limit) {
1037 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
1038 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1039 }
1040
1041 struct lp_build_loop_state loop_state;
1042 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1043 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1044 loop_state.counter, "");
1045 if (bit_size == 64)
1046 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, bld_base->uint64_bld.elem_type, "");
1047 else
1048 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
1049 struct lp_build_if_state ifthen;
1050 LLVMValueRef cond;
1051
1052 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1053 loop_state.counter, "");
1054 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1055 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1056 lp_build_if(&ifthen, gallivm, cond);
1057 if (bit_size == 64) {
1058 LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(bld_base->uint64_bld.elem_type, 0), "");
1059 lp_build_pointer_set(builder, ssbo_ptr2, loop_index, value_ptr);
1060 } else
1061 lp_build_pointer_set(builder, ssbo_ptr, loop_index, value_ptr);
1062 lp_build_endif(&ifthen);
1063 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1064 NULL, LLVMIntUGE);
1065 }
1066 }
1067
1068 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1069 nir_intrinsic_op nir_op,
1070 LLVMValueRef index, LLVMValueRef offset,
1071 LLVMValueRef val, LLVMValueRef val2,
1072 LLVMValueRef *result)
1073 {
1074 struct gallivm_state *gallivm = bld_base->base.gallivm;
1075 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1076 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1077 LLVMValueRef ssbo_ptr;
1078 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1079 LLVMValueRef ssbo_limit = NULL;
1080
1081 if (index) {
1082 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1083 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, 2), "");
1084 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1085 ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1086 } else
1087 ssbo_ptr = bld->shared_ptr;
1088
1089 offset = lp_build_shr_imm(uint_bld, offset, 2);
1090 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1091 uint_bld->vec_type, "");
1092
1093 LLVMValueRef exec_mask = mask_vec(bld_base);
1094 if (ssbo_limit) {
1095 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, offset, ssbo_limit);
1096 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1097 }
1098
1099 struct lp_build_loop_state loop_state;
1100 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1101
1102 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1103 loop_state.counter, "");
1104 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
1105
1106 offset = LLVMBuildExtractElement(gallivm->builder, offset,
1107 loop_state.counter, "");
1108
1109 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr,
1110 &offset, 1, "");
1111
1112 struct lp_build_if_state ifthen;
1113 LLVMValueRef cond, temp_res;
1114 LLVMValueRef scalar;
1115 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1116 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1117 lp_build_if(&ifthen, gallivm, cond);
1118
1119 if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1120 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1121 loop_state.counter, "");
1122 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
1123 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1124 cas_src_ptr,
1125 LLVMAtomicOrderingSequentiallyConsistent,
1126 LLVMAtomicOrderingSequentiallyConsistent,
1127 false);
1128 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1129 } else {
1130 LLVMAtomicRMWBinOp op;
1131
1132 switch (nir_op) {
1133 case nir_intrinsic_shared_atomic_add:
1134 case nir_intrinsic_ssbo_atomic_add:
1135 op = LLVMAtomicRMWBinOpAdd;
1136 break;
1137 case nir_intrinsic_shared_atomic_exchange:
1138 case nir_intrinsic_ssbo_atomic_exchange:
1139 op = LLVMAtomicRMWBinOpXchg;
1140 break;
1141 case nir_intrinsic_shared_atomic_and:
1142 case nir_intrinsic_ssbo_atomic_and:
1143 op = LLVMAtomicRMWBinOpAnd;
1144 break;
1145 case nir_intrinsic_shared_atomic_or:
1146 case nir_intrinsic_ssbo_atomic_or:
1147 op = LLVMAtomicRMWBinOpOr;
1148 break;
1149 case nir_intrinsic_shared_atomic_xor:
1150 case nir_intrinsic_ssbo_atomic_xor:
1151 op = LLVMAtomicRMWBinOpXor;
1152 break;
1153 case nir_intrinsic_shared_atomic_umin:
1154 case nir_intrinsic_ssbo_atomic_umin:
1155 op = LLVMAtomicRMWBinOpUMin;
1156 break;
1157 case nir_intrinsic_shared_atomic_umax:
1158 case nir_intrinsic_ssbo_atomic_umax:
1159 op = LLVMAtomicRMWBinOpUMax;
1160 break;
1161 case nir_intrinsic_ssbo_atomic_imin:
1162 case nir_intrinsic_shared_atomic_imin:
1163 op = LLVMAtomicRMWBinOpMin;
1164 break;
1165 case nir_intrinsic_ssbo_atomic_imax:
1166 case nir_intrinsic_shared_atomic_imax:
1167 op = LLVMAtomicRMWBinOpMax;
1168 break;
1169 default:
1170 unreachable("unknown atomic op");
1171 }
1172 scalar = LLVMBuildAtomicRMW(builder, op,
1173 scalar_ptr, value_ptr,
1174 LLVMAtomicOrderingSequentiallyConsistent,
1175 false);
1176 }
1177 temp_res = LLVMBuildLoad(builder, atom_res, "");
1178 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1179 LLVMBuildStore(builder, temp_res, atom_res);
1180 lp_build_else(&ifthen);
1181 temp_res = LLVMBuildLoad(builder, atom_res, "");
1182 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
1183 LLVMBuildStore(builder, temp_res, atom_res);
1184 lp_build_endif(&ifthen);
1185
1186 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1187 NULL, LLVMIntUGE);
1188 *result = LLVMBuildLoad(builder, atom_res, "");
1189 }
1190
1191 static void emit_barrier(struct lp_build_nir_context *bld_base)
1192 {
1193 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1194 struct gallivm_state * gallivm = bld_base->base.gallivm;
1195
1196 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1197
1198 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1199 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1200 }
1201
1202 static LLVMValueRef emit_get_buffer_size(struct lp_build_nir_context *bld_base,
1203 LLVMValueRef index)
1204 {
1205 struct gallivm_state *gallivm = bld_base->base.gallivm;
1206 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1207 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1208 struct lp_build_context *bld_broad = &bld_base->uint_bld;
1209 LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1210 LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1211 return lp_build_broadcast_scalar(bld_broad, size_ptr);
1212 }
1213
1214 static void emit_image_op(struct lp_build_nir_context *bld_base,
1215 struct lp_img_params *params)
1216 {
1217 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1218 params->type = bld_base->base.type;
1219 params->context_ptr = bld->context_ptr;
1220 params->thread_data_ptr = bld->thread_data_ptr;
1221 params->exec_mask = mask_vec(bld_base);
1222 bld->image->emit_op(bld->image,
1223 bld->bld_base.base.gallivm,
1224 params);
1225
1226 }
1227
1228 static void emit_image_size(struct lp_build_nir_context *bld_base,
1229 struct lp_sampler_size_query_params *params)
1230 {
1231 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1232
1233 params->int_type = bld_base->int_bld.type;
1234 params->context_ptr = bld->context_ptr;
1235
1236 bld->image->emit_size_query(bld->image,
1237 bld->bld_base.base.gallivm,
1238 params);
1239
1240 }
1241
1242 static void init_var_slots(struct lp_build_nir_context *bld_base,
1243 nir_variable *var, unsigned sc)
1244 {
1245 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1246 unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1247
1248 if (!bld->outputs)
1249 return;
1250 for (unsigned comp = sc; comp < slots + sc; comp++) {
1251 unsigned this_loc = var->data.driver_location + (comp / 4);
1252 unsigned this_chan = comp % 4;
1253
1254 if (!bld->outputs[this_loc][this_chan])
1255 bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1256 bld_base->base.vec_type, "output");
1257 }
1258 }
1259
1260 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1261 nir_variable *var)
1262 {
1263 unsigned sc = var->data.location_frac;
1264 switch (var->data.mode) {
1265 case nir_var_shader_out: {
1266 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1267 if (var->data.location == FRAG_RESULT_STENCIL)
1268 sc = 1;
1269 else if (var->data.location == FRAG_RESULT_DEPTH)
1270 sc = 2;
1271 }
1272 init_var_slots(bld_base, var, sc);
1273 break;
1274 }
1275 default:
1276 break;
1277 }
1278 }
1279
1280 static void emit_tex(struct lp_build_nir_context *bld_base,
1281 struct lp_sampler_params *params)
1282 {
1283 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1284
1285 params->type = bld_base->base.type;
1286 params->context_ptr = bld->context_ptr;
1287 params->thread_data_ptr = bld->thread_data_ptr;
1288
1289 bld->sampler->emit_tex_sample(bld->sampler,
1290 bld->bld_base.base.gallivm,
1291 params);
1292 }
1293
1294 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1295 struct lp_sampler_size_query_params *params)
1296 {
1297 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1298
1299 params->int_type = bld_base->int_bld.type;
1300 params->context_ptr = bld->context_ptr;
1301
1302 bld->sampler->emit_size_query(bld->sampler,
1303 bld->bld_base.base.gallivm,
1304 params);
1305 }
1306
1307 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1308 nir_intrinsic_instr *instr,
1309 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1310 {
1311 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1312 struct gallivm_state *gallivm = bld_base->base.gallivm;
1313 switch (instr->intrinsic) {
1314 case nir_intrinsic_load_instance_id:
1315 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1316 break;
1317 case nir_intrinsic_load_base_instance:
1318 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1319 break;
1320 case nir_intrinsic_load_base_vertex:
1321 result[0] = bld->system_values.basevertex;
1322 break;
1323 case nir_intrinsic_load_vertex_id:
1324 result[0] = bld->system_values.vertex_id;
1325 break;
1326 case nir_intrinsic_load_primitive_id:
1327 result[0] = bld->system_values.prim_id;
1328 break;
1329 case nir_intrinsic_load_work_group_id:
1330 for (unsigned i = 0; i < 3; i++)
1331 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), ""));
1332 break;
1333 case nir_intrinsic_load_local_invocation_id:
1334 for (unsigned i = 0; i < 3; i++)
1335 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1336 break;
1337 case nir_intrinsic_load_num_work_groups:
1338 for (unsigned i = 0; i < 3; i++)
1339 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), ""));
1340 break;
1341 case nir_intrinsic_load_invocation_id:
1342 if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1343 result[0] = bld->system_values.invocation_id;
1344 else
1345 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1346 break;
1347 case nir_intrinsic_load_front_face:
1348 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1349 break;
1350 case nir_intrinsic_load_draw_id:
1351 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1352 break;
1353 default:
1354 break;
1355 case nir_intrinsic_load_local_group_size:
1356 for (unsigned i = 0; i < 3; i++)
1357 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1358 break;
1359 case nir_intrinsic_load_work_dim:
1360 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1361 break;
1362 case nir_intrinsic_load_tess_coord:
1363 for (unsigned i = 0; i < 3; i++) {
1364 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1365 }
1366 break;
1367 case nir_intrinsic_load_tess_level_outer:
1368 for (unsigned i = 0; i < 4; i++)
1369 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1370 break;
1371 case nir_intrinsic_load_tess_level_inner:
1372 for (unsigned i = 0; i < 2; i++)
1373 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1374 break;
1375 case nir_intrinsic_load_patch_vertices_in:
1376 result[0] = bld->system_values.vertices_in;
1377 break;
1378 case nir_intrinsic_load_sample_id:
1379 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1380 break;
1381 case nir_intrinsic_load_sample_pos:
1382 for (unsigned i = 0; i < 2; i++) {
1383 LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1384 idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1385 LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1386 result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1387 }
1388 break;
1389 case nir_intrinsic_load_sample_mask_in:
1390 result[0] = bld->system_values.sample_mask_in;
1391 break;
1392 }
1393 }
1394
1395 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1396 LLVMValueRef *dst)
1397 {
1398 struct gallivm_state *gallivm = bld_base->base.gallivm;
1399 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1400 *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1401 }
1402
1403 static void bgnloop(struct lp_build_nir_context *bld_base)
1404 {
1405 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1406 lp_exec_bgnloop(&bld->exec_mask, true);
1407 }
1408
1409 static void endloop(struct lp_build_nir_context *bld_base)
1410 {
1411 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1412 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1413 }
1414
1415 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1416 {
1417 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1418 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1419 lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1420 }
1421
1422 static void else_stmt(struct lp_build_nir_context *bld_base)
1423 {
1424 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1425 lp_exec_mask_cond_invert(&bld->exec_mask);
1426 }
1427
1428 static void endif_stmt(struct lp_build_nir_context *bld_base)
1429 {
1430 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1431 lp_exec_mask_cond_pop(&bld->exec_mask);
1432 }
1433
1434 static void break_stmt(struct lp_build_nir_context *bld_base)
1435 {
1436 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1437
1438 lp_exec_break(&bld->exec_mask, NULL, false);
1439 }
1440
1441 static void continue_stmt(struct lp_build_nir_context *bld_base)
1442 {
1443 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1444 lp_exec_continue(&bld->exec_mask);
1445 }
1446
1447 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1448 {
1449 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1450 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1451 LLVMValueRef mask;
1452
1453 if (!cond) {
1454 if (bld->exec_mask.has_mask) {
1455 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1456 } else {
1457 mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1458 }
1459 } else {
1460 mask = LLVMBuildNot(builder, cond, "");
1461 if (bld->exec_mask.has_mask) {
1462 LLVMValueRef invmask;
1463 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1464 mask = LLVMBuildOr(builder, mask, invmask, "");
1465 }
1466 }
1467 lp_build_mask_update(bld->mask, mask);
1468 }
1469
1470 static void
1471 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1472 LLVMValueRef ptr,
1473 LLVMValueRef mask)
1474 {
1475 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1476 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1477
1478 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1479
1480 LLVMBuildStore(builder, current_vec, ptr);
1481 }
1482
1483 static void
1484 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1485 LLVMValueRef ptr,
1486 LLVMValueRef mask)
1487 {
1488 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1489 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1490
1491 current_vec = lp_build_select(&bld_base->uint_bld,
1492 mask,
1493 bld_base->uint_bld.zero,
1494 current_vec);
1495
1496 LLVMBuildStore(builder, current_vec, ptr);
1497 }
1498
1499 static LLVMValueRef
1500 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1501 LLVMValueRef current_mask_vec,
1502 LLVMValueRef total_emitted_vertices_vec)
1503 {
1504 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1505 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1506 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1507 total_emitted_vertices_vec,
1508 bld->max_output_vertices_vec);
1509
1510 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1511 }
1512
1513 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1514 {
1515 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1516 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1517
1518 assert(bld->gs_iface->emit_vertex);
1519 LLVMValueRef total_emitted_vertices_vec =
1520 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1521 LLVMValueRef mask = mask_vec(bld_base);
1522 mask = clamp_mask_to_max_output_vertices(bld, mask,
1523 total_emitted_vertices_vec);
1524 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1525 bld->outputs,
1526 total_emitted_vertices_vec,
1527 lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1528
1529 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1530 mask);
1531 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1532 mask);
1533 }
1534
1535 static void
1536 end_primitive_masked(struct lp_build_nir_context * bld_base,
1537 LLVMValueRef mask, uint32_t stream_id)
1538 {
1539 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1540 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1541
1542 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1543 LLVMValueRef emitted_vertices_vec =
1544 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1545 LLVMValueRef emitted_prims_vec =
1546 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1547 LLVMValueRef total_emitted_vertices_vec =
1548 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1549
1550 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1551 PIPE_FUNC_NOTEQUAL,
1552 emitted_vertices_vec,
1553 uint_bld->zero);
1554 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1555 if (stream_id == 0)
1556 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1557 total_emitted_vertices_vec,
1558 emitted_vertices_vec, emitted_prims_vec, mask_vec(bld_base));
1559 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1560 mask);
1561 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1562 mask);
1563 }
1564
1565 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1566 {
1567 ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1568
1569 assert(bld->gs_iface->end_primitive);
1570
1571 LLVMValueRef mask = mask_vec(bld_base);
1572 end_primitive_masked(bld_base, mask, stream_id);
1573 }
1574
1575 static void
1576 emit_prologue(struct lp_build_nir_soa_context *bld)
1577 {
1578 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1579 if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1580 uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1581 unsigned index, chan;
1582 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1583 LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
1584 bld->inputs_array = lp_build_array_alloca(gallivm,
1585 vec_type, array_size,
1586 "input_array");
1587
1588 for (index = 0; index < num_inputs; ++index) {
1589 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1590 LLVMValueRef lindex =
1591 lp_build_const_int32(gallivm, index * 4 + chan);
1592 LLVMValueRef input_ptr =
1593 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1594 &lindex, 1, "");
1595 LLVMValueRef value = bld->inputs[index][chan];
1596 if (value)
1597 LLVMBuildStore(gallivm->builder, value, input_ptr);
1598 }
1599 }
1600 }
1601 }
1602
1603 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
1604 {
1605 struct gallivm_state * gallivm = bld_base->base.gallivm;
1606 LLVMBuilderRef builder = gallivm->builder;
1607
1608 LLVMValueRef exec_mask = mask_vec(bld_base);
1609 struct lp_build_loop_state loop_state;
1610
1611 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1612
1613 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1614 LLVMValueRef init_val = NULL;
1615 if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1616 /* for equal we unfortunately have to loop and find the first valid one. */
1617 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1618 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1619
1620 struct lp_build_if_state ifthen;
1621 lp_build_if(&ifthen, gallivm, if_cond);
1622 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1623 loop_state.counter, "");
1624 LLVMBuildStore(builder, value_ptr, res_store);
1625 lp_build_endif(&ifthen);
1626 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1627 NULL, LLVMIntUGE);
1628 init_val = LLVMBuildLoad(builder, res_store, "");
1629 } else {
1630 LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
1631 }
1632
1633 LLVMValueRef res;
1634 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1635 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1636 loop_state.counter, "");
1637 struct lp_build_if_state ifthen;
1638 LLVMValueRef if_cond;
1639 if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1640
1641 lp_build_if(&ifthen, gallivm, if_cond);
1642 res = LLVMBuildLoad(builder, res_store, "");
1643
1644 if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1645 LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
1646 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1647 res = LLVMBuildOr(builder, res, tmp, "");
1648 } else if (instr->intrinsic == nir_intrinsic_vote_any)
1649 res = LLVMBuildOr(builder, res, value_ptr, "");
1650 else
1651 res = LLVMBuildAnd(builder, res, value_ptr, "");
1652 LLVMBuildStore(builder, res, res_store);
1653 lp_build_endif(&ifthen);
1654 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1655 NULL, LLVMIntUGE);
1656 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1657 }
1658
1659 static void
1660 emit_interp_at(struct lp_build_nir_context *bld_base,
1661 unsigned num_components,
1662 nir_variable *var,
1663 bool centroid,
1664 bool sample,
1665 unsigned const_index,
1666 LLVMValueRef indir_index,
1667 LLVMValueRef offsets[2],
1668 LLVMValueRef dst[4])
1669 {
1670 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1671
1672 for (unsigned i = 0; i < num_components; i++) {
1673 dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
1674 const_index + var->data.driver_location, i + var->data.location_frac,
1675 centroid, sample, indir_index, offsets);
1676 }
1677 }
1678
1679 void lp_build_nir_soa(struct gallivm_state *gallivm,
1680 struct nir_shader *shader,
1681 const struct lp_build_tgsi_params *params,
1682 LLVMValueRef (*outputs)[4])
1683 {
1684 struct lp_build_nir_soa_context bld;
1685 struct lp_type type = params->type;
1686 struct lp_type res_type;
1687
1688 assert(type.length <= LP_MAX_VECTOR_LENGTH);
1689 memset(&res_type, 0, sizeof res_type);
1690 res_type.width = type.width;
1691 res_type.length = type.length;
1692 res_type.sign = 1;
1693
1694 /* Setup build context */
1695 memset(&bld, 0, sizeof bld);
1696 lp_build_context_init(&bld.bld_base.base, gallivm, type);
1697 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1698 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1699 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
1700 lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
1701 {
1702 struct lp_type dbl_type;
1703 dbl_type = type;
1704 dbl_type.width *= 2;
1705 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
1706 }
1707 {
1708 struct lp_type uint64_type;
1709 uint64_type = lp_uint_type(type);
1710 uint64_type.width *= 2;
1711 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
1712 }
1713 {
1714 struct lp_type int64_type;
1715 int64_type = lp_int_type(type);
1716 int64_type.width *= 2;
1717 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
1718 }
1719 {
1720 struct lp_type uint16_type;
1721 uint16_type = lp_uint_type(type);
1722 uint16_type.width /= 2;
1723 lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
1724 }
1725 {
1726 struct lp_type int16_type;
1727 int16_type = lp_int_type(type);
1728 int16_type.width /= 2;
1729 lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
1730 }
1731 {
1732 struct lp_type uint8_type;
1733 uint8_type = lp_uint_type(type);
1734 uint8_type.width /= 4;
1735 lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
1736 }
1737 {
1738 struct lp_type int8_type;
1739 int8_type = lp_int_type(type);
1740 int8_type.width /= 4;
1741 lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
1742 }
1743 bld.bld_base.load_var = emit_load_var;
1744 bld.bld_base.store_var = emit_store_var;
1745 bld.bld_base.load_reg = emit_load_reg;
1746 bld.bld_base.store_reg = emit_store_reg;
1747 bld.bld_base.emit_var_decl = emit_var_decl;
1748 bld.bld_base.load_ubo = emit_load_ubo;
1749 bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
1750 bld.bld_base.load_global = emit_load_global;
1751 bld.bld_base.store_global = emit_store_global;
1752 bld.bld_base.atomic_global = emit_atomic_global;
1753 bld.bld_base.tex = emit_tex;
1754 bld.bld_base.tex_size = emit_tex_size;
1755 bld.bld_base.bgnloop = bgnloop;
1756 bld.bld_base.endloop = endloop;
1757 bld.bld_base.if_cond = if_cond;
1758 bld.bld_base.else_stmt = else_stmt;
1759 bld.bld_base.endif_stmt = endif_stmt;
1760 bld.bld_base.break_stmt = break_stmt;
1761 bld.bld_base.continue_stmt = continue_stmt;
1762 bld.bld_base.sysval_intrin = emit_sysval_intrin;
1763 bld.bld_base.discard = discard;
1764 bld.bld_base.emit_vertex = emit_vertex;
1765 bld.bld_base.end_primitive = end_primitive;
1766 bld.bld_base.load_mem = emit_load_mem;
1767 bld.bld_base.store_mem = emit_store_mem;
1768 bld.bld_base.get_buffer_size = emit_get_buffer_size;
1769 bld.bld_base.atomic_mem = emit_atomic_mem;
1770 bld.bld_base.barrier = emit_barrier;
1771 bld.bld_base.image_op = emit_image_op;
1772 bld.bld_base.image_size = emit_image_size;
1773 bld.bld_base.vote = emit_vote;
1774 bld.bld_base.helper_invocation = emit_helper_invocation;
1775 bld.bld_base.interp_at = emit_interp_at;
1776
1777 bld.mask = params->mask;
1778 bld.inputs = params->inputs;
1779 bld.outputs = outputs;
1780 bld.consts_ptr = params->consts_ptr;
1781 bld.const_sizes_ptr = params->const_sizes_ptr;
1782 bld.ssbo_ptr = params->ssbo_ptr;
1783 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
1784 bld.sampler = params->sampler;
1785 // bld.bld_base.info = params->info;
1786
1787 bld.context_ptr = params->context_ptr;
1788 bld.thread_data_ptr = params->thread_data_ptr;
1789 bld.image = params->image;
1790 bld.shared_ptr = params->shared_ptr;
1791 bld.coro = params->coro;
1792 bld.kernel_args_ptr = params->kernel_args;
1793 bld.indirects = 0;
1794 if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
1795 bld.indirects |= nir_var_shader_in;
1796
1797 bld.gs_iface = params->gs_iface;
1798 bld.tcs_iface = params->tcs_iface;
1799 bld.tes_iface = params->tes_iface;
1800 bld.fs_iface = params->fs_iface;
1801 if (bld.gs_iface) {
1802 struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
1803
1804 bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
1805 shader->info.gs.vertices_out);
1806 for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
1807 bld.emitted_prims_vec_ptr[i] =
1808 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
1809 bld.emitted_vertices_vec_ptr[i] =
1810 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
1811 bld.total_emitted_vertices_vec_ptr[i] =
1812 lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
1813 }
1814 }
1815 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
1816
1817 bld.system_values = *params->system_values;
1818
1819 bld.bld_base.shader = shader;
1820
1821 emit_prologue(&bld);
1822 lp_build_nir_llvm(&bld.bld_base, shader);
1823
1824 if (bld.gs_iface) {
1825 LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
1826 LLVMValueRef total_emitted_vertices_vec;
1827 LLVMValueRef emitted_prims_vec;
1828
1829 end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), 0);
1830 for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
1831 total_emitted_vertices_vec =
1832 LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
1833
1834 emitted_prims_vec =
1835 LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
1836 bld.gs_iface->gs_epilogue(bld.gs_iface,
1837 total_emitted_vertices_vec,
1838 emitted_prims_vec, i);
1839 }
1840 }
1841 lp_exec_mask_fini(&bld.exec_mask);
1842 }