llvmpipe: add framebuffer fetching support (v1.1)
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_nir_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "util/u_math.h"
38 /*
39 * combine the execution mask if there is one with the current mask.
40 */
41 static LLVMValueRef
42 mask_vec(struct lp_build_nir_context *bld_base)
43 {
44 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
45 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
46 struct lp_exec_mask *exec_mask = &bld->exec_mask;
47 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
48 if (!exec_mask->has_mask) {
49 return bld_mask;
50 }
51 if (!bld_mask)
52 return exec_mask->exec_mask;
53 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
54 exec_mask->exec_mask, "");
55 }
56
57 static LLVMValueRef
58 emit_fetch_64bit(
59 struct lp_build_nir_context * bld_base,
60 LLVMValueRef input,
61 LLVMValueRef input2)
62 {
63 struct gallivm_state *gallivm = bld_base->base.gallivm;
64 LLVMBuilderRef builder = gallivm->builder;
65 LLVMValueRef res;
66 int i;
67 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
68 int len = bld_base->base.type.length * 2;
69 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
70
71 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
72 #if UTIL_ARCH_LITTLE_ENDIAN
73 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
74 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
75 #else
76 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
77 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
78 #endif
79 }
80 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
81
82 return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
83 }
84
85 static void
86 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
87 LLVMValueRef value,
88 LLVMValueRef split_values[2])
89 {
90 struct gallivm_state *gallivm = bld_base->base.gallivm;
91 LLVMBuilderRef builder = gallivm->builder;
92 unsigned i;
93 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
94 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
95 int len = bld_base->base.type.length * 2;
96
97 value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
98 for (i = 0; i < bld_base->base.type.length; i++) {
99 #if UTIL_ARCH_LITTLE_ENDIAN
100 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
101 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
102 #else
103 shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
104 shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
105 #endif
106 }
107
108 split_values[0] = LLVMBuildShuffleVector(builder, value,
109 LLVMGetUndef(LLVMTypeOf(value)),
110 LLVMConstVector(shuffles,
111 bld_base->base.type.length),
112 "");
113 split_values[1] = LLVMBuildShuffleVector(builder, value,
114 LLVMGetUndef(LLVMTypeOf(value)),
115 LLVMConstVector(shuffles2,
116 bld_base->base.type.length),
117 "");
118 }
119
120 static void
121 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
122 LLVMValueRef chan_ptr,
123 LLVMValueRef chan_ptr2,
124 LLVMValueRef value)
125 {
126 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
127 struct lp_build_context *float_bld = &bld_base->base;
128 LLVMValueRef split_vals[2];
129
130 emit_store_64bit_split(bld_base, value, split_vals);
131
132 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
133 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
134 }
135
136 static LLVMValueRef
137 get_soa_array_offsets(struct lp_build_context *uint_bld,
138 LLVMValueRef indirect_index,
139 int num_components,
140 unsigned chan_index,
141 bool need_perelement_offset)
142 {
143 struct gallivm_state *gallivm = uint_bld->gallivm;
144 LLVMValueRef chan_vec =
145 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
146 LLVMValueRef length_vec =
147 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
148 LLVMValueRef index_vec;
149
150 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
151 index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
152 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
153 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
154
155 if (need_perelement_offset) {
156 LLVMValueRef pixel_offsets;
157 unsigned i;
158 /* build pixel offset vector: {0, 1, 2, 3, ...} */
159 pixel_offsets = uint_bld->undef;
160 for (i = 0; i < uint_bld->type.length; i++) {
161 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
162 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
163 ii, ii, "");
164 }
165 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
166 }
167 return index_vec;
168 }
169
170 static LLVMValueRef
171 build_gather(struct lp_build_nir_context *bld_base,
172 struct lp_build_context *bld,
173 LLVMValueRef base_ptr,
174 LLVMValueRef indexes,
175 LLVMValueRef overflow_mask,
176 LLVMValueRef indexes2)
177 {
178 struct gallivm_state *gallivm = bld_base->base.gallivm;
179 LLVMBuilderRef builder = gallivm->builder;
180 struct lp_build_context *uint_bld = &bld_base->uint_bld;
181 LLVMValueRef res;
182 unsigned i;
183
184 if (indexes2)
185 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
186 else
187 res = bld->undef;
188 /*
189 * overflow_mask is a vector telling us which channels
190 * in the vector overflowed. We use the overflow behavior for
191 * constant buffers which is defined as:
192 * Out of bounds access to constant buffer returns 0 in all
193 * components. Out of bounds behavior is always with respect
194 * to the size of the buffer bound at that slot.
195 */
196
197 if (overflow_mask) {
198 /*
199 * We avoid per-element control flow here (also due to llvm going crazy,
200 * though I suspect it's better anyway since overflow is likely rare).
201 * Note that since we still fetch from buffers even if num_elements was
202 * zero (in this case we'll fetch from index zero) the jit func callers
203 * MUST provide valid fake constant buffers of size 4x32 (the values do
204 * not matter), otherwise we'd still need (not per element though)
205 * control flow.
206 */
207 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
208 if (indexes2)
209 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
210 }
211
212 /*
213 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
214 */
215 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
216 LLVMValueRef si, di;
217 LLVMValueRef index;
218 LLVMValueRef scalar_ptr, scalar;
219
220 di = lp_build_const_int32(gallivm, i);
221 if (indexes2)
222 si = lp_build_const_int32(gallivm, i >> 1);
223 else
224 si = di;
225
226 if (indexes2 && (i & 1)) {
227 index = LLVMBuildExtractElement(builder,
228 indexes2, si, "");
229 } else {
230 index = LLVMBuildExtractElement(builder,
231 indexes, si, "");
232 }
233 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
234 &index, 1, "gather_ptr");
235 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
236
237 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
238 }
239
240 if (overflow_mask) {
241 if (indexes2) {
242 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
243 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
244 bld_base->dbl_bld.int_vec_type, "");
245 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
246 bld_base->dbl_bld.zero, res);
247 } else
248 res = lp_build_select(bld, overflow_mask, bld->zero, res);
249 }
250
251 return res;
252 }
253
254 /**
255 * Scatter/store vector.
256 */
257 static void
258 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
259 LLVMValueRef base_ptr,
260 LLVMValueRef indexes,
261 LLVMValueRef values,
262 struct lp_exec_mask *mask)
263 {
264 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
265 LLVMBuilderRef builder = gallivm->builder;
266 unsigned i;
267 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
268
269 /*
270 * Loop over elements of index_vec, store scalar value.
271 */
272 for (i = 0; i < bld->bld_base.base.type.length; i++) {
273 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
274 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
275 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
276 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
277 LLVMValueRef scalar_pred = pred ?
278 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
279
280 if (0)
281 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
282 ii, val, index, scalar_ptr);
283
284 if (scalar_pred) {
285 LLVMValueRef real_val, dst_val;
286 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
287 real_val = lp_build_select(&bld->uint_elem_bld, scalar_pred, val, dst_val);
288 LLVMBuildStore(builder, real_val, scalar_ptr);
289 }
290 else {
291 LLVMBuildStore(builder, val, scalar_ptr);
292 }
293 }
294 }
295
296 static void emit_load_var(struct lp_build_nir_context *bld_base,
297 nir_variable_mode deref_mode,
298 unsigned num_components,
299 unsigned bit_size,
300 nir_variable *var,
301 unsigned vertex_index,
302 LLVMValueRef indir_vertex_index,
303 unsigned const_index,
304 LLVMValueRef indir_index,
305 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
306 {
307 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
308 struct gallivm_state *gallivm = bld_base->base.gallivm;
309 int dmul = bit_size == 64 ? 2 : 1;
310 switch (deref_mode) {
311 case nir_var_shader_in:
312 for (unsigned i = 0; i < num_components; i++) {
313 int idx = (i * dmul) + var->data.location_frac;
314 if (bld->gs_iface) {
315 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
316 LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
317 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
318 LLVMValueRef result2;
319 result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
320 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
321 if (bit_size == 64) {
322 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
323 result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
324 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
325 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
326 }
327 } else if (bld->tes_iface) {
328 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
329 LLVMValueRef attrib_index_val;
330 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
331 LLVMValueRef result2;
332
333 if (indir_index)
334 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
335 else
336 attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
337 if (var->data.patch) {
338 result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
339 indir_index ? true : false, attrib_index_val, swizzle_index_val);
340 if (bit_size == 64) {
341 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
342 result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
343 indir_index ? true : false, attrib_index_val, swizzle_index_val);
344 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
345 }
346 }
347 else {
348 result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
349 indir_vertex_index ? true : false,
350 indir_vertex_index ? indir_vertex_index : vertex_index_val,
351 indir_index ? true : false, attrib_index_val, swizzle_index_val);
352 if (bit_size == 64) {
353 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
354 result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
355 indir_vertex_index ? true : false,
356 indir_vertex_index ? indir_vertex_index : vertex_index_val,
357 indir_index ? true : false, attrib_index_val, swizzle_index_val);
358 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
359 }
360 }
361 } else if (bld->tcs_iface) {
362 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
363 LLVMValueRef attrib_index_val;
364 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
365
366 if (indir_index)
367 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
368 else
369 attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
370 result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
371 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
372 indir_index ? true : false, attrib_index_val, swizzle_index_val);
373 if (bit_size == 64) {
374 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
375 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
376 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
377 indir_index ? true : false, attrib_index_val, swizzle_index_val);
378 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
379 }
380 } else {
381 if (indir_index) {
382 LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
383 LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
384 attrib_index_val, 4, idx,
385 TRUE);
386 LLVMValueRef index_vec2 = NULL;
387 LLVMTypeRef fptr_type;
388 LLVMValueRef inputs_array;
389 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
390 inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
391
392 if (bit_size == 64)
393 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
394 indir_index, 4, idx + 1, TRUE);
395
396 /* Gather values from the input register array */
397 result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
398 } else {
399 if (bld->indirects & nir_var_shader_in) {
400 LLVMValueRef lindex = lp_build_const_int32(gallivm,
401 var->data.driver_location * 4 + idx);
402 LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
403 bld->inputs_array, lindex);
404 if (bit_size == 64) {
405 LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
406 var->data.driver_location * 4 + (idx + 1));
407 LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
408 bld->inputs_array, lindex2);
409 result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
410 } else {
411 result[i] = input_ptr;
412 }
413 } else {
414 if (bit_size == 64) {
415 LLVMValueRef tmp[2];
416 tmp[0] = bld->inputs[var->data.driver_location + const_index][idx];
417 tmp[1] = bld->inputs[var->data.driver_location + const_index][idx + 1];
418 result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
419 } else {
420 result[i] = bld->inputs[var->data.driver_location + const_index][idx];
421 }
422 }
423 }
424 }
425 }
426 break;
427 case nir_var_shader_out:
428 if (bld->fs_iface && bld->fs_iface->fb_fetch) {
429 bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);
430 return;
431 }
432 for (unsigned i = 0; i < num_components; i++) {
433 int idx = (i * dmul) + var->data.location_frac;
434 if (bld->tcs_iface) {
435 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
436 LLVMValueRef attrib_index_val;
437 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
438
439 if (indir_index)
440 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
441 else
442 attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
443
444 result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
445 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
446 indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
447 if (bit_size == 64) {
448 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
449 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
450 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
451 indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
452 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
453 }
454 }
455 }
456 break;
457 default:
458 break;
459 }
460 }
461
462 static void emit_store_chan(struct lp_build_nir_context *bld_base,
463 nir_variable_mode deref_mode,
464 unsigned bit_size,
465 unsigned location, unsigned comp,
466 unsigned chan,
467 LLVMValueRef dst)
468 {
469 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
470 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
471 struct lp_build_context *float_bld = &bld_base->base;
472
473 if (bit_size == 64) {
474 chan *= 2;
475 chan += comp;
476 if (chan >= 4) {
477 chan -= 4;
478 location++;
479 }
480 emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
481 bld->outputs[location][chan + 1], dst);
482 } else {
483 dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
484 lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
485 bld->outputs[location][chan + comp]);
486 }
487 }
488
489 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
490 unsigned bit_size,
491 unsigned location,
492 unsigned const_index,
493 LLVMValueRef indir_vertex_index,
494 LLVMValueRef indir_index,
495 unsigned comp,
496 unsigned chan,
497 LLVMValueRef chan_val)
498 {
499 struct gallivm_state *gallivm = bld_base->base.gallivm;
500 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
501 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
502 unsigned swizzle = chan;
503 if (bit_size == 64) {
504 swizzle *= 2;
505 swizzle += comp;
506 if (swizzle >= 4) {
507 swizzle -= 4;
508 location++;
509 }
510 } else
511 swizzle += comp;
512 LLVMValueRef attrib_index_val;
513 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
514
515 if (indir_index)
516 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
517 else
518 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
519 if (bit_size == 64) {
520 LLVMValueRef split_vals[2];
521 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
522 emit_store_64bit_split(bld_base, chan_val, split_vals);
523 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
524 indir_vertex_index ? true : false,
525 indir_vertex_index,
526 indir_index ? true : false,
527 attrib_index_val, swizzle_index_val,
528 split_vals[0], mask_vec(bld_base));
529 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
530 indir_vertex_index ? true : false,
531 indir_vertex_index,
532 indir_index ? true : false,
533 attrib_index_val, swizzle_index_val2,
534 split_vals[1], mask_vec(bld_base));
535 } else {
536 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
537 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
538 indir_vertex_index ? true : false,
539 indir_vertex_index,
540 indir_index ? true : false,
541 attrib_index_val, swizzle_index_val,
542 chan_val, mask_vec(bld_base));
543 }
544 }
545
546 static void emit_store_var(struct lp_build_nir_context *bld_base,
547 nir_variable_mode deref_mode,
548 unsigned num_components,
549 unsigned bit_size,
550 nir_variable *var,
551 unsigned writemask,
552 LLVMValueRef indir_vertex_index,
553 unsigned const_index,
554 LLVMValueRef indir_index,
555 LLVMValueRef dst)
556 {
557 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
558 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
559 switch (deref_mode) {
560 case nir_var_shader_out: {
561 unsigned location = var->data.driver_location;
562 unsigned comp = var->data.location_frac;
563 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
564 if (var->data.location == FRAG_RESULT_STENCIL)
565 comp = 1;
566 else if (var->data.location == FRAG_RESULT_DEPTH)
567 comp = 2;
568 }
569
570 for (unsigned chan = 0; chan < num_components; chan++) {
571 if (writemask & (1u << chan)) {
572 LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
573 if (bld->tcs_iface) {
574 emit_store_tcs_chan(bld_base, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
575 } else
576 emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
577 }
578 }
579 break;
580 }
581 default:
582 break;
583 }
584 }
585
586 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
587 struct lp_build_context *reg_bld,
588 const nir_reg_src *reg,
589 LLVMValueRef indir_src,
590 LLVMValueRef reg_storage)
591 {
592 struct gallivm_state *gallivm = bld_base->base.gallivm;
593 LLVMBuilderRef builder = gallivm->builder;
594 int nc = reg->reg->num_components;
595 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
596 struct lp_build_context *uint_bld = &bld_base->uint_bld;
597 if (reg->reg->num_array_elems) {
598 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
599 if (reg->indirect) {
600 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
601 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
602 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
603 }
604 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
605 for (unsigned i = 0; i < nc; i++) {
606 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
607 vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
608 }
609 } else {
610 for (unsigned i = 0; i < nc; i++) {
611 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
612 lp_build_const_int32(gallivm, i));
613 vals[i] = LLVMBuildLoad(builder, this_storage, "");
614 }
615 }
616 return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
617 }
618
619 static void emit_store_reg(struct lp_build_nir_context *bld_base,
620 struct lp_build_context *reg_bld,
621 const nir_reg_dest *reg,
622 unsigned writemask,
623 LLVMValueRef indir_src,
624 LLVMValueRef reg_storage,
625 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
626 {
627 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
628 struct gallivm_state *gallivm = bld_base->base.gallivm;
629 LLVMBuilderRef builder = gallivm->builder;
630 struct lp_build_context *uint_bld = &bld_base->uint_bld;
631 int nc = reg->reg->num_components;
632 if (reg->reg->num_array_elems > 0) {
633 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
634 if (reg->indirect) {
635 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
636 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
637 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
638 }
639 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
640 for (unsigned i = 0; i < nc; i++) {
641 if (!(writemask & (1 << i)))
642 continue;
643 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
644 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
645 emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
646 }
647 return;
648 }
649
650 for (unsigned i = 0; i < nc; i++) {
651 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
652 lp_build_const_int32(gallivm, i));
653 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
654 lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
655 }
656 }
657
658 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
659 unsigned nc,
660 unsigned bit_size,
661 unsigned offset_bit_size,
662 bool offset_is_uniform,
663 LLVMValueRef offset,
664 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
665 {
666 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
667 struct gallivm_state *gallivm = bld_base->base.gallivm;
668 LLVMBuilderRef builder = gallivm->builder;
669 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
670 LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
671 unsigned size_shift = 0;
672 struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
673 if (bit_size == 16)
674 size_shift = 1;
675 else if (bit_size == 32)
676 size_shift = 2;
677 else if (bit_size == 64)
678 size_shift = 3;
679 if (size_shift)
680 offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
681
682 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
683 kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
684
685 if (offset_is_uniform) {
686 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
687
688 for (unsigned c = 0; c < nc; c++) {
689 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
690
691 LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
692 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
693 }
694 }
695 }
696
697 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
698 {
699 LLVMBuilderRef builder = gallivm->builder;
700 switch (bit_size) {
701 case 8:
702 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
703 break;
704 case 16:
705 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
706 break;
707 case 32:
708 default:
709 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
710 break;
711 case 64:
712 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
713 break;
714 }
715 return addr_ptr;
716 }
717
718 static void emit_load_global(struct lp_build_nir_context *bld_base,
719 unsigned nc,
720 unsigned bit_size,
721 unsigned addr_bit_size,
722 LLVMValueRef addr,
723 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
724 {
725 struct gallivm_state *gallivm = bld_base->base.gallivm;
726 LLVMBuilderRef builder = gallivm->builder;
727 struct lp_build_context *uint_bld = &bld_base->uint_bld;
728 struct lp_build_context *res_bld;
729
730 res_bld = get_int_bld(bld_base, true, bit_size);
731
732 for (unsigned c = 0; c < nc; c++) {
733 LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
734
735 struct lp_build_loop_state loop_state;
736 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
737
738 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
739 loop_state.counter, "");
740 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
741
742 LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
743
744 LLVMValueRef temp_res;
745 temp_res = LLVMBuildLoad(builder, result, "");
746 temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
747 LLVMBuildStore(builder, temp_res, result);
748 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
749 NULL, LLVMIntUGE);
750 outval[c] = LLVMBuildLoad(builder, result, "");
751 }
752 }
753
754 static void emit_store_global(struct lp_build_nir_context *bld_base,
755 unsigned writemask,
756 unsigned nc, unsigned bit_size,
757 unsigned addr_bit_size,
758 LLVMValueRef addr,
759 LLVMValueRef dst)
760 {
761 struct gallivm_state *gallivm = bld_base->base.gallivm;
762 LLVMBuilderRef builder = gallivm->builder;
763 struct lp_build_context *uint_bld = &bld_base->uint_bld;
764
765 for (unsigned c = 0; c < nc; c++) {
766 if (!(writemask & (1u << c)))
767 continue;
768 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
769
770 LLVMValueRef exec_mask = mask_vec(bld_base);
771 struct lp_build_loop_state loop_state;
772 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
773 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
774 loop_state.counter, "");
775
776 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
777 loop_state.counter, "");
778 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
779 switch (bit_size) {
780 case 32:
781 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
782 break;
783 case 64:
784 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
785 break;
786 default:
787 break;
788 }
789 struct lp_build_if_state ifthen;
790
791 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
792 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
793 lp_build_if(&ifthen, gallivm, cond);
794 lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
795 lp_build_endif(&ifthen);
796 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
797 NULL, LLVMIntUGE);
798 }
799 }
800
801 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
802 nir_intrinsic_op nir_op,
803 unsigned addr_bit_size,
804 LLVMValueRef addr,
805 LLVMValueRef val, LLVMValueRef val2,
806 LLVMValueRef *result)
807 {
808 struct gallivm_state *gallivm = bld_base->base.gallivm;
809 LLVMBuilderRef builder = gallivm->builder;
810 struct lp_build_context *uint_bld = &bld_base->uint_bld;
811
812 LLVMValueRef atom_res = lp_build_alloca(gallivm,
813 uint_bld->vec_type, "");
814 LLVMValueRef exec_mask = mask_vec(bld_base);
815 struct lp_build_loop_state loop_state;
816 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
817
818 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
819 loop_state.counter, "");
820
821 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
822 loop_state.counter, "");
823 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
824 struct lp_build_if_state ifthen;
825 LLVMValueRef cond, temp_res;
826 LLVMValueRef scalar;
827 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
828 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
829 lp_build_if(&ifthen, gallivm, cond);
830
831 if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
832 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
833 loop_state.counter, "");
834 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
835 scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
836 cas_src_ptr,
837 LLVMAtomicOrderingSequentiallyConsistent,
838 LLVMAtomicOrderingSequentiallyConsistent,
839 false);
840 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
841 } else {
842 LLVMAtomicRMWBinOp op;
843 switch (nir_op) {
844 case nir_intrinsic_global_atomic_add:
845 op = LLVMAtomicRMWBinOpAdd;
846 break;
847 case nir_intrinsic_global_atomic_exchange:
848 op = LLVMAtomicRMWBinOpXchg;
849 break;
850 case nir_intrinsic_global_atomic_and:
851 op = LLVMAtomicRMWBinOpAnd;
852 break;
853 case nir_intrinsic_global_atomic_or:
854 op = LLVMAtomicRMWBinOpOr;
855 break;
856 case nir_intrinsic_global_atomic_xor:
857 op = LLVMAtomicRMWBinOpXor;
858 break;
859 case nir_intrinsic_global_atomic_umin:
860 op = LLVMAtomicRMWBinOpUMin;
861 break;
862 case nir_intrinsic_global_atomic_umax:
863 op = LLVMAtomicRMWBinOpUMax;
864 break;
865 case nir_intrinsic_global_atomic_imin:
866 op = LLVMAtomicRMWBinOpMin;
867 break;
868 case nir_intrinsic_global_atomic_imax:
869 op = LLVMAtomicRMWBinOpMax;
870 break;
871 default:
872 unreachable("unknown atomic op");
873 }
874
875 scalar = LLVMBuildAtomicRMW(builder, op,
876 addr_ptr, value_ptr,
877 LLVMAtomicOrderingSequentiallyConsistent,
878 false);
879 }
880 temp_res = LLVMBuildLoad(builder, atom_res, "");
881 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
882 LLVMBuildStore(builder, temp_res, atom_res);
883 lp_build_else(&ifthen);
884 temp_res = LLVMBuildLoad(builder, atom_res, "");
885 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
886 LLVMBuildStore(builder, temp_res, atom_res);
887 lp_build_endif(&ifthen);
888 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
889 NULL, LLVMIntUGE);
890 *result = LLVMBuildLoad(builder, atom_res, "");
891 }
892
893 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
894 unsigned nc,
895 unsigned bit_size,
896 bool offset_is_uniform,
897 LLVMValueRef index,
898 LLVMValueRef offset,
899 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
900 {
901 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
902 struct gallivm_state *gallivm = bld_base->base.gallivm;
903 LLVMBuilderRef builder = gallivm->builder;
904 struct lp_build_context *uint_bld = &bld_base->uint_bld;
905 struct lp_build_context *bld_broad = bit_size == 64 ? &bld_base->dbl_bld : &bld_base->base;
906 LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
907 unsigned size_shift = 0;
908 if (bit_size == 32)
909 size_shift = 2;
910 else if (bit_size == 64)
911 size_shift = 3;
912 if (size_shift)
913 offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
914 if (bit_size == 64) {
915 LLVMTypeRef dptr_type = LLVMPointerType(bld_base->dbl_bld.elem_type, 0);
916 consts_ptr = LLVMBuildBitCast(builder, consts_ptr, dptr_type, "");
917 }
918
919 if (offset_is_uniform) {
920 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
921
922 for (unsigned c = 0; c < nc; c++) {
923 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
924
925 LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset);
926 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
927 }
928 } else {
929 LLVMValueRef overflow_mask;
930 LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
931
932 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 4), "");
933 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
934 for (unsigned c = 0; c < nc; c++) {
935 LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
936 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
937 this_offset, num_consts);
938
939 result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
940 }
941 }
942 }
943
944
945 static void emit_load_mem(struct lp_build_nir_context *bld_base,
946 unsigned nc,
947 unsigned bit_size,
948 LLVMValueRef index,
949 LLVMValueRef offset,
950 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
951 {
952 struct gallivm_state *gallivm = bld_base->base.gallivm;
953 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
954 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
955 LLVMValueRef ssbo_ptr = NULL;
956 struct lp_build_context *uint_bld = &bld_base->uint_bld;
957 struct lp_build_context *uint64_bld = &bld_base->uint64_bld;
958 LLVMValueRef ssbo_limit = NULL;
959
960 if (index) {
961 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
962 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, bit_size == 64 ? 3 : 2), "");
963 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
964
965 ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
966 } else
967 ssbo_ptr = bld->shared_ptr;
968
969 offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, bit_size == 64 ? 3 : 2), "");
970 for (unsigned c = 0; c < nc; c++) {
971 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
972 LLVMValueRef exec_mask = mask_vec(bld_base);
973
974 if (ssbo_limit) {
975 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
976 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
977 }
978
979 LLVMValueRef result = lp_build_alloca(gallivm, bit_size == 64 ? uint64_bld->vec_type : uint_bld->vec_type, "");
980 struct lp_build_loop_state loop_state;
981 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
982
983 struct lp_build_if_state ifthen;
984 LLVMValueRef cond, temp_res;
985
986 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
987 loop_state.counter, "");
988
989 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
990 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
991
992 lp_build_if(&ifthen, gallivm, cond);
993 LLVMValueRef scalar;
994 if (bit_size == 64) {
995 LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(uint64_bld->elem_type, 0), "");
996 scalar = lp_build_pointer_get(builder, ssbo_ptr2, loop_index);
997 } else
998 scalar = lp_build_pointer_get(builder, ssbo_ptr, loop_index);
999
1000 temp_res = LLVMBuildLoad(builder, result, "");
1001 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1002 LLVMBuildStore(builder, temp_res, result);
1003 lp_build_else(&ifthen);
1004 temp_res = LLVMBuildLoad(builder, result, "");
1005 LLVMValueRef zero;
1006 if (bit_size == 64)
1007 zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
1008 else
1009 zero = lp_build_const_int32(gallivm, 0);
1010 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1011 LLVMBuildStore(builder, temp_res, result);
1012 lp_build_endif(&ifthen);
1013 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1014 NULL, LLVMIntUGE);
1015 outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
1016 }
1017 }
1018
1019 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1020 unsigned writemask,
1021 unsigned nc,
1022 unsigned bit_size,
1023 LLVMValueRef index,
1024 LLVMValueRef offset,
1025 LLVMValueRef dst)
1026 {
1027 struct gallivm_state *gallivm = bld_base->base.gallivm;
1028 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1029 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1030 LLVMValueRef ssbo_ptr;
1031 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1032 LLVMValueRef ssbo_limit = NULL;
1033
1034 if (index) {
1035 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1036 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, bit_size == 64 ? 3 : 2), "");
1037 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1038 ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1039 } else
1040 ssbo_ptr = bld->shared_ptr;
1041
1042 offset = lp_build_shr_imm(uint_bld, offset, bit_size == 64 ? 3 : 2);
1043 for (unsigned c = 0; c < nc; c++) {
1044 if (!(writemask & (1u << c)))
1045 continue;
1046 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1047 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1048
1049 LLVMValueRef exec_mask = mask_vec(bld_base);
1050 if (ssbo_limit) {
1051 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
1052 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1053 }
1054
1055 struct lp_build_loop_state loop_state;
1056 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1057 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1058 loop_state.counter, "");
1059 if (bit_size == 64)
1060 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, bld_base->uint64_bld.elem_type, "");
1061 else
1062 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
1063 struct lp_build_if_state ifthen;
1064 LLVMValueRef cond;
1065
1066 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1067 loop_state.counter, "");
1068 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1069 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1070 lp_build_if(&ifthen, gallivm, cond);
1071 if (bit_size == 64) {
1072 LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(bld_base->uint64_bld.elem_type, 0), "");
1073 lp_build_pointer_set(builder, ssbo_ptr2, loop_index, value_ptr);
1074 } else
1075 lp_build_pointer_set(builder, ssbo_ptr, loop_index, value_ptr);
1076 lp_build_endif(&ifthen);
1077 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1078 NULL, LLVMIntUGE);
1079 }
1080 }
1081
1082 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1083 nir_intrinsic_op nir_op,
1084 LLVMValueRef index, LLVMValueRef offset,
1085 LLVMValueRef val, LLVMValueRef val2,
1086 LLVMValueRef *result)
1087 {
1088 struct gallivm_state *gallivm = bld_base->base.gallivm;
1089 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1090 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1091 LLVMValueRef ssbo_ptr;
1092 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1093 LLVMValueRef ssbo_limit = NULL;
1094
1095 if (index) {
1096 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1097 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, 2), "");
1098 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1099 ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1100 } else
1101 ssbo_ptr = bld->shared_ptr;
1102
1103 offset = lp_build_shr_imm(uint_bld, offset, 2);
1104 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1105 uint_bld->vec_type, "");
1106
1107 LLVMValueRef exec_mask = mask_vec(bld_base);
1108 if (ssbo_limit) {
1109 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, offset, ssbo_limit);
1110 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1111 }
1112
1113 struct lp_build_loop_state loop_state;
1114 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1115
1116 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1117 loop_state.counter, "");
1118 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
1119
1120 offset = LLVMBuildExtractElement(gallivm->builder, offset,
1121 loop_state.counter, "");
1122
1123 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr,
1124 &offset, 1, "");
1125
1126 struct lp_build_if_state ifthen;
1127 LLVMValueRef cond, temp_res;
1128 LLVMValueRef scalar;
1129 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1130 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1131 lp_build_if(&ifthen, gallivm, cond);
1132
1133 if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1134 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1135 loop_state.counter, "");
1136 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
1137 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1138 cas_src_ptr,
1139 LLVMAtomicOrderingSequentiallyConsistent,
1140 LLVMAtomicOrderingSequentiallyConsistent,
1141 false);
1142 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1143 } else {
1144 LLVMAtomicRMWBinOp op;
1145
1146 switch (nir_op) {
1147 case nir_intrinsic_shared_atomic_add:
1148 case nir_intrinsic_ssbo_atomic_add:
1149 op = LLVMAtomicRMWBinOpAdd;
1150 break;
1151 case nir_intrinsic_shared_atomic_exchange:
1152 case nir_intrinsic_ssbo_atomic_exchange:
1153 op = LLVMAtomicRMWBinOpXchg;
1154 break;
1155 case nir_intrinsic_shared_atomic_and:
1156 case nir_intrinsic_ssbo_atomic_and:
1157 op = LLVMAtomicRMWBinOpAnd;
1158 break;
1159 case nir_intrinsic_shared_atomic_or:
1160 case nir_intrinsic_ssbo_atomic_or:
1161 op = LLVMAtomicRMWBinOpOr;
1162 break;
1163 case nir_intrinsic_shared_atomic_xor:
1164 case nir_intrinsic_ssbo_atomic_xor:
1165 op = LLVMAtomicRMWBinOpXor;
1166 break;
1167 case nir_intrinsic_shared_atomic_umin:
1168 case nir_intrinsic_ssbo_atomic_umin:
1169 op = LLVMAtomicRMWBinOpUMin;
1170 break;
1171 case nir_intrinsic_shared_atomic_umax:
1172 case nir_intrinsic_ssbo_atomic_umax:
1173 op = LLVMAtomicRMWBinOpUMax;
1174 break;
1175 case nir_intrinsic_ssbo_atomic_imin:
1176 case nir_intrinsic_shared_atomic_imin:
1177 op = LLVMAtomicRMWBinOpMin;
1178 break;
1179 case nir_intrinsic_ssbo_atomic_imax:
1180 case nir_intrinsic_shared_atomic_imax:
1181 op = LLVMAtomicRMWBinOpMax;
1182 break;
1183 default:
1184 unreachable("unknown atomic op");
1185 }
1186 scalar = LLVMBuildAtomicRMW(builder, op,
1187 scalar_ptr, value_ptr,
1188 LLVMAtomicOrderingSequentiallyConsistent,
1189 false);
1190 }
1191 temp_res = LLVMBuildLoad(builder, atom_res, "");
1192 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1193 LLVMBuildStore(builder, temp_res, atom_res);
1194 lp_build_else(&ifthen);
1195 temp_res = LLVMBuildLoad(builder, atom_res, "");
1196 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
1197 LLVMBuildStore(builder, temp_res, atom_res);
1198 lp_build_endif(&ifthen);
1199
1200 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1201 NULL, LLVMIntUGE);
1202 *result = LLVMBuildLoad(builder, atom_res, "");
1203 }
1204
1205 static void emit_barrier(struct lp_build_nir_context *bld_base)
1206 {
1207 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1208 struct gallivm_state * gallivm = bld_base->base.gallivm;
1209
1210 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1211
1212 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1213 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1214 }
1215
1216 static LLVMValueRef emit_get_buffer_size(struct lp_build_nir_context *bld_base,
1217 LLVMValueRef index)
1218 {
1219 struct gallivm_state *gallivm = bld_base->base.gallivm;
1220 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1221 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1222 struct lp_build_context *bld_broad = &bld_base->uint_bld;
1223 LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1224 LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1225 return lp_build_broadcast_scalar(bld_broad, size_ptr);
1226 }
1227
1228 static void emit_image_op(struct lp_build_nir_context *bld_base,
1229 struct lp_img_params *params)
1230 {
1231 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1232 struct gallivm_state *gallivm = bld_base->base.gallivm;
1233
1234 params->type = bld_base->base.type;
1235 params->context_ptr = bld->context_ptr;
1236 params->thread_data_ptr = bld->thread_data_ptr;
1237 params->exec_mask = mask_vec(bld_base);
1238
1239 if (params->image_index_offset)
1240 params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1241 lp_build_const_int32(gallivm, 0), "");
1242
1243 bld->image->emit_op(bld->image,
1244 bld->bld_base.base.gallivm,
1245 params);
1246
1247 }
1248
1249 static void emit_image_size(struct lp_build_nir_context *bld_base,
1250 struct lp_sampler_size_query_params *params)
1251 {
1252 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1253 struct gallivm_state *gallivm = bld_base->base.gallivm;
1254
1255 params->int_type = bld_base->int_bld.type;
1256 params->context_ptr = bld->context_ptr;
1257
1258 if (params->texture_unit_offset)
1259 params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1260 lp_build_const_int32(gallivm, 0), "");
1261 bld->image->emit_size_query(bld->image,
1262 bld->bld_base.base.gallivm,
1263 params);
1264
1265 }
1266
1267 static void init_var_slots(struct lp_build_nir_context *bld_base,
1268 nir_variable *var, unsigned sc)
1269 {
1270 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1271 unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1272
1273 if (!bld->outputs)
1274 return;
1275 for (unsigned comp = sc; comp < slots + sc; comp++) {
1276 unsigned this_loc = var->data.driver_location + (comp / 4);
1277 unsigned this_chan = comp % 4;
1278
1279 if (!bld->outputs[this_loc][this_chan])
1280 bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1281 bld_base->base.vec_type, "output");
1282 }
1283 }
1284
1285 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1286 nir_variable *var)
1287 {
1288 unsigned sc = var->data.location_frac;
1289 switch (var->data.mode) {
1290 case nir_var_shader_out: {
1291 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1292 if (var->data.location == FRAG_RESULT_STENCIL)
1293 sc = 1;
1294 else if (var->data.location == FRAG_RESULT_DEPTH)
1295 sc = 2;
1296 }
1297 init_var_slots(bld_base, var, sc);
1298 break;
1299 }
1300 default:
1301 break;
1302 }
1303 }
1304
1305 static void emit_tex(struct lp_build_nir_context *bld_base,
1306 struct lp_sampler_params *params)
1307 {
1308 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1309 struct gallivm_state *gallivm = bld_base->base.gallivm;
1310
1311 params->type = bld_base->base.type;
1312 params->context_ptr = bld->context_ptr;
1313 params->thread_data_ptr = bld->thread_data_ptr;
1314
1315 if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1316 /* this is horrible but this can be dynamic */
1317 LLVMValueRef coords[5];
1318 LLVMValueRef *orig_texel_ptr;
1319 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1320 LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1321 LLVMGetUndef(bld_base->base.vec_type),
1322 LLVMGetUndef(bld_base->base.vec_type),
1323 LLVMGetUndef(bld_base->base.vec_type) };
1324 LLVMValueRef texel[4], orig_offset;
1325 unsigned i;
1326 orig_texel_ptr = params->texel;
1327
1328 for (i = 0; i < 5; i++) {
1329 coords[i] = params->coords[i];
1330 }
1331 orig_offset = params->texture_index_offset;
1332
1333 for (unsigned v = 0; v < uint_bld->type.length; v++) {
1334 LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1335 LLVMValueRef new_coords[5];
1336 for (i = 0; i < 5; i++) {
1337 new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1338 coords[i], idx, "");
1339 }
1340 params->coords = new_coords;
1341 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1342 orig_offset,
1343 idx, "");
1344 params->type = lp_elem_type(bld_base->base.type);
1345
1346 params->texel = texel;
1347 bld->sampler->emit_tex_sample(bld->sampler,
1348 gallivm,
1349 params);
1350
1351 for (i = 0; i < 4; i++) {
1352 result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1353 }
1354 }
1355 for (i = 0; i < 4; i++) {
1356 orig_texel_ptr[i] = result[i];
1357 }
1358 return;
1359 }
1360
1361 if (params->texture_index_offset)
1362 params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1363 params->texture_index_offset,
1364 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1365
1366 params->type = bld_base->base.type;
1367 bld->sampler->emit_tex_sample(bld->sampler,
1368 bld->bld_base.base.gallivm,
1369 params);
1370 }
1371
1372 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1373 struct lp_sampler_size_query_params *params)
1374 {
1375 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1376
1377 params->int_type = bld_base->int_bld.type;
1378 params->context_ptr = bld->context_ptr;
1379
1380 if (params->texture_unit_offset)
1381 params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1382 params->texture_unit_offset,
1383 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1384 bld->sampler->emit_size_query(bld->sampler,
1385 bld->bld_base.base.gallivm,
1386 params);
1387 }
1388
1389 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1390 nir_intrinsic_instr *instr,
1391 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1392 {
1393 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1394 struct gallivm_state *gallivm = bld_base->base.gallivm;
1395 switch (instr->intrinsic) {
1396 case nir_intrinsic_load_instance_id:
1397 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1398 break;
1399 case nir_intrinsic_load_base_instance:
1400 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1401 break;
1402 case nir_intrinsic_load_base_vertex:
1403 result[0] = bld->system_values.basevertex;
1404 break;
1405 case nir_intrinsic_load_vertex_id:
1406 result[0] = bld->system_values.vertex_id;
1407 break;
1408 case nir_intrinsic_load_primitive_id:
1409 result[0] = bld->system_values.prim_id;
1410 break;
1411 case nir_intrinsic_load_work_group_id:
1412 for (unsigned i = 0; i < 3; i++)
1413 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), ""));
1414 break;
1415 case nir_intrinsic_load_local_invocation_id:
1416 for (unsigned i = 0; i < 3; i++)
1417 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1418 break;
1419 case nir_intrinsic_load_num_work_groups:
1420 for (unsigned i = 0; i < 3; i++)
1421 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), ""));
1422 break;
1423 case nir_intrinsic_load_invocation_id:
1424 if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1425 result[0] = bld->system_values.invocation_id;
1426 else
1427 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1428 break;
1429 case nir_intrinsic_load_front_face:
1430 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1431 break;
1432 case nir_intrinsic_load_draw_id:
1433 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1434 break;
1435 default:
1436 break;
1437 case nir_intrinsic_load_local_group_size:
1438 for (unsigned i = 0; i < 3; i++)
1439 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1440 break;
1441 case nir_intrinsic_load_work_dim:
1442 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1443 break;
1444 case nir_intrinsic_load_tess_coord:
1445 for (unsigned i = 0; i < 3; i++) {
1446 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1447 }
1448 break;
1449 case nir_intrinsic_load_tess_level_outer:
1450 for (unsigned i = 0; i < 4; i++)
1451 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1452 break;
1453 case nir_intrinsic_load_tess_level_inner:
1454 for (unsigned i = 0; i < 2; i++)
1455 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1456 break;
1457 case nir_intrinsic_load_patch_vertices_in:
1458 result[0] = bld->system_values.vertices_in;
1459 break;
1460 case nir_intrinsic_load_sample_id:
1461 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1462 break;
1463 case nir_intrinsic_load_sample_pos:
1464 for (unsigned i = 0; i < 2; i++) {
1465 LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1466 idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1467 LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1468 result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1469 }
1470 break;
1471 case nir_intrinsic_load_sample_mask_in:
1472 result[0] = bld->system_values.sample_mask_in;
1473 break;
1474 }
1475 }
1476
1477 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1478 LLVMValueRef *dst)
1479 {
1480 struct gallivm_state *gallivm = bld_base->base.gallivm;
1481 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1482 *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1483 }
1484
1485 static void bgnloop(struct lp_build_nir_context *bld_base)
1486 {
1487 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1488 lp_exec_bgnloop(&bld->exec_mask, true);
1489 }
1490
1491 static void endloop(struct lp_build_nir_context *bld_base)
1492 {
1493 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1494 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1495 }
1496
1497 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1498 {
1499 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1500 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1501 lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1502 }
1503
1504 static void else_stmt(struct lp_build_nir_context *bld_base)
1505 {
1506 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1507 lp_exec_mask_cond_invert(&bld->exec_mask);
1508 }
1509
1510 static void endif_stmt(struct lp_build_nir_context *bld_base)
1511 {
1512 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1513 lp_exec_mask_cond_pop(&bld->exec_mask);
1514 }
1515
1516 static void break_stmt(struct lp_build_nir_context *bld_base)
1517 {
1518 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1519
1520 lp_exec_break(&bld->exec_mask, NULL, false);
1521 }
1522
1523 static void continue_stmt(struct lp_build_nir_context *bld_base)
1524 {
1525 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1526 lp_exec_continue(&bld->exec_mask);
1527 }
1528
1529 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1530 {
1531 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1532 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1533 LLVMValueRef mask;
1534
1535 if (!cond) {
1536 if (bld->exec_mask.has_mask) {
1537 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1538 } else {
1539 mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1540 }
1541 } else {
1542 mask = LLVMBuildNot(builder, cond, "");
1543 if (bld->exec_mask.has_mask) {
1544 LLVMValueRef invmask;
1545 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1546 mask = LLVMBuildOr(builder, mask, invmask, "");
1547 }
1548 }
1549 lp_build_mask_update(bld->mask, mask);
1550 }
1551
1552 static void
1553 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1554 LLVMValueRef ptr,
1555 LLVMValueRef mask)
1556 {
1557 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1558 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1559
1560 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1561
1562 LLVMBuildStore(builder, current_vec, ptr);
1563 }
1564
1565 static void
1566 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1567 LLVMValueRef ptr,
1568 LLVMValueRef mask)
1569 {
1570 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1571 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1572
1573 current_vec = lp_build_select(&bld_base->uint_bld,
1574 mask,
1575 bld_base->uint_bld.zero,
1576 current_vec);
1577
1578 LLVMBuildStore(builder, current_vec, ptr);
1579 }
1580
1581 static LLVMValueRef
1582 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1583 LLVMValueRef current_mask_vec,
1584 LLVMValueRef total_emitted_vertices_vec)
1585 {
1586 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1587 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1588 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1589 total_emitted_vertices_vec,
1590 bld->max_output_vertices_vec);
1591
1592 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1593 }
1594
1595 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1596 {
1597 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1598 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1599
1600 if (stream_id >= bld->gs_vertex_streams)
1601 return;
1602 assert(bld->gs_iface->emit_vertex);
1603 LLVMValueRef total_emitted_vertices_vec =
1604 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1605 LLVMValueRef mask = mask_vec(bld_base);
1606 mask = clamp_mask_to_max_output_vertices(bld, mask,
1607 total_emitted_vertices_vec);
1608 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1609 bld->outputs,
1610 total_emitted_vertices_vec,
1611 mask,
1612 lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1613
1614 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1615 mask);
1616 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1617 mask);
1618 }
1619
1620 static void
1621 end_primitive_masked(struct lp_build_nir_context * bld_base,
1622 LLVMValueRef mask, uint32_t stream_id)
1623 {
1624 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1625 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1626
1627 if (stream_id >= bld->gs_vertex_streams)
1628 return;
1629 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1630 LLVMValueRef emitted_vertices_vec =
1631 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1632 LLVMValueRef emitted_prims_vec =
1633 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1634 LLVMValueRef total_emitted_vertices_vec =
1635 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1636
1637 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1638 PIPE_FUNC_NOTEQUAL,
1639 emitted_vertices_vec,
1640 uint_bld->zero);
1641 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1642 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1643 total_emitted_vertices_vec,
1644 emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
1645 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1646 mask);
1647 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1648 mask);
1649 }
1650
1651 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1652 {
1653 ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1654
1655 assert(bld->gs_iface->end_primitive);
1656
1657 LLVMValueRef mask = mask_vec(bld_base);
1658 end_primitive_masked(bld_base, mask, stream_id);
1659 }
1660
1661 static void
1662 emit_prologue(struct lp_build_nir_soa_context *bld)
1663 {
1664 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1665 if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1666 uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1667 unsigned index, chan;
1668 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1669 LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
1670 bld->inputs_array = lp_build_array_alloca(gallivm,
1671 vec_type, array_size,
1672 "input_array");
1673
1674 for (index = 0; index < num_inputs; ++index) {
1675 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1676 LLVMValueRef lindex =
1677 lp_build_const_int32(gallivm, index * 4 + chan);
1678 LLVMValueRef input_ptr =
1679 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1680 &lindex, 1, "");
1681 LLVMValueRef value = bld->inputs[index][chan];
1682 if (value)
1683 LLVMBuildStore(gallivm->builder, value, input_ptr);
1684 }
1685 }
1686 }
1687 }
1688
1689 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
1690 {
1691 struct gallivm_state * gallivm = bld_base->base.gallivm;
1692 LLVMBuilderRef builder = gallivm->builder;
1693
1694 LLVMValueRef exec_mask = mask_vec(bld_base);
1695 struct lp_build_loop_state loop_state;
1696
1697 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1698
1699 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1700 LLVMValueRef init_val = NULL;
1701 if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1702 /* for equal we unfortunately have to loop and find the first valid one. */
1703 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1704 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1705
1706 struct lp_build_if_state ifthen;
1707 lp_build_if(&ifthen, gallivm, if_cond);
1708 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1709 loop_state.counter, "");
1710 LLVMBuildStore(builder, value_ptr, res_store);
1711 lp_build_endif(&ifthen);
1712 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1713 NULL, LLVMIntUGE);
1714 init_val = LLVMBuildLoad(builder, res_store, "");
1715 } else {
1716 LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
1717 }
1718
1719 LLVMValueRef res;
1720 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1721 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1722 loop_state.counter, "");
1723 struct lp_build_if_state ifthen;
1724 LLVMValueRef if_cond;
1725 if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1726
1727 lp_build_if(&ifthen, gallivm, if_cond);
1728 res = LLVMBuildLoad(builder, res_store, "");
1729
1730 if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1731 LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
1732 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1733 res = LLVMBuildOr(builder, res, tmp, "");
1734 } else if (instr->intrinsic == nir_intrinsic_vote_any)
1735 res = LLVMBuildOr(builder, res, value_ptr, "");
1736 else
1737 res = LLVMBuildAnd(builder, res, value_ptr, "");
1738 LLVMBuildStore(builder, res, res_store);
1739 lp_build_endif(&ifthen);
1740 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1741 NULL, LLVMIntUGE);
1742 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1743 }
1744
1745 static void
1746 emit_interp_at(struct lp_build_nir_context *bld_base,
1747 unsigned num_components,
1748 nir_variable *var,
1749 bool centroid,
1750 bool sample,
1751 unsigned const_index,
1752 LLVMValueRef indir_index,
1753 LLVMValueRef offsets[2],
1754 LLVMValueRef dst[4])
1755 {
1756 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1757
1758 for (unsigned i = 0; i < num_components; i++) {
1759 dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
1760 const_index + var->data.driver_location, i + var->data.location_frac,
1761 centroid, sample, indir_index, offsets);
1762 }
1763 }
1764
1765 void lp_build_nir_soa(struct gallivm_state *gallivm,
1766 struct nir_shader *shader,
1767 const struct lp_build_tgsi_params *params,
1768 LLVMValueRef (*outputs)[4])
1769 {
1770 struct lp_build_nir_soa_context bld;
1771 struct lp_type type = params->type;
1772 struct lp_type res_type;
1773
1774 assert(type.length <= LP_MAX_VECTOR_LENGTH);
1775 memset(&res_type, 0, sizeof res_type);
1776 res_type.width = type.width;
1777 res_type.length = type.length;
1778 res_type.sign = 1;
1779
1780 /* Setup build context */
1781 memset(&bld, 0, sizeof bld);
1782 lp_build_context_init(&bld.bld_base.base, gallivm, type);
1783 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1784 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1785 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
1786 lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
1787 {
1788 struct lp_type dbl_type;
1789 dbl_type = type;
1790 dbl_type.width *= 2;
1791 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
1792 }
1793 {
1794 struct lp_type uint64_type;
1795 uint64_type = lp_uint_type(type);
1796 uint64_type.width *= 2;
1797 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
1798 }
1799 {
1800 struct lp_type int64_type;
1801 int64_type = lp_int_type(type);
1802 int64_type.width *= 2;
1803 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
1804 }
1805 {
1806 struct lp_type uint16_type;
1807 uint16_type = lp_uint_type(type);
1808 uint16_type.width /= 2;
1809 lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
1810 }
1811 {
1812 struct lp_type int16_type;
1813 int16_type = lp_int_type(type);
1814 int16_type.width /= 2;
1815 lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
1816 }
1817 {
1818 struct lp_type uint8_type;
1819 uint8_type = lp_uint_type(type);
1820 uint8_type.width /= 4;
1821 lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
1822 }
1823 {
1824 struct lp_type int8_type;
1825 int8_type = lp_int_type(type);
1826 int8_type.width /= 4;
1827 lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
1828 }
1829 bld.bld_base.load_var = emit_load_var;
1830 bld.bld_base.store_var = emit_store_var;
1831 bld.bld_base.load_reg = emit_load_reg;
1832 bld.bld_base.store_reg = emit_store_reg;
1833 bld.bld_base.emit_var_decl = emit_var_decl;
1834 bld.bld_base.load_ubo = emit_load_ubo;
1835 bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
1836 bld.bld_base.load_global = emit_load_global;
1837 bld.bld_base.store_global = emit_store_global;
1838 bld.bld_base.atomic_global = emit_atomic_global;
1839 bld.bld_base.tex = emit_tex;
1840 bld.bld_base.tex_size = emit_tex_size;
1841 bld.bld_base.bgnloop = bgnloop;
1842 bld.bld_base.endloop = endloop;
1843 bld.bld_base.if_cond = if_cond;
1844 bld.bld_base.else_stmt = else_stmt;
1845 bld.bld_base.endif_stmt = endif_stmt;
1846 bld.bld_base.break_stmt = break_stmt;
1847 bld.bld_base.continue_stmt = continue_stmt;
1848 bld.bld_base.sysval_intrin = emit_sysval_intrin;
1849 bld.bld_base.discard = discard;
1850 bld.bld_base.emit_vertex = emit_vertex;
1851 bld.bld_base.end_primitive = end_primitive;
1852 bld.bld_base.load_mem = emit_load_mem;
1853 bld.bld_base.store_mem = emit_store_mem;
1854 bld.bld_base.get_buffer_size = emit_get_buffer_size;
1855 bld.bld_base.atomic_mem = emit_atomic_mem;
1856 bld.bld_base.barrier = emit_barrier;
1857 bld.bld_base.image_op = emit_image_op;
1858 bld.bld_base.image_size = emit_image_size;
1859 bld.bld_base.vote = emit_vote;
1860 bld.bld_base.helper_invocation = emit_helper_invocation;
1861 bld.bld_base.interp_at = emit_interp_at;
1862
1863 bld.mask = params->mask;
1864 bld.inputs = params->inputs;
1865 bld.outputs = outputs;
1866 bld.consts_ptr = params->consts_ptr;
1867 bld.const_sizes_ptr = params->const_sizes_ptr;
1868 bld.ssbo_ptr = params->ssbo_ptr;
1869 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
1870 bld.sampler = params->sampler;
1871 // bld.bld_base.info = params->info;
1872
1873 bld.context_ptr = params->context_ptr;
1874 bld.thread_data_ptr = params->thread_data_ptr;
1875 bld.image = params->image;
1876 bld.shared_ptr = params->shared_ptr;
1877 bld.coro = params->coro;
1878 bld.kernel_args_ptr = params->kernel_args;
1879 bld.indirects = 0;
1880 if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
1881 bld.indirects |= nir_var_shader_in;
1882
1883 bld.gs_iface = params->gs_iface;
1884 bld.tcs_iface = params->tcs_iface;
1885 bld.tes_iface = params->tes_iface;
1886 bld.fs_iface = params->fs_iface;
1887 if (bld.gs_iface) {
1888 struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
1889
1890 bld.gs_vertex_streams = params->gs_vertex_streams;
1891 bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
1892 shader->info.gs.vertices_out);
1893 for (int i = 0; i < params->gs_vertex_streams; i++) {
1894 bld.emitted_prims_vec_ptr[i] =
1895 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
1896 bld.emitted_vertices_vec_ptr[i] =
1897 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
1898 bld.total_emitted_vertices_vec_ptr[i] =
1899 lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
1900 }
1901 }
1902 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
1903
1904 bld.system_values = *params->system_values;
1905
1906 bld.bld_base.shader = shader;
1907
1908 emit_prologue(&bld);
1909 lp_build_nir_llvm(&bld.bld_base, shader);
1910
1911 if (bld.gs_iface) {
1912 LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
1913 LLVMValueRef total_emitted_vertices_vec;
1914 LLVMValueRef emitted_prims_vec;
1915
1916 for (int i = 0; i < params->gs_vertex_streams; i++) {
1917 end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
1918
1919 total_emitted_vertices_vec =
1920 LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
1921
1922 emitted_prims_vec =
1923 LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
1924 bld.gs_iface->gs_epilogue(bld.gs_iface,
1925 total_emitted_vertices_vec,
1926 emitted_prims_vec, i);
1927 }
1928 }
1929 lp_exec_mask_fini(&bld.exec_mask);
1930 }