1 /**************************************************************************
3 * Copyright 2019 Red Hat.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 **************************************************************************/
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "util/u_math.h"
39 * combine the execution mask if there is one with the current mask.
42 mask_vec(struct lp_build_nir_context
*bld_base
)
44 struct lp_build_nir_soa_context
* bld
= (struct lp_build_nir_soa_context
*)bld_base
;
45 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
46 struct lp_exec_mask
*exec_mask
= &bld
->exec_mask
;
47 LLVMValueRef bld_mask
= bld
->mask
? lp_build_mask_value(bld
->mask
) : NULL
;
48 if (!exec_mask
->has_mask
) {
52 return exec_mask
->exec_mask
;
53 return LLVMBuildAnd(builder
, lp_build_mask_value(bld
->mask
),
54 exec_mask
->exec_mask
, "");
59 struct lp_build_nir_context
* bld_base
,
63 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
64 LLVMBuilderRef builder
= gallivm
->builder
;
67 LLVMValueRef shuffles
[2 * (LP_MAX_VECTOR_WIDTH
/32)];
68 int len
= bld_base
->base
.type
.length
* 2;
69 assert(len
<= (2 * (LP_MAX_VECTOR_WIDTH
/32)));
71 for (i
= 0; i
< bld_base
->base
.type
.length
* 2; i
+=2) {
72 shuffles
[i
] = lp_build_const_int32(gallivm
, i
/ 2);
73 shuffles
[i
+ 1] = lp_build_const_int32(gallivm
, i
/ 2 + bld_base
->base
.type
.length
);
75 res
= LLVMBuildShuffleVector(builder
, input
, input2
, LLVMConstVector(shuffles
, len
), "");
77 return LLVMBuildBitCast(builder
, res
, bld_base
->dbl_bld
.vec_type
, "");
81 emit_store_64bit_split(struct lp_build_nir_context
*bld_base
,
83 LLVMValueRef split_values
[2])
85 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
86 LLVMBuilderRef builder
= gallivm
->builder
;
88 LLVMValueRef shuffles
[LP_MAX_VECTOR_WIDTH
/32];
89 LLVMValueRef shuffles2
[LP_MAX_VECTOR_WIDTH
/32];
90 int len
= bld_base
->base
.type
.length
* 2;
92 value
= LLVMBuildBitCast(gallivm
->builder
, value
, LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), len
), "");
93 for (i
= 0; i
< bld_base
->base
.type
.length
; i
++) {
94 shuffles
[i
] = lp_build_const_int32(gallivm
, i
* 2);
95 shuffles2
[i
] = lp_build_const_int32(gallivm
, (i
* 2) + 1);
98 split_values
[0] = LLVMBuildShuffleVector(builder
, value
,
99 LLVMGetUndef(LLVMTypeOf(value
)),
100 LLVMConstVector(shuffles
,
101 bld_base
->base
.type
.length
),
103 split_values
[1] = LLVMBuildShuffleVector(builder
, value
,
104 LLVMGetUndef(LLVMTypeOf(value
)),
105 LLVMConstVector(shuffles2
,
106 bld_base
->base
.type
.length
),
111 emit_store_64bit_chan(struct lp_build_nir_context
*bld_base
,
112 LLVMValueRef chan_ptr
,
113 LLVMValueRef chan_ptr2
,
116 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
117 struct lp_build_context
*float_bld
= &bld_base
->base
;
118 LLVMValueRef split_vals
[2];
120 emit_store_64bit_split(bld_base
, value
, split_vals
);
122 lp_exec_mask_store(&bld
->exec_mask
, float_bld
, split_vals
[0], chan_ptr
);
123 lp_exec_mask_store(&bld
->exec_mask
, float_bld
, split_vals
[1], chan_ptr2
);
127 get_soa_array_offsets(struct lp_build_context
*uint_bld
,
128 LLVMValueRef indirect_index
,
131 bool need_perelement_offset
)
133 struct gallivm_state
*gallivm
= uint_bld
->gallivm
;
134 LLVMValueRef chan_vec
=
135 lp_build_const_int_vec(uint_bld
->gallivm
, uint_bld
->type
, chan_index
);
136 LLVMValueRef length_vec
=
137 lp_build_const_int_vec(gallivm
, uint_bld
->type
, uint_bld
->type
.length
);
138 LLVMValueRef index_vec
;
140 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
141 index_vec
= lp_build_mul(uint_bld
, indirect_index
, lp_build_const_int_vec(uint_bld
->gallivm
, uint_bld
->type
, num_components
));
142 index_vec
= lp_build_add(uint_bld
, index_vec
, chan_vec
);
143 index_vec
= lp_build_mul(uint_bld
, index_vec
, length_vec
);
145 if (need_perelement_offset
) {
146 LLVMValueRef pixel_offsets
;
148 /* build pixel offset vector: {0, 1, 2, 3, ...} */
149 pixel_offsets
= uint_bld
->undef
;
150 for (i
= 0; i
< uint_bld
->type
.length
; i
++) {
151 LLVMValueRef ii
= lp_build_const_int32(gallivm
, i
);
152 pixel_offsets
= LLVMBuildInsertElement(gallivm
->builder
, pixel_offsets
,
155 index_vec
= lp_build_add(uint_bld
, index_vec
, pixel_offsets
);
161 build_gather(struct lp_build_nir_context
*bld_base
,
162 struct lp_build_context
*bld
,
163 LLVMValueRef base_ptr
,
164 LLVMValueRef indexes
,
165 LLVMValueRef overflow_mask
,
166 LLVMValueRef indexes2
)
168 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
169 LLVMBuilderRef builder
= gallivm
->builder
;
170 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
175 res
= LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), bld_base
->base
.type
.length
* 2));
179 * overflow_mask is a vector telling us which channels
180 * in the vector overflowed. We use the overflow behavior for
181 * constant buffers which is defined as:
182 * Out of bounds access to constant buffer returns 0 in all
183 * components. Out of bounds behavior is always with respect
184 * to the size of the buffer bound at that slot.
189 * We avoid per-element control flow here (also due to llvm going crazy,
190 * though I suspect it's better anyway since overflow is likely rare).
191 * Note that since we still fetch from buffers even if num_elements was
192 * zero (in this case we'll fetch from index zero) the jit func callers
193 * MUST provide valid fake constant buffers of size 4x32 (the values do
194 * not matter), otherwise we'd still need (not per element though)
197 indexes
= lp_build_select(uint_bld
, overflow_mask
, uint_bld
->zero
, indexes
);
199 indexes2
= lp_build_select(uint_bld
, overflow_mask
, uint_bld
->zero
, indexes2
);
203 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
205 for (i
= 0; i
< bld
->type
.length
* (indexes2
? 2 : 1); i
++) {
208 LLVMValueRef scalar_ptr
, scalar
;
210 di
= lp_build_const_int32(gallivm
, i
);
212 si
= lp_build_const_int32(gallivm
, i
>> 1);
216 if (indexes2
&& (i
& 1)) {
217 index
= LLVMBuildExtractElement(builder
,
220 index
= LLVMBuildExtractElement(builder
,
223 scalar_ptr
= LLVMBuildGEP(builder
, base_ptr
,
224 &index
, 1, "gather_ptr");
225 scalar
= LLVMBuildLoad(builder
, scalar_ptr
, "");
227 res
= LLVMBuildInsertElement(builder
, res
, scalar
, di
, "");
232 res
= LLVMBuildBitCast(builder
, res
, bld_base
->dbl_bld
.vec_type
, "");
233 overflow_mask
= LLVMBuildSExt(builder
, overflow_mask
,
234 bld_base
->dbl_bld
.int_vec_type
, "");
235 res
= lp_build_select(&bld_base
->dbl_bld
, overflow_mask
,
236 bld_base
->dbl_bld
.zero
, res
);
238 res
= lp_build_select(bld
, overflow_mask
, bld
->zero
, res
);
245 * Scatter/store vector.
248 emit_mask_scatter(struct lp_build_nir_soa_context
*bld
,
249 LLVMValueRef base_ptr
,
250 LLVMValueRef indexes
,
252 struct lp_exec_mask
*mask
)
254 struct gallivm_state
*gallivm
= bld
->bld_base
.base
.gallivm
;
255 LLVMBuilderRef builder
= gallivm
->builder
;
257 LLVMValueRef pred
= mask
->has_mask
? mask
->exec_mask
: NULL
;
260 * Loop over elements of index_vec, store scalar value.
262 for (i
= 0; i
< bld
->bld_base
.base
.type
.length
; i
++) {
263 LLVMValueRef ii
= lp_build_const_int32(gallivm
, i
);
264 LLVMValueRef index
= LLVMBuildExtractElement(builder
, indexes
, ii
, "");
265 LLVMValueRef scalar_ptr
= LLVMBuildGEP(builder
, base_ptr
, &index
, 1, "scatter_ptr");
266 LLVMValueRef val
= LLVMBuildExtractElement(builder
, values
, ii
, "scatter_val");
267 LLVMValueRef scalar_pred
= pred
?
268 LLVMBuildExtractElement(builder
, pred
, ii
, "scatter_pred") : NULL
;
271 lp_build_printf(gallivm
, "scatter %d: val %f at %d %p\n",
272 ii
, val
, index
, scalar_ptr
);
275 LLVMValueRef real_val
, dst_val
;
276 dst_val
= LLVMBuildLoad(builder
, scalar_ptr
, "");
277 real_val
= lp_build_select(&bld
->uint_elem_bld
, scalar_pred
, val
, dst_val
);
278 LLVMBuildStore(builder
, real_val
, scalar_ptr
);
281 LLVMBuildStore(builder
, val
, scalar_ptr
);
286 static void emit_load_var(struct lp_build_nir_context
*bld_base
,
287 nir_variable_mode deref_mode
,
288 unsigned num_components
,
291 unsigned vertex_index
,
292 LLVMValueRef indir_vertex_index
,
293 unsigned const_index
,
294 LLVMValueRef indir_index
,
295 LLVMValueRef result
[NIR_MAX_VEC_COMPONENTS
])
297 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
298 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
299 int dmul
= bit_size
== 64 ? 2 : 1;
300 switch (deref_mode
) {
301 case nir_var_shader_in
:
302 for (unsigned i
= 0; i
< num_components
; i
++) {
303 int idx
= (i
* dmul
) + var
->data
.location_frac
;
305 LLVMValueRef vertex_index_val
= lp_build_const_int32(gallivm
, vertex_index
);
306 LLVMValueRef attrib_index_val
= lp_build_const_int32(gallivm
, const_index
+ var
->data
.driver_location
);
307 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
);
308 LLVMValueRef result2
;
309 result
[i
] = bld
->gs_iface
->fetch_input(bld
->gs_iface
, &bld_base
->base
,
310 false, vertex_index_val
, 0, attrib_index_val
, swizzle_index_val
);
311 if (bit_size
== 64) {
312 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
+ 1);
313 result2
= bld
->gs_iface
->fetch_input(bld
->gs_iface
, &bld_base
->base
,
314 false, vertex_index_val
, 0, attrib_index_val
, swizzle_index_val
);
315 result
[i
] = emit_fetch_64bit(bld_base
, result
[i
], result2
);
317 } else if (bld
->tes_iface
) {
318 LLVMValueRef vertex_index_val
= lp_build_const_int32(gallivm
, vertex_index
);
319 LLVMValueRef attrib_index_val
;
320 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
);
321 LLVMValueRef result2
;
324 attrib_index_val
= lp_build_add(&bld_base
->uint_bld
, indir_index
, lp_build_const_int_vec(gallivm
, bld_base
->uint_bld
.type
, var
->data
.driver_location
));
326 attrib_index_val
= lp_build_const_int32(gallivm
, const_index
+ var
->data
.driver_location
);
327 if (var
->data
.patch
) {
328 result
[i
] = bld
->tes_iface
->fetch_patch_input(bld
->tes_iface
, &bld_base
->base
,
329 indir_index
? true : false, attrib_index_val
, swizzle_index_val
);
330 if (bit_size
== 64) {
331 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
+ 1);
332 result2
= bld
->tes_iface
->fetch_patch_input(bld
->tes_iface
, &bld_base
->base
,
333 indir_index
? true : false, attrib_index_val
, swizzle_index_val
);
334 result
[i
] = emit_fetch_64bit(bld_base
, result
[i
], result2
);
338 result
[i
] = bld
->tes_iface
->fetch_vertex_input(bld
->tes_iface
, &bld_base
->base
,
339 indir_vertex_index
? true : false,
340 indir_vertex_index
? indir_vertex_index
: vertex_index_val
,
341 indir_index
? true : false, attrib_index_val
, swizzle_index_val
);
342 if (bit_size
== 64) {
343 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
+ 1);
344 result2
= bld
->tes_iface
->fetch_vertex_input(bld
->tes_iface
, &bld_base
->base
,
345 indir_vertex_index
? true : false,
346 indir_vertex_index
? indir_vertex_index
: vertex_index_val
,
347 indir_index
? true : false, attrib_index_val
, swizzle_index_val
);
348 result
[i
] = emit_fetch_64bit(bld_base
, result
[i
], result2
);
351 } else if (bld
->tcs_iface
) {
352 LLVMValueRef vertex_index_val
= lp_build_const_int32(gallivm
, vertex_index
);
353 LLVMValueRef attrib_index_val
;
354 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
);
357 attrib_index_val
= lp_build_add(&bld_base
->uint_bld
, indir_index
, lp_build_const_int_vec(gallivm
, bld_base
->uint_bld
.type
, var
->data
.driver_location
));
359 attrib_index_val
= lp_build_const_int32(gallivm
, const_index
+ var
->data
.driver_location
);
360 result
[i
] = bld
->tcs_iface
->emit_fetch_input(bld
->tcs_iface
, &bld_base
->base
,
361 indir_vertex_index
? true : false, indir_vertex_index
? indir_vertex_index
: vertex_index_val
,
362 indir_index
? true : false, attrib_index_val
, swizzle_index_val
);
363 if (bit_size
== 64) {
364 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
+ 1);
365 LLVMValueRef result2
= bld
->tcs_iface
->emit_fetch_input(bld
->tcs_iface
, &bld_base
->base
,
366 indir_vertex_index
? true : false, indir_vertex_index
? indir_vertex_index
: vertex_index_val
,
367 indir_index
? true : false, attrib_index_val
, swizzle_index_val
);
368 result
[i
] = emit_fetch_64bit(bld_base
, result
[i
], result2
);
372 LLVMValueRef attrib_index_val
= lp_build_add(&bld_base
->uint_bld
, indir_index
, lp_build_const_int_vec(gallivm
, bld_base
->uint_bld
.type
, var
->data
.driver_location
));
373 LLVMValueRef index_vec
= get_soa_array_offsets(&bld_base
->uint_bld
,
374 attrib_index_val
, 4, idx
,
376 LLVMValueRef index_vec2
= NULL
;
377 LLVMTypeRef fptr_type
;
378 LLVMValueRef inputs_array
;
379 fptr_type
= LLVMPointerType(LLVMFloatTypeInContext(gallivm
->context
), 0);
380 inputs_array
= LLVMBuildBitCast(gallivm
->builder
, bld
->inputs_array
, fptr_type
, "");
383 index_vec2
= get_soa_array_offsets(&bld_base
->uint_bld
,
384 indir_index
, 4, idx
+ 1, TRUE
);
386 /* Gather values from the input register array */
387 result
[i
] = build_gather(bld_base
, &bld_base
->base
, inputs_array
, index_vec
, NULL
, index_vec2
);
389 if (bld
->indirects
& nir_var_shader_in
) {
390 LLVMValueRef lindex
= lp_build_const_int32(gallivm
,
391 var
->data
.driver_location
* 4 + idx
);
392 LLVMValueRef input_ptr
= lp_build_pointer_get(gallivm
->builder
,
393 bld
->inputs_array
, lindex
);
394 if (bit_size
== 64) {
395 LLVMValueRef lindex2
= lp_build_const_int32(gallivm
,
396 var
->data
.driver_location
* 4 + (idx
+ 1));
397 LLVMValueRef input_ptr2
= lp_build_pointer_get(gallivm
->builder
,
398 bld
->inputs_array
, lindex2
);
399 result
[i
] = emit_fetch_64bit(bld_base
, input_ptr
, input_ptr2
);
401 result
[i
] = input_ptr
;
404 if (bit_size
== 64) {
406 tmp
[0] = bld
->inputs
[var
->data
.driver_location
+ const_index
][idx
];
407 tmp
[1] = bld
->inputs
[var
->data
.driver_location
+ const_index
][idx
+ 1];
408 result
[i
] = emit_fetch_64bit(bld_base
, tmp
[0], tmp
[1]);
410 result
[i
] = bld
->inputs
[var
->data
.driver_location
+ const_index
][idx
];
417 case nir_var_shader_out
:
418 for (unsigned i
= 0; i
< num_components
; i
++) {
419 int idx
= (i
* dmul
) + var
->data
.location_frac
;
420 if (bld
->tcs_iface
) {
421 LLVMValueRef vertex_index_val
= lp_build_const_int32(gallivm
, vertex_index
);
422 LLVMValueRef attrib_index_val
;
423 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
);
426 attrib_index_val
= lp_build_add(&bld_base
->uint_bld
, indir_index
, lp_build_const_int_vec(gallivm
, bld_base
->uint_bld
.type
, var
->data
.driver_location
));
428 attrib_index_val
= lp_build_const_int32(gallivm
, const_index
+ var
->data
.driver_location
);
430 result
[i
] = bld
->tcs_iface
->emit_fetch_output(bld
->tcs_iface
, &bld_base
->base
,
431 indir_vertex_index
? true : false, indir_vertex_index
? indir_vertex_index
: vertex_index_val
,
432 indir_index
? true : false, attrib_index_val
, swizzle_index_val
, 0);
433 if (bit_size
== 64) {
434 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, idx
+ 1);
435 LLVMValueRef result2
= bld
->tcs_iface
->emit_fetch_output(bld
->tcs_iface
, &bld_base
->base
,
436 indir_vertex_index
? true : false, indir_vertex_index
? indir_vertex_index
: vertex_index_val
,
437 indir_index
? true : false, attrib_index_val
, swizzle_index_val
, 0);
438 result
[i
] = emit_fetch_64bit(bld_base
, result
[i
], result2
);
448 static void emit_store_chan(struct lp_build_nir_context
*bld_base
,
449 nir_variable_mode deref_mode
,
451 unsigned location
, unsigned comp
,
455 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
456 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
457 struct lp_build_context
*float_bld
= &bld_base
->base
;
459 if (bit_size
== 64) {
466 emit_store_64bit_chan(bld_base
, bld
->outputs
[location
][chan
],
467 bld
->outputs
[location
][chan
+ 1], dst
);
469 dst
= LLVMBuildBitCast(builder
, dst
, float_bld
->vec_type
, "");
470 lp_exec_mask_store(&bld
->exec_mask
, float_bld
, dst
,
471 bld
->outputs
[location
][chan
+ comp
]);
475 static void emit_store_tcs_chan(struct lp_build_nir_context
*bld_base
,
478 unsigned const_index
,
479 LLVMValueRef indir_vertex_index
,
480 LLVMValueRef indir_index
,
483 LLVMValueRef chan_val
)
485 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
486 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
487 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
488 unsigned swizzle
= chan
;
489 if (bit_size
== 64) {
498 LLVMValueRef attrib_index_val
;
499 LLVMValueRef swizzle_index_val
= lp_build_const_int32(gallivm
, swizzle
);
502 attrib_index_val
= lp_build_add(&bld_base
->uint_bld
, indir_index
, lp_build_const_int_vec(gallivm
, bld_base
->uint_bld
.type
, location
));
504 attrib_index_val
= lp_build_const_int32(gallivm
, const_index
+ location
);
505 if (bit_size
== 64) {
506 LLVMValueRef split_vals
[2];
507 LLVMValueRef swizzle_index_val2
= lp_build_const_int32(gallivm
, swizzle
+ 1);
508 emit_store_64bit_split(bld_base
, chan_val
, split_vals
);
509 bld
->tcs_iface
->emit_store_output(bld
->tcs_iface
, &bld_base
->base
, 0,
510 indir_vertex_index
? true : false,
512 indir_index
? true : false,
513 attrib_index_val
, swizzle_index_val
,
514 split_vals
[0], mask_vec(bld_base
));
515 bld
->tcs_iface
->emit_store_output(bld
->tcs_iface
, &bld_base
->base
, 0,
516 indir_vertex_index
? true : false,
518 indir_index
? true : false,
519 attrib_index_val
, swizzle_index_val2
,
520 split_vals
[1], mask_vec(bld_base
));
522 chan_val
= LLVMBuildBitCast(builder
, chan_val
, bld_base
->base
.vec_type
, "");
523 bld
->tcs_iface
->emit_store_output(bld
->tcs_iface
, &bld_base
->base
, 0,
524 indir_vertex_index
? true : false,
526 indir_index
? true : false,
527 attrib_index_val
, swizzle_index_val
,
528 chan_val
, mask_vec(bld_base
));
532 static void emit_store_var(struct lp_build_nir_context
*bld_base
,
533 nir_variable_mode deref_mode
,
534 unsigned num_components
,
538 LLVMValueRef indir_vertex_index
,
539 unsigned const_index
,
540 LLVMValueRef indir_index
,
543 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
544 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
545 switch (deref_mode
) {
546 case nir_var_shader_out
: {
547 unsigned location
= var
->data
.driver_location
;
548 unsigned comp
= var
->data
.location_frac
;
549 if (bld_base
->shader
->info
.stage
== MESA_SHADER_FRAGMENT
) {
550 if (var
->data
.location
== FRAG_RESULT_STENCIL
)
552 else if (var
->data
.location
== FRAG_RESULT_DEPTH
)
556 for (unsigned chan
= 0; chan
< num_components
; chan
++) {
557 if (writemask
& (1u << chan
)) {
558 LLVMValueRef chan_val
= (num_components
== 1) ? dst
: LLVMBuildExtractValue(builder
, dst
, chan
, "");
559 if (bld
->tcs_iface
) {
560 emit_store_tcs_chan(bld_base
, bit_size
, location
, const_index
, indir_vertex_index
, indir_index
, comp
, chan
, chan_val
);
562 emit_store_chan(bld_base
, deref_mode
, bit_size
, location
+ const_index
, comp
, chan
, chan_val
);
572 static LLVMValueRef
emit_load_reg(struct lp_build_nir_context
*bld_base
,
573 struct lp_build_context
*reg_bld
,
574 const nir_reg_src
*reg
,
575 LLVMValueRef indir_src
,
576 LLVMValueRef reg_storage
)
578 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
579 LLVMBuilderRef builder
= gallivm
->builder
;
580 int nc
= reg
->reg
->num_components
;
581 LLVMValueRef vals
[NIR_MAX_VEC_COMPONENTS
] = { NULL
};
582 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
583 if (reg
->reg
->num_array_elems
) {
584 LLVMValueRef indirect_val
= lp_build_const_int_vec(gallivm
, uint_bld
->type
, reg
->base_offset
);
586 LLVMValueRef max_index
= lp_build_const_int_vec(gallivm
, uint_bld
->type
, reg
->reg
->num_array_elems
- 1);
587 indirect_val
= LLVMBuildAdd(builder
, indirect_val
, indir_src
, "");
588 indirect_val
= lp_build_min(uint_bld
, indirect_val
, max_index
);
590 reg_storage
= LLVMBuildBitCast(builder
, reg_storage
, LLVMPointerType(reg_bld
->elem_type
, 0), "");
591 for (unsigned i
= 0; i
< nc
; i
++) {
592 LLVMValueRef indirect_offset
= get_soa_array_offsets(uint_bld
, indirect_val
, nc
, i
, TRUE
);
593 vals
[i
] = build_gather(bld_base
, reg_bld
, reg_storage
, indirect_offset
, NULL
, NULL
);
596 for (unsigned i
= 0; i
< nc
; i
++) {
597 LLVMValueRef this_storage
= nc
== 1 ? reg_storage
: lp_build_array_get_ptr(gallivm
, reg_storage
,
598 lp_build_const_int32(gallivm
, i
));
599 vals
[i
] = LLVMBuildLoad(builder
, this_storage
, "");
602 return nc
== 1 ? vals
[0] : lp_nir_array_build_gather_values(builder
, vals
, nc
);
605 static void emit_store_reg(struct lp_build_nir_context
*bld_base
,
606 struct lp_build_context
*reg_bld
,
607 const nir_reg_dest
*reg
,
609 LLVMValueRef indir_src
,
610 LLVMValueRef reg_storage
,
611 LLVMValueRef dst
[NIR_MAX_VEC_COMPONENTS
])
613 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
614 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
615 LLVMBuilderRef builder
= gallivm
->builder
;
616 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
617 int nc
= reg
->reg
->num_components
;
618 if (reg
->reg
->num_array_elems
> 0) {
619 LLVMValueRef indirect_val
= lp_build_const_int_vec(gallivm
, uint_bld
->type
, reg
->base_offset
);
621 LLVMValueRef max_index
= lp_build_const_int_vec(gallivm
, uint_bld
->type
, reg
->reg
->num_array_elems
- 1);
622 indirect_val
= LLVMBuildAdd(builder
, indirect_val
, indir_src
, "");
623 indirect_val
= lp_build_min(uint_bld
, indirect_val
, max_index
);
625 reg_storage
= LLVMBuildBitCast(builder
, reg_storage
, LLVMPointerType(reg_bld
->elem_type
, 0), "");
626 for (unsigned i
= 0; i
< nc
; i
++) {
627 if (!(writemask
& (1 << i
)))
629 LLVMValueRef indirect_offset
= get_soa_array_offsets(uint_bld
, indirect_val
, nc
, i
, TRUE
);
630 dst
[i
] = LLVMBuildBitCast(builder
, dst
[i
], reg_bld
->vec_type
, "");
631 emit_mask_scatter(bld
, reg_storage
, indirect_offset
, dst
[i
], &bld
->exec_mask
);
636 for (unsigned i
= 0; i
< nc
; i
++) {
637 LLVMValueRef this_storage
= nc
== 1 ? reg_storage
: lp_build_array_get_ptr(gallivm
, reg_storage
,
638 lp_build_const_int32(gallivm
, i
));
639 dst
[i
] = LLVMBuildBitCast(builder
, dst
[i
], reg_bld
->vec_type
, "");
640 lp_exec_mask_store(&bld
->exec_mask
, reg_bld
, dst
[i
], this_storage
);
644 static void emit_load_kernel_arg(struct lp_build_nir_context
*bld_base
,
647 unsigned offset_bit_size
,
648 bool offset_is_uniform
,
650 LLVMValueRef result
[NIR_MAX_VEC_COMPONENTS
])
652 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
653 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
654 LLVMBuilderRef builder
= gallivm
->builder
;
655 struct lp_build_context
*bld_broad
= get_int_bld(bld_base
, true, bit_size
);
656 LLVMValueRef kernel_args_ptr
= bld
->kernel_args_ptr
;
657 unsigned size_shift
= 0;
658 struct lp_build_context
*bld_offset
= get_int_bld(bld_base
, true, offset_bit_size
);
661 else if (bit_size
== 32)
663 else if (bit_size
== 64)
666 offset
= lp_build_shr(bld_offset
, offset
, lp_build_const_int_vec(gallivm
, bld_offset
->type
, size_shift
));
668 LLVMTypeRef ptr_type
= LLVMPointerType(bld_broad
->elem_type
, 0);
669 kernel_args_ptr
= LLVMBuildBitCast(builder
, kernel_args_ptr
, ptr_type
, "");
671 if (offset_is_uniform
) {
672 offset
= LLVMBuildExtractElement(builder
, offset
, lp_build_const_int32(gallivm
, 0), "");
674 for (unsigned c
= 0; c
< nc
; c
++) {
675 LLVMValueRef this_offset
= LLVMBuildAdd(builder
, offset
, offset_bit_size
== 64 ? lp_build_const_int64(gallivm
, c
) : lp_build_const_int32(gallivm
, c
), "");
677 LLVMValueRef scalar
= lp_build_pointer_get(builder
, kernel_args_ptr
, this_offset
);
678 result
[c
] = lp_build_broadcast_scalar(bld_broad
, scalar
);
683 static LLVMValueRef
global_addr_to_ptr(struct gallivm_state
*gallivm
, LLVMValueRef addr_ptr
, unsigned bit_size
)
685 LLVMBuilderRef builder
= gallivm
->builder
;
688 addr_ptr
= LLVMBuildIntToPtr(builder
, addr_ptr
, LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0), "");
691 addr_ptr
= LLVMBuildIntToPtr(builder
, addr_ptr
, LLVMPointerType(LLVMInt16TypeInContext(gallivm
->context
), 0), "");
695 addr_ptr
= LLVMBuildIntToPtr(builder
, addr_ptr
, LLVMPointerType(LLVMInt32TypeInContext(gallivm
->context
), 0), "");
698 addr_ptr
= LLVMBuildIntToPtr(builder
, addr_ptr
, LLVMPointerType(LLVMInt64TypeInContext(gallivm
->context
), 0), "");
704 static void emit_load_global(struct lp_build_nir_context
*bld_base
,
707 unsigned addr_bit_size
,
709 LLVMValueRef outval
[NIR_MAX_VEC_COMPONENTS
])
711 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
712 LLVMBuilderRef builder
= gallivm
->builder
;
713 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
714 struct lp_build_context
*res_bld
;
716 res_bld
= get_int_bld(bld_base
, true, bit_size
);
718 for (unsigned c
= 0; c
< nc
; c
++) {
719 LLVMValueRef result
= lp_build_alloca(gallivm
, res_bld
->vec_type
, "");
721 struct lp_build_loop_state loop_state
;
722 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
724 LLVMValueRef addr_ptr
= LLVMBuildExtractElement(gallivm
->builder
, addr
,
725 loop_state
.counter
, "");
726 addr_ptr
= global_addr_to_ptr(gallivm
, addr_ptr
, bit_size
);
728 LLVMValueRef value_ptr
= lp_build_pointer_get(builder
, addr_ptr
, lp_build_const_int32(gallivm
, c
));
730 LLVMValueRef temp_res
;
731 temp_res
= LLVMBuildLoad(builder
, result
, "");
732 temp_res
= LLVMBuildInsertElement(builder
, temp_res
, value_ptr
, loop_state
.counter
, "");
733 LLVMBuildStore(builder
, temp_res
, result
);
734 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, uint_bld
->type
.length
),
736 outval
[c
] = LLVMBuildLoad(builder
, result
, "");
740 static void emit_store_global(struct lp_build_nir_context
*bld_base
,
742 unsigned nc
, unsigned bit_size
,
743 unsigned addr_bit_size
,
747 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
748 LLVMBuilderRef builder
= gallivm
->builder
;
749 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
751 for (unsigned c
= 0; c
< nc
; c
++) {
752 if (!(writemask
& (1u << c
)))
754 LLVMValueRef val
= (nc
== 1) ? dst
: LLVMBuildExtractValue(builder
, dst
, c
, "");
756 LLVMValueRef exec_mask
= mask_vec(bld_base
);
757 struct lp_build_loop_state loop_state
;
758 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
759 LLVMValueRef value_ptr
= LLVMBuildExtractElement(gallivm
->builder
, val
,
760 loop_state
.counter
, "");
762 LLVMValueRef addr_ptr
= LLVMBuildExtractElement(gallivm
->builder
, addr
,
763 loop_state
.counter
, "");
764 addr_ptr
= global_addr_to_ptr(gallivm
, addr_ptr
, bit_size
);
767 value_ptr
= LLVMBuildBitCast(builder
, value_ptr
, LLVMInt32TypeInContext(gallivm
->context
), "");
770 value_ptr
= LLVMBuildBitCast(builder
, value_ptr
, LLVMInt64TypeInContext(gallivm
->context
), "");
775 struct lp_build_if_state ifthen
;
777 LLVMValueRef cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
, exec_mask
, uint_bld
->zero
, "");
778 cond
= LLVMBuildExtractElement(gallivm
->builder
, cond
, loop_state
.counter
, "");
779 lp_build_if(&ifthen
, gallivm
, cond
);
780 lp_build_pointer_set(builder
, addr_ptr
, lp_build_const_int32(gallivm
, c
), value_ptr
);
781 lp_build_endif(&ifthen
);
782 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, uint_bld
->type
.length
),
787 static void emit_atomic_global(struct lp_build_nir_context
*bld_base
,
788 nir_intrinsic_op nir_op
,
789 unsigned addr_bit_size
,
791 LLVMValueRef val
, LLVMValueRef val2
,
792 LLVMValueRef
*result
)
794 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
795 LLVMBuilderRef builder
= gallivm
->builder
;
796 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
798 LLVMValueRef atom_res
= lp_build_alloca(gallivm
,
799 uint_bld
->vec_type
, "");
800 LLVMValueRef exec_mask
= mask_vec(bld_base
);
801 struct lp_build_loop_state loop_state
;
802 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
804 LLVMValueRef value_ptr
= LLVMBuildExtractElement(gallivm
->builder
, val
,
805 loop_state
.counter
, "");
807 LLVMValueRef addr_ptr
= LLVMBuildExtractElement(gallivm
->builder
, addr
,
808 loop_state
.counter
, "");
809 addr_ptr
= global_addr_to_ptr(gallivm
, addr_ptr
, 32);
810 struct lp_build_if_state ifthen
;
811 LLVMValueRef cond
, temp_res
;
813 cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
, exec_mask
, uint_bld
->zero
, "");
814 cond
= LLVMBuildExtractElement(gallivm
->builder
, cond
, loop_state
.counter
, "");
815 lp_build_if(&ifthen
, gallivm
, cond
);
817 if (nir_op
== nir_intrinsic_global_atomic_comp_swap
) {
818 LLVMValueRef cas_src_ptr
= LLVMBuildExtractElement(gallivm
->builder
, val2
,
819 loop_state
.counter
, "");
820 cas_src_ptr
= LLVMBuildBitCast(gallivm
->builder
, cas_src_ptr
, uint_bld
->elem_type
, "");
821 scalar
= LLVMBuildAtomicCmpXchg(builder
, addr_ptr
, value_ptr
,
823 LLVMAtomicOrderingSequentiallyConsistent
,
824 LLVMAtomicOrderingSequentiallyConsistent
,
826 scalar
= LLVMBuildExtractValue(gallivm
->builder
, scalar
, 0, "");
828 LLVMAtomicRMWBinOp op
;
830 case nir_intrinsic_global_atomic_add
:
831 op
= LLVMAtomicRMWBinOpAdd
;
833 case nir_intrinsic_global_atomic_exchange
:
834 op
= LLVMAtomicRMWBinOpXchg
;
836 case nir_intrinsic_global_atomic_and
:
837 op
= LLVMAtomicRMWBinOpAnd
;
839 case nir_intrinsic_global_atomic_or
:
840 op
= LLVMAtomicRMWBinOpOr
;
842 case nir_intrinsic_global_atomic_xor
:
843 op
= LLVMAtomicRMWBinOpXor
;
845 case nir_intrinsic_global_atomic_umin
:
846 op
= LLVMAtomicRMWBinOpUMin
;
848 case nir_intrinsic_global_atomic_umax
:
849 op
= LLVMAtomicRMWBinOpUMax
;
851 case nir_intrinsic_global_atomic_imin
:
852 op
= LLVMAtomicRMWBinOpMin
;
854 case nir_intrinsic_global_atomic_imax
:
855 op
= LLVMAtomicRMWBinOpMax
;
858 unreachable("unknown atomic op");
861 scalar
= LLVMBuildAtomicRMW(builder
, op
,
863 LLVMAtomicOrderingSequentiallyConsistent
,
866 temp_res
= LLVMBuildLoad(builder
, atom_res
, "");
867 temp_res
= LLVMBuildInsertElement(builder
, temp_res
, scalar
, loop_state
.counter
, "");
868 LLVMBuildStore(builder
, temp_res
, atom_res
);
869 lp_build_else(&ifthen
);
870 temp_res
= LLVMBuildLoad(builder
, atom_res
, "");
871 temp_res
= LLVMBuildInsertElement(builder
, temp_res
, lp_build_const_int32(gallivm
, 0), loop_state
.counter
, "");
872 LLVMBuildStore(builder
, temp_res
, atom_res
);
873 lp_build_endif(&ifthen
);
874 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, uint_bld
->type
.length
),
876 *result
= LLVMBuildLoad(builder
, atom_res
, "");
879 static void emit_load_ubo(struct lp_build_nir_context
*bld_base
,
882 bool offset_is_uniform
,
885 LLVMValueRef result
[NIR_MAX_VEC_COMPONENTS
])
887 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
888 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
889 LLVMBuilderRef builder
= gallivm
->builder
;
890 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
891 struct lp_build_context
*bld_broad
= bit_size
== 64 ? &bld_base
->dbl_bld
: &bld_base
->base
;
892 LLVMValueRef consts_ptr
= lp_build_array_get(gallivm
, bld
->consts_ptr
, index
);
893 unsigned size_shift
= 0;
896 else if (bit_size
== 64)
899 offset
= lp_build_shr(uint_bld
, offset
, lp_build_const_int_vec(gallivm
, uint_bld
->type
, size_shift
));
900 if (bit_size
== 64) {
901 LLVMTypeRef dptr_type
= LLVMPointerType(bld_base
->dbl_bld
.elem_type
, 0);
902 consts_ptr
= LLVMBuildBitCast(builder
, consts_ptr
, dptr_type
, "");
905 if (offset_is_uniform
) {
906 offset
= LLVMBuildExtractElement(builder
, offset
, lp_build_const_int32(gallivm
, 0), "");
908 for (unsigned c
= 0; c
< nc
; c
++) {
909 LLVMValueRef this_offset
= LLVMBuildAdd(builder
, offset
, lp_build_const_int32(gallivm
, c
), "");
911 LLVMValueRef scalar
= lp_build_pointer_get(builder
, consts_ptr
, this_offset
);
912 result
[c
] = lp_build_broadcast_scalar(bld_broad
, scalar
);
915 LLVMValueRef overflow_mask
;
916 LLVMValueRef num_consts
= lp_build_array_get(gallivm
, bld
->const_sizes_ptr
, index
);
918 num_consts
= LLVMBuildShl(gallivm
->builder
, num_consts
, lp_build_const_int32(gallivm
, 4), "");
919 num_consts
= lp_build_broadcast_scalar(uint_bld
, num_consts
);
920 for (unsigned c
= 0; c
< nc
; c
++) {
921 LLVMValueRef this_offset
= lp_build_add(uint_bld
, offset
, lp_build_const_int_vec(gallivm
, uint_bld
->type
, c
));
922 overflow_mask
= lp_build_compare(gallivm
, uint_bld
->type
, PIPE_FUNC_GEQUAL
,
923 this_offset
, num_consts
);
925 result
[c
] = build_gather(bld_base
, bld_broad
, consts_ptr
, this_offset
, overflow_mask
, NULL
);
931 static void emit_load_mem(struct lp_build_nir_context
*bld_base
,
936 LLVMValueRef outval
[NIR_MAX_VEC_COMPONENTS
])
938 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
939 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
940 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
941 LLVMValueRef ssbo_ptr
= NULL
;
942 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
943 struct lp_build_context
*uint64_bld
= &bld_base
->uint64_bld
;
944 LLVMValueRef ssbo_limit
= NULL
;
947 LLVMValueRef ssbo_size_ptr
= lp_build_array_get(gallivm
, bld
->ssbo_sizes_ptr
, LLVMBuildExtractElement(builder
, index
, lp_build_const_int32(gallivm
, 0), ""));
948 ssbo_limit
= LLVMBuildAShr(gallivm
->builder
, ssbo_size_ptr
, lp_build_const_int32(gallivm
, bit_size
== 64 ? 3 : 2), "");
949 ssbo_limit
= lp_build_broadcast_scalar(uint_bld
, ssbo_limit
);
951 ssbo_ptr
= lp_build_array_get(gallivm
, bld
->ssbo_ptr
, LLVMBuildExtractElement(builder
, index
, lp_build_const_int32(gallivm
, 0), ""));
953 ssbo_ptr
= bld
->shared_ptr
;
955 offset
= LLVMBuildAShr(gallivm
->builder
, offset
, lp_build_const_int_vec(gallivm
, uint_bld
->type
, bit_size
== 64 ? 3 : 2), "");
956 for (unsigned c
= 0; c
< nc
; c
++) {
957 LLVMValueRef loop_index
= lp_build_add(uint_bld
, offset
, lp_build_const_int_vec(gallivm
, uint_bld
->type
, c
));
958 LLVMValueRef exec_mask
= mask_vec(bld_base
);
961 LLVMValueRef ssbo_oob_cmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_LESS
, loop_index
, ssbo_limit
);
962 exec_mask
= LLVMBuildAnd(builder
, exec_mask
, ssbo_oob_cmp
, "");
965 LLVMValueRef result
= lp_build_alloca(gallivm
, bit_size
== 64 ? uint64_bld
->vec_type
: uint_bld
->vec_type
, "");
966 struct lp_build_loop_state loop_state
;
967 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
969 struct lp_build_if_state ifthen
;
970 LLVMValueRef cond
, temp_res
;
972 loop_index
= LLVMBuildExtractElement(gallivm
->builder
, loop_index
,
973 loop_state
.counter
, "");
975 cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
, exec_mask
, uint_bld
->zero
, "");
976 cond
= LLVMBuildExtractElement(gallivm
->builder
, cond
, loop_state
.counter
, "");
978 lp_build_if(&ifthen
, gallivm
, cond
);
980 if (bit_size
== 64) {
981 LLVMValueRef ssbo_ptr2
= LLVMBuildBitCast(builder
, ssbo_ptr
, LLVMPointerType(uint64_bld
->elem_type
, 0), "");
982 scalar
= lp_build_pointer_get(builder
, ssbo_ptr2
, loop_index
);
984 scalar
= lp_build_pointer_get(builder
, ssbo_ptr
, loop_index
);
986 temp_res
= LLVMBuildLoad(builder
, result
, "");
987 temp_res
= LLVMBuildInsertElement(builder
, temp_res
, scalar
, loop_state
.counter
, "");
988 LLVMBuildStore(builder
, temp_res
, result
);
989 lp_build_else(&ifthen
);
990 temp_res
= LLVMBuildLoad(builder
, result
, "");
993 zero
= LLVMConstInt(LLVMInt64TypeInContext(gallivm
->context
), 0, 0);
995 zero
= lp_build_const_int32(gallivm
, 0);
996 temp_res
= LLVMBuildInsertElement(builder
, temp_res
, zero
, loop_state
.counter
, "");
997 LLVMBuildStore(builder
, temp_res
, result
);
998 lp_build_endif(&ifthen
);
999 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, uint_bld
->type
.length
),
1001 outval
[c
] = LLVMBuildLoad(gallivm
->builder
, result
, "");
1005 static void emit_store_mem(struct lp_build_nir_context
*bld_base
,
1010 LLVMValueRef offset
,
1013 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1014 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1015 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
1016 LLVMValueRef ssbo_ptr
;
1017 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
1018 LLVMValueRef ssbo_limit
= NULL
;
1021 LLVMValueRef ssbo_size_ptr
= lp_build_array_get(gallivm
, bld
->ssbo_sizes_ptr
, LLVMBuildExtractElement(builder
, index
, lp_build_const_int32(gallivm
, 0), ""));
1022 ssbo_limit
= LLVMBuildAShr(gallivm
->builder
, ssbo_size_ptr
, lp_build_const_int32(gallivm
, bit_size
== 64 ? 3 : 2), "");
1023 ssbo_limit
= lp_build_broadcast_scalar(uint_bld
, ssbo_limit
);
1024 ssbo_ptr
= lp_build_array_get(gallivm
, bld
->ssbo_ptr
, LLVMBuildExtractElement(builder
, index
, lp_build_const_int32(gallivm
, 0), ""));
1026 ssbo_ptr
= bld
->shared_ptr
;
1028 offset
= lp_build_shr_imm(uint_bld
, offset
, bit_size
== 64 ? 3 : 2);
1029 for (unsigned c
= 0; c
< nc
; c
++) {
1030 if (!(writemask
& (1u << c
)))
1032 LLVMValueRef loop_index
= lp_build_add(uint_bld
, offset
, lp_build_const_int_vec(gallivm
, uint_bld
->type
, c
));
1033 LLVMValueRef val
= (nc
== 1) ? dst
: LLVMBuildExtractValue(builder
, dst
, c
, "");
1035 LLVMValueRef exec_mask
= mask_vec(bld_base
);
1037 LLVMValueRef ssbo_oob_cmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_LESS
, loop_index
, ssbo_limit
);
1038 exec_mask
= LLVMBuildAnd(builder
, exec_mask
, ssbo_oob_cmp
, "");
1041 struct lp_build_loop_state loop_state
;
1042 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
1043 LLVMValueRef value_ptr
= LLVMBuildExtractElement(gallivm
->builder
, val
,
1044 loop_state
.counter
, "");
1046 value_ptr
= LLVMBuildBitCast(gallivm
->builder
, value_ptr
, bld_base
->uint64_bld
.elem_type
, "");
1048 value_ptr
= LLVMBuildBitCast(gallivm
->builder
, value_ptr
, uint_bld
->elem_type
, "");
1049 struct lp_build_if_state ifthen
;
1052 loop_index
= LLVMBuildExtractElement(gallivm
->builder
, loop_index
,
1053 loop_state
.counter
, "");
1054 cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
, exec_mask
, uint_bld
->zero
, "");
1055 cond
= LLVMBuildExtractElement(gallivm
->builder
, cond
, loop_state
.counter
, "");
1056 lp_build_if(&ifthen
, gallivm
, cond
);
1057 if (bit_size
== 64) {
1058 LLVMValueRef ssbo_ptr2
= LLVMBuildBitCast(builder
, ssbo_ptr
, LLVMPointerType(bld_base
->uint64_bld
.elem_type
, 0), "");
1059 lp_build_pointer_set(builder
, ssbo_ptr2
, loop_index
, value_ptr
);
1061 lp_build_pointer_set(builder
, ssbo_ptr
, loop_index
, value_ptr
);
1062 lp_build_endif(&ifthen
);
1063 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, uint_bld
->type
.length
),
1068 static void emit_atomic_mem(struct lp_build_nir_context
*bld_base
,
1069 nir_intrinsic_op nir_op
,
1070 LLVMValueRef index
, LLVMValueRef offset
,
1071 LLVMValueRef val
, LLVMValueRef val2
,
1072 LLVMValueRef
*result
)
1074 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1075 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1076 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
1077 LLVMValueRef ssbo_ptr
;
1078 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
1079 LLVMValueRef ssbo_limit
= NULL
;
1082 LLVMValueRef ssbo_size_ptr
= lp_build_array_get(gallivm
, bld
->ssbo_sizes_ptr
, LLVMBuildExtractElement(builder
, index
, lp_build_const_int32(gallivm
, 0), ""));
1083 ssbo_limit
= LLVMBuildAShr(gallivm
->builder
, ssbo_size_ptr
, lp_build_const_int32(gallivm
, 2), "");
1084 ssbo_limit
= lp_build_broadcast_scalar(uint_bld
, ssbo_limit
);
1085 ssbo_ptr
= lp_build_array_get(gallivm
, bld
->ssbo_ptr
, LLVMBuildExtractElement(builder
, index
, lp_build_const_int32(gallivm
, 0), ""));
1087 ssbo_ptr
= bld
->shared_ptr
;
1089 offset
= lp_build_shr_imm(uint_bld
, offset
, 2);
1090 LLVMValueRef atom_res
= lp_build_alloca(gallivm
,
1091 uint_bld
->vec_type
, "");
1093 LLVMValueRef exec_mask
= mask_vec(bld_base
);
1095 LLVMValueRef ssbo_oob_cmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_LESS
, offset
, ssbo_limit
);
1096 exec_mask
= LLVMBuildAnd(builder
, exec_mask
, ssbo_oob_cmp
, "");
1099 struct lp_build_loop_state loop_state
;
1100 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
1102 LLVMValueRef value_ptr
= LLVMBuildExtractElement(gallivm
->builder
, val
,
1103 loop_state
.counter
, "");
1104 value_ptr
= LLVMBuildBitCast(gallivm
->builder
, value_ptr
, uint_bld
->elem_type
, "");
1106 offset
= LLVMBuildExtractElement(gallivm
->builder
, offset
,
1107 loop_state
.counter
, "");
1109 LLVMValueRef scalar_ptr
= LLVMBuildGEP(builder
, ssbo_ptr
,
1112 struct lp_build_if_state ifthen
;
1113 LLVMValueRef cond
, temp_res
;
1114 LLVMValueRef scalar
;
1115 cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
, exec_mask
, uint_bld
->zero
, "");
1116 cond
= LLVMBuildExtractElement(gallivm
->builder
, cond
, loop_state
.counter
, "");
1117 lp_build_if(&ifthen
, gallivm
, cond
);
1119 if (nir_op
== nir_intrinsic_ssbo_atomic_comp_swap
|| nir_op
== nir_intrinsic_shared_atomic_comp_swap
) {
1120 LLVMValueRef cas_src_ptr
= LLVMBuildExtractElement(gallivm
->builder
, val2
,
1121 loop_state
.counter
, "");
1122 cas_src_ptr
= LLVMBuildBitCast(gallivm
->builder
, cas_src_ptr
, uint_bld
->elem_type
, "");
1123 scalar
= LLVMBuildAtomicCmpXchg(builder
, scalar_ptr
, value_ptr
,
1125 LLVMAtomicOrderingSequentiallyConsistent
,
1126 LLVMAtomicOrderingSequentiallyConsistent
,
1128 scalar
= LLVMBuildExtractValue(gallivm
->builder
, scalar
, 0, "");
1130 LLVMAtomicRMWBinOp op
;
1133 case nir_intrinsic_shared_atomic_add
:
1134 case nir_intrinsic_ssbo_atomic_add
:
1135 op
= LLVMAtomicRMWBinOpAdd
;
1137 case nir_intrinsic_shared_atomic_exchange
:
1138 case nir_intrinsic_ssbo_atomic_exchange
:
1139 op
= LLVMAtomicRMWBinOpXchg
;
1141 case nir_intrinsic_shared_atomic_and
:
1142 case nir_intrinsic_ssbo_atomic_and
:
1143 op
= LLVMAtomicRMWBinOpAnd
;
1145 case nir_intrinsic_shared_atomic_or
:
1146 case nir_intrinsic_ssbo_atomic_or
:
1147 op
= LLVMAtomicRMWBinOpOr
;
1149 case nir_intrinsic_shared_atomic_xor
:
1150 case nir_intrinsic_ssbo_atomic_xor
:
1151 op
= LLVMAtomicRMWBinOpXor
;
1153 case nir_intrinsic_shared_atomic_umin
:
1154 case nir_intrinsic_ssbo_atomic_umin
:
1155 op
= LLVMAtomicRMWBinOpUMin
;
1157 case nir_intrinsic_shared_atomic_umax
:
1158 case nir_intrinsic_ssbo_atomic_umax
:
1159 op
= LLVMAtomicRMWBinOpUMax
;
1161 case nir_intrinsic_ssbo_atomic_imin
:
1162 case nir_intrinsic_shared_atomic_imin
:
1163 op
= LLVMAtomicRMWBinOpMin
;
1165 case nir_intrinsic_ssbo_atomic_imax
:
1166 case nir_intrinsic_shared_atomic_imax
:
1167 op
= LLVMAtomicRMWBinOpMax
;
1170 unreachable("unknown atomic op");
1172 scalar
= LLVMBuildAtomicRMW(builder
, op
,
1173 scalar_ptr
, value_ptr
,
1174 LLVMAtomicOrderingSequentiallyConsistent
,
1177 temp_res
= LLVMBuildLoad(builder
, atom_res
, "");
1178 temp_res
= LLVMBuildInsertElement(builder
, temp_res
, scalar
, loop_state
.counter
, "");
1179 LLVMBuildStore(builder
, temp_res
, atom_res
);
1180 lp_build_else(&ifthen
);
1181 temp_res
= LLVMBuildLoad(builder
, atom_res
, "");
1182 temp_res
= LLVMBuildInsertElement(builder
, temp_res
, lp_build_const_int32(gallivm
, 0), loop_state
.counter
, "");
1183 LLVMBuildStore(builder
, temp_res
, atom_res
);
1184 lp_build_endif(&ifthen
);
1186 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, uint_bld
->type
.length
),
1188 *result
= LLVMBuildLoad(builder
, atom_res
, "");
1191 static void emit_barrier(struct lp_build_nir_context
*bld_base
)
1193 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1194 struct gallivm_state
* gallivm
= bld_base
->base
.gallivm
;
1196 LLVMBasicBlockRef resume
= lp_build_insert_new_block(gallivm
, "resume");
1198 lp_build_coro_suspend_switch(gallivm
, bld
->coro
, resume
, false);
1199 LLVMPositionBuilderAtEnd(gallivm
->builder
, resume
);
1202 static LLVMValueRef
emit_get_buffer_size(struct lp_build_nir_context
*bld_base
,
1205 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1206 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1207 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
1208 struct lp_build_context
*bld_broad
= &bld_base
->uint_bld
;
1209 LLVMValueRef size_ptr
= lp_build_array_get(bld_base
->base
.gallivm
, bld
->ssbo_sizes_ptr
,
1210 LLVMBuildExtractElement(builder
, index
, lp_build_const_int32(gallivm
, 0), ""));
1211 return lp_build_broadcast_scalar(bld_broad
, size_ptr
);
1214 static void emit_image_op(struct lp_build_nir_context
*bld_base
,
1215 struct lp_img_params
*params
)
1217 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1218 params
->type
= bld_base
->base
.type
;
1219 params
->context_ptr
= bld
->context_ptr
;
1220 params
->thread_data_ptr
= bld
->thread_data_ptr
;
1221 params
->exec_mask
= mask_vec(bld_base
);
1222 bld
->image
->emit_op(bld
->image
,
1223 bld
->bld_base
.base
.gallivm
,
1228 static void emit_image_size(struct lp_build_nir_context
*bld_base
,
1229 struct lp_sampler_size_query_params
*params
)
1231 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1233 params
->int_type
= bld_base
->int_bld
.type
;
1234 params
->context_ptr
= bld
->context_ptr
;
1236 bld
->image
->emit_size_query(bld
->image
,
1237 bld
->bld_base
.base
.gallivm
,
1242 static void init_var_slots(struct lp_build_nir_context
*bld_base
,
1243 nir_variable
*var
, unsigned sc
)
1245 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1246 unsigned slots
= glsl_count_attribute_slots(var
->type
, false) * 4;
1250 for (unsigned comp
= sc
; comp
< slots
+ sc
; comp
++) {
1251 unsigned this_loc
= var
->data
.driver_location
+ (comp
/ 4);
1252 unsigned this_chan
= comp
% 4;
1254 if (!bld
->outputs
[this_loc
][this_chan
])
1255 bld
->outputs
[this_loc
][this_chan
] = lp_build_alloca(bld_base
->base
.gallivm
,
1256 bld_base
->base
.vec_type
, "output");
1260 static void emit_var_decl(struct lp_build_nir_context
*bld_base
,
1263 unsigned sc
= var
->data
.location_frac
;
1264 switch (var
->data
.mode
) {
1265 case nir_var_shader_out
: {
1266 if (bld_base
->shader
->info
.stage
== MESA_SHADER_FRAGMENT
) {
1267 if (var
->data
.location
== FRAG_RESULT_STENCIL
)
1269 else if (var
->data
.location
== FRAG_RESULT_DEPTH
)
1272 init_var_slots(bld_base
, var
, sc
);
1280 static void emit_tex(struct lp_build_nir_context
*bld_base
,
1281 struct lp_sampler_params
*params
)
1283 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1285 params
->type
= bld_base
->base
.type
;
1286 params
->context_ptr
= bld
->context_ptr
;
1287 params
->thread_data_ptr
= bld
->thread_data_ptr
;
1289 bld
->sampler
->emit_tex_sample(bld
->sampler
,
1290 bld
->bld_base
.base
.gallivm
,
1294 static void emit_tex_size(struct lp_build_nir_context
*bld_base
,
1295 struct lp_sampler_size_query_params
*params
)
1297 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1299 params
->int_type
= bld_base
->int_bld
.type
;
1300 params
->context_ptr
= bld
->context_ptr
;
1302 bld
->sampler
->emit_size_query(bld
->sampler
,
1303 bld
->bld_base
.base
.gallivm
,
1307 static void emit_sysval_intrin(struct lp_build_nir_context
*bld_base
,
1308 nir_intrinsic_instr
*instr
,
1309 LLVMValueRef result
[NIR_MAX_VEC_COMPONENTS
])
1311 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1312 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1313 switch (instr
->intrinsic
) {
1314 case nir_intrinsic_load_instance_id
:
1315 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, bld
->system_values
.instance_id
);
1317 case nir_intrinsic_load_base_instance
:
1318 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, bld
->system_values
.base_instance
);
1320 case nir_intrinsic_load_base_vertex
:
1321 result
[0] = bld
->system_values
.basevertex
;
1323 case nir_intrinsic_load_vertex_id
:
1324 result
[0] = bld
->system_values
.vertex_id
;
1326 case nir_intrinsic_load_primitive_id
:
1327 result
[0] = bld
->system_values
.prim_id
;
1329 case nir_intrinsic_load_work_group_id
:
1330 for (unsigned i
= 0; i
< 3; i
++)
1331 result
[i
] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, LLVMBuildExtractElement(gallivm
->builder
, bld
->system_values
.block_id
, lp_build_const_int32(gallivm
, i
), ""));
1333 case nir_intrinsic_load_local_invocation_id
:
1334 for (unsigned i
= 0; i
< 3; i
++)
1335 result
[i
] = LLVMBuildExtractValue(gallivm
->builder
, bld
->system_values
.thread_id
, i
, "");
1337 case nir_intrinsic_load_num_work_groups
:
1338 for (unsigned i
= 0; i
< 3; i
++)
1339 result
[i
] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, LLVMBuildExtractElement(gallivm
->builder
, bld
->system_values
.grid_size
, lp_build_const_int32(gallivm
, i
), ""));
1341 case nir_intrinsic_load_invocation_id
:
1342 if (bld_base
->shader
->info
.stage
== MESA_SHADER_TESS_CTRL
)
1343 result
[0] = bld
->system_values
.invocation_id
;
1345 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, bld
->system_values
.invocation_id
);
1347 case nir_intrinsic_load_front_face
:
1348 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, bld
->system_values
.front_facing
);
1350 case nir_intrinsic_load_draw_id
:
1351 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, bld
->system_values
.draw_id
);
1355 case nir_intrinsic_load_local_group_size
:
1356 for (unsigned i
= 0; i
< 3; i
++)
1357 result
[i
] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, LLVMBuildExtractElement(gallivm
->builder
, bld
->system_values
.block_size
, lp_build_const_int32(gallivm
, i
), ""));
1359 case nir_intrinsic_load_work_dim
:
1360 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, bld
->system_values
.work_dim
);
1362 case nir_intrinsic_load_tess_coord
:
1363 for (unsigned i
= 0; i
< 3; i
++) {
1364 result
[i
] = LLVMBuildExtractValue(gallivm
->builder
, bld
->system_values
.tess_coord
, i
, "");
1367 case nir_intrinsic_load_tess_level_outer
:
1368 for (unsigned i
= 0; i
< 4; i
++)
1369 result
[i
] = lp_build_broadcast_scalar(&bld_base
->base
, LLVMBuildExtractValue(gallivm
->builder
, bld
->system_values
.tess_outer
, i
, ""));
1371 case nir_intrinsic_load_tess_level_inner
:
1372 for (unsigned i
= 0; i
< 2; i
++)
1373 result
[i
] = lp_build_broadcast_scalar(&bld_base
->base
, LLVMBuildExtractValue(gallivm
->builder
, bld
->system_values
.tess_inner
, i
, ""));
1375 case nir_intrinsic_load_patch_vertices_in
:
1376 result
[0] = bld
->system_values
.vertices_in
;
1378 case nir_intrinsic_load_sample_id
:
1379 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, bld
->system_values
.sample_id
);
1381 case nir_intrinsic_load_sample_pos
:
1382 for (unsigned i
= 0; i
< 2; i
++) {
1383 LLVMValueRef idx
= LLVMBuildMul(gallivm
->builder
, bld
->system_values
.sample_id
, lp_build_const_int32(gallivm
, 2), "");
1384 idx
= LLVMBuildAdd(gallivm
->builder
, idx
, lp_build_const_int32(gallivm
, i
), "");
1385 LLVMValueRef val
= lp_build_array_get(gallivm
, bld
->system_values
.sample_pos
, idx
);
1386 result
[i
] = lp_build_broadcast_scalar(&bld_base
->base
, val
);
1389 case nir_intrinsic_load_sample_mask_in
:
1390 result
[0] = bld
->system_values
.sample_mask_in
;
1395 static void emit_helper_invocation(struct lp_build_nir_context
*bld_base
,
1398 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1399 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
1400 *dst
= lp_build_cmp(uint_bld
, PIPE_FUNC_NOTEQUAL
, mask_vec(bld_base
), lp_build_const_int_vec(gallivm
, uint_bld
->type
, -1));
1403 static void bgnloop(struct lp_build_nir_context
*bld_base
)
1405 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1406 lp_exec_bgnloop(&bld
->exec_mask
, true);
1409 static void endloop(struct lp_build_nir_context
*bld_base
)
1411 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1412 lp_exec_endloop(bld_base
->base
.gallivm
, &bld
->exec_mask
);
1415 static void if_cond(struct lp_build_nir_context
*bld_base
, LLVMValueRef cond
)
1417 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
1418 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1419 lp_exec_mask_cond_push(&bld
->exec_mask
, LLVMBuildBitCast(builder
, cond
, bld_base
->base
.int_vec_type
, ""));
1422 static void else_stmt(struct lp_build_nir_context
*bld_base
)
1424 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1425 lp_exec_mask_cond_invert(&bld
->exec_mask
);
1428 static void endif_stmt(struct lp_build_nir_context
*bld_base
)
1430 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1431 lp_exec_mask_cond_pop(&bld
->exec_mask
);
1434 static void break_stmt(struct lp_build_nir_context
*bld_base
)
1436 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1438 lp_exec_break(&bld
->exec_mask
, NULL
, false);
1441 static void continue_stmt(struct lp_build_nir_context
*bld_base
)
1443 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1444 lp_exec_continue(&bld
->exec_mask
);
1447 static void discard(struct lp_build_nir_context
*bld_base
, LLVMValueRef cond
)
1449 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1450 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
1454 if (bld
->exec_mask
.has_mask
) {
1455 mask
= LLVMBuildNot(builder
, bld
->exec_mask
.exec_mask
, "kilp");
1457 mask
= LLVMConstNull(bld
->bld_base
.base
.int_vec_type
);
1460 mask
= LLVMBuildNot(builder
, cond
, "");
1461 if (bld
->exec_mask
.has_mask
) {
1462 LLVMValueRef invmask
;
1463 invmask
= LLVMBuildNot(builder
, bld
->exec_mask
.exec_mask
, "kilp");
1464 mask
= LLVMBuildOr(builder
, mask
, invmask
, "");
1467 lp_build_mask_update(bld
->mask
, mask
);
1471 increment_vec_ptr_by_mask(struct lp_build_nir_context
* bld_base
,
1475 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
1476 LLVMValueRef current_vec
= LLVMBuildLoad(builder
, ptr
, "");
1478 current_vec
= LLVMBuildSub(builder
, current_vec
, mask
, "");
1480 LLVMBuildStore(builder
, current_vec
, ptr
);
1484 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context
* bld_base
,
1488 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
1489 LLVMValueRef current_vec
= LLVMBuildLoad(builder
, ptr
, "");
1491 current_vec
= lp_build_select(&bld_base
->uint_bld
,
1493 bld_base
->uint_bld
.zero
,
1496 LLVMBuildStore(builder
, current_vec
, ptr
);
1500 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context
* bld
,
1501 LLVMValueRef current_mask_vec
,
1502 LLVMValueRef total_emitted_vertices_vec
)
1504 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
1505 struct lp_build_context
*int_bld
= &bld
->bld_base
.int_bld
;
1506 LLVMValueRef max_mask
= lp_build_cmp(int_bld
, PIPE_FUNC_LESS
,
1507 total_emitted_vertices_vec
,
1508 bld
->max_output_vertices_vec
);
1510 return LLVMBuildAnd(builder
, current_mask_vec
, max_mask
, "");
1513 static void emit_vertex(struct lp_build_nir_context
*bld_base
, uint32_t stream_id
)
1515 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1516 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
1518 assert(bld
->gs_iface
->emit_vertex
);
1519 LLVMValueRef total_emitted_vertices_vec
=
1520 LLVMBuildLoad(builder
, bld
->total_emitted_vertices_vec_ptr
[stream_id
], "");
1521 LLVMValueRef mask
= mask_vec(bld_base
);
1522 mask
= clamp_mask_to_max_output_vertices(bld
, mask
,
1523 total_emitted_vertices_vec
);
1524 bld
->gs_iface
->emit_vertex(bld
->gs_iface
, &bld
->bld_base
.base
,
1526 total_emitted_vertices_vec
,
1527 lp_build_const_int_vec(bld
->bld_base
.base
.gallivm
, bld
->bld_base
.base
.type
, stream_id
));
1529 increment_vec_ptr_by_mask(bld_base
, bld
->emitted_vertices_vec_ptr
[stream_id
],
1531 increment_vec_ptr_by_mask(bld_base
, bld
->total_emitted_vertices_vec_ptr
[stream_id
],
1536 end_primitive_masked(struct lp_build_nir_context
* bld_base
,
1537 LLVMValueRef mask
, uint32_t stream_id
)
1539 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1540 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
1542 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
1543 LLVMValueRef emitted_vertices_vec
=
1544 LLVMBuildLoad(builder
, bld
->emitted_vertices_vec_ptr
[stream_id
], "");
1545 LLVMValueRef emitted_prims_vec
=
1546 LLVMBuildLoad(builder
, bld
->emitted_prims_vec_ptr
[stream_id
], "");
1547 LLVMValueRef total_emitted_vertices_vec
=
1548 LLVMBuildLoad(builder
, bld
->total_emitted_vertices_vec_ptr
[stream_id
], "");
1550 LLVMValueRef emitted_mask
= lp_build_cmp(uint_bld
,
1552 emitted_vertices_vec
,
1554 mask
= LLVMBuildAnd(builder
, mask
, emitted_mask
, "");
1556 bld
->gs_iface
->end_primitive(bld
->gs_iface
, &bld
->bld_base
.base
,
1557 total_emitted_vertices_vec
,
1558 emitted_vertices_vec
, emitted_prims_vec
, mask_vec(bld_base
));
1559 increment_vec_ptr_by_mask(bld_base
, bld
->emitted_prims_vec_ptr
[stream_id
],
1561 clear_uint_vec_ptr_from_mask(bld_base
, bld
->emitted_vertices_vec_ptr
[stream_id
],
1565 static void end_primitive(struct lp_build_nir_context
*bld_base
, uint32_t stream_id
)
1567 ASSERTED
struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1569 assert(bld
->gs_iface
->end_primitive
);
1571 LLVMValueRef mask
= mask_vec(bld_base
);
1572 end_primitive_masked(bld_base
, mask
, stream_id
);
1576 emit_prologue(struct lp_build_nir_soa_context
*bld
)
1578 struct gallivm_state
* gallivm
= bld
->bld_base
.base
.gallivm
;
1579 if (bld
->indirects
& nir_var_shader_in
&& !bld
->gs_iface
&& !bld
->tcs_iface
&& !bld
->tes_iface
) {
1580 uint32_t num_inputs
= util_bitcount64(bld
->bld_base
.shader
->info
.inputs_read
);
1581 unsigned index
, chan
;
1582 LLVMTypeRef vec_type
= bld
->bld_base
.base
.vec_type
;
1583 LLVMValueRef array_size
= lp_build_const_int32(gallivm
, num_inputs
* 4);
1584 bld
->inputs_array
= lp_build_array_alloca(gallivm
,
1585 vec_type
, array_size
,
1588 for (index
= 0; index
< num_inputs
; ++index
) {
1589 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; ++chan
) {
1590 LLVMValueRef lindex
=
1591 lp_build_const_int32(gallivm
, index
* 4 + chan
);
1592 LLVMValueRef input_ptr
=
1593 LLVMBuildGEP(gallivm
->builder
, bld
->inputs_array
,
1595 LLVMValueRef value
= bld
->inputs
[index
][chan
];
1597 LLVMBuildStore(gallivm
->builder
, value
, input_ptr
);
1603 static void emit_vote(struct lp_build_nir_context
*bld_base
, LLVMValueRef src
, nir_intrinsic_instr
*instr
, LLVMValueRef result
[4])
1605 struct gallivm_state
* gallivm
= bld_base
->base
.gallivm
;
1606 LLVMBuilderRef builder
= gallivm
->builder
;
1608 LLVMValueRef exec_mask
= mask_vec(bld_base
);
1609 struct lp_build_loop_state loop_state
;
1611 LLVMValueRef outer_cond
= LLVMBuildICmp(builder
, LLVMIntNE
, exec_mask
, bld_base
->uint_bld
.zero
, "");
1613 LLVMValueRef res_store
= lp_build_alloca(gallivm
, bld_base
->int_bld
.elem_type
, "");
1614 LLVMValueRef init_val
= NULL
;
1615 if (instr
->intrinsic
== nir_intrinsic_vote_ieq
) {
1616 /* for equal we unfortunately have to loop and find the first valid one. */
1617 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
1618 LLVMValueRef if_cond
= LLVMBuildExtractElement(gallivm
->builder
, outer_cond
, loop_state
.counter
, "");
1620 struct lp_build_if_state ifthen
;
1621 lp_build_if(&ifthen
, gallivm
, if_cond
);
1622 LLVMValueRef value_ptr
= LLVMBuildExtractElement(gallivm
->builder
, src
,
1623 loop_state
.counter
, "");
1624 LLVMBuildStore(builder
, value_ptr
, res_store
);
1625 lp_build_endif(&ifthen
);
1626 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, bld_base
->uint_bld
.type
.length
),
1628 init_val
= LLVMBuildLoad(builder
, res_store
, "");
1630 LLVMBuildStore(builder
, lp_build_const_int32(gallivm
, instr
->intrinsic
== nir_intrinsic_vote_any
? 0 : -1), res_store
);
1634 lp_build_loop_begin(&loop_state
, gallivm
, lp_build_const_int32(gallivm
, 0));
1635 LLVMValueRef value_ptr
= LLVMBuildExtractElement(gallivm
->builder
, src
,
1636 loop_state
.counter
, "");
1637 struct lp_build_if_state ifthen
;
1638 LLVMValueRef if_cond
;
1639 if_cond
= LLVMBuildExtractElement(gallivm
->builder
, outer_cond
, loop_state
.counter
, "");
1641 lp_build_if(&ifthen
, gallivm
, if_cond
);
1642 res
= LLVMBuildLoad(builder
, res_store
, "");
1644 if (instr
->intrinsic
== nir_intrinsic_vote_ieq
) {
1645 LLVMValueRef tmp
= LLVMBuildICmp(builder
, LLVMIntEQ
, init_val
, value_ptr
, "");
1646 tmp
= LLVMBuildSExt(builder
, tmp
, bld_base
->uint_bld
.elem_type
, "");
1647 res
= LLVMBuildOr(builder
, res
, tmp
, "");
1648 } else if (instr
->intrinsic
== nir_intrinsic_vote_any
)
1649 res
= LLVMBuildOr(builder
, res
, value_ptr
, "");
1651 res
= LLVMBuildAnd(builder
, res
, value_ptr
, "");
1652 LLVMBuildStore(builder
, res
, res_store
);
1653 lp_build_endif(&ifthen
);
1654 lp_build_loop_end_cond(&loop_state
, lp_build_const_int32(gallivm
, bld_base
->uint_bld
.type
.length
),
1656 result
[0] = lp_build_broadcast_scalar(&bld_base
->uint_bld
, LLVMBuildLoad(builder
, res_store
, ""));
1660 emit_interp_at(struct lp_build_nir_context
*bld_base
,
1661 unsigned num_components
,
1665 unsigned const_index
,
1666 LLVMValueRef indir_index
,
1667 LLVMValueRef offsets
[2],
1668 LLVMValueRef dst
[4])
1670 struct lp_build_nir_soa_context
*bld
= (struct lp_build_nir_soa_context
*)bld_base
;
1672 for (unsigned i
= 0; i
< num_components
; i
++) {
1673 dst
[i
] = bld
->fs_iface
->interp_fn(bld
->fs_iface
, &bld_base
->base
,
1674 const_index
+ var
->data
.driver_location
, i
+ var
->data
.location_frac
,
1675 centroid
, sample
, indir_index
, offsets
);
1679 void lp_build_nir_soa(struct gallivm_state
*gallivm
,
1680 struct nir_shader
*shader
,
1681 const struct lp_build_tgsi_params
*params
,
1682 LLVMValueRef (*outputs
)[4])
1684 struct lp_build_nir_soa_context bld
;
1685 struct lp_type type
= params
->type
;
1686 struct lp_type res_type
;
1688 assert(type
.length
<= LP_MAX_VECTOR_LENGTH
);
1689 memset(&res_type
, 0, sizeof res_type
);
1690 res_type
.width
= type
.width
;
1691 res_type
.length
= type
.length
;
1694 /* Setup build context */
1695 memset(&bld
, 0, sizeof bld
);
1696 lp_build_context_init(&bld
.bld_base
.base
, gallivm
, type
);
1697 lp_build_context_init(&bld
.bld_base
.uint_bld
, gallivm
, lp_uint_type(type
));
1698 lp_build_context_init(&bld
.bld_base
.int_bld
, gallivm
, lp_int_type(type
));
1699 lp_build_context_init(&bld
.elem_bld
, gallivm
, lp_elem_type(type
));
1700 lp_build_context_init(&bld
.uint_elem_bld
, gallivm
, lp_elem_type(lp_uint_type(type
)));
1702 struct lp_type dbl_type
;
1704 dbl_type
.width
*= 2;
1705 lp_build_context_init(&bld
.bld_base
.dbl_bld
, gallivm
, dbl_type
);
1708 struct lp_type uint64_type
;
1709 uint64_type
= lp_uint_type(type
);
1710 uint64_type
.width
*= 2;
1711 lp_build_context_init(&bld
.bld_base
.uint64_bld
, gallivm
, uint64_type
);
1714 struct lp_type int64_type
;
1715 int64_type
= lp_int_type(type
);
1716 int64_type
.width
*= 2;
1717 lp_build_context_init(&bld
.bld_base
.int64_bld
, gallivm
, int64_type
);
1720 struct lp_type uint16_type
;
1721 uint16_type
= lp_uint_type(type
);
1722 uint16_type
.width
/= 2;
1723 lp_build_context_init(&bld
.bld_base
.uint16_bld
, gallivm
, uint16_type
);
1726 struct lp_type int16_type
;
1727 int16_type
= lp_int_type(type
);
1728 int16_type
.width
/= 2;
1729 lp_build_context_init(&bld
.bld_base
.int16_bld
, gallivm
, int16_type
);
1732 struct lp_type uint8_type
;
1733 uint8_type
= lp_uint_type(type
);
1734 uint8_type
.width
/= 4;
1735 lp_build_context_init(&bld
.bld_base
.uint8_bld
, gallivm
, uint8_type
);
1738 struct lp_type int8_type
;
1739 int8_type
= lp_int_type(type
);
1740 int8_type
.width
/= 4;
1741 lp_build_context_init(&bld
.bld_base
.int8_bld
, gallivm
, int8_type
);
1743 bld
.bld_base
.load_var
= emit_load_var
;
1744 bld
.bld_base
.store_var
= emit_store_var
;
1745 bld
.bld_base
.load_reg
= emit_load_reg
;
1746 bld
.bld_base
.store_reg
= emit_store_reg
;
1747 bld
.bld_base
.emit_var_decl
= emit_var_decl
;
1748 bld
.bld_base
.load_ubo
= emit_load_ubo
;
1749 bld
.bld_base
.load_kernel_arg
= emit_load_kernel_arg
;
1750 bld
.bld_base
.load_global
= emit_load_global
;
1751 bld
.bld_base
.store_global
= emit_store_global
;
1752 bld
.bld_base
.atomic_global
= emit_atomic_global
;
1753 bld
.bld_base
.tex
= emit_tex
;
1754 bld
.bld_base
.tex_size
= emit_tex_size
;
1755 bld
.bld_base
.bgnloop
= bgnloop
;
1756 bld
.bld_base
.endloop
= endloop
;
1757 bld
.bld_base
.if_cond
= if_cond
;
1758 bld
.bld_base
.else_stmt
= else_stmt
;
1759 bld
.bld_base
.endif_stmt
= endif_stmt
;
1760 bld
.bld_base
.break_stmt
= break_stmt
;
1761 bld
.bld_base
.continue_stmt
= continue_stmt
;
1762 bld
.bld_base
.sysval_intrin
= emit_sysval_intrin
;
1763 bld
.bld_base
.discard
= discard
;
1764 bld
.bld_base
.emit_vertex
= emit_vertex
;
1765 bld
.bld_base
.end_primitive
= end_primitive
;
1766 bld
.bld_base
.load_mem
= emit_load_mem
;
1767 bld
.bld_base
.store_mem
= emit_store_mem
;
1768 bld
.bld_base
.get_buffer_size
= emit_get_buffer_size
;
1769 bld
.bld_base
.atomic_mem
= emit_atomic_mem
;
1770 bld
.bld_base
.barrier
= emit_barrier
;
1771 bld
.bld_base
.image_op
= emit_image_op
;
1772 bld
.bld_base
.image_size
= emit_image_size
;
1773 bld
.bld_base
.vote
= emit_vote
;
1774 bld
.bld_base
.helper_invocation
= emit_helper_invocation
;
1775 bld
.bld_base
.interp_at
= emit_interp_at
;
1777 bld
.mask
= params
->mask
;
1778 bld
.inputs
= params
->inputs
;
1779 bld
.outputs
= outputs
;
1780 bld
.consts_ptr
= params
->consts_ptr
;
1781 bld
.const_sizes_ptr
= params
->const_sizes_ptr
;
1782 bld
.ssbo_ptr
= params
->ssbo_ptr
;
1783 bld
.ssbo_sizes_ptr
= params
->ssbo_sizes_ptr
;
1784 bld
.sampler
= params
->sampler
;
1785 // bld.bld_base.info = params->info;
1787 bld
.context_ptr
= params
->context_ptr
;
1788 bld
.thread_data_ptr
= params
->thread_data_ptr
;
1789 bld
.image
= params
->image
;
1790 bld
.shared_ptr
= params
->shared_ptr
;
1791 bld
.coro
= params
->coro
;
1792 bld
.kernel_args_ptr
= params
->kernel_args
;
1794 if (params
->info
->indirect_files
& (1 << TGSI_FILE_INPUT
))
1795 bld
.indirects
|= nir_var_shader_in
;
1797 bld
.gs_iface
= params
->gs_iface
;
1798 bld
.tcs_iface
= params
->tcs_iface
;
1799 bld
.tes_iface
= params
->tes_iface
;
1800 bld
.fs_iface
= params
->fs_iface
;
1802 struct lp_build_context
*uint_bld
= &bld
.bld_base
.uint_bld
;
1804 bld
.max_output_vertices_vec
= lp_build_const_int_vec(gallivm
, bld
.bld_base
.int_bld
.type
,
1805 shader
->info
.gs
.vertices_out
);
1806 for (int i
= 0; i
< PIPE_MAX_VERTEX_STREAMS
; i
++) {
1807 bld
.emitted_prims_vec_ptr
[i
] =
1808 lp_build_alloca(gallivm
, uint_bld
->vec_type
, "emitted_prims_ptr");
1809 bld
.emitted_vertices_vec_ptr
[i
] =
1810 lp_build_alloca(gallivm
, uint_bld
->vec_type
, "emitted_vertices_ptr");
1811 bld
.total_emitted_vertices_vec_ptr
[i
] =
1812 lp_build_alloca(gallivm
, uint_bld
->vec_type
, "total_emitted_vertices_ptr");
1815 lp_exec_mask_init(&bld
.exec_mask
, &bld
.bld_base
.int_bld
);
1817 bld
.system_values
= *params
->system_values
;
1819 bld
.bld_base
.shader
= shader
;
1821 emit_prologue(&bld
);
1822 lp_build_nir_llvm(&bld
.bld_base
, shader
);
1825 LLVMBuilderRef builder
= bld
.bld_base
.base
.gallivm
->builder
;
1826 LLVMValueRef total_emitted_vertices_vec
;
1827 LLVMValueRef emitted_prims_vec
;
1829 end_primitive_masked(&bld
.bld_base
, lp_build_mask_value(bld
.mask
), 0);
1830 for (int i
= 0; i
< PIPE_MAX_VERTEX_STREAMS
; i
++) {
1831 total_emitted_vertices_vec
=
1832 LLVMBuildLoad(builder
, bld
.total_emitted_vertices_vec_ptr
[i
], "");
1835 LLVMBuildLoad(builder
, bld
.emitted_prims_vec_ptr
[i
], "");
1836 bld
.gs_iface
->gs_epilogue(bld
.gs_iface
,
1837 total_emitted_vertices_vec
,
1838 emitted_prims_vec
, i
);
1841 lp_exec_mask_fini(&bld
.exec_mask
);