gallivm: add base instance sysval support
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc;
309 boolean default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = TRUE;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449
450 switch (file) {
451 case TGSI_FILE_TEMPORARY:
452 array_of_vars = bld->temps;
453 var_of_array = bld->temps_array;
454 break;
455 case TGSI_FILE_OUTPUT:
456 array_of_vars = bld->outputs;
457 var_of_array = bld->outputs_array;
458 break;
459 default:
460 assert(0);
461 return NULL;
462 }
463
464 assert(chan < 4);
465
466 if (bld->indirect_files & (1 << file)) {
467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469 LLVMValueRef gep[2];
470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471 gep[1] = lindex;
472 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473 } else {
474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475 }
476 }
477 else {
478 assert(index <= bld->bld_base.info->file_max[file]);
479 return array_of_vars[index][chan];
480 }
481 }
482
483
484 /**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index which temporary register
488 * \param chan which channel of the temp register.
489 */
490 LLVMValueRef
491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492 unsigned index,
493 unsigned chan)
494 {
495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497
498 /**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index which output register
502 * \param chan which channel of the output register.
503 */
504 LLVMValueRef
505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506 unsigned index,
507 unsigned chan)
508 {
509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511
512 /*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517 static void
518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521 unsigned index, chan;
522 assert(bld->bld_base.info->num_outputs <=
523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527 }
528 }
529 }
530 }
531
532 /**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537 static LLVMValueRef
538 build_gather(struct lp_build_tgsi_context *bld_base,
539 LLVMValueRef base_ptr,
540 LLVMValueRef indexes,
541 LLVMValueRef overflow_mask,
542 LLVMValueRef indexes2)
543 {
544 struct gallivm_state *gallivm = bld_base->base.gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *uint_bld = &bld_base->uint_bld;
547 struct lp_build_context *bld = &bld_base->base;
548 LLVMValueRef res;
549 unsigned i;
550
551 if (indexes2)
552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553 else
554 res = bld->undef;
555 /*
556 * overflow_mask is a vector telling us which channels
557 * in the vector overflowed. We use the overflow behavior for
558 * constant buffers which is defined as:
559 * Out of bounds access to constant buffer returns 0 in all
560 * components. Out of bounds behavior is always with respect
561 * to the size of the buffer bound at that slot.
562 */
563
564 if (overflow_mask) {
565 /*
566 * We avoid per-element control flow here (also due to llvm going crazy,
567 * though I suspect it's better anyway since overflow is likely rare).
568 * Note that since we still fetch from buffers even if num_elements was
569 * zero (in this case we'll fetch from index zero) the jit func callers
570 * MUST provide valid fake constant buffers of size 4x32 (the values do
571 * not matter), otherwise we'd still need (not per element though)
572 * control flow.
573 */
574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575 if (indexes2)
576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577 }
578
579 /*
580 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581 */
582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583 LLVMValueRef si, di;
584 LLVMValueRef index;
585 LLVMValueRef scalar_ptr, scalar;
586
587 di = lp_build_const_int32(bld->gallivm, i);
588 if (indexes2)
589 si = lp_build_const_int32(bld->gallivm, i >> 1);
590 else
591 si = di;
592
593 if (indexes2 && (i & 1)) {
594 index = LLVMBuildExtractElement(builder,
595 indexes2, si, "");
596 } else {
597 index = LLVMBuildExtractElement(builder,
598 indexes, si, "");
599 }
600 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601 &index, 1, "gather_ptr");
602 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605 }
606
607 if (overflow_mask) {
608 if (indexes2) {
609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611 bld_base->dbl_bld.int_vec_type, "");
612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613 bld_base->dbl_bld.zero, res);
614 } else
615 res = lp_build_select(bld, overflow_mask, bld->zero, res);
616 }
617
618 return res;
619 }
620
621
622 /**
623 * Scatter/store vector.
624 */
625 static void
626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627 LLVMValueRef base_ptr,
628 LLVMValueRef indexes,
629 LLVMValueRef values,
630 struct lp_exec_mask *mask)
631 {
632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633 LLVMBuilderRef builder = gallivm->builder;
634 unsigned i;
635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637 /*
638 * Loop over elements of index_vec, store scalar value.
639 */
640 for (i = 0; i < bld->bld_base.base.type.length; i++) {
641 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645 LLVMValueRef scalar_pred = pred ?
646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648 if (0)
649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650 ii, val, index, scalar_ptr);
651
652 if (scalar_pred) {
653 LLVMValueRef real_val, dst_val;
654 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656 LLVMBuildStore(builder, real_val, scalar_ptr);
657 }
658 else {
659 LLVMBuildStore(builder, val, scalar_ptr);
660 }
661 }
662 }
663
664
665 /**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671 static LLVMValueRef
672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673 unsigned reg_file, unsigned reg_index,
674 const struct tgsi_ind_register *indirect_reg,
675 int index_limit)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679 /* always use X component of address register */
680 unsigned swizzle = indirect_reg->Swizzle;
681 LLVMValueRef base;
682 LLVMValueRef rel;
683 LLVMValueRef max_index;
684 LLVMValueRef index;
685
686 assert(bld->indirect_files & (1 << reg_file));
687
688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690 assert(swizzle < 4);
691 switch (indirect_reg->File) {
692 case TGSI_FILE_ADDRESS:
693 rel = LLVMBuildLoad(builder,
694 bld->addr[indirect_reg->Index][swizzle],
695 "load addr reg");
696 /* ADDR LLVM values already have LLVM integer type. */
697 break;
698 case TGSI_FILE_TEMPORARY:
699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700 rel = LLVMBuildLoad(builder, rel, "load temp reg");
701 /* TEMP LLVM values always have LLVM float type, but for indirection, the
702 * value actually stored is expected to be an integer */
703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704 break;
705 default:
706 assert(0);
707 rel = uint_bld->zero;
708 }
709
710 index = lp_build_add(uint_bld, base, rel);
711
712 /*
713 * emit_fetch_constant handles constant buffer overflow so this code
714 * is pointless for them.
715 * Furthermore the D3D10 spec in section 6.5 says:
716 * If the constant buffer bound to a slot is larger than the size
717 * declared in the shader for that slot, implementations are allowed
718 * to return incorrect data (not necessarily 0) for indices that are
719 * larger than the declared size but smaller than the buffer size.
720 */
721 if (reg_file != TGSI_FILE_CONSTANT) {
722 assert(index_limit >= 0);
723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724 uint_bld->type, index_limit);
725
726 assert(!uint_bld->type.sign);
727 index = lp_build_min(uint_bld, index, max_index);
728 }
729
730 return index;
731 }
732
733 static struct lp_build_context *
734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 enum tgsi_opcode_type stype)
736 {
737 struct lp_build_context *bld_fetch;
738
739 switch (stype) {
740 case TGSI_TYPE_FLOAT:
741 case TGSI_TYPE_UNTYPED:
742 bld_fetch = &bld_base->base;
743 break;
744 case TGSI_TYPE_UNSIGNED:
745 bld_fetch = &bld_base->uint_bld;
746 break;
747 case TGSI_TYPE_SIGNED:
748 bld_fetch = &bld_base->int_bld;
749 break;
750 case TGSI_TYPE_DOUBLE:
751 bld_fetch = &bld_base->dbl_bld;
752 break;
753 case TGSI_TYPE_UNSIGNED64:
754 bld_fetch = &bld_base->uint64_bld;
755 break;
756 case TGSI_TYPE_SIGNED64:
757 bld_fetch = &bld_base->int64_bld;
758 break;
759 case TGSI_TYPE_VOID:
760 default:
761 assert(0);
762 bld_fetch = NULL;
763 break;
764 }
765 return bld_fetch;
766 }
767
768 static LLVMValueRef
769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770 LLVMValueRef indirect_index,
771 unsigned chan_index,
772 boolean need_perelement_offset)
773 {
774 struct gallivm_state *gallivm = uint_bld->gallivm;
775 LLVMValueRef chan_vec =
776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777 LLVMValueRef length_vec =
778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779 LLVMValueRef index_vec;
780
781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786 if (need_perelement_offset) {
787 LLVMValueRef pixel_offsets;
788 unsigned i;
789 /* build pixel offset vector: {0, 1, 2, 3, ...} */
790 pixel_offsets = uint_bld->undef;
791 for (i = 0; i < uint_bld->type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794 ii, ii, "");
795 }
796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797 }
798 return index_vec;
799 }
800
801 static LLVMValueRef
802 emit_fetch_constant(
803 struct lp_build_tgsi_context * bld_base,
804 const struct tgsi_full_src_register * reg,
805 enum tgsi_opcode_type stype,
806 unsigned swizzle_in)
807 {
808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809 struct gallivm_state *gallivm = bld_base->base.gallivm;
810 LLVMBuilderRef builder = gallivm->builder;
811 struct lp_build_context *uint_bld = &bld_base->uint_bld;
812 unsigned dimension = 0;
813 LLVMValueRef consts_ptr;
814 LLVMValueRef num_consts;
815 LLVMValueRef res;
816 unsigned swizzle = swizzle_in & 0xffff;
817
818 /* XXX: Handle fetching xyzw components as a vector */
819 assert(swizzle != ~0u);
820
821 if (reg->Register.Dimension) {
822 assert(!reg->Dimension.Indirect);
823 dimension = reg->Dimension.Index;
824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825 }
826
827 consts_ptr = bld->consts[dimension];
828 num_consts = bld->consts_sizes[dimension];
829
830 if (reg->Register.Indirect) {
831 LLVMValueRef indirect_index;
832 LLVMValueRef swizzle_vec =
833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834 LLVMValueRef index_vec; /* index into the const buffer */
835 LLVMValueRef overflow_mask;
836 LLVMValueRef index_vec2 = NULL;
837
838 indirect_index = get_indirect_index(bld,
839 reg->Register.File,
840 reg->Register.Index,
841 &reg->Indirect,
842 bld->bld_base.info->file_max[reg->Register.File]);
843
844 /* All fetches are from the same constant buffer, so
845 * we need to propagate the size to a vector to do a
846 * vector comparison */
847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848 /* Construct a boolean vector telling us which channels
849 * overflow the bound constant buffer */
850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851 indirect_index, num_consts);
852
853 /* index_vec = indirect_index * 4 + swizzle */
854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857 if (tgsi_type_is_64bit(stype)) {
858 LLVMValueRef swizzle_vec2;
859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862 }
863 /* Gather values from the constant buffer */
864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865 }
866 else {
867 LLVMValueRef index; /* index into the const buffer */
868 LLVMValueRef scalar, scalar_ptr;
869 struct lp_build_context *bld_broad = &bld_base->base;
870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873 &index, 1, "");
874
875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877 LLVMValueRef scalar2, scalar2_ptr;
878 LLVMValueRef shuffles[2];
879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882 &index, 1, "");
883
884 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886 shuffles[0] = lp_build_const_int32(gallivm, 0);
887 shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892 } else {
893 if (stype == TGSI_TYPE_DOUBLE) {
894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896 bld_broad = &bld_base->dbl_bld;
897 } else if (stype == TGSI_TYPE_UNSIGNED64) {
898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900 bld_broad = &bld_base->uint64_bld;
901 } else if (stype == TGSI_TYPE_SIGNED64) {
902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904 bld_broad = &bld_base->int64_bld;
905 }
906 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907 res = lp_build_broadcast_scalar(bld_broad, scalar);
908 }
909
910 }
911
912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915 }
916
917 return res;
918 }
919
920 /**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927 static LLVMValueRef
928 emit_fetch_64bit(
929 struct lp_build_tgsi_context * bld_base,
930 enum tgsi_opcode_type stype,
931 LLVMValueRef input,
932 LLVMValueRef input2)
933 {
934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936 LLVMBuilderRef builder = gallivm->builder;
937 LLVMValueRef res;
938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939 int i;
940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941 int len = bld_base->base.type.length * 2;
942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947 }
948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952
953 static LLVMValueRef
954 emit_fetch_immediate(
955 struct lp_build_tgsi_context * bld_base,
956 const struct tgsi_full_src_register * reg,
957 enum tgsi_opcode_type stype,
958 unsigned swizzle_in)
959 {
960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962 LLVMBuilderRef builder = gallivm->builder;
963 LLVMValueRef res = NULL;
964 unsigned swizzle = swizzle_in & 0xffff;
965
966 if (bld->use_immediates_array || reg->Register.Indirect) {
967 LLVMValueRef imms_array;
968 LLVMTypeRef fptr_type;
969
970 /* cast imms_array pointer to float* */
971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974 if (reg->Register.Indirect) {
975 LLVMValueRef indirect_index;
976 LLVMValueRef index_vec; /* index into the immediate register array */
977 LLVMValueRef index_vec2 = NULL;
978 indirect_index = get_indirect_index(bld,
979 reg->Register.File,
980 reg->Register.Index,
981 &reg->Indirect,
982 bld->bld_base.info->file_max[reg->Register.File]);
983 /*
984 * Unlike for other reg classes, adding pixel offsets is unnecessary -
985 * immediates are stored as full vectors (FIXME??? - might be better
986 * to store them the same as constants) but all elements are the same
987 * in any case.
988 */
989 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990 indirect_index,
991 swizzle,
992 FALSE);
993 if (tgsi_type_is_64bit(stype))
994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995 indirect_index,
996 swizzle_in >> 16,
997 FALSE);
998 /* Gather values from the immediate register array */
999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000 } else {
1001 LLVMValueRef gep[2];
1002 gep[0] = lp_build_const_int32(gallivm, 0);
1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005 bld->imms_array, gep, 2, "");
1006 res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008 if (tgsi_type_is_64bit(stype)) {
1009 LLVMValueRef imms_ptr2;
1010 LLVMValueRef res2;
1011 gep[1] = lp_build_const_int32(gallivm,
1012 reg->Register.Index * 4 + (swizzle_in >> 16));
1013 imms_ptr2 = LLVMBuildGEP(builder,
1014 bld->imms_array, gep, 2, "");
1015 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016 res = emit_fetch_64bit(bld_base, stype, res, res2);
1017 }
1018 }
1019 }
1020 else {
1021 res = bld->immediates[reg->Register.Index][swizzle];
1022 if (tgsi_type_is_64bit(stype))
1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024 }
1025
1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029 }
1030 return res;
1031 }
1032
1033 static LLVMValueRef
1034 emit_fetch_input(
1035 struct lp_build_tgsi_context * bld_base,
1036 const struct tgsi_full_src_register * reg,
1037 enum tgsi_opcode_type stype,
1038 unsigned swizzle_in)
1039 {
1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 LLVMValueRef res;
1044 unsigned swizzle = swizzle_in & 0xffff;
1045
1046 if (reg->Register.Indirect) {
1047 LLVMValueRef indirect_index;
1048 LLVMValueRef index_vec; /* index into the input reg array */
1049 LLVMValueRef index_vec2 = NULL;
1050 LLVMValueRef inputs_array;
1051 LLVMTypeRef fptr_type;
1052
1053 indirect_index = get_indirect_index(bld,
1054 reg->Register.File,
1055 reg->Register.Index,
1056 &reg->Indirect,
1057 bld->bld_base.info->file_max[reg->Register.File]);
1058
1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060 indirect_index,
1061 swizzle,
1062 TRUE);
1063 if (tgsi_type_is_64bit(stype)) {
1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065 indirect_index,
1066 swizzle_in >> 16,
1067 TRUE);
1068 }
1069 /* cast inputs_array pointer to float* */
1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073 /* Gather values from the input register array */
1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075 } else {
1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078 reg->Register.Index * 4 + swizzle);
1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080 bld->inputs_array, &lindex, 1, "");
1081
1082 res = LLVMBuildLoad(builder, input_ptr, "");
1083 if (tgsi_type_is_64bit(stype)) {
1084 LLVMValueRef lindex1;
1085 LLVMValueRef input_ptr2;
1086 LLVMValueRef res2;
1087
1088 lindex1 = lp_build_const_int32(gallivm,
1089 reg->Register.Index * 4 + (swizzle_in >> 16));
1090 input_ptr2 = LLVMBuildGEP(builder,
1091 bld->inputs_array, &lindex1, 1, "");
1092 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093 res = emit_fetch_64bit(bld_base, stype, res, res2);
1094 }
1095 }
1096 else {
1097 res = bld->inputs[reg->Register.Index][swizzle];
1098 if (tgsi_type_is_64bit(stype))
1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100 }
1101 }
1102
1103 assert(res);
1104
1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle_in)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 const struct tgsi_shader_info *info = bld->bld_base.info;
1124 LLVMBuilderRef builder = gallivm->builder;
1125 LLVMValueRef attrib_index = NULL;
1126 LLVMValueRef vertex_index = NULL;
1127 unsigned swizzle = swizzle_in & 0xffff;
1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129 LLVMValueRef res;
1130
1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132 /* This is really a system value not a regular input */
1133 assert(!reg->Register.Indirect);
1134 assert(!reg->Dimension.Indirect);
1135 res = bld->system_values.prim_id;
1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138 }
1139 return res;
1140 }
1141
1142 if (reg->Register.Indirect) {
1143 /*
1144 * XXX: this is possibly not quite the right value, since file_max may be
1145 * larger than the max attrib index, due to it being the max of declared
1146 * inputs AND the max vertices per prim (which is 6 for tri adj).
1147 * It should however be safe to use (since we always allocate
1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149 */
1150 int index_limit = info->file_max[reg->Register.File];
1151 attrib_index = get_indirect_index(bld,
1152 reg->Register.File,
1153 reg->Register.Index,
1154 &reg->Indirect,
1155 index_limit);
1156 } else {
1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158 }
1159
1160 if (reg->Dimension.Indirect) {
1161 /*
1162 * A fixed 6 should do as well (which is what we allocate).
1163 */
1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165 vertex_index = get_indirect_index(bld,
1166 reg->Register.File,
1167 reg->Dimension.Index,
1168 &reg->DimIndirect,
1169 index_limit);
1170 } else {
1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172 }
1173
1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175 reg->Dimension.Indirect,
1176 vertex_index,
1177 reg->Register.Indirect,
1178 attrib_index,
1179 swizzle_index);
1180
1181 assert(res);
1182 if (tgsi_type_is_64bit(stype)) {
1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184 LLVMValueRef res2;
1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186 reg->Dimension.Indirect,
1187 vertex_index,
1188 reg->Register.Indirect,
1189 attrib_index,
1190 swizzle_index);
1191 assert(res2);
1192 res = emit_fetch_64bit(bld_base, stype, res, res2);
1193 } else if (stype == TGSI_TYPE_UNSIGNED) {
1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195 } else if (stype == TGSI_TYPE_SIGNED) {
1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197 }
1198
1199 return res;
1200 }
1201
1202 static LLVMValueRef
1203 emit_fetch_temporary(
1204 struct lp_build_tgsi_context * bld_base,
1205 const struct tgsi_full_src_register * reg,
1206 enum tgsi_opcode_type stype,
1207 unsigned swizzle_in)
1208 {
1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211 LLVMBuilderRef builder = gallivm->builder;
1212 LLVMValueRef res;
1213 unsigned swizzle = swizzle_in & 0xffff;
1214
1215 if (reg->Register.Indirect) {
1216 LLVMValueRef indirect_index;
1217 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1218 LLVMValueRef temps_array;
1219 LLVMTypeRef fptr_type;
1220
1221 indirect_index = get_indirect_index(bld,
1222 reg->Register.File,
1223 reg->Register.Index,
1224 &reg->Indirect,
1225 bld->bld_base.info->file_max[reg->Register.File]);
1226
1227 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1228 indirect_index,
1229 swizzle,
1230 TRUE);
1231 if (tgsi_type_is_64bit(stype)) {
1232 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1233 indirect_index,
1234 swizzle_in >> 16,
1235 TRUE);
1236 }
1237
1238 /* cast temps_array pointer to float* */
1239 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1240 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1241
1242 /* Gather values from the temporary register array */
1243 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1244 }
1245 else {
1246 LLVMValueRef temp_ptr;
1247 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1248 res = LLVMBuildLoad(builder, temp_ptr, "");
1249
1250 if (tgsi_type_is_64bit(stype)) {
1251 LLVMValueRef temp_ptr2, res2;
1252
1253 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1254 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1255 res = emit_fetch_64bit(bld_base, stype, res, res2);
1256 }
1257 }
1258
1259 if (stype == TGSI_TYPE_SIGNED ||
1260 stype == TGSI_TYPE_UNSIGNED ||
1261 stype == TGSI_TYPE_DOUBLE ||
1262 stype == TGSI_TYPE_SIGNED64 ||
1263 stype == TGSI_TYPE_UNSIGNED64) {
1264 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1265 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1266 }
1267
1268 return res;
1269 }
1270
1271 static LLVMValueRef
1272 emit_fetch_system_value(
1273 struct lp_build_tgsi_context * bld_base,
1274 const struct tgsi_full_src_register * reg,
1275 enum tgsi_opcode_type stype,
1276 unsigned swizzle_in)
1277 {
1278 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1279 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1280 const struct tgsi_shader_info *info = bld->bld_base.info;
1281 LLVMBuilderRef builder = gallivm->builder;
1282 LLVMValueRef res;
1283 enum tgsi_opcode_type atype; // Actual type of the value
1284 unsigned swizzle = swizzle_in & 0xffff;
1285
1286 assert(!reg->Register.Indirect);
1287
1288 switch (info->system_value_semantic_name[reg->Register.Index]) {
1289 case TGSI_SEMANTIC_INSTANCEID:
1290 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1291 atype = TGSI_TYPE_UNSIGNED;
1292 break;
1293
1294 case TGSI_SEMANTIC_VERTEXID:
1295 res = bld->system_values.vertex_id;
1296 atype = TGSI_TYPE_UNSIGNED;
1297 break;
1298
1299 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1300 res = bld->system_values.vertex_id_nobase;
1301 atype = TGSI_TYPE_UNSIGNED;
1302 break;
1303
1304 case TGSI_SEMANTIC_BASEVERTEX:
1305 res = bld->system_values.basevertex;
1306 atype = TGSI_TYPE_UNSIGNED;
1307 break;
1308
1309 case TGSI_SEMANTIC_BASEINSTANCE:
1310 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1311 atype = TGSI_TYPE_UNSIGNED;
1312 break;
1313
1314 case TGSI_SEMANTIC_PRIMID:
1315 res = bld->system_values.prim_id;
1316 atype = TGSI_TYPE_UNSIGNED;
1317 break;
1318
1319 case TGSI_SEMANTIC_INVOCATIONID:
1320 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1321 atype = TGSI_TYPE_UNSIGNED;
1322 break;
1323
1324 case TGSI_SEMANTIC_HELPER_INVOCATION:
1325 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1326 atype = TGSI_TYPE_UNSIGNED;
1327 break;
1328
1329 case TGSI_SEMANTIC_THREAD_ID:
1330 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1331 atype = TGSI_TYPE_UNSIGNED;
1332 break;
1333
1334 case TGSI_SEMANTIC_BLOCK_ID:
1335 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1336 atype = TGSI_TYPE_UNSIGNED;
1337 break;
1338
1339 case TGSI_SEMANTIC_GRID_SIZE:
1340 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1341 atype = TGSI_TYPE_UNSIGNED;
1342 break;
1343
1344 case TGSI_SEMANTIC_FACE:
1345 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1346 break;
1347
1348 default:
1349 assert(!"unexpected semantic in emit_fetch_system_value");
1350 res = bld_base->base.zero;
1351 atype = TGSI_TYPE_FLOAT;
1352 break;
1353 }
1354
1355 if (atype != stype) {
1356 if (stype == TGSI_TYPE_FLOAT) {
1357 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1358 } else if (stype == TGSI_TYPE_UNSIGNED) {
1359 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1360 } else if (stype == TGSI_TYPE_SIGNED) {
1361 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1362 }
1363 }
1364
1365 return res;
1366 }
1367
1368 /**
1369 * Register fetch with derivatives.
1370 */
1371 static void
1372 emit_fetch_deriv(
1373 struct lp_build_tgsi_soa_context *bld,
1374 LLVMValueRef src,
1375 LLVMValueRef *res,
1376 LLVMValueRef *ddx,
1377 LLVMValueRef *ddy)
1378 {
1379 if (res)
1380 *res = src;
1381
1382 /* TODO: use interpolation coeffs for inputs */
1383
1384 if (ddx)
1385 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1386
1387 if (ddy)
1388 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1389 }
1390
1391 /**
1392 * store an array of vec-length 64-bit into two arrays of vec_length floats
1393 * i.e.
1394 * value is d0, d1, d2, d3 etc.
1395 * each 64-bit has high and low pieces x, y
1396 * so gets stored into the separate channels as:
1397 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1398 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1399 */
1400 static void
1401 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1402 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1403 LLVMValueRef value)
1404 {
1405 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1406 struct gallivm_state *gallivm = bld_base->base.gallivm;
1407 LLVMBuilderRef builder = gallivm->builder;
1408 struct lp_build_context *float_bld = &bld_base->base;
1409 unsigned i;
1410 LLVMValueRef temp, temp2;
1411 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1412 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1413
1414 for (i = 0; i < bld_base->base.type.length; i++) {
1415 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1416 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1417 }
1418
1419 temp = LLVMBuildShuffleVector(builder, value,
1420 LLVMGetUndef(LLVMTypeOf(value)),
1421 LLVMConstVector(shuffles,
1422 bld_base->base.type.length),
1423 "");
1424 temp2 = LLVMBuildShuffleVector(builder, value,
1425 LLVMGetUndef(LLVMTypeOf(value)),
1426 LLVMConstVector(shuffles2,
1427 bld_base->base.type.length),
1428 "");
1429
1430 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1431 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1432 }
1433
1434 /**
1435 * Register store.
1436 */
1437 static void
1438 emit_store_chan(
1439 struct lp_build_tgsi_context *bld_base,
1440 const struct tgsi_full_instruction *inst,
1441 unsigned index,
1442 unsigned chan_index,
1443 LLVMValueRef value)
1444 {
1445 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1446 struct gallivm_state *gallivm = bld_base->base.gallivm;
1447 LLVMBuilderRef builder = gallivm->builder;
1448 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1449 struct lp_build_context *float_bld = &bld_base->base;
1450 struct lp_build_context *int_bld = &bld_base->int_bld;
1451 LLVMValueRef indirect_index = NULL;
1452 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1453
1454 /*
1455 * Apply saturation.
1456 *
1457 * It is always assumed to be float.
1458 */
1459 if (inst->Instruction.Saturate) {
1460 assert(dtype == TGSI_TYPE_FLOAT ||
1461 dtype == TGSI_TYPE_UNTYPED);
1462 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1463 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1464 }
1465
1466 if (reg->Register.Indirect) {
1467 /*
1468 * Currently the mesa/st doesn't generate indirect stores
1469 * to 64-bit values, it normally uses MOV to do indirect stores.
1470 */
1471 assert(!tgsi_type_is_64bit(dtype));
1472 indirect_index = get_indirect_index(bld,
1473 reg->Register.File,
1474 reg->Register.Index,
1475 &reg->Indirect,
1476 bld->bld_base.info->file_max[reg->Register.File]);
1477 } else {
1478 assert(reg->Register.Index <=
1479 bld_base->info->file_max[reg->Register.File]);
1480 }
1481
1482 if (DEBUG_EXECUTION) {
1483 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1484 }
1485
1486 switch( reg->Register.File ) {
1487 case TGSI_FILE_OUTPUT:
1488 /* Outputs are always stored as floats */
1489 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1490
1491 if (reg->Register.Indirect) {
1492 LLVMValueRef index_vec; /* indexes into the output registers */
1493 LLVMValueRef outputs_array;
1494 LLVMTypeRef fptr_type;
1495
1496 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1497 indirect_index,
1498 chan_index,
1499 TRUE);
1500
1501 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1502 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1503
1504 /* Scatter store values into output registers */
1505 emit_mask_scatter(bld, outputs_array, index_vec, value,
1506 &bld->exec_mask);
1507 }
1508 else {
1509 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1510 chan_index);
1511
1512 if (tgsi_type_is_64bit(dtype)) {
1513 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1514 chan_index + 1);
1515 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1516 value);
1517 } else
1518 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1519 }
1520 break;
1521
1522 case TGSI_FILE_TEMPORARY:
1523 /* Temporaries are always stored as floats */
1524 if (!tgsi_type_is_64bit(dtype))
1525 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1526 else
1527 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1528
1529 if (reg->Register.Indirect) {
1530 LLVMValueRef index_vec; /* indexes into the temp registers */
1531 LLVMValueRef temps_array;
1532 LLVMTypeRef fptr_type;
1533
1534 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1535 indirect_index,
1536 chan_index,
1537 TRUE);
1538
1539 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1540 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1541
1542 /* Scatter store values into temp registers */
1543 emit_mask_scatter(bld, temps_array, index_vec, value,
1544 &bld->exec_mask);
1545 }
1546 else {
1547 LLVMValueRef temp_ptr;
1548 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1549
1550 if (tgsi_type_is_64bit(dtype)) {
1551 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1552 reg->Register.Index,
1553 chan_index + 1);
1554 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1555 value);
1556 }
1557 else
1558 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1559 }
1560 break;
1561
1562 case TGSI_FILE_ADDRESS:
1563 assert(dtype == TGSI_TYPE_SIGNED);
1564 assert(LLVMTypeOf(value) == int_bld->vec_type);
1565 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1566 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1567 bld->addr[reg->Register.Index][chan_index]);
1568 break;
1569
1570 default:
1571 assert( 0 );
1572 }
1573
1574 (void)dtype;
1575 }
1576
1577 /*
1578 * Called at the beginning of the translation of each TGSI instruction, to
1579 * emit some debug code.
1580 */
1581 static void
1582 emit_debug(
1583 struct lp_build_tgsi_context * bld_base,
1584 const struct tgsi_full_instruction * inst,
1585 const struct tgsi_opcode_info * info)
1586
1587 {
1588 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1589
1590 if (DEBUG_EXECUTION) {
1591 /*
1592 * Dump the TGSI instruction.
1593 */
1594
1595 struct gallivm_state *gallivm = bld_base->base.gallivm;
1596 char buf[512];
1597 buf[0] = '$';
1598 buf[1] = ' ';
1599 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1600 lp_build_printf(gallivm, buf);
1601
1602 /* Dump the execution mask.
1603 */
1604 if (bld->exec_mask.has_mask) {
1605 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1606 }
1607 }
1608 }
1609
1610 static void
1611 emit_store(
1612 struct lp_build_tgsi_context * bld_base,
1613 const struct tgsi_full_instruction * inst,
1614 const struct tgsi_opcode_info * info,
1615 unsigned index,
1616 LLVMValueRef dst[4])
1617
1618 {
1619 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1620
1621 unsigned writemask = inst->Dst[index].Register.WriteMask;
1622 while (writemask) {
1623 unsigned chan_index = u_bit_scan(&writemask);
1624 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1625 continue;
1626 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1627 }
1628 }
1629
1630 static unsigned
1631 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1632 {
1633 switch (tgsi_target) {
1634 case TGSI_TEXTURE_BUFFER:
1635 return PIPE_BUFFER;
1636 case TGSI_TEXTURE_1D:
1637 case TGSI_TEXTURE_SHADOW1D:
1638 return PIPE_TEXTURE_1D;
1639 case TGSI_TEXTURE_2D:
1640 case TGSI_TEXTURE_SHADOW2D:
1641 case TGSI_TEXTURE_2D_MSAA:
1642 return PIPE_TEXTURE_2D;
1643 case TGSI_TEXTURE_3D:
1644 return PIPE_TEXTURE_3D;
1645 case TGSI_TEXTURE_CUBE:
1646 case TGSI_TEXTURE_SHADOWCUBE:
1647 return PIPE_TEXTURE_CUBE;
1648 case TGSI_TEXTURE_RECT:
1649 case TGSI_TEXTURE_SHADOWRECT:
1650 return PIPE_TEXTURE_RECT;
1651 case TGSI_TEXTURE_1D_ARRAY:
1652 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1653 return PIPE_TEXTURE_1D_ARRAY;
1654 case TGSI_TEXTURE_2D_ARRAY:
1655 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1656 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1657 return PIPE_TEXTURE_2D_ARRAY;
1658 case TGSI_TEXTURE_CUBE_ARRAY:
1659 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1660 return PIPE_TEXTURE_CUBE_ARRAY;
1661 default:
1662 assert(0);
1663 return PIPE_BUFFER;
1664 }
1665 }
1666
1667
1668 static enum lp_sampler_lod_property
1669 lp_build_lod_property(
1670 struct lp_build_tgsi_context *bld_base,
1671 const struct tgsi_full_instruction *inst,
1672 unsigned src_op)
1673 {
1674 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1675 enum lp_sampler_lod_property lod_property;
1676
1677 /*
1678 * Not much we can do here. We could try catching inputs declared
1679 * with constant interpolation but not sure it's worth it - since for
1680 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1681 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1682 * like the constant/immediate recognition below.
1683 * What seems to be of more value would be to recognize temps holding
1684 * broadcasted scalars but no way we can do it.
1685 * Tried asking llvm but without any success (using LLVMIsConstant
1686 * even though this isn't exactly what we'd need), even as simple as
1687 * IMM[0] UINT32 (0,-1,0,0)
1688 * MOV TEMP[0] IMM[0].yyyy
1689 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1690 * doesn't work.
1691 * This means there's ZERO chance this will ever catch a scalar lod
1692 * with traditional tex opcodes as well as texel fetches, since the lod
1693 * comes from the same reg as coords (except some test shaders using
1694 * constant coords maybe).
1695 * There's at least hope for sample opcodes as well as size queries.
1696 */
1697 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
1698 reg->Register.File == TGSI_FILE_CONSTANT ||
1699 reg->Register.File == TGSI_FILE_IMMEDIATE) {
1700 lod_property = LP_SAMPLER_LOD_SCALAR;
1701 }
1702 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
1703 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
1704 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1705 }
1706 else {
1707 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1708 }
1709 }
1710 else {
1711 /* never use scalar (per-quad) lod the results are just too wrong. */
1712 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1713 }
1714 return lod_property;
1715 }
1716
1717
1718 /**
1719 * High-level instruction translators.
1720 */
1721
1722 static void
1723 emit_tex( struct lp_build_tgsi_soa_context *bld,
1724 const struct tgsi_full_instruction *inst,
1725 enum lp_build_tex_modifier modifier,
1726 LLVMValueRef *texel,
1727 unsigned sampler_reg,
1728 enum lp_sampler_op_type sampler_op)
1729 {
1730 unsigned unit = inst->Src[sampler_reg].Register.Index;
1731 LLVMValueRef oow = NULL;
1732 LLVMValueRef lod = NULL;
1733 LLVMValueRef coords[5];
1734 LLVMValueRef offsets[3] = { NULL };
1735 struct lp_derivatives derivs;
1736 struct lp_sampler_params params;
1737 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1738 unsigned num_derivs, num_offsets, i;
1739 unsigned shadow_coord = 0;
1740 unsigned layer_coord = 0;
1741 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
1742
1743 memset(&params, 0, sizeof(params));
1744
1745 if (!bld->sampler) {
1746 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1747 for (i = 0; i < 4; i++) {
1748 texel[i] = bld->bld_base.base.undef;
1749 }
1750 return;
1751 }
1752
1753 switch (inst->Texture.Texture) {
1754 case TGSI_TEXTURE_1D_ARRAY:
1755 layer_coord = 1;
1756 /* fallthrough */
1757 case TGSI_TEXTURE_1D:
1758 num_offsets = 1;
1759 num_derivs = 1;
1760 break;
1761 case TGSI_TEXTURE_2D_ARRAY:
1762 layer_coord = 2;
1763 /* fallthrough */
1764 case TGSI_TEXTURE_2D:
1765 case TGSI_TEXTURE_RECT:
1766 num_offsets = 2;
1767 num_derivs = 2;
1768 break;
1769 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1770 layer_coord = 1;
1771 /* fallthrough */
1772 case TGSI_TEXTURE_SHADOW1D:
1773 shadow_coord = 2;
1774 num_offsets = 1;
1775 num_derivs = 1;
1776 break;
1777 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1778 layer_coord = 2;
1779 shadow_coord = 3;
1780 num_offsets = 2;
1781 num_derivs = 2;
1782 break;
1783 case TGSI_TEXTURE_SHADOW2D:
1784 case TGSI_TEXTURE_SHADOWRECT:
1785 shadow_coord = 2;
1786 num_offsets = 2;
1787 num_derivs = 2;
1788 break;
1789 case TGSI_TEXTURE_CUBE:
1790 num_offsets = 2;
1791 num_derivs = 3;
1792 break;
1793 case TGSI_TEXTURE_3D:
1794 num_offsets = 3;
1795 num_derivs = 3;
1796 break;
1797 case TGSI_TEXTURE_SHADOWCUBE:
1798 shadow_coord = 3;
1799 num_offsets = 2;
1800 num_derivs = 3;
1801 break;
1802 case TGSI_TEXTURE_CUBE_ARRAY:
1803 num_offsets = 2;
1804 num_derivs = 3;
1805 layer_coord = 3;
1806 break;
1807 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1808 num_offsets = 2;
1809 num_derivs = 3;
1810 layer_coord = 3;
1811 shadow_coord = 4; /* shadow coord special different reg */
1812 break;
1813 case TGSI_TEXTURE_2D_MSAA:
1814 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1815 default:
1816 assert(0);
1817 return;
1818 }
1819
1820 /* Note lod and especially projected are illegal in a LOT of cases */
1821 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
1822 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1823 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
1824 lod = bld->bld_base.base.zero;
1825 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1826 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
1827 /* note that shadow cube array with bias/explicit lod does not exist */
1828 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
1829 }
1830 else {
1831 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1832 }
1833 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1834 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
1835 }
1836 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1837 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
1838 }
1839 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
1840 }
1841
1842 if (sampler_op == LP_SAMPLER_OP_GATHER) {
1843 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
1844 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
1845 }
1846 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1847 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1848 oow = lp_build_rcp(&bld->bld_base.base, oow);
1849 }
1850
1851 for (i = 0; i < num_derivs; i++) {
1852 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
1853 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1854 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1855 }
1856 for (i = num_derivs; i < 5; i++) {
1857 coords[i] = bld->bld_base.base.undef;
1858 }
1859
1860 /* Layer coord always goes into 3rd slot, except for cube map arrays */
1861 if (layer_coord) {
1862 if (layer_coord == 3) {
1863 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1864 }
1865 else {
1866 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1867 }
1868 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1869 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
1870 }
1871 /* Shadow coord occupies always 5th slot. */
1872 if (shadow_coord) {
1873 sample_key |= LP_SAMPLER_SHADOW;
1874 if (shadow_coord == 4) {
1875 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
1876 }
1877 else {
1878 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
1879 }
1880 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1881 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
1882 }
1883
1884 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1885 unsigned dim;
1886 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
1887 for (dim = 0; dim < num_derivs; ++dim) {
1888 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
1889 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
1890 }
1891 params.derivs = &derivs;
1892 /*
1893 * could also check all src regs if constant but I doubt such
1894 * cases exist in practice.
1895 */
1896 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
1897 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
1898 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1899 }
1900 else {
1901 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1902 }
1903 }
1904 else {
1905 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1906 }
1907 }
1908 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
1909
1910 /* we don't handle the 4 offset version of tg4 */
1911 if (inst->Texture.NumOffsets == 1) {
1912 unsigned dim;
1913 sample_key |= LP_SAMPLER_OFFSETS;
1914 for (dim = 0; dim < num_offsets; dim++) {
1915 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
1916 }
1917 }
1918
1919 params.type = bld->bld_base.base.type;
1920 params.sample_key = sample_key;
1921 params.texture_index = unit;
1922 params.sampler_index = unit;
1923 params.context_ptr = bld->context_ptr;
1924 params.thread_data_ptr = bld->thread_data_ptr;
1925 params.coords = coords;
1926 params.offsets = offsets;
1927 params.lod = lod;
1928 params.texel = texel;
1929
1930 bld->sampler->emit_tex_sample(bld->sampler,
1931 bld->bld_base.base.gallivm,
1932 &params);
1933 }
1934
1935 static void
1936 emit_sample(struct lp_build_tgsi_soa_context *bld,
1937 const struct tgsi_full_instruction *inst,
1938 enum lp_build_tex_modifier modifier,
1939 boolean compare,
1940 enum lp_sampler_op_type sample_type,
1941 LLVMValueRef *texel)
1942 {
1943 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1944 unsigned texture_unit, sampler_unit;
1945 LLVMValueRef lod = NULL;
1946 LLVMValueRef coords[5];
1947 LLVMValueRef offsets[3] = { NULL };
1948 struct lp_derivatives derivs;
1949 struct lp_sampler_params params;
1950 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1951
1952 unsigned num_offsets, num_derivs, i;
1953 unsigned layer_coord = 0;
1954 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
1955
1956 memset(&params, 0, sizeof(params));
1957
1958 if (!bld->sampler) {
1959 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1960 for (i = 0; i < 4; i++) {
1961 texel[i] = bld->bld_base.base.undef;
1962 }
1963 return;
1964 }
1965
1966 /*
1967 * unlike old-style tex opcodes the texture/sampler indices
1968 * always come from src1 and src2 respectively.
1969 */
1970 texture_unit = inst->Src[1].Register.Index;
1971 sampler_unit = inst->Src[2].Register.Index;
1972
1973 /*
1974 * Note inst->Texture.Texture will contain the number of offsets,
1975 * however the target information is NOT there and comes from the
1976 * declared sampler views instead.
1977 */
1978 switch (bld->sv[texture_unit].Resource) {
1979 case TGSI_TEXTURE_1D:
1980 num_offsets = 1;
1981 num_derivs = 1;
1982 break;
1983 case TGSI_TEXTURE_1D_ARRAY:
1984 layer_coord = 1;
1985 num_offsets = 1;
1986 num_derivs = 1;
1987 break;
1988 case TGSI_TEXTURE_2D:
1989 case TGSI_TEXTURE_RECT:
1990 num_offsets = 2;
1991 num_derivs = 2;
1992 break;
1993 case TGSI_TEXTURE_2D_ARRAY:
1994 layer_coord = 2;
1995 num_offsets = 2;
1996 num_derivs = 2;
1997 break;
1998 case TGSI_TEXTURE_CUBE:
1999 num_offsets = 2;
2000 num_derivs = 3;
2001 break;
2002 case TGSI_TEXTURE_3D:
2003 num_offsets = 3;
2004 num_derivs = 3;
2005 break;
2006 case TGSI_TEXTURE_CUBE_ARRAY:
2007 layer_coord = 3;
2008 num_offsets = 2;
2009 num_derivs = 3;
2010 break;
2011 default:
2012 assert(0);
2013 return;
2014 }
2015
2016 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2017 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2018 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2019 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2020 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2021 }
2022 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2023 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2024 }
2025 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2026 }
2027 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2028 /* XXX might be better to explicitly pass the level zero information */
2029 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2030 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2031 }
2032
2033 for (i = 0; i < num_derivs; i++) {
2034 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2035 }
2036 for (i = num_derivs; i < 5; i++) {
2037 coords[i] = bld->bld_base.base.undef;
2038 }
2039
2040 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2041 if (layer_coord) {
2042 if (layer_coord == 3)
2043 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2044 else
2045 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2046 }
2047 /* Shadow coord occupies always 5th slot. */
2048 if (compare) {
2049 sample_key |= LP_SAMPLER_SHADOW;
2050 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2051 }
2052
2053 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2054 unsigned dim;
2055 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2056 for (dim = 0; dim < num_derivs; ++dim) {
2057 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2058 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2059 }
2060 params.derivs = &derivs;
2061 /*
2062 * could also check all src regs if constant but I doubt such
2063 * cases exist in practice.
2064 */
2065 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2066 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2067 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2068 }
2069 else {
2070 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2071 }
2072 }
2073 else {
2074 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2075 }
2076 }
2077
2078 /* some advanced gather instructions (txgo) would require 4 offsets */
2079 if (inst->Texture.NumOffsets == 1) {
2080 unsigned dim;
2081 sample_key |= LP_SAMPLER_OFFSETS;
2082 for (dim = 0; dim < num_offsets; dim++) {
2083 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2084 }
2085 }
2086 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2087
2088 params.type = bld->bld_base.base.type;
2089 params.sample_key = sample_key;
2090 params.texture_index = texture_unit;
2091 params.sampler_index = sampler_unit;
2092 params.context_ptr = bld->context_ptr;
2093 params.thread_data_ptr = bld->thread_data_ptr;
2094 params.coords = coords;
2095 params.offsets = offsets;
2096 params.lod = lod;
2097 params.texel = texel;
2098
2099 bld->sampler->emit_tex_sample(bld->sampler,
2100 bld->bld_base.base.gallivm,
2101 &params);
2102
2103 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2104 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2105 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2106 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2107 unsigned char swizzles[4];
2108 swizzles[0] = inst->Src[1].Register.SwizzleX;
2109 swizzles[1] = inst->Src[1].Register.SwizzleY;
2110 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2111 swizzles[3] = inst->Src[1].Register.SwizzleW;
2112
2113 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2114 }
2115 }
2116
2117 static void
2118 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2119 const struct tgsi_full_instruction *inst,
2120 LLVMValueRef *texel,
2121 boolean is_samplei)
2122 {
2123 unsigned unit, target;
2124 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2125 LLVMValueRef explicit_lod = NULL;
2126 LLVMValueRef coords[5];
2127 LLVMValueRef offsets[3] = { NULL };
2128 struct lp_sampler_params params;
2129 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2130 unsigned dims, i;
2131 unsigned layer_coord = 0;
2132 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2133
2134 memset(&params, 0, sizeof(params));
2135
2136 if (!bld->sampler) {
2137 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2138 for (i = 0; i < 4; i++) {
2139 texel[i] = coord_undef;
2140 }
2141 return;
2142 }
2143
2144 unit = inst->Src[1].Register.Index;
2145
2146 if (is_samplei) {
2147 target = bld->sv[unit].Resource;
2148 }
2149 else {
2150 target = inst->Texture.Texture;
2151 }
2152
2153 switch (target) {
2154 case TGSI_TEXTURE_1D:
2155 case TGSI_TEXTURE_BUFFER:
2156 dims = 1;
2157 break;
2158 case TGSI_TEXTURE_1D_ARRAY:
2159 layer_coord = 1;
2160 dims = 1;
2161 break;
2162 case TGSI_TEXTURE_2D:
2163 case TGSI_TEXTURE_RECT:
2164 case TGSI_TEXTURE_2D_MSAA:
2165 dims = 2;
2166 break;
2167 case TGSI_TEXTURE_2D_ARRAY:
2168 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2169 layer_coord = 2;
2170 dims = 2;
2171 break;
2172 case TGSI_TEXTURE_3D:
2173 dims = 3;
2174 break;
2175 default:
2176 assert(0);
2177 return;
2178 }
2179
2180 /* always have lod except for buffers and msaa targets ? */
2181 if (target != TGSI_TEXTURE_BUFFER &&
2182 target != TGSI_TEXTURE_2D_MSAA &&
2183 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2184 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2185 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2186 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2187 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2188 }
2189 /*
2190 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2191 * would be the sample index.
2192 */
2193
2194 for (i = 0; i < dims; i++) {
2195 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2196 }
2197 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2198 for (i = dims; i < 5; i++) {
2199 coords[i] = coord_undef;
2200 }
2201 if (layer_coord)
2202 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2203
2204 if (inst->Texture.NumOffsets == 1) {
2205 unsigned dim;
2206 sample_key |= LP_SAMPLER_OFFSETS;
2207 for (dim = 0; dim < dims; dim++) {
2208 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2209 }
2210 }
2211 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2212
2213 params.type = bld->bld_base.base.type;
2214 params.sample_key = sample_key;
2215 params.texture_index = unit;
2216 /*
2217 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2218 * and trigger some assertions with d3d10 where the sampler view number
2219 * can exceed this.
2220 */
2221 params.sampler_index = 0;
2222 params.context_ptr = bld->context_ptr;
2223 params.thread_data_ptr = bld->thread_data_ptr;
2224 params.coords = coords;
2225 params.offsets = offsets;
2226 params.derivs = NULL;
2227 params.lod = explicit_lod;
2228 params.texel = texel;
2229
2230 bld->sampler->emit_tex_sample(bld->sampler,
2231 bld->bld_base.base.gallivm,
2232 &params);
2233
2234 if (is_samplei &&
2235 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2236 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2237 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2238 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2239 unsigned char swizzles[4];
2240 swizzles[0] = inst->Src[1].Register.SwizzleX;
2241 swizzles[1] = inst->Src[1].Register.SwizzleY;
2242 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2243 swizzles[3] = inst->Src[1].Register.SwizzleW;
2244
2245 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2246 }
2247 }
2248
2249 static void
2250 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2251 const struct tgsi_full_instruction *inst,
2252 LLVMValueRef *sizes_out,
2253 boolean is_sviewinfo)
2254 {
2255 LLVMValueRef explicit_lod;
2256 enum lp_sampler_lod_property lod_property;
2257 unsigned has_lod;
2258 unsigned i;
2259 unsigned unit = inst->Src[1].Register.Index;
2260 unsigned target, pipe_target;
2261 struct lp_sampler_size_query_params params;
2262
2263 if (is_sviewinfo) {
2264 target = bld->sv[unit].Resource;
2265 }
2266 else {
2267 target = inst->Texture.Texture;
2268 }
2269 switch (target) {
2270 case TGSI_TEXTURE_BUFFER:
2271 case TGSI_TEXTURE_RECT:
2272 case TGSI_TEXTURE_SHADOWRECT:
2273 has_lod = 0;
2274 break;
2275 default:
2276 has_lod = 1;
2277 break;
2278 }
2279
2280 if (!bld->sampler) {
2281 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2282 for (i = 0; i < 4; i++)
2283 sizes_out[i] = bld->bld_base.int_bld.undef;
2284 return;
2285 }
2286
2287 if (has_lod) {
2288 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2289 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2290 }
2291 else {
2292 explicit_lod = NULL;
2293 lod_property = LP_SAMPLER_LOD_SCALAR;
2294 }
2295
2296
2297 pipe_target = tgsi_to_pipe_tex_target(target);
2298
2299 params.int_type = bld->bld_base.int_bld.type;
2300 params.texture_unit = unit;
2301 params.target = pipe_target;
2302 params.context_ptr = bld->context_ptr;
2303 params.is_sviewinfo = TRUE;
2304 params.lod_property = lod_property;
2305 params.explicit_lod = explicit_lod;
2306 params.sizes_out = sizes_out;
2307
2308 bld->sampler->emit_size_query(bld->sampler,
2309 bld->bld_base.base.gallivm,
2310 &params);
2311 }
2312
2313 static boolean
2314 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2315 int pc)
2316 {
2317 unsigned i;
2318
2319 for (i = 0; i < 5; i++) {
2320 enum tgsi_opcode opcode;
2321
2322 if (pc + i >= bld->bld_base.info->num_instructions)
2323 return TRUE;
2324
2325 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2326
2327 if (opcode == TGSI_OPCODE_END)
2328 return TRUE;
2329
2330 if (opcode == TGSI_OPCODE_TEX ||
2331 opcode == TGSI_OPCODE_TXP ||
2332 opcode == TGSI_OPCODE_TXD ||
2333 opcode == TGSI_OPCODE_TXB ||
2334 opcode == TGSI_OPCODE_TXL ||
2335 opcode == TGSI_OPCODE_TXF ||
2336 opcode == TGSI_OPCODE_TXQ ||
2337 opcode == TGSI_OPCODE_TEX2 ||
2338 opcode == TGSI_OPCODE_TXB2 ||
2339 opcode == TGSI_OPCODE_TXL2 ||
2340 opcode == TGSI_OPCODE_SAMPLE ||
2341 opcode == TGSI_OPCODE_SAMPLE_B ||
2342 opcode == TGSI_OPCODE_SAMPLE_C ||
2343 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2344 opcode == TGSI_OPCODE_SAMPLE_D ||
2345 opcode == TGSI_OPCODE_SAMPLE_I ||
2346 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2347 opcode == TGSI_OPCODE_SAMPLE_L ||
2348 opcode == TGSI_OPCODE_SVIEWINFO ||
2349 opcode == TGSI_OPCODE_CAL ||
2350 opcode == TGSI_OPCODE_IF ||
2351 opcode == TGSI_OPCODE_UIF ||
2352 opcode == TGSI_OPCODE_BGNLOOP ||
2353 opcode == TGSI_OPCODE_SWITCH)
2354 return FALSE;
2355 }
2356
2357 return TRUE;
2358 }
2359
2360
2361
2362 /**
2363 * Kill fragment if any of the src register values are negative.
2364 */
2365 static void
2366 emit_kill_if(
2367 struct lp_build_tgsi_soa_context *bld,
2368 const struct tgsi_full_instruction *inst,
2369 int pc)
2370 {
2371 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2372 const struct tgsi_full_src_register *reg = &inst->Src[0];
2373 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2374 LLVMValueRef mask;
2375 unsigned chan_index;
2376
2377 memset(&terms, 0, sizeof terms);
2378
2379 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2380 unsigned swizzle;
2381
2382 /* Unswizzle channel */
2383 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2384
2385 /* Check if the component has not been already tested. */
2386 assert(swizzle < TGSI_NUM_CHANNELS);
2387 if( !terms[swizzle] )
2388 /* TODO: change the comparison operator instead of setting the sign */
2389 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2390 }
2391
2392 mask = NULL;
2393 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2394 if(terms[chan_index]) {
2395 LLVMValueRef chan_mask;
2396
2397 /*
2398 * If term < 0 then mask = 0 else mask = ~0.
2399 */
2400 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2401
2402 if(mask)
2403 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2404 else
2405 mask = chan_mask;
2406 }
2407 }
2408
2409 if (bld->exec_mask.has_mask) {
2410 LLVMValueRef invmask;
2411 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2412 mask = LLVMBuildOr(builder, mask, invmask, "");
2413 }
2414
2415 lp_build_mask_update(bld->mask, mask);
2416 if (!near_end_of_shader(bld, pc))
2417 lp_build_mask_check(bld->mask);
2418 }
2419
2420
2421 /**
2422 * Unconditional fragment kill.
2423 * The only predication is the execution mask which will apply if
2424 * we're inside a loop or conditional.
2425 */
2426 static void
2427 emit_kill(struct lp_build_tgsi_soa_context *bld,
2428 int pc)
2429 {
2430 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2431 LLVMValueRef mask;
2432
2433 /* For those channels which are "alive", disable fragment shader
2434 * execution.
2435 */
2436 if (bld->exec_mask.has_mask) {
2437 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2438 }
2439 else {
2440 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2441 mask = zero;
2442 }
2443
2444 lp_build_mask_update(bld->mask, mask);
2445
2446 if (!near_end_of_shader(bld, pc))
2447 lp_build_mask_check(bld->mask);
2448 }
2449
2450
2451 /**
2452 * Emit code which will dump the value of all the temporary registers
2453 * to stdout.
2454 */
2455 static void
2456 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2457 unsigned file)
2458 {
2459 const struct tgsi_shader_info *info = bld->bld_base.info;
2460 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2461 LLVMBuilderRef builder = gallivm->builder;
2462 LLVMValueRef reg_ptr;
2463 int index;
2464 int max_index = info->file_max[file];
2465
2466 /*
2467 * Some register files, particularly constants, can be very large,
2468 * and dumping everything could make this unusably slow.
2469 */
2470 max_index = MIN2(max_index, 32);
2471
2472 for (index = 0; index <= max_index; index++) {
2473 LLVMValueRef res;
2474 unsigned mask;
2475 int chan;
2476
2477 if (index < 8 * sizeof(unsigned) &&
2478 (info->file_mask[file] & (1u << index)) == 0) {
2479 /* This was not declared.*/
2480 continue;
2481 }
2482
2483 if (file == TGSI_FILE_INPUT) {
2484 mask = info->input_usage_mask[index];
2485 } else {
2486 mask = TGSI_WRITEMASK_XYZW;
2487 }
2488
2489 for (chan = 0; chan < 4; chan++) {
2490 if ((mask & (1 << chan)) == 0) {
2491 /* This channel is not used.*/
2492 continue;
2493 }
2494
2495 if (file == TGSI_FILE_CONSTANT) {
2496 struct tgsi_full_src_register reg;
2497 memset(&reg, 0, sizeof reg);
2498 reg.Register.File = file;
2499 reg.Register.Index = index;
2500 reg.Register.SwizzleX = 0;
2501 reg.Register.SwizzleY = 1;
2502 reg.Register.SwizzleZ = 2;
2503 reg.Register.SwizzleW = 3;
2504
2505 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2506 if (!res) {
2507 continue;
2508 }
2509 } else if (file == TGSI_FILE_INPUT) {
2510 res = bld->inputs[index][chan];
2511 if (!res) {
2512 continue;
2513 }
2514 } else if (file == TGSI_FILE_TEMPORARY) {
2515 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2516 assert(reg_ptr);
2517 res = LLVMBuildLoad(builder, reg_ptr, "");
2518 } else if (file == TGSI_FILE_OUTPUT) {
2519 reg_ptr = lp_get_output_ptr(bld, index, chan);
2520 assert(reg_ptr);
2521 res = LLVMBuildLoad(builder, reg_ptr, "");
2522 } else {
2523 assert(0);
2524 continue;
2525 }
2526
2527 emit_dump_reg(gallivm, file, index, chan, res);
2528 }
2529 }
2530 }
2531
2532
2533
2534 void
2535 lp_emit_declaration_soa(
2536 struct lp_build_tgsi_context *bld_base,
2537 const struct tgsi_full_declaration *decl)
2538 {
2539 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2540 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2541 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2542 const unsigned first = decl->Range.First;
2543 const unsigned last = decl->Range.Last;
2544 unsigned idx, i;
2545
2546 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2547
2548 switch (decl->Declaration.File) {
2549 case TGSI_FILE_TEMPORARY:
2550 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2551 assert(last < LP_MAX_INLINED_TEMPS);
2552 for (idx = first; idx <= last; ++idx) {
2553 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2554 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2555 }
2556 }
2557 break;
2558
2559 case TGSI_FILE_OUTPUT:
2560 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2561 for (idx = first; idx <= last; ++idx) {
2562 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2563 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2564 vec_type, "output");
2565 }
2566 }
2567 break;
2568
2569 case TGSI_FILE_ADDRESS:
2570 /* ADDR registers are only allocated with an integer LLVM IR type,
2571 * as they are guaranteed to always have integers.
2572 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2573 * an ADDR register for that matter).
2574 */
2575 assert(last < LP_MAX_TGSI_ADDRS);
2576 for (idx = first; idx <= last; ++idx) {
2577 assert(idx < LP_MAX_TGSI_ADDRS);
2578 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2579 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2580 }
2581 break;
2582
2583 case TGSI_FILE_SAMPLER_VIEW:
2584 /*
2585 * The target stored here MUST match whatever there actually
2586 * is in the set sampler views (what about return type?).
2587 */
2588 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2589 for (idx = first; idx <= last; ++idx) {
2590 bld->sv[idx] = decl->SamplerView;
2591 }
2592 break;
2593
2594 case TGSI_FILE_CONSTANT:
2595 {
2596 /*
2597 * We could trivially fetch the per-buffer pointer when fetching the
2598 * constant, relying on llvm to figure out it's always the same pointer
2599 * anyway. However, doing so results in a huge (more than factor of 10)
2600 * slowdown in llvm compilation times for some (but not all) shaders
2601 * (more specifically, the IR optimization spends way more time in
2602 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2603 */
2604 unsigned idx2D = decl->Dim.Index2D;
2605 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2606 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2607 bld->consts[idx2D] =
2608 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2609 bld->consts_sizes[idx2D] =
2610 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2611 }
2612 break;
2613 case TGSI_FILE_BUFFER:
2614 {
2615 unsigned idx = decl->Range.First;
2616 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2617 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2618 bld->ssbos[idx] =
2619 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2620 bld->ssbo_sizes[idx] =
2621 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2622
2623 }
2624 break;
2625 case TGSI_FILE_MEMORY:
2626 break;
2627 default:
2628 /* don't need to declare other vars */
2629 break;
2630 }
2631 }
2632
2633
2634 void lp_emit_immediate_soa(
2635 struct lp_build_tgsi_context *bld_base,
2636 const struct tgsi_full_immediate *imm)
2637 {
2638 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2639 struct gallivm_state * gallivm = bld_base->base.gallivm;
2640 LLVMValueRef imms[4];
2641 unsigned i;
2642 const uint size = imm->Immediate.NrTokens - 1;
2643 assert(size <= 4);
2644 switch (imm->Immediate.DataType) {
2645 case TGSI_IMM_FLOAT32:
2646 for( i = 0; i < size; ++i )
2647 imms[i] =
2648 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2649
2650 break;
2651 case TGSI_IMM_FLOAT64:
2652 case TGSI_IMM_UINT64:
2653 case TGSI_IMM_INT64:
2654 case TGSI_IMM_UINT32:
2655 for( i = 0; i < size; ++i ) {
2656 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2657 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2658 }
2659
2660 break;
2661 case TGSI_IMM_INT32:
2662 for( i = 0; i < size; ++i ) {
2663 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2664 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2665 }
2666
2667 break;
2668 }
2669 for( i = size; i < 4; ++i )
2670 imms[i] = bld_base->base.undef;
2671
2672 if (bld->use_immediates_array) {
2673 unsigned index = bld->num_immediates;
2674 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2675 LLVMBuilderRef builder = gallivm->builder;
2676 LLVMValueRef gep[2];
2677 gep[0] = lp_build_const_int32(gallivm, 0);
2678
2679 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2680 for (i = 0; i < 4; ++i ) {
2681 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
2682 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2683 bld->imms_array, gep, 2, "");
2684 LLVMBuildStore(builder, imms[i], imm_ptr);
2685 }
2686 } else {
2687 /* simply copy the immediate values into the next immediates[] slot */
2688 unsigned i;
2689 assert(imm->Immediate.NrTokens - 1 <= 4);
2690 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2691
2692 for(i = 0; i < 4; ++i )
2693 bld->immediates[bld->num_immediates][i] = imms[i];
2694
2695 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2696 unsigned index = bld->num_immediates;
2697 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2698 LLVMBuilderRef builder = gallivm->builder;
2699 LLVMValueRef gep[2];
2700 gep[0] = lp_build_const_int32(gallivm, 0);
2701 for (i = 0; i < 4; ++i ) {
2702 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
2703 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2704 bld->imms_array, gep, 2, "");
2705 LLVMBuildStore(builder,
2706 bld->immediates[index][i],
2707 imm_ptr);
2708 }
2709 }
2710 }
2711
2712 bld->num_immediates++;
2713 }
2714
2715 static void
2716 ddx_emit(
2717 const struct lp_build_tgsi_action * action,
2718 struct lp_build_tgsi_context * bld_base,
2719 struct lp_build_emit_data * emit_data)
2720 {
2721 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2722
2723 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2724 &emit_data->output[emit_data->chan], NULL);
2725 }
2726
2727 static void
2728 ddy_emit(
2729 const struct lp_build_tgsi_action * action,
2730 struct lp_build_tgsi_context * bld_base,
2731 struct lp_build_emit_data * emit_data)
2732 {
2733 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2734
2735 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2736 &emit_data->output[emit_data->chan]);
2737 }
2738
2739 static void
2740 kill_emit(
2741 const struct lp_build_tgsi_action * action,
2742 struct lp_build_tgsi_context * bld_base,
2743 struct lp_build_emit_data * emit_data)
2744 {
2745 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2746
2747 emit_kill(bld, bld_base->pc - 1);
2748 }
2749
2750 static void
2751 kill_if_emit(
2752 const struct lp_build_tgsi_action * action,
2753 struct lp_build_tgsi_context * bld_base,
2754 struct lp_build_emit_data * emit_data)
2755 {
2756 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2757
2758 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2759 }
2760
2761 static void
2762 tex_emit(
2763 const struct lp_build_tgsi_action * action,
2764 struct lp_build_tgsi_context * bld_base,
2765 struct lp_build_emit_data * emit_data)
2766 {
2767 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2768
2769 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2770 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2771 }
2772
2773 static void
2774 tex2_emit(
2775 const struct lp_build_tgsi_action * action,
2776 struct lp_build_tgsi_context * bld_base,
2777 struct lp_build_emit_data * emit_data)
2778 {
2779 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2780
2781 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2782 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2783 }
2784
2785 static void
2786 txb_emit(
2787 const struct lp_build_tgsi_action * action,
2788 struct lp_build_tgsi_context * bld_base,
2789 struct lp_build_emit_data * emit_data)
2790 {
2791 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2792
2793 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2794 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2795 }
2796
2797 static void
2798 txb2_emit(
2799 const struct lp_build_tgsi_action * action,
2800 struct lp_build_tgsi_context * bld_base,
2801 struct lp_build_emit_data * emit_data)
2802 {
2803 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2804
2805 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2806 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2807 }
2808
2809 static void
2810 txd_emit(
2811 const struct lp_build_tgsi_action * action,
2812 struct lp_build_tgsi_context * bld_base,
2813 struct lp_build_emit_data * emit_data)
2814 {
2815 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2816
2817 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2818 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
2819 }
2820
2821 static void
2822 txl_emit(
2823 const struct lp_build_tgsi_action * action,
2824 struct lp_build_tgsi_context * bld_base,
2825 struct lp_build_emit_data * emit_data)
2826 {
2827 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2828
2829 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2830 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2831 }
2832
2833 static void
2834 txl2_emit(
2835 const struct lp_build_tgsi_action * action,
2836 struct lp_build_tgsi_context * bld_base,
2837 struct lp_build_emit_data * emit_data)
2838 {
2839 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2840
2841 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2842 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2843 }
2844
2845 static void
2846 txp_emit(
2847 const struct lp_build_tgsi_action * action,
2848 struct lp_build_tgsi_context * bld_base,
2849 struct lp_build_emit_data * emit_data)
2850 {
2851 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2852
2853 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2854 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2855 }
2856
2857 static void
2858 tg4_emit(
2859 const struct lp_build_tgsi_action * action,
2860 struct lp_build_tgsi_context * bld_base,
2861 struct lp_build_emit_data * emit_data)
2862 {
2863 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2864
2865 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2866 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
2867 }
2868
2869 static void
2870 lodq_emit(
2871 const struct lp_build_tgsi_action * action,
2872 struct lp_build_tgsi_context * bld_base,
2873 struct lp_build_emit_data * emit_data)
2874 {
2875 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2876
2877 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2878 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
2879 }
2880
2881 static void
2882 txq_emit(
2883 const struct lp_build_tgsi_action * action,
2884 struct lp_build_tgsi_context * bld_base,
2885 struct lp_build_emit_data * emit_data)
2886 {
2887 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2888
2889 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2890 }
2891
2892 static void
2893 txf_emit(
2894 const struct lp_build_tgsi_action * action,
2895 struct lp_build_tgsi_context * bld_base,
2896 struct lp_build_emit_data * emit_data)
2897 {
2898 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2899
2900 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2901 }
2902
2903 static void
2904 sample_i_emit(
2905 const struct lp_build_tgsi_action * action,
2906 struct lp_build_tgsi_context * bld_base,
2907 struct lp_build_emit_data * emit_data)
2908 {
2909 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2910
2911 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2912 }
2913
2914 static void
2915 sample_emit(
2916 const struct lp_build_tgsi_action * action,
2917 struct lp_build_tgsi_context * bld_base,
2918 struct lp_build_emit_data * emit_data)
2919 {
2920 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2921
2922 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2923 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2924 }
2925
2926 static void
2927 sample_b_emit(
2928 const struct lp_build_tgsi_action * action,
2929 struct lp_build_tgsi_context * bld_base,
2930 struct lp_build_emit_data * emit_data)
2931 {
2932 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2933
2934 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2935 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2936 }
2937
2938 static void
2939 sample_c_emit(
2940 const struct lp_build_tgsi_action * action,
2941 struct lp_build_tgsi_context * bld_base,
2942 struct lp_build_emit_data * emit_data)
2943 {
2944 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2945
2946 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2947 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2948 }
2949
2950 static void
2951 sample_c_lz_emit(
2952 const struct lp_build_tgsi_action * action,
2953 struct lp_build_tgsi_context * bld_base,
2954 struct lp_build_emit_data * emit_data)
2955 {
2956 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2957
2958 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2959 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2960 }
2961
2962 static void
2963 sample_d_emit(
2964 const struct lp_build_tgsi_action * action,
2965 struct lp_build_tgsi_context * bld_base,
2966 struct lp_build_emit_data * emit_data)
2967 {
2968 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2969
2970 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2971 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2972 }
2973
2974 static void
2975 sample_l_emit(
2976 const struct lp_build_tgsi_action * action,
2977 struct lp_build_tgsi_context * bld_base,
2978 struct lp_build_emit_data * emit_data)
2979 {
2980 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2981
2982 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2983 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2984 }
2985
2986 static void
2987 gather4_emit(
2988 const struct lp_build_tgsi_action * action,
2989 struct lp_build_tgsi_context * bld_base,
2990 struct lp_build_emit_data * emit_data)
2991 {
2992 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2993
2994 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2995 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
2996 }
2997
2998 static void
2999 sviewinfo_emit(
3000 const struct lp_build_tgsi_action * action,
3001 struct lp_build_tgsi_context * bld_base,
3002 struct lp_build_emit_data * emit_data)
3003 {
3004 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3005
3006 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3007 }
3008
3009 static void
3010 lod_emit(
3011 const struct lp_build_tgsi_action * action,
3012 struct lp_build_tgsi_context * bld_base,
3013 struct lp_build_emit_data * emit_data)
3014 {
3015 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3016
3017 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3018 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3019 }
3020
3021 static void target_to_dims_layer(unsigned target,
3022 unsigned *dims,
3023 unsigned *layer_coord)
3024 {
3025 *layer_coord = 0;
3026 switch (target) {
3027 case TGSI_TEXTURE_1D:
3028 case TGSI_TEXTURE_BUFFER:
3029 *dims = 1;
3030 break;
3031 case TGSI_TEXTURE_1D_ARRAY:
3032 *layer_coord = 1;
3033 *dims = 1;
3034 break;
3035 case TGSI_TEXTURE_2D:
3036 case TGSI_TEXTURE_RECT:
3037 *dims = 2;
3038 break;
3039 case TGSI_TEXTURE_2D_ARRAY:
3040 *layer_coord = 2;
3041 *dims = 2;
3042 break;
3043 case TGSI_TEXTURE_3D:
3044 case TGSI_TEXTURE_CUBE:
3045 case TGSI_TEXTURE_CUBE_ARRAY:
3046 *dims = 3;
3047 break;
3048 default:
3049 assert(0);
3050 return;
3051 }
3052 }
3053
3054 static void
3055 img_load_emit(
3056 const struct lp_build_tgsi_action * action,
3057 struct lp_build_tgsi_context * bld_base,
3058 struct lp_build_emit_data * emit_data)
3059 {
3060 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3061 struct lp_img_params params;
3062 LLVMValueRef coords[5];
3063 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3064 unsigned dims;
3065 unsigned target = emit_data->inst->Memory.Texture;
3066 unsigned layer_coord;
3067
3068 target_to_dims_layer(target, &dims, &layer_coord);
3069
3070 for (unsigned i = 0; i < dims; i++) {
3071 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3072 }
3073 for (unsigned i = dims; i < 5; i++) {
3074 coords[i] = coord_undef;
3075 }
3076 if (layer_coord)
3077 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3078
3079 memset(&params, 0, sizeof(params));
3080
3081 params.type = bld->bld_base.base.type;
3082 params.context_ptr = bld->context_ptr;
3083 params.thread_data_ptr = bld->thread_data_ptr;
3084 params.coords = coords;
3085 params.outdata = emit_data->output;
3086 params.target = tgsi_to_pipe_tex_target(target);
3087 params.image_index = emit_data->inst->Src[0].Register.Index;
3088 params.img_op = LP_IMG_LOAD;
3089 bld->image->emit_op(bld->image,
3090 bld->bld_base.base.gallivm,
3091 &params);
3092 }
3093
3094 static void
3095 load_emit(
3096 const struct lp_build_tgsi_action * action,
3097 struct lp_build_tgsi_context * bld_base,
3098 struct lp_build_emit_data * emit_data)
3099 {
3100 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3101 struct gallivm_state * gallivm = bld_base->base.gallivm;
3102 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3103 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3104 unsigned buf = bufreg->Register.Index;
3105 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3106 bufreg->Register.File == TGSI_FILE_IMAGE ||
3107 bufreg->Register.File == TGSI_FILE_MEMORY ||
3108 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3109 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3110 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3111
3112 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3113 img_load_emit(action, bld_base, emit_data);
3114 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3115 LLVMValueRef consts_ptr = bld->consts[buf];
3116 LLVMValueRef num_consts = bld->consts_sizes[buf];
3117
3118 LLVMValueRef indirect_index;
3119 LLVMValueRef overflow_mask;
3120
3121 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3122 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3123
3124 /* All fetches are from the same constant buffer, so
3125 * we need to propagate the size to a vector to do a
3126 * vector comparison */
3127 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3128
3129 /* Gather values from the constant buffer */
3130 unsigned chan_index;
3131 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3132 /* Construct a boolean vector telling us which channels
3133 * overflow the bound constant buffer */
3134 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3135 indirect_index, num_consts);
3136
3137 /* index_vec = indirect_index * 4 */
3138 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3139 index_vec = lp_build_add(uint_bld, index_vec,
3140 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3141
3142 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3143 }
3144 } else if (0) {
3145 /* for indirect support with ARB_gpu_shader5 */
3146 } else {
3147 LLVMValueRef index;
3148 LLVMValueRef scalar, scalar_ptr;
3149 unsigned chan_index;
3150
3151 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3152 index = lp_build_shr_imm(uint_bld, index, 2);
3153
3154 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3155
3156 LLVMValueRef ssbo_limit;
3157
3158 if (!is_shared) {
3159 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3160 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3161 }
3162
3163 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3164 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3165
3166 LLVMValueRef exec_mask = mask_vec(bld_base);
3167 if (!is_shared) {
3168 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3169 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3170 }
3171
3172 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3173 struct lp_build_loop_state loop_state;
3174 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3175
3176 struct lp_build_if_state ifthen;
3177 LLVMValueRef cond, temp_res;
3178
3179 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3180 loop_state.counter, "");
3181
3182 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3183 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3184
3185 lp_build_if(&ifthen, gallivm, cond);
3186 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3187
3188 temp_res = LLVMBuildLoad(builder, result, "");
3189 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3190 LLVMBuildStore(builder, temp_res, result);
3191 lp_build_else(&ifthen);
3192 temp_res = LLVMBuildLoad(builder, result, "");
3193 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3194 LLVMBuildStore(builder, temp_res, result);
3195 lp_build_endif(&ifthen);
3196 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3197 NULL, LLVMIntUGE);
3198 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3199 }
3200 }
3201 }
3202
3203 static void
3204 img_store_emit(
3205 const struct lp_build_tgsi_action * action,
3206 struct lp_build_tgsi_context * bld_base,
3207 struct lp_build_emit_data * emit_data)
3208 {
3209 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3210 struct lp_img_params params;
3211 LLVMValueRef coords[5];
3212 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3213 unsigned dims;
3214 unsigned target = emit_data->inst->Memory.Texture;
3215 unsigned layer_coord;
3216
3217 target_to_dims_layer(target, &dims, &layer_coord);
3218 for (unsigned i = 0; i < dims; i++) {
3219 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3220 }
3221 for (unsigned i = dims; i < 5; i++) {
3222 coords[i] = coord_undef;
3223 }
3224 if (layer_coord)
3225 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3226 memset(&params, 0, sizeof(params));
3227
3228 params.type = bld->bld_base.base.type;
3229 params.context_ptr = bld->context_ptr;
3230 params.thread_data_ptr = bld->thread_data_ptr;
3231 params.coords = coords;
3232 params.outdata = NULL;
3233 params.exec_mask = mask_vec(bld_base);
3234 params.target = tgsi_to_pipe_tex_target(target);
3235 params.image_index = emit_data->inst->Dst[0].Register.Index;
3236 params.img_op = LP_IMG_STORE;
3237 for (unsigned i = 0; i < 4; i++)
3238 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3239
3240 bld->image->emit_op(bld->image,
3241 bld->bld_base.base.gallivm,
3242 &params);
3243 }
3244
3245 static void
3246 store_emit(
3247 const struct lp_build_tgsi_action * action,
3248 struct lp_build_tgsi_context * bld_base,
3249 struct lp_build_emit_data * emit_data)
3250 {
3251 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3252 struct gallivm_state * gallivm = bld_base->base.gallivm;
3253 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3254 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3255 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3256 unsigned buf = bufreg->Register.Index;
3257 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3258 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3259
3260 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3261 img_store_emit(action, bld_base, emit_data);
3262 } else if (0) {
3263
3264 } else {
3265 LLVMValueRef index; /* index into the const buffer */
3266 LLVMValueRef scalar_ptr;
3267 LLVMValueRef value;
3268 unsigned chan_index;
3269
3270 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3271 index = lp_build_shr_imm(uint_bld, index, 2);
3272
3273 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3274
3275 LLVMValueRef ssbo_limit;
3276
3277 if (!is_shared) {
3278 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3279 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3280 }
3281
3282 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3283 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3284
3285 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3286
3287 LLVMValueRef exec_mask = mask_vec(bld_base);
3288 if (!is_shared) {
3289 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3290 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3291 }
3292
3293 struct lp_build_loop_state loop_state;
3294 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3295
3296 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3297 loop_state.counter, "");
3298 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3299
3300 struct lp_build_if_state ifthen;
3301 LLVMValueRef cond;
3302
3303 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3304 loop_state.counter, "");
3305
3306 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3307 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3308 lp_build_if(&ifthen, gallivm, cond);
3309
3310 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3311
3312 lp_build_endif(&ifthen);
3313 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3314 NULL, LLVMIntUGE);
3315 }
3316 }
3317 }
3318
3319 static void
3320 resq_emit(
3321 const struct lp_build_tgsi_action * action,
3322 struct lp_build_tgsi_context * bld_base,
3323 struct lp_build_emit_data * emit_data)
3324 {
3325 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3326 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3327 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3328
3329 unsigned buf = bufreg->Register.Index;
3330 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3331
3332 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3333 unsigned target = emit_data->inst->Memory.Texture;
3334 struct lp_sampler_size_query_params params = { 0 };
3335 params.int_type = bld->bld_base.int_bld.type;
3336 params.texture_unit = buf;
3337 params.target = tgsi_to_pipe_tex_target(target);
3338 params.context_ptr = bld->context_ptr;
3339 params.sizes_out = emit_data->output;
3340
3341 bld->image->emit_size_query(bld->image,
3342 bld->bld_base.base.gallivm,
3343 &params);
3344 } else {
3345 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3346
3347 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3348 }
3349 }
3350
3351 static void
3352 img_atomic_emit(
3353 const struct lp_build_tgsi_action * action,
3354 struct lp_build_tgsi_context * bld_base,
3355 struct lp_build_emit_data * emit_data,
3356 LLVMAtomicRMWBinOp op)
3357 {
3358 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3359 struct lp_img_params params;
3360 LLVMValueRef coords[5];
3361 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3362 unsigned dims;
3363 unsigned layer_coord;
3364 unsigned target = emit_data->inst->Memory.Texture;
3365
3366 target_to_dims_layer(target, &dims, &layer_coord);
3367
3368 for (unsigned i = 0; i < dims; i++) {
3369 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3370 }
3371 for (unsigned i = dims; i < 5; i++) {
3372 coords[i] = coord_undef;
3373 }
3374 if (layer_coord)
3375 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3376 memset(&params, 0, sizeof(params));
3377
3378 params.type = bld->bld_base.base.type;
3379 params.context_ptr = bld->context_ptr;
3380 params.thread_data_ptr = bld->thread_data_ptr;
3381 params.exec_mask = mask_vec(bld_base);
3382 params.image_index = emit_data->inst->Src[0].Register.Index;
3383 params.coords = coords;
3384 params.target = tgsi_to_pipe_tex_target(target);
3385 params.op = op;
3386 params.outdata = emit_data->output;
3387 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3388
3389 for (unsigned i = 0; i < 4; i++)
3390 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3391 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3392 for (unsigned i = 0; i < 4; i++)
3393 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3394 }
3395 bld->image->emit_op(bld->image,
3396 bld->bld_base.base.gallivm,
3397 &params);
3398 }
3399
3400 static void
3401 atomic_emit(
3402 const struct lp_build_tgsi_action * action,
3403 struct lp_build_tgsi_context * bld_base,
3404 struct lp_build_emit_data * emit_data)
3405 {
3406 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3407 struct gallivm_state * gallivm = bld_base->base.gallivm;
3408 LLVMBuilderRef builder = gallivm->builder;
3409 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3410 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3411
3412 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3413 unsigned buf = bufreg->Register.Index;
3414 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3415
3416 LLVMAtomicRMWBinOp op;
3417 switch (emit_data->inst->Instruction.Opcode) {
3418 case TGSI_OPCODE_ATOMUADD:
3419 op = LLVMAtomicRMWBinOpAdd;
3420 break;
3421 case TGSI_OPCODE_ATOMXCHG:
3422 op = LLVMAtomicRMWBinOpXchg;
3423 break;
3424 case TGSI_OPCODE_ATOMAND:
3425 op = LLVMAtomicRMWBinOpAnd;
3426 break;
3427 case TGSI_OPCODE_ATOMOR:
3428 op = LLVMAtomicRMWBinOpOr;
3429 break;
3430 case TGSI_OPCODE_ATOMXOR:
3431 op = LLVMAtomicRMWBinOpXor;
3432 break;
3433 case TGSI_OPCODE_ATOMUMIN:
3434 op = LLVMAtomicRMWBinOpUMin;
3435 break;
3436 case TGSI_OPCODE_ATOMUMAX:
3437 op = LLVMAtomicRMWBinOpUMax;
3438 break;
3439 case TGSI_OPCODE_ATOMIMIN:
3440 op = LLVMAtomicRMWBinOpMin;
3441 break;
3442 case TGSI_OPCODE_ATOMIMAX:
3443 op = LLVMAtomicRMWBinOpMax;
3444 break;
3445 case TGSI_OPCODE_ATOMCAS:
3446 break;
3447 default:
3448 assert(0);
3449 return;
3450 }
3451
3452 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3453 img_atomic_emit(action, bld_base, emit_data, op);
3454 } else if (0) {
3455 } else {
3456 LLVMValueRef index; /* index into the const buffer */
3457 LLVMValueRef scalar, scalar_ptr;
3458 LLVMValueRef value;
3459
3460 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3461 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3462
3463 index = lp_build_shr_imm(uint_bld, index, 2);
3464
3465 if (!is_shared) {
3466 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3467 scalar_ptr = bld->ssbos[buf];
3468 } else
3469 scalar_ptr = bld->shared_ptr;
3470
3471 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3472 uint_bld->vec_type, "");
3473
3474 LLVMValueRef ssbo_limit;
3475 if (!is_shared) {
3476 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3477 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3478 }
3479
3480 LLVMValueRef exec_mask = mask_vec(bld_base);
3481
3482 if (!is_shared) {
3483 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3484 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3485 }
3486
3487 struct lp_build_loop_state loop_state;
3488 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3489
3490 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3491 loop_state.counter, "");
3492 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3493
3494 index = LLVMBuildExtractElement(gallivm->builder, index,
3495 loop_state.counter, "");
3496
3497 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3498 &index, 1, "");
3499
3500 struct lp_build_if_state ifthen;
3501 LLVMValueRef cond, temp_res;
3502
3503 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3504 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3505 lp_build_if(&ifthen, gallivm, cond);
3506
3507 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3508 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3509 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3510 loop_state.counter, "");
3511 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3512 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3513 cas_src_ptr,
3514 LLVMAtomicOrderingSequentiallyConsistent,
3515 LLVMAtomicOrderingSequentiallyConsistent,
3516 false);
3517 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3518 } else {
3519 scalar = LLVMBuildAtomicRMW(builder, op,
3520 scalar_ptr, value_ptr,
3521 LLVMAtomicOrderingSequentiallyConsistent,
3522 false);
3523 }
3524 temp_res = LLVMBuildLoad(builder, atom_res, "");
3525 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3526 LLVMBuildStore(builder, temp_res, atom_res);
3527 lp_build_else(&ifthen);
3528 temp_res = LLVMBuildLoad(builder, atom_res, "");
3529 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3530 LLVMBuildStore(builder, temp_res, atom_res);
3531 lp_build_endif(&ifthen);
3532
3533 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3534 NULL, LLVMIntUGE);
3535 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3536 }
3537 }
3538
3539 static void
3540 barrier_emit(
3541 const struct lp_build_tgsi_action * action,
3542 struct lp_build_tgsi_context * bld_base,
3543 struct lp_build_emit_data * emit_data)
3544 {
3545 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3546 struct gallivm_state * gallivm = bld_base->base.gallivm;
3547
3548 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3549
3550 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3551 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3552 }
3553
3554 static void
3555 membar_emit(
3556 const struct lp_build_tgsi_action * action,
3557 struct lp_build_tgsi_context * bld_base,
3558 struct lp_build_emit_data * emit_data)
3559 {
3560 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3561 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3562 }
3563
3564 static void
3565 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3566 LLVMValueRef ptr,
3567 LLVMValueRef mask)
3568 {
3569 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3570 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3571
3572 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3573
3574 LLVMBuildStore(builder, current_vec, ptr);
3575 }
3576
3577 static void
3578 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3579 LLVMValueRef ptr,
3580 LLVMValueRef mask)
3581 {
3582 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3583 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3584
3585 current_vec = lp_build_select(&bld_base->uint_bld,
3586 mask,
3587 bld_base->uint_bld.zero,
3588 current_vec);
3589
3590 LLVMBuildStore(builder, current_vec, ptr);
3591 }
3592
3593 static LLVMValueRef
3594 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3595 LLVMValueRef current_mask_vec,
3596 LLVMValueRef total_emitted_vertices_vec)
3597 {
3598 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3599 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3600 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3601 total_emitted_vertices_vec,
3602 bld->max_output_vertices_vec);
3603
3604 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3605 }
3606
3607 static void
3608 emit_vertex(
3609 const struct lp_build_tgsi_action * action,
3610 struct lp_build_tgsi_context * bld_base,
3611 struct lp_build_emit_data * emit_data)
3612 {
3613 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3614 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3615
3616 if (bld->gs_iface->emit_vertex) {
3617 uint32_t imms_idx = emit_data->inst->Src[0].Register.SwizzleX;
3618 LLVMValueRef stream_id = bld->immediates[0][imms_idx];
3619 LLVMValueRef mask = mask_vec(bld_base);
3620 LLVMValueRef total_emitted_vertices_vec =
3621 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3622 mask = clamp_mask_to_max_output_vertices(bld, mask,
3623 total_emitted_vertices_vec);
3624 gather_outputs(bld);
3625 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3626 bld->outputs,
3627 total_emitted_vertices_vec,
3628 stream_id);
3629 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3630 mask);
3631 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3632 mask);
3633 #if DUMP_GS_EMITS
3634 lp_build_print_value(bld->bld_base.base.gallivm,
3635 " +++ emit vertex masked ones = ",
3636 mask);
3637 lp_build_print_value(bld->bld_base.base.gallivm,
3638 " +++ emit vertex emitted = ",
3639 total_emitted_vertices_vec);
3640 #endif
3641 }
3642 }
3643
3644
3645 static void
3646 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3647 LLVMValueRef mask)
3648 {
3649 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3650 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3651
3652 if (bld->gs_iface->end_primitive) {
3653 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3654 LLVMValueRef emitted_vertices_vec =
3655 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3656 LLVMValueRef emitted_prims_vec =
3657 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3658 LLVMValueRef total_emitted_vertices_vec =
3659 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3660 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3661 emitted_vertices_vec,
3662 uint_bld->zero);
3663 /* We need to combine the current execution mask with the mask
3664 telling us which, if any, execution slots actually have
3665 unemitted primitives, this way we make sure that end_primitives
3666 executes only on the paths that have unflushed vertices */
3667 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3668
3669 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
3670 total_emitted_vertices_vec,
3671 emitted_vertices_vec,
3672 emitted_prims_vec,
3673 mask_vec(bld_base));
3674
3675 #if DUMP_GS_EMITS
3676 lp_build_print_value(bld->bld_base.base.gallivm,
3677 " +++ end prim masked ones = ",
3678 mask);
3679 lp_build_print_value(bld->bld_base.base.gallivm,
3680 " +++ end prim emitted verts1 = ",
3681 emitted_vertices_vec);
3682 lp_build_print_value(bld->bld_base.base.gallivm,
3683 " +++ end prim emitted prims1 = ",
3684 LLVMBuildLoad(builder,
3685 bld->emitted_prims_vec_ptr, ""));
3686 #endif
3687 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3688 mask);
3689 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3690 mask);
3691 #if DUMP_GS_EMITS
3692 lp_build_print_value(bld->bld_base.base.gallivm,
3693 " +++ end prim emitted verts2 = ",
3694 LLVMBuildLoad(builder,
3695 bld->emitted_vertices_vec_ptr, ""));
3696 #endif
3697 }
3698
3699 }
3700
3701 static void
3702 end_primitive(
3703 const struct lp_build_tgsi_action * action,
3704 struct lp_build_tgsi_context * bld_base,
3705 struct lp_build_emit_data * emit_data)
3706 {
3707 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3708
3709 if (bld->gs_iface->end_primitive) {
3710 LLVMValueRef mask = mask_vec(bld_base);
3711 end_primitive_masked(bld_base, mask);
3712 }
3713 }
3714
3715 static void
3716 cal_emit(
3717 const struct lp_build_tgsi_action * action,
3718 struct lp_build_tgsi_context * bld_base,
3719 struct lp_build_emit_data * emit_data)
3720 {
3721 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3722
3723 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3724 &bld_base->pc);
3725 }
3726
3727 static void
3728 ret_emit(
3729 const struct lp_build_tgsi_action * action,
3730 struct lp_build_tgsi_context * bld_base,
3731 struct lp_build_emit_data * emit_data)
3732 {
3733 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3734
3735 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3736 }
3737
3738 static void
3739 brk_emit(
3740 const struct lp_build_tgsi_action * action,
3741 struct lp_build_tgsi_context * bld_base,
3742 struct lp_build_emit_data * emit_data)
3743 {
3744 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3745
3746 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
3747 }
3748
3749 static void
3750 if_emit(
3751 const struct lp_build_tgsi_action * action,
3752 struct lp_build_tgsi_context * bld_base,
3753 struct lp_build_emit_data * emit_data)
3754 {
3755 LLVMValueRef tmp;
3756 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3757
3758 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3759 emit_data->args[0], bld->bld_base.base.zero);
3760 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3761 }
3762
3763 static void
3764 uif_emit(
3765 const struct lp_build_tgsi_action * action,
3766 struct lp_build_tgsi_context * bld_base,
3767 struct lp_build_emit_data * emit_data)
3768 {
3769 LLVMValueRef tmp;
3770 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3771 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3772
3773 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3774 emit_data->args[0], uint_bld->zero);
3775 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3776 }
3777
3778 static void
3779 case_emit(
3780 const struct lp_build_tgsi_action * action,
3781 struct lp_build_tgsi_context * bld_base,
3782 struct lp_build_emit_data * emit_data)
3783 {
3784 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3785
3786 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3787 }
3788
3789 static void
3790 default_emit(
3791 const struct lp_build_tgsi_action * action,
3792 struct lp_build_tgsi_context * bld_base,
3793 struct lp_build_emit_data * emit_data)
3794 {
3795 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3796
3797 lp_exec_default(&bld->exec_mask, bld_base);
3798 }
3799
3800 static void
3801 switch_emit(
3802 const struct lp_build_tgsi_action * action,
3803 struct lp_build_tgsi_context * bld_base,
3804 struct lp_build_emit_data * emit_data)
3805 {
3806 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3807
3808 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3809 }
3810
3811 static void
3812 endswitch_emit(
3813 const struct lp_build_tgsi_action * action,
3814 struct lp_build_tgsi_context * bld_base,
3815 struct lp_build_emit_data * emit_data)
3816 {
3817 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3818
3819 lp_exec_endswitch(&bld->exec_mask, bld_base);
3820 }
3821
3822 static void
3823 bgnloop_emit(
3824 const struct lp_build_tgsi_action * action,
3825 struct lp_build_tgsi_context * bld_base,
3826 struct lp_build_emit_data * emit_data)
3827 {
3828 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3829
3830 lp_exec_bgnloop(&bld->exec_mask, true);
3831 }
3832
3833 static void
3834 bgnsub_emit(
3835 const struct lp_build_tgsi_action * action,
3836 struct lp_build_tgsi_context * bld_base,
3837 struct lp_build_emit_data * emit_data)
3838 {
3839 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3840
3841 lp_exec_mask_bgnsub(&bld->exec_mask);
3842 }
3843
3844 static void
3845 else_emit(
3846 const struct lp_build_tgsi_action * action,
3847 struct lp_build_tgsi_context * bld_base,
3848 struct lp_build_emit_data * emit_data)
3849 {
3850 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3851
3852 lp_exec_mask_cond_invert(&bld->exec_mask);
3853 }
3854
3855 static void
3856 endif_emit(
3857 const struct lp_build_tgsi_action * action,
3858 struct lp_build_tgsi_context * bld_base,
3859 struct lp_build_emit_data * emit_data)
3860 {
3861 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3862
3863 lp_exec_mask_cond_pop(&bld->exec_mask);
3864 }
3865
3866 static void
3867 endloop_emit(
3868 const struct lp_build_tgsi_action * action,
3869 struct lp_build_tgsi_context * bld_base,
3870 struct lp_build_emit_data * emit_data)
3871 {
3872 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3873
3874 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3875 }
3876
3877 static void
3878 endsub_emit(
3879 const struct lp_build_tgsi_action * action,
3880 struct lp_build_tgsi_context * bld_base,
3881 struct lp_build_emit_data * emit_data)
3882 {
3883 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3884
3885 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3886 }
3887
3888 static void
3889 cont_emit(
3890 const struct lp_build_tgsi_action * action,
3891 struct lp_build_tgsi_context * bld_base,
3892 struct lp_build_emit_data * emit_data)
3893 {
3894 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3895
3896 lp_exec_continue(&bld->exec_mask);
3897 }
3898
3899 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3900 {
3901 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3902 struct gallivm_state * gallivm = bld_base->base.gallivm;
3903
3904 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3905 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
3906 bld->temps_array = lp_build_alloca_undef(gallivm,
3907 LLVMArrayType(bld_base->base.vec_type, array_size),
3908 "temp_array");
3909 }
3910
3911 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3912 LLVMValueRef array_size =
3913 lp_build_const_int32(gallivm,
3914 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3915 bld->outputs_array = lp_build_array_alloca(gallivm,
3916 bld_base->base.vec_type, array_size,
3917 "output_array");
3918 }
3919
3920 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3921 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
3922 bld->imms_array = lp_build_alloca_undef(gallivm,
3923 LLVMArrayType(bld_base->base.vec_type, array_size),
3924 "imms_array");
3925 }
3926
3927 /* If we have indirect addressing in inputs we need to copy them into
3928 * our alloca array to be able to iterate over them */
3929 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3930 unsigned index, chan;
3931 LLVMTypeRef vec_type = bld_base->base.vec_type;
3932 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3933 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3934 bld->inputs_array = lp_build_array_alloca(gallivm,
3935 vec_type, array_size,
3936 "input_array");
3937
3938 assert(bld_base->info->num_inputs
3939 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3940
3941 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3942 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3943 LLVMValueRef lindex =
3944 lp_build_const_int32(gallivm, index * 4 + chan);
3945 LLVMValueRef input_ptr =
3946 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3947 &lindex, 1, "");
3948 LLVMValueRef value = bld->inputs[index][chan];
3949 if (value)
3950 LLVMBuildStore(gallivm->builder, value, input_ptr);
3951 }
3952 }
3953 }
3954
3955 if (bld->gs_iface) {
3956 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3957 bld->emitted_prims_vec_ptr =
3958 lp_build_alloca(gallivm,
3959 uint_bld->vec_type,
3960 "emitted_prims_ptr");
3961 bld->emitted_vertices_vec_ptr =
3962 lp_build_alloca(gallivm,
3963 uint_bld->vec_type,
3964 "emitted_vertices_ptr");
3965 bld->total_emitted_vertices_vec_ptr =
3966 lp_build_alloca(gallivm,
3967 uint_bld->vec_type,
3968 "total_emitted_vertices_ptr");
3969
3970 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3971 bld->emitted_prims_vec_ptr);
3972 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3973 bld->emitted_vertices_vec_ptr);
3974 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3975 bld->total_emitted_vertices_vec_ptr);
3976 }
3977
3978 if (DEBUG_EXECUTION) {
3979 lp_build_printf(gallivm, "\n");
3980 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3981 if (!bld->gs_iface)
3982 emit_dump_file(bld, TGSI_FILE_INPUT);
3983 }
3984 }
3985
3986 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3987 {
3988 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3989 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3990
3991 if (DEBUG_EXECUTION) {
3992 /* for debugging */
3993 if (0) {
3994 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3995 }
3996 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3997 lp_build_printf(bld_base->base.gallivm, "\n");
3998 }
3999
4000 /* If we have indirect addressing in outputs we need to copy our alloca array
4001 * to the outputs slots specified by the caller */
4002 if (bld->gs_iface) {
4003 LLVMValueRef total_emitted_vertices_vec;
4004 LLVMValueRef emitted_prims_vec;
4005 /* implicit end_primitives, needed in case there are any unflushed
4006 vertices in the cache. Note must not call end_primitive here
4007 since the exec_mask is not valid at this point. */
4008 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4009
4010 total_emitted_vertices_vec =
4011 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4012 emitted_prims_vec =
4013 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4014
4015 bld->gs_iface->gs_epilogue(bld->gs_iface,
4016 total_emitted_vertices_vec,
4017 emitted_prims_vec);
4018 } else {
4019 gather_outputs(bld);
4020 }
4021 }
4022
4023 void
4024 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4025 const struct tgsi_token *tokens,
4026 const struct lp_build_tgsi_params *params,
4027 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4028 {
4029 struct lp_build_tgsi_soa_context bld;
4030 struct lp_type type = params->type;
4031 struct lp_type res_type;
4032
4033 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4034 memset(&res_type, 0, sizeof res_type);
4035 res_type.width = type.width;
4036 res_type.length = type.length;
4037 res_type.sign = 1;
4038
4039 /* Setup build context */
4040 memset(&bld, 0, sizeof bld);
4041 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4042 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4043 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4044 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4045 {
4046 struct lp_type dbl_type;
4047 dbl_type = type;
4048 dbl_type.width *= 2;
4049 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4050 }
4051 {
4052 struct lp_type uint64_type;
4053 uint64_type = lp_uint_type(type);
4054 uint64_type.width *= 2;
4055 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4056 }
4057 {
4058 struct lp_type int64_type;
4059 int64_type = lp_int_type(type);
4060 int64_type.width *= 2;
4061 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4062 }
4063 bld.mask = params->mask;
4064 bld.inputs = params->inputs;
4065 bld.outputs = outputs;
4066 bld.consts_ptr = params->consts_ptr;
4067 bld.const_sizes_ptr = params->const_sizes_ptr;
4068 bld.ssbo_ptr = params->ssbo_ptr;
4069 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4070 bld.sampler = params->sampler;
4071 bld.bld_base.info = params->info;
4072 bld.indirect_files = params->info->indirect_files;
4073 bld.context_ptr = params->context_ptr;
4074 bld.thread_data_ptr = params->thread_data_ptr;
4075 bld.image = params->image;
4076 bld.shared_ptr = params->shared_ptr;
4077 bld.coro = params->coro;
4078
4079 /*
4080 * If the number of temporaries is rather large then we just
4081 * allocate them as an array right from the start and treat
4082 * like indirect temporaries.
4083 */
4084 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4085 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4086 }
4087 /*
4088 * For performance reason immediates are always backed in a static
4089 * array, but if their number is too great, we have to use just
4090 * a dynamically allocated array.
4091 */
4092 bld.use_immediates_array =
4093 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4094 if (bld.use_immediates_array) {
4095 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4096 }
4097
4098
4099 bld.bld_base.soa = TRUE;
4100 bld.bld_base.emit_debug = emit_debug;
4101 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4102 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4103 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4104 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4105 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4106 bld.bld_base.emit_store = emit_store;
4107
4108 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4109 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4110
4111 bld.bld_base.emit_prologue = emit_prologue;
4112 bld.bld_base.emit_epilogue = emit_epilogue;
4113
4114 /* Set opcode actions */
4115 lp_set_default_actions_cpu(&bld.bld_base);
4116
4117 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4118 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4119 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4120 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4121 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4122 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4123 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4124 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4125 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4126 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4127 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4128 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4129 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4130 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4131 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4132 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4133 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4134 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4135 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4136 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4137 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4138 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4139 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4140 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4141 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4142 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4143 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4144 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4145 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4146 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4147 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4148 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4149 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4150 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4151 /* DX10 sampling ops */
4152 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4153 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4154 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4155 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4156 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4157 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4158 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4159 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4160 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4161 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4162 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4163
4164 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4165 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4166 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4167
4168 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4169 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4170 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4171 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4172 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4173 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4174 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4175 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4176 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4177 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4178
4179 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4180 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4181
4182 if (params->gs_iface) {
4183 /* There's no specific value for this because it should always
4184 * be set, but apps using ext_geometry_shader4 quite often
4185 * were forgetting so we're using MAX_VERTEX_VARYING from
4186 * that spec even though we could debug_assert if it's not
4187 * set, but that's a lot uglier. */
4188 uint max_output_vertices;
4189
4190 /* inputs are always indirect with gs */
4191 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4192 bld.gs_iface = params->gs_iface;
4193 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4194 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4195 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4196
4197 max_output_vertices =
4198 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4199 if (!max_output_vertices)
4200 max_output_vertices = 32;
4201
4202 bld.max_output_vertices_vec =
4203 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4204 max_output_vertices);
4205 }
4206
4207 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4208
4209 bld.system_values = *params->system_values;
4210
4211 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4212
4213 if (0) {
4214 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4215 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4216 debug_printf("11111111111111111111111111111 \n");
4217 tgsi_dump(tokens, 0);
4218 lp_debug_dump_value(function);
4219 debug_printf("2222222222222222222222222222 \n");
4220 }
4221
4222 if (0) {
4223 LLVMModuleRef module = LLVMGetGlobalParent(
4224 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4225 LLVMDumpModule(module);
4226
4227 }
4228 lp_exec_mask_fini(&bld.exec_mask);
4229 }