gallivm/nir: support passing image index into image code.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc;
309 boolean default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = TRUE;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449
450 switch (file) {
451 case TGSI_FILE_TEMPORARY:
452 array_of_vars = bld->temps;
453 var_of_array = bld->temps_array;
454 break;
455 case TGSI_FILE_OUTPUT:
456 array_of_vars = bld->outputs;
457 var_of_array = bld->outputs_array;
458 break;
459 default:
460 assert(0);
461 return NULL;
462 }
463
464 assert(chan < 4);
465
466 if (bld->indirect_files & (1 << file)) {
467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469 LLVMValueRef gep[2];
470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471 gep[1] = lindex;
472 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473 } else {
474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475 }
476 }
477 else {
478 assert(index <= bld->bld_base.info->file_max[file]);
479 return array_of_vars[index][chan];
480 }
481 }
482
483
484 /**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index which temporary register
488 * \param chan which channel of the temp register.
489 */
490 LLVMValueRef
491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492 unsigned index,
493 unsigned chan)
494 {
495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497
498 /**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index which output register
502 * \param chan which channel of the output register.
503 */
504 LLVMValueRef
505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506 unsigned index,
507 unsigned chan)
508 {
509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511
512 /*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517 static void
518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521 unsigned index, chan;
522 assert(bld->bld_base.info->num_outputs <=
523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527 }
528 }
529 }
530 }
531
532 /**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537 static LLVMValueRef
538 build_gather(struct lp_build_tgsi_context *bld_base,
539 LLVMValueRef base_ptr,
540 LLVMValueRef indexes,
541 LLVMValueRef overflow_mask,
542 LLVMValueRef indexes2)
543 {
544 struct gallivm_state *gallivm = bld_base->base.gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *uint_bld = &bld_base->uint_bld;
547 struct lp_build_context *bld = &bld_base->base;
548 LLVMValueRef res;
549 unsigned i;
550
551 if (indexes2)
552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553 else
554 res = bld->undef;
555 /*
556 * overflow_mask is a vector telling us which channels
557 * in the vector overflowed. We use the overflow behavior for
558 * constant buffers which is defined as:
559 * Out of bounds access to constant buffer returns 0 in all
560 * components. Out of bounds behavior is always with respect
561 * to the size of the buffer bound at that slot.
562 */
563
564 if (overflow_mask) {
565 /*
566 * We avoid per-element control flow here (also due to llvm going crazy,
567 * though I suspect it's better anyway since overflow is likely rare).
568 * Note that since we still fetch from buffers even if num_elements was
569 * zero (in this case we'll fetch from index zero) the jit func callers
570 * MUST provide valid fake constant buffers of size 4x32 (the values do
571 * not matter), otherwise we'd still need (not per element though)
572 * control flow.
573 */
574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575 if (indexes2)
576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577 }
578
579 /*
580 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581 */
582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583 LLVMValueRef si, di;
584 LLVMValueRef index;
585 LLVMValueRef scalar_ptr, scalar;
586
587 di = lp_build_const_int32(bld->gallivm, i);
588 if (indexes2)
589 si = lp_build_const_int32(bld->gallivm, i >> 1);
590 else
591 si = di;
592
593 if (indexes2 && (i & 1)) {
594 index = LLVMBuildExtractElement(builder,
595 indexes2, si, "");
596 } else {
597 index = LLVMBuildExtractElement(builder,
598 indexes, si, "");
599 }
600 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601 &index, 1, "gather_ptr");
602 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605 }
606
607 if (overflow_mask) {
608 if (indexes2) {
609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611 bld_base->dbl_bld.int_vec_type, "");
612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613 bld_base->dbl_bld.zero, res);
614 } else
615 res = lp_build_select(bld, overflow_mask, bld->zero, res);
616 }
617
618 return res;
619 }
620
621
622 /**
623 * Scatter/store vector.
624 */
625 static void
626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627 LLVMValueRef base_ptr,
628 LLVMValueRef indexes,
629 LLVMValueRef values,
630 struct lp_exec_mask *mask)
631 {
632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633 LLVMBuilderRef builder = gallivm->builder;
634 unsigned i;
635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637 /*
638 * Loop over elements of index_vec, store scalar value.
639 */
640 for (i = 0; i < bld->bld_base.base.type.length; i++) {
641 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645 LLVMValueRef scalar_pred = pred ?
646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648 if (0)
649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650 ii, val, index, scalar_ptr);
651
652 if (scalar_pred) {
653 LLVMValueRef real_val, dst_val;
654 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656 LLVMBuildStore(builder, real_val, scalar_ptr);
657 }
658 else {
659 LLVMBuildStore(builder, val, scalar_ptr);
660 }
661 }
662 }
663
664
665 /**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671 static LLVMValueRef
672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673 unsigned reg_file, unsigned reg_index,
674 const struct tgsi_ind_register *indirect_reg,
675 int index_limit)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679 /* always use X component of address register */
680 unsigned swizzle = indirect_reg->Swizzle;
681 LLVMValueRef base;
682 LLVMValueRef rel;
683 LLVMValueRef max_index;
684 LLVMValueRef index;
685
686 assert(bld->indirect_files & (1 << reg_file));
687
688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690 assert(swizzle < 4);
691 switch (indirect_reg->File) {
692 case TGSI_FILE_ADDRESS:
693 rel = LLVMBuildLoad(builder,
694 bld->addr[indirect_reg->Index][swizzle],
695 "load addr reg");
696 /* ADDR LLVM values already have LLVM integer type. */
697 break;
698 case TGSI_FILE_TEMPORARY:
699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700 rel = LLVMBuildLoad(builder, rel, "load temp reg");
701 /* TEMP LLVM values always have LLVM float type, but for indirection, the
702 * value actually stored is expected to be an integer */
703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704 break;
705 default:
706 assert(0);
707 rel = uint_bld->zero;
708 }
709
710 index = lp_build_add(uint_bld, base, rel);
711
712 /*
713 * emit_fetch_constant handles constant buffer overflow so this code
714 * is pointless for them.
715 * Furthermore the D3D10 spec in section 6.5 says:
716 * If the constant buffer bound to a slot is larger than the size
717 * declared in the shader for that slot, implementations are allowed
718 * to return incorrect data (not necessarily 0) for indices that are
719 * larger than the declared size but smaller than the buffer size.
720 */
721 if (reg_file != TGSI_FILE_CONSTANT) {
722 assert(index_limit >= 0);
723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724 uint_bld->type, index_limit);
725
726 assert(!uint_bld->type.sign);
727 index = lp_build_min(uint_bld, index, max_index);
728 }
729
730 return index;
731 }
732
733 static struct lp_build_context *
734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 enum tgsi_opcode_type stype)
736 {
737 struct lp_build_context *bld_fetch;
738
739 switch (stype) {
740 case TGSI_TYPE_FLOAT:
741 case TGSI_TYPE_UNTYPED:
742 bld_fetch = &bld_base->base;
743 break;
744 case TGSI_TYPE_UNSIGNED:
745 bld_fetch = &bld_base->uint_bld;
746 break;
747 case TGSI_TYPE_SIGNED:
748 bld_fetch = &bld_base->int_bld;
749 break;
750 case TGSI_TYPE_DOUBLE:
751 bld_fetch = &bld_base->dbl_bld;
752 break;
753 case TGSI_TYPE_UNSIGNED64:
754 bld_fetch = &bld_base->uint64_bld;
755 break;
756 case TGSI_TYPE_SIGNED64:
757 bld_fetch = &bld_base->int64_bld;
758 break;
759 case TGSI_TYPE_VOID:
760 default:
761 assert(0);
762 bld_fetch = NULL;
763 break;
764 }
765 return bld_fetch;
766 }
767
768 static LLVMValueRef
769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770 LLVMValueRef indirect_index,
771 unsigned chan_index,
772 boolean need_perelement_offset)
773 {
774 struct gallivm_state *gallivm = uint_bld->gallivm;
775 LLVMValueRef chan_vec =
776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777 LLVMValueRef length_vec =
778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779 LLVMValueRef index_vec;
780
781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786 if (need_perelement_offset) {
787 LLVMValueRef pixel_offsets;
788 unsigned i;
789 /* build pixel offset vector: {0, 1, 2, 3, ...} */
790 pixel_offsets = uint_bld->undef;
791 for (i = 0; i < uint_bld->type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794 ii, ii, "");
795 }
796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797 }
798 return index_vec;
799 }
800
801 static LLVMValueRef
802 emit_fetch_constant(
803 struct lp_build_tgsi_context * bld_base,
804 const struct tgsi_full_src_register * reg,
805 enum tgsi_opcode_type stype,
806 unsigned swizzle_in)
807 {
808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809 struct gallivm_state *gallivm = bld_base->base.gallivm;
810 LLVMBuilderRef builder = gallivm->builder;
811 struct lp_build_context *uint_bld = &bld_base->uint_bld;
812 unsigned dimension = 0;
813 LLVMValueRef consts_ptr;
814 LLVMValueRef num_consts;
815 LLVMValueRef res;
816 unsigned swizzle = swizzle_in & 0xffff;
817
818 /* XXX: Handle fetching xyzw components as a vector */
819 assert(swizzle != ~0u);
820
821 if (reg->Register.Dimension) {
822 assert(!reg->Dimension.Indirect);
823 dimension = reg->Dimension.Index;
824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825 }
826
827 consts_ptr = bld->consts[dimension];
828 num_consts = bld->consts_sizes[dimension];
829
830 if (reg->Register.Indirect) {
831 LLVMValueRef indirect_index;
832 LLVMValueRef swizzle_vec =
833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834 LLVMValueRef index_vec; /* index into the const buffer */
835 LLVMValueRef overflow_mask;
836 LLVMValueRef index_vec2 = NULL;
837
838 indirect_index = get_indirect_index(bld,
839 reg->Register.File,
840 reg->Register.Index,
841 &reg->Indirect,
842 bld->bld_base.info->file_max[reg->Register.File]);
843
844 /* All fetches are from the same constant buffer, so
845 * we need to propagate the size to a vector to do a
846 * vector comparison */
847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848 /* Construct a boolean vector telling us which channels
849 * overflow the bound constant buffer */
850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851 indirect_index, num_consts);
852
853 /* index_vec = indirect_index * 4 + swizzle */
854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857 if (tgsi_type_is_64bit(stype)) {
858 LLVMValueRef swizzle_vec2;
859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862 }
863 /* Gather values from the constant buffer */
864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865 }
866 else {
867 LLVMValueRef index; /* index into the const buffer */
868 LLVMValueRef scalar, scalar_ptr;
869 struct lp_build_context *bld_broad = &bld_base->base;
870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873 &index, 1, "");
874
875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877 LLVMValueRef scalar2, scalar2_ptr;
878 LLVMValueRef shuffles[2];
879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882 &index, 1, "");
883
884 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886 shuffles[0] = lp_build_const_int32(gallivm, 0);
887 shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892 } else {
893 if (stype == TGSI_TYPE_DOUBLE) {
894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896 bld_broad = &bld_base->dbl_bld;
897 } else if (stype == TGSI_TYPE_UNSIGNED64) {
898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900 bld_broad = &bld_base->uint64_bld;
901 } else if (stype == TGSI_TYPE_SIGNED64) {
902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904 bld_broad = &bld_base->int64_bld;
905 }
906 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907 res = lp_build_broadcast_scalar(bld_broad, scalar);
908 }
909
910 }
911
912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915 }
916
917 return res;
918 }
919
920 /**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927 static LLVMValueRef
928 emit_fetch_64bit(
929 struct lp_build_tgsi_context * bld_base,
930 enum tgsi_opcode_type stype,
931 LLVMValueRef input,
932 LLVMValueRef input2)
933 {
934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936 LLVMBuilderRef builder = gallivm->builder;
937 LLVMValueRef res;
938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939 int i;
940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941 int len = bld_base->base.type.length * 2;
942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947 }
948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952
953 static LLVMValueRef
954 emit_fetch_immediate(
955 struct lp_build_tgsi_context * bld_base,
956 const struct tgsi_full_src_register * reg,
957 enum tgsi_opcode_type stype,
958 unsigned swizzle_in)
959 {
960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962 LLVMBuilderRef builder = gallivm->builder;
963 LLVMValueRef res = NULL;
964 unsigned swizzle = swizzle_in & 0xffff;
965
966 if (bld->use_immediates_array || reg->Register.Indirect) {
967 LLVMValueRef imms_array;
968 LLVMTypeRef fptr_type;
969
970 /* cast imms_array pointer to float* */
971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974 if (reg->Register.Indirect) {
975 LLVMValueRef indirect_index;
976 LLVMValueRef index_vec; /* index into the immediate register array */
977 LLVMValueRef index_vec2 = NULL;
978 indirect_index = get_indirect_index(bld,
979 reg->Register.File,
980 reg->Register.Index,
981 &reg->Indirect,
982 bld->bld_base.info->file_max[reg->Register.File]);
983 /*
984 * Unlike for other reg classes, adding pixel offsets is unnecessary -
985 * immediates are stored as full vectors (FIXME??? - might be better
986 * to store them the same as constants) but all elements are the same
987 * in any case.
988 */
989 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990 indirect_index,
991 swizzle,
992 FALSE);
993 if (tgsi_type_is_64bit(stype))
994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995 indirect_index,
996 swizzle_in >> 16,
997 FALSE);
998 /* Gather values from the immediate register array */
999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000 } else {
1001 LLVMValueRef gep[2];
1002 gep[0] = lp_build_const_int32(gallivm, 0);
1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005 bld->imms_array, gep, 2, "");
1006 res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008 if (tgsi_type_is_64bit(stype)) {
1009 LLVMValueRef imms_ptr2;
1010 LLVMValueRef res2;
1011 gep[1] = lp_build_const_int32(gallivm,
1012 reg->Register.Index * 4 + (swizzle_in >> 16));
1013 imms_ptr2 = LLVMBuildGEP(builder,
1014 bld->imms_array, gep, 2, "");
1015 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016 res = emit_fetch_64bit(bld_base, stype, res, res2);
1017 }
1018 }
1019 }
1020 else {
1021 res = bld->immediates[reg->Register.Index][swizzle];
1022 if (tgsi_type_is_64bit(stype))
1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024 }
1025
1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029 }
1030 return res;
1031 }
1032
1033 static LLVMValueRef
1034 emit_fetch_input(
1035 struct lp_build_tgsi_context * bld_base,
1036 const struct tgsi_full_src_register * reg,
1037 enum tgsi_opcode_type stype,
1038 unsigned swizzle_in)
1039 {
1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 LLVMValueRef res;
1044 unsigned swizzle = swizzle_in & 0xffff;
1045
1046 if (reg->Register.Indirect) {
1047 LLVMValueRef indirect_index;
1048 LLVMValueRef index_vec; /* index into the input reg array */
1049 LLVMValueRef index_vec2 = NULL;
1050 LLVMValueRef inputs_array;
1051 LLVMTypeRef fptr_type;
1052
1053 indirect_index = get_indirect_index(bld,
1054 reg->Register.File,
1055 reg->Register.Index,
1056 &reg->Indirect,
1057 bld->bld_base.info->file_max[reg->Register.File]);
1058
1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060 indirect_index,
1061 swizzle,
1062 TRUE);
1063 if (tgsi_type_is_64bit(stype)) {
1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065 indirect_index,
1066 swizzle_in >> 16,
1067 TRUE);
1068 }
1069 /* cast inputs_array pointer to float* */
1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073 /* Gather values from the input register array */
1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075 } else {
1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078 reg->Register.Index * 4 + swizzle);
1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080 bld->inputs_array, &lindex, 1, "");
1081
1082 res = LLVMBuildLoad(builder, input_ptr, "");
1083 if (tgsi_type_is_64bit(stype)) {
1084 LLVMValueRef lindex1;
1085 LLVMValueRef input_ptr2;
1086 LLVMValueRef res2;
1087
1088 lindex1 = lp_build_const_int32(gallivm,
1089 reg->Register.Index * 4 + (swizzle_in >> 16));
1090 input_ptr2 = LLVMBuildGEP(builder,
1091 bld->inputs_array, &lindex1, 1, "");
1092 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093 res = emit_fetch_64bit(bld_base, stype, res, res2);
1094 }
1095 }
1096 else {
1097 res = bld->inputs[reg->Register.Index][swizzle];
1098 if (tgsi_type_is_64bit(stype))
1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100 }
1101 }
1102
1103 assert(res);
1104
1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle_in)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 const struct tgsi_shader_info *info = bld->bld_base.info;
1124 LLVMBuilderRef builder = gallivm->builder;
1125 LLVMValueRef attrib_index = NULL;
1126 LLVMValueRef vertex_index = NULL;
1127 unsigned swizzle = swizzle_in & 0xffff;
1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129 LLVMValueRef res;
1130
1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132 /* This is really a system value not a regular input */
1133 assert(!reg->Register.Indirect);
1134 assert(!reg->Dimension.Indirect);
1135 res = bld->system_values.prim_id;
1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138 }
1139 return res;
1140 }
1141
1142 if (reg->Register.Indirect) {
1143 /*
1144 * XXX: this is possibly not quite the right value, since file_max may be
1145 * larger than the max attrib index, due to it being the max of declared
1146 * inputs AND the max vertices per prim (which is 6 for tri adj).
1147 * It should however be safe to use (since we always allocate
1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149 */
1150 int index_limit = info->file_max[reg->Register.File];
1151 attrib_index = get_indirect_index(bld,
1152 reg->Register.File,
1153 reg->Register.Index,
1154 &reg->Indirect,
1155 index_limit);
1156 } else {
1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158 }
1159
1160 if (reg->Dimension.Indirect) {
1161 /*
1162 * A fixed 6 should do as well (which is what we allocate).
1163 */
1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165 vertex_index = get_indirect_index(bld,
1166 reg->Register.File,
1167 reg->Dimension.Index,
1168 &reg->DimIndirect,
1169 index_limit);
1170 } else {
1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172 }
1173
1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175 reg->Dimension.Indirect,
1176 vertex_index,
1177 reg->Register.Indirect,
1178 attrib_index,
1179 swizzle_index);
1180
1181 assert(res);
1182 if (tgsi_type_is_64bit(stype)) {
1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184 LLVMValueRef res2;
1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186 reg->Dimension.Indirect,
1187 vertex_index,
1188 reg->Register.Indirect,
1189 attrib_index,
1190 swizzle_index);
1191 assert(res2);
1192 res = emit_fetch_64bit(bld_base, stype, res, res2);
1193 } else if (stype == TGSI_TYPE_UNSIGNED) {
1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195 } else if (stype == TGSI_TYPE_SIGNED) {
1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197 }
1198
1199 return res;
1200 }
1201
1202 static LLVMValueRef
1203 emit_fetch_tcs_input(
1204 struct lp_build_tgsi_context * bld_base,
1205 const struct tgsi_full_src_register * reg,
1206 enum tgsi_opcode_type stype,
1207 unsigned swizzle_in)
1208 {
1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211 const struct tgsi_shader_info *info = bld->bld_base.info;
1212 LLVMBuilderRef builder = gallivm->builder;
1213 LLVMValueRef attrib_index = NULL;
1214 LLVMValueRef vertex_index = NULL;
1215 unsigned swizzle = swizzle_in & 0xffff;
1216 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217 LLVMValueRef res;
1218
1219 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220 /* This is really a system value not a regular input */
1221 assert(!reg->Register.Indirect);
1222 assert(!reg->Dimension.Indirect);
1223 res = bld->system_values.prim_id;
1224 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226 }
1227 return res;
1228 }
1229
1230 if (reg->Register.Indirect) {
1231 int index_limit = info->file_max[reg->Register.File];
1232 attrib_index = get_indirect_index(bld,
1233 reg->Register.File,
1234 reg->Register.Index,
1235 &reg->Indirect,
1236 index_limit);
1237 } else {
1238 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239 }
1240
1241 if (reg->Dimension.Indirect) {
1242 vertex_index = get_indirect_index(bld,
1243 reg->Register.File,
1244 reg->Dimension.Index,
1245 &reg->DimIndirect,
1246 PIPE_MAX_SHADER_INPUTS);
1247 } else {
1248 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249 }
1250
1251 // TCS can read from its own outputs
1252 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254 reg->Dimension.Indirect,
1255 vertex_index,
1256 reg->Register.Indirect,
1257 attrib_index,
1258 swizzle_index,
1259 bld_base->info->output_semantic_name[reg->Register.Index]);
1260 } else {
1261 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1262 reg->Dimension.Indirect,
1263 vertex_index,
1264 reg->Register.Indirect,
1265 attrib_index,
1266 swizzle_index);
1267 }
1268
1269
1270 assert(res);
1271 if (tgsi_type_is_64bit(stype)) {
1272 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1273 LLVMValueRef res2;
1274 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1275 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1276 reg->Dimension.Indirect,
1277 vertex_index,
1278 reg->Register.Indirect,
1279 attrib_index,
1280 swizzle_index,
1281 bld_base->info->output_semantic_name[reg->Register.Index]);
1282 } else {
1283 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1284 reg->Dimension.Indirect,
1285 vertex_index,
1286 reg->Register.Indirect,
1287 attrib_index,
1288 swizzle_index);
1289 }
1290 assert(res2);
1291 res = emit_fetch_64bit(bld_base, stype, res, res2);
1292 } else if (stype == TGSI_TYPE_UNSIGNED) {
1293 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1294 } else if (stype == TGSI_TYPE_SIGNED) {
1295 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1296 }
1297
1298 return res;
1299 }
1300
1301 static LLVMValueRef
1302 emit_fetch_tes_input(
1303 struct lp_build_tgsi_context * bld_base,
1304 const struct tgsi_full_src_register * reg,
1305 enum tgsi_opcode_type stype,
1306 unsigned swizzle_in)
1307 {
1308 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1309 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1310 const struct tgsi_shader_info *info = bld->bld_base.info;
1311 LLVMBuilderRef builder = gallivm->builder;
1312 LLVMValueRef attrib_index = NULL;
1313 LLVMValueRef vertex_index = NULL;
1314 unsigned swizzle = swizzle_in & 0xffff;
1315 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1316 LLVMValueRef res;
1317
1318 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1319 /* This is really a system value not a regular input */
1320 assert(!reg->Register.Indirect);
1321 assert(!reg->Dimension.Indirect);
1322 res = bld->system_values.prim_id;
1323 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1324 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1325 }
1326 return res;
1327 }
1328
1329 if (reg->Register.Indirect) {
1330 int index_limit = info->file_max[reg->Register.File];
1331 attrib_index = get_indirect_index(bld,
1332 reg->Register.File,
1333 reg->Register.Index,
1334 &reg->Indirect,
1335 index_limit);
1336 } else {
1337 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1338 }
1339
1340 if (reg->Dimension.Indirect) {
1341 vertex_index = get_indirect_index(bld,
1342 reg->Register.File,
1343 reg->Dimension.Index,
1344 &reg->DimIndirect,
1345 PIPE_MAX_SHADER_INPUTS);
1346 } else {
1347 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1348 }
1349
1350 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1351 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1352 reg->Register.Indirect,
1353 attrib_index,
1354 swizzle_index);
1355 } else {
1356 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1357 reg->Dimension.Indirect,
1358 vertex_index,
1359 reg->Register.Indirect,
1360 attrib_index,
1361 swizzle_index);
1362 }
1363
1364 assert(res);
1365 if (tgsi_type_is_64bit(stype)) {
1366 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1367 LLVMValueRef res2;
1368 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1369 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1370 reg->Register.Indirect,
1371 attrib_index,
1372 swizzle_index);
1373 }
1374 else {
1375 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1376 reg->Dimension.Indirect,
1377 vertex_index,
1378 reg->Register.Indirect,
1379 attrib_index,
1380 swizzle_index);
1381 }
1382 assert(res2);
1383 res = emit_fetch_64bit(bld_base, stype, res, res2);
1384 } else if (stype == TGSI_TYPE_UNSIGNED) {
1385 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1386 } else if (stype == TGSI_TYPE_SIGNED) {
1387 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1388 }
1389
1390 return res;
1391 }
1392
1393
1394
1395 static LLVMValueRef
1396 emit_fetch_temporary(
1397 struct lp_build_tgsi_context * bld_base,
1398 const struct tgsi_full_src_register * reg,
1399 enum tgsi_opcode_type stype,
1400 unsigned swizzle_in)
1401 {
1402 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1403 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1404 LLVMBuilderRef builder = gallivm->builder;
1405 LLVMValueRef res;
1406 unsigned swizzle = swizzle_in & 0xffff;
1407
1408 if (reg->Register.Indirect) {
1409 LLVMValueRef indirect_index;
1410 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1411 LLVMValueRef temps_array;
1412 LLVMTypeRef fptr_type;
1413
1414 indirect_index = get_indirect_index(bld,
1415 reg->Register.File,
1416 reg->Register.Index,
1417 &reg->Indirect,
1418 bld->bld_base.info->file_max[reg->Register.File]);
1419
1420 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1421 indirect_index,
1422 swizzle,
1423 TRUE);
1424 if (tgsi_type_is_64bit(stype)) {
1425 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1426 indirect_index,
1427 swizzle_in >> 16,
1428 TRUE);
1429 }
1430
1431 /* cast temps_array pointer to float* */
1432 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1433 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1434
1435 /* Gather values from the temporary register array */
1436 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1437 }
1438 else {
1439 LLVMValueRef temp_ptr;
1440 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1441 res = LLVMBuildLoad(builder, temp_ptr, "");
1442
1443 if (tgsi_type_is_64bit(stype)) {
1444 LLVMValueRef temp_ptr2, res2;
1445
1446 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1447 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1448 res = emit_fetch_64bit(bld_base, stype, res, res2);
1449 }
1450 }
1451
1452 if (stype == TGSI_TYPE_SIGNED ||
1453 stype == TGSI_TYPE_UNSIGNED ||
1454 stype == TGSI_TYPE_DOUBLE ||
1455 stype == TGSI_TYPE_SIGNED64 ||
1456 stype == TGSI_TYPE_UNSIGNED64) {
1457 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1458 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1459 }
1460
1461 return res;
1462 }
1463
1464 static LLVMValueRef
1465 emit_fetch_system_value(
1466 struct lp_build_tgsi_context * bld_base,
1467 const struct tgsi_full_src_register * reg,
1468 enum tgsi_opcode_type stype,
1469 unsigned swizzle_in)
1470 {
1471 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1472 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1473 const struct tgsi_shader_info *info = bld->bld_base.info;
1474 LLVMBuilderRef builder = gallivm->builder;
1475 LLVMValueRef res;
1476 enum tgsi_opcode_type atype; // Actual type of the value
1477 unsigned swizzle = swizzle_in & 0xffff;
1478
1479 assert(!reg->Register.Indirect);
1480
1481 switch (info->system_value_semantic_name[reg->Register.Index]) {
1482 case TGSI_SEMANTIC_INSTANCEID:
1483 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1484 atype = TGSI_TYPE_UNSIGNED;
1485 break;
1486
1487 case TGSI_SEMANTIC_VERTEXID:
1488 res = bld->system_values.vertex_id;
1489 atype = TGSI_TYPE_UNSIGNED;
1490 break;
1491
1492 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1493 res = bld->system_values.vertex_id_nobase;
1494 atype = TGSI_TYPE_UNSIGNED;
1495 break;
1496
1497 case TGSI_SEMANTIC_BASEVERTEX:
1498 res = bld->system_values.basevertex;
1499 atype = TGSI_TYPE_UNSIGNED;
1500 break;
1501
1502 case TGSI_SEMANTIC_BASEINSTANCE:
1503 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1504 atype = TGSI_TYPE_UNSIGNED;
1505 break;
1506
1507 case TGSI_SEMANTIC_PRIMID:
1508 res = bld->system_values.prim_id;
1509 atype = TGSI_TYPE_UNSIGNED;
1510 break;
1511
1512 case TGSI_SEMANTIC_INVOCATIONID:
1513 if (info->processor == PIPE_SHADER_TESS_CTRL)
1514 res = bld->system_values.invocation_id;
1515 else
1516 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1517 atype = TGSI_TYPE_UNSIGNED;
1518 break;
1519
1520 case TGSI_SEMANTIC_HELPER_INVOCATION:
1521 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1522 atype = TGSI_TYPE_UNSIGNED;
1523 break;
1524
1525 case TGSI_SEMANTIC_THREAD_ID:
1526 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1527 atype = TGSI_TYPE_UNSIGNED;
1528 break;
1529
1530 case TGSI_SEMANTIC_BLOCK_ID:
1531 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1532 atype = TGSI_TYPE_UNSIGNED;
1533 break;
1534
1535 case TGSI_SEMANTIC_GRID_SIZE:
1536 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1537 atype = TGSI_TYPE_UNSIGNED;
1538 break;
1539
1540 case TGSI_SEMANTIC_TESSCOORD:
1541 {
1542 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1543 LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1544 res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1545 }
1546 atype = TGSI_TYPE_FLOAT;
1547 break;
1548
1549 case TGSI_SEMANTIC_FACE:
1550 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1551 atype = TGSI_TYPE_UNSIGNED;
1552 break;
1553
1554 case TGSI_SEMANTIC_DRAWID:
1555 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1556 atype = TGSI_TYPE_UNSIGNED;
1557 break;
1558
1559 case TGSI_SEMANTIC_TESSOUTER:
1560 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1561 bld->system_values.tess_outer,
1562 lp_build_const_int32(gallivm, swizzle_in));
1563 atype = TGSI_TYPE_FLOAT;
1564 break;
1565
1566 case TGSI_SEMANTIC_TESSINNER:
1567 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1568 bld->system_values.tess_inner,
1569 lp_build_const_int32(gallivm, swizzle_in));
1570 atype = TGSI_TYPE_FLOAT;
1571 break;
1572
1573 case TGSI_SEMANTIC_VERTICESIN:
1574 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1575 atype = TGSI_TYPE_UNSIGNED;
1576 break;
1577
1578 default:
1579 assert(!"unexpected semantic in emit_fetch_system_value");
1580 res = bld_base->base.zero;
1581 atype = TGSI_TYPE_FLOAT;
1582 break;
1583 }
1584
1585 if (atype != stype) {
1586 if (stype == TGSI_TYPE_FLOAT) {
1587 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1588 } else if (stype == TGSI_TYPE_UNSIGNED) {
1589 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1590 } else if (stype == TGSI_TYPE_SIGNED) {
1591 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1592 }
1593 }
1594
1595 return res;
1596 }
1597
1598 /**
1599 * Register fetch with derivatives.
1600 */
1601 static void
1602 emit_fetch_deriv(
1603 struct lp_build_tgsi_soa_context *bld,
1604 LLVMValueRef src,
1605 LLVMValueRef *res,
1606 LLVMValueRef *ddx,
1607 LLVMValueRef *ddy)
1608 {
1609 if (res)
1610 *res = src;
1611
1612 /* TODO: use interpolation coeffs for inputs */
1613
1614 if (ddx)
1615 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1616
1617 if (ddy)
1618 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1619 }
1620
1621 /**
1622 * store an array of vec-length 64-bit into two arrays of vec_length floats
1623 * i.e.
1624 * value is d0, d1, d2, d3 etc.
1625 * each 64-bit has high and low pieces x, y
1626 * so gets stored into the separate channels as:
1627 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1628 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1629 */
1630 static void
1631 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1632 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1633 LLVMValueRef value)
1634 {
1635 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1636 struct gallivm_state *gallivm = bld_base->base.gallivm;
1637 LLVMBuilderRef builder = gallivm->builder;
1638 struct lp_build_context *float_bld = &bld_base->base;
1639 unsigned i;
1640 LLVMValueRef temp, temp2;
1641 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1642 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1643
1644 for (i = 0; i < bld_base->base.type.length; i++) {
1645 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1646 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1647 }
1648
1649 temp = LLVMBuildShuffleVector(builder, value,
1650 LLVMGetUndef(LLVMTypeOf(value)),
1651 LLVMConstVector(shuffles,
1652 bld_base->base.type.length),
1653 "");
1654 temp2 = LLVMBuildShuffleVector(builder, value,
1655 LLVMGetUndef(LLVMTypeOf(value)),
1656 LLVMConstVector(shuffles2,
1657 bld_base->base.type.length),
1658 "");
1659
1660 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1661 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1662 }
1663
1664 static void
1665 emit_store_output(struct lp_build_tgsi_context *bld_base,
1666 enum tgsi_opcode_type dtype,
1667 const struct tgsi_full_dst_register *reg,
1668 unsigned index,
1669 unsigned chan_index,
1670 LLVMValueRef indirect_index,
1671 LLVMValueRef value)
1672 {
1673 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1674 struct gallivm_state *gallivm = bld_base->base.gallivm;
1675 LLVMBuilderRef builder = gallivm->builder;
1676 struct lp_build_context *float_bld = &bld_base->base;
1677
1678 /* Outputs are always stored as floats */
1679 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1680
1681 if (reg->Register.Indirect) {
1682 LLVMValueRef index_vec; /* indexes into the output registers */
1683 LLVMValueRef outputs_array;
1684 LLVMTypeRef fptr_type;
1685
1686 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1687 indirect_index,
1688 chan_index,
1689 TRUE);
1690
1691 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1692 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1693
1694 /* Scatter store values into output registers */
1695 emit_mask_scatter(bld, outputs_array, index_vec, value,
1696 &bld->exec_mask);
1697 }
1698 else {
1699 assert(LLVMTypeOf(value) == float_bld->vec_type);
1700 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1701 chan_index);
1702
1703 if (tgsi_type_is_64bit(dtype)) {
1704 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1705 chan_index + 1);
1706 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1707 value);
1708 } else
1709 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1710 }
1711 }
1712
1713 static void
1714 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1715 enum tgsi_opcode_type dtype,
1716 const struct tgsi_full_dst_register *reg,
1717 unsigned index,
1718 unsigned chan_index,
1719 LLVMValueRef indirect_index,
1720 LLVMValueRef value)
1721 {
1722 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1723 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1724 const struct tgsi_shader_info *info = bld->bld_base.info;
1725 LLVMValueRef attrib_index = NULL;
1726 LLVMValueRef vertex_index = NULL;
1727 LLVMValueRef channel_index = NULL;
1728
1729 if (reg->Register.Indirect) {
1730 /*
1731 * XXX: this is possibly not quite the right value, since file_max may be
1732 * larger than the max attrib index, due to it being the max of declared
1733 * inputs AND the max vertices per prim (which is 6 for tri adj).
1734 * It should however be safe to use (since we always allocate
1735 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1736 */
1737 int index_limit = info->file_max[reg->Register.File];
1738 attrib_index = get_indirect_index(bld,
1739 reg->Register.File,
1740 reg->Register.Index,
1741 &reg->Indirect,
1742 index_limit);
1743 } else {
1744 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1745 }
1746
1747 if (reg->Dimension.Indirect) {
1748 vertex_index = get_indirect_index(bld,
1749 reg->Register.File,
1750 reg->Dimension.Index,
1751 &reg->DimIndirect,
1752 PIPE_MAX_SHADER_OUTPUTS);
1753 } else {
1754 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1755 }
1756
1757 channel_index = lp_build_const_int32(gallivm, chan_index);
1758
1759 assert(bld->tcs_iface->emit_store_output);
1760 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1761 bld_base->info->output_semantic_name[reg->Register.Index],
1762 reg->Dimension.Indirect,
1763 vertex_index,
1764 reg->Register.Indirect,
1765 attrib_index,
1766 channel_index,
1767 value,
1768 mask_vec(bld_base));
1769 }
1770
1771 static void
1772 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1773 enum tgsi_opcode_type dtype,
1774 const struct tgsi_full_dst_register *reg,
1775 unsigned index,
1776 unsigned chan_index,
1777 LLVMValueRef indirect_index,
1778 LLVMValueRef value)
1779 {
1780 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1781 struct gallivm_state *gallivm = bld_base->base.gallivm;
1782 LLVMBuilderRef builder = gallivm->builder;
1783 struct lp_build_context *float_bld = &bld_base->base;
1784
1785 /* Temporaries are always stored as floats */
1786 if (!tgsi_type_is_64bit(dtype))
1787 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1788 else
1789 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1790
1791 if (reg->Register.Indirect) {
1792 LLVMValueRef index_vec; /* indexes into the temp registers */
1793 LLVMValueRef temps_array;
1794 LLVMTypeRef fptr_type;
1795
1796 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1797 indirect_index,
1798 chan_index,
1799 TRUE);
1800
1801 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1802 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1803
1804 /* Scatter store values into temp registers */
1805 emit_mask_scatter(bld, temps_array, index_vec, value,
1806 &bld->exec_mask);
1807 }
1808 else {
1809 LLVMValueRef temp_ptr;
1810 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1811
1812 if (tgsi_type_is_64bit(dtype)) {
1813 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1814 reg->Register.Index,
1815 chan_index + 1);
1816 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1817 value);
1818 }
1819 else
1820 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1821 }
1822 }
1823
1824 static void
1825 emit_store_address(struct lp_build_tgsi_context *bld_base,
1826 enum tgsi_opcode_type dtype,
1827 const struct tgsi_full_dst_register *reg,
1828 unsigned index,
1829 unsigned chan_index,
1830 LLVMValueRef indirect_index,
1831 LLVMValueRef value)
1832 {
1833 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1834 struct gallivm_state *gallivm = bld_base->base.gallivm;
1835 LLVMBuilderRef builder = gallivm->builder;
1836 struct lp_build_context *int_bld = &bld_base->int_bld;
1837
1838 assert(dtype == TGSI_TYPE_SIGNED);
1839 assert(LLVMTypeOf(value) == int_bld->vec_type);
1840 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1841 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1842 bld->addr[reg->Register.Index][chan_index]);
1843 }
1844
1845 /**
1846 * Register store.
1847 */
1848 static void
1849 emit_store_chan(
1850 struct lp_build_tgsi_context *bld_base,
1851 const struct tgsi_full_instruction *inst,
1852 unsigned index,
1853 unsigned chan_index,
1854 LLVMValueRef value)
1855 {
1856 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1857 struct gallivm_state *gallivm = bld_base->base.gallivm;
1858 LLVMBuilderRef builder = gallivm->builder;
1859 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1860 struct lp_build_context *float_bld = &bld_base->base;
1861 LLVMValueRef indirect_index = NULL;
1862 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1863
1864 /*
1865 * Apply saturation.
1866 *
1867 * It is always assumed to be float.
1868 */
1869 if (inst->Instruction.Saturate) {
1870 assert(dtype == TGSI_TYPE_FLOAT ||
1871 dtype == TGSI_TYPE_UNTYPED);
1872 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1873 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1874 }
1875
1876 if (reg->Register.Indirect) {
1877 /*
1878 * Currently the mesa/st doesn't generate indirect stores
1879 * to 64-bit values, it normally uses MOV to do indirect stores.
1880 */
1881 assert(!tgsi_type_is_64bit(dtype));
1882 indirect_index = get_indirect_index(bld,
1883 reg->Register.File,
1884 reg->Register.Index,
1885 &reg->Indirect,
1886 bld->bld_base.info->file_max[reg->Register.File]);
1887 } else {
1888 assert(reg->Register.Index <=
1889 bld_base->info->file_max[reg->Register.File]);
1890 }
1891
1892 if (DEBUG_EXECUTION) {
1893 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1894 }
1895
1896 assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1897 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1898 dtype,
1899 reg,
1900 index,
1901 chan_index,
1902 indirect_index,
1903 value);
1904
1905 (void)dtype;
1906 }
1907
1908 /*
1909 * Called at the beginning of the translation of each TGSI instruction, to
1910 * emit some debug code.
1911 */
1912 static void
1913 emit_debug(
1914 struct lp_build_tgsi_context * bld_base,
1915 const struct tgsi_full_instruction * inst,
1916 const struct tgsi_opcode_info * info)
1917
1918 {
1919 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1920
1921 if (DEBUG_EXECUTION) {
1922 /*
1923 * Dump the TGSI instruction.
1924 */
1925
1926 struct gallivm_state *gallivm = bld_base->base.gallivm;
1927 char buf[512];
1928 buf[0] = '$';
1929 buf[1] = ' ';
1930 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1931 lp_build_printf(gallivm, buf);
1932
1933 /* Dump the execution mask.
1934 */
1935 if (bld->exec_mask.has_mask) {
1936 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1937 }
1938 }
1939 }
1940
1941 static void
1942 emit_store(
1943 struct lp_build_tgsi_context * bld_base,
1944 const struct tgsi_full_instruction * inst,
1945 const struct tgsi_opcode_info * info,
1946 unsigned index,
1947 LLVMValueRef dst[4])
1948
1949 {
1950 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1951
1952 unsigned writemask = inst->Dst[index].Register.WriteMask;
1953 while (writemask) {
1954 unsigned chan_index = u_bit_scan(&writemask);
1955 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1956 continue;
1957 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1958 }
1959 }
1960
1961 static unsigned
1962 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1963 {
1964 switch (tgsi_target) {
1965 case TGSI_TEXTURE_BUFFER:
1966 return PIPE_BUFFER;
1967 case TGSI_TEXTURE_1D:
1968 case TGSI_TEXTURE_SHADOW1D:
1969 return PIPE_TEXTURE_1D;
1970 case TGSI_TEXTURE_2D:
1971 case TGSI_TEXTURE_SHADOW2D:
1972 case TGSI_TEXTURE_2D_MSAA:
1973 return PIPE_TEXTURE_2D;
1974 case TGSI_TEXTURE_3D:
1975 return PIPE_TEXTURE_3D;
1976 case TGSI_TEXTURE_CUBE:
1977 case TGSI_TEXTURE_SHADOWCUBE:
1978 return PIPE_TEXTURE_CUBE;
1979 case TGSI_TEXTURE_RECT:
1980 case TGSI_TEXTURE_SHADOWRECT:
1981 return PIPE_TEXTURE_RECT;
1982 case TGSI_TEXTURE_1D_ARRAY:
1983 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1984 return PIPE_TEXTURE_1D_ARRAY;
1985 case TGSI_TEXTURE_2D_ARRAY:
1986 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1987 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1988 return PIPE_TEXTURE_2D_ARRAY;
1989 case TGSI_TEXTURE_CUBE_ARRAY:
1990 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1991 return PIPE_TEXTURE_CUBE_ARRAY;
1992 default:
1993 assert(0);
1994 return PIPE_BUFFER;
1995 }
1996 }
1997
1998
1999 static enum lp_sampler_lod_property
2000 lp_build_lod_property(
2001 struct lp_build_tgsi_context *bld_base,
2002 const struct tgsi_full_instruction *inst,
2003 unsigned src_op)
2004 {
2005 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2006 enum lp_sampler_lod_property lod_property;
2007
2008 /*
2009 * Not much we can do here. We could try catching inputs declared
2010 * with constant interpolation but not sure it's worth it - since for
2011 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2012 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2013 * like the constant/immediate recognition below.
2014 * What seems to be of more value would be to recognize temps holding
2015 * broadcasted scalars but no way we can do it.
2016 * Tried asking llvm but without any success (using LLVMIsConstant
2017 * even though this isn't exactly what we'd need), even as simple as
2018 * IMM[0] UINT32 (0,-1,0,0)
2019 * MOV TEMP[0] IMM[0].yyyy
2020 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2021 * doesn't work.
2022 * This means there's ZERO chance this will ever catch a scalar lod
2023 * with traditional tex opcodes as well as texel fetches, since the lod
2024 * comes from the same reg as coords (except some test shaders using
2025 * constant coords maybe).
2026 * There's at least hope for sample opcodes as well as size queries.
2027 */
2028 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2029 reg->Register.File == TGSI_FILE_CONSTANT ||
2030 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2031 lod_property = LP_SAMPLER_LOD_SCALAR;
2032 }
2033 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2034 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2035 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2036 }
2037 else {
2038 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2039 }
2040 }
2041 else {
2042 /* never use scalar (per-quad) lod the results are just too wrong. */
2043 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2044 }
2045 return lod_property;
2046 }
2047
2048
2049 /**
2050 * High-level instruction translators.
2051 */
2052
2053 static void
2054 emit_tex( struct lp_build_tgsi_soa_context *bld,
2055 const struct tgsi_full_instruction *inst,
2056 enum lp_build_tex_modifier modifier,
2057 LLVMValueRef *texel,
2058 unsigned sampler_reg,
2059 enum lp_sampler_op_type sampler_op)
2060 {
2061 unsigned unit = inst->Src[sampler_reg].Register.Index;
2062 LLVMValueRef oow = NULL;
2063 LLVMValueRef lod = NULL;
2064 LLVMValueRef coords[5];
2065 LLVMValueRef offsets[3] = { NULL };
2066 struct lp_derivatives derivs;
2067 struct lp_sampler_params params;
2068 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2069 unsigned num_derivs, num_offsets, i;
2070 unsigned shadow_coord = 0;
2071 unsigned layer_coord = 0;
2072 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2073
2074 memset(&params, 0, sizeof(params));
2075
2076 if (!bld->sampler) {
2077 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2078 for (i = 0; i < 4; i++) {
2079 texel[i] = bld->bld_base.base.undef;
2080 }
2081 return;
2082 }
2083
2084 switch (inst->Texture.Texture) {
2085 case TGSI_TEXTURE_1D_ARRAY:
2086 layer_coord = 1;
2087 /* fallthrough */
2088 case TGSI_TEXTURE_1D:
2089 num_offsets = 1;
2090 num_derivs = 1;
2091 break;
2092 case TGSI_TEXTURE_2D_ARRAY:
2093 layer_coord = 2;
2094 /* fallthrough */
2095 case TGSI_TEXTURE_2D:
2096 case TGSI_TEXTURE_RECT:
2097 num_offsets = 2;
2098 num_derivs = 2;
2099 break;
2100 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2101 layer_coord = 1;
2102 /* fallthrough */
2103 case TGSI_TEXTURE_SHADOW1D:
2104 shadow_coord = 2;
2105 num_offsets = 1;
2106 num_derivs = 1;
2107 break;
2108 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2109 layer_coord = 2;
2110 shadow_coord = 3;
2111 num_offsets = 2;
2112 num_derivs = 2;
2113 break;
2114 case TGSI_TEXTURE_SHADOW2D:
2115 case TGSI_TEXTURE_SHADOWRECT:
2116 shadow_coord = 2;
2117 num_offsets = 2;
2118 num_derivs = 2;
2119 break;
2120 case TGSI_TEXTURE_CUBE:
2121 num_offsets = 2;
2122 num_derivs = 3;
2123 break;
2124 case TGSI_TEXTURE_3D:
2125 num_offsets = 3;
2126 num_derivs = 3;
2127 break;
2128 case TGSI_TEXTURE_SHADOWCUBE:
2129 shadow_coord = 3;
2130 num_offsets = 2;
2131 num_derivs = 3;
2132 break;
2133 case TGSI_TEXTURE_CUBE_ARRAY:
2134 num_offsets = 2;
2135 num_derivs = 3;
2136 layer_coord = 3;
2137 break;
2138 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2139 num_offsets = 2;
2140 num_derivs = 3;
2141 layer_coord = 3;
2142 shadow_coord = 4; /* shadow coord special different reg */
2143 break;
2144 case TGSI_TEXTURE_2D_MSAA:
2145 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2146 default:
2147 assert(0);
2148 return;
2149 }
2150
2151 /* Note lod and especially projected are illegal in a LOT of cases */
2152 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2153 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2154 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2155 lod = bld->bld_base.base.zero;
2156 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2157 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2158 /* note that shadow cube array with bias/explicit lod does not exist */
2159 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2160 }
2161 else {
2162 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2163 }
2164 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2165 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2166 }
2167 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2168 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2169 }
2170 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2171 }
2172
2173 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2174 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2175 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2176 }
2177 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2178 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2179 oow = lp_build_rcp(&bld->bld_base.base, oow);
2180 }
2181
2182 for (i = 0; i < num_derivs; i++) {
2183 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2184 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2185 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2186 }
2187 for (i = num_derivs; i < 5; i++) {
2188 coords[i] = bld->bld_base.base.undef;
2189 }
2190
2191 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2192 if (layer_coord) {
2193 if (layer_coord == 3) {
2194 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2195 }
2196 else {
2197 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2198 }
2199 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2200 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2201 }
2202 /* Shadow coord occupies always 5th slot. */
2203 if (shadow_coord) {
2204 sample_key |= LP_SAMPLER_SHADOW;
2205 if (shadow_coord == 4) {
2206 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2207 }
2208 else {
2209 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2210 }
2211 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2212 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2213 }
2214
2215 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2216 unsigned dim;
2217 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2218 for (dim = 0; dim < num_derivs; ++dim) {
2219 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2220 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2221 }
2222 params.derivs = &derivs;
2223 /*
2224 * could also check all src regs if constant but I doubt such
2225 * cases exist in practice.
2226 */
2227 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2228 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2229 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2230 }
2231 else {
2232 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2233 }
2234 }
2235 else {
2236 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2237 }
2238 }
2239 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2240
2241 /* we don't handle the 4 offset version of tg4 */
2242 if (inst->Texture.NumOffsets == 1) {
2243 unsigned dim;
2244 sample_key |= LP_SAMPLER_OFFSETS;
2245 for (dim = 0; dim < num_offsets; dim++) {
2246 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2247 }
2248 }
2249
2250 params.type = bld->bld_base.base.type;
2251 params.sample_key = sample_key;
2252 params.texture_index = unit;
2253 params.sampler_index = unit;
2254 params.context_ptr = bld->context_ptr;
2255 params.thread_data_ptr = bld->thread_data_ptr;
2256 params.coords = coords;
2257 params.offsets = offsets;
2258 params.lod = lod;
2259 params.texel = texel;
2260
2261 bld->sampler->emit_tex_sample(bld->sampler,
2262 bld->bld_base.base.gallivm,
2263 &params);
2264 }
2265
2266 static void
2267 emit_sample(struct lp_build_tgsi_soa_context *bld,
2268 const struct tgsi_full_instruction *inst,
2269 enum lp_build_tex_modifier modifier,
2270 boolean compare,
2271 enum lp_sampler_op_type sample_type,
2272 LLVMValueRef *texel)
2273 {
2274 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2275 unsigned texture_unit, sampler_unit;
2276 LLVMValueRef lod = NULL;
2277 LLVMValueRef coords[5];
2278 LLVMValueRef offsets[3] = { NULL };
2279 struct lp_derivatives derivs;
2280 struct lp_sampler_params params;
2281 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2282
2283 unsigned num_offsets, num_derivs, i;
2284 unsigned layer_coord = 0;
2285 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2286
2287 memset(&params, 0, sizeof(params));
2288
2289 if (!bld->sampler) {
2290 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2291 for (i = 0; i < 4; i++) {
2292 texel[i] = bld->bld_base.base.undef;
2293 }
2294 return;
2295 }
2296
2297 /*
2298 * unlike old-style tex opcodes the texture/sampler indices
2299 * always come from src1 and src2 respectively.
2300 */
2301 texture_unit = inst->Src[1].Register.Index;
2302 sampler_unit = inst->Src[2].Register.Index;
2303
2304 /*
2305 * Note inst->Texture.Texture will contain the number of offsets,
2306 * however the target information is NOT there and comes from the
2307 * declared sampler views instead.
2308 */
2309 switch (bld->sv[texture_unit].Resource) {
2310 case TGSI_TEXTURE_1D:
2311 num_offsets = 1;
2312 num_derivs = 1;
2313 break;
2314 case TGSI_TEXTURE_1D_ARRAY:
2315 layer_coord = 1;
2316 num_offsets = 1;
2317 num_derivs = 1;
2318 break;
2319 case TGSI_TEXTURE_2D:
2320 case TGSI_TEXTURE_RECT:
2321 num_offsets = 2;
2322 num_derivs = 2;
2323 break;
2324 case TGSI_TEXTURE_2D_ARRAY:
2325 layer_coord = 2;
2326 num_offsets = 2;
2327 num_derivs = 2;
2328 break;
2329 case TGSI_TEXTURE_CUBE:
2330 num_offsets = 2;
2331 num_derivs = 3;
2332 break;
2333 case TGSI_TEXTURE_3D:
2334 num_offsets = 3;
2335 num_derivs = 3;
2336 break;
2337 case TGSI_TEXTURE_CUBE_ARRAY:
2338 layer_coord = 3;
2339 num_offsets = 2;
2340 num_derivs = 3;
2341 break;
2342 default:
2343 assert(0);
2344 return;
2345 }
2346
2347 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2348 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2349 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2350 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2351 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2352 }
2353 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2354 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2355 }
2356 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2357 }
2358 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2359 /* XXX might be better to explicitly pass the level zero information */
2360 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2361 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2362 }
2363
2364 for (i = 0; i < num_derivs; i++) {
2365 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2366 }
2367 for (i = num_derivs; i < 5; i++) {
2368 coords[i] = bld->bld_base.base.undef;
2369 }
2370
2371 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2372 if (layer_coord) {
2373 if (layer_coord == 3)
2374 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2375 else
2376 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2377 }
2378 /* Shadow coord occupies always 5th slot. */
2379 if (compare) {
2380 sample_key |= LP_SAMPLER_SHADOW;
2381 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2382 }
2383
2384 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2385 unsigned dim;
2386 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2387 for (dim = 0; dim < num_derivs; ++dim) {
2388 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2389 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2390 }
2391 params.derivs = &derivs;
2392 /*
2393 * could also check all src regs if constant but I doubt such
2394 * cases exist in practice.
2395 */
2396 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2397 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2398 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2399 }
2400 else {
2401 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2402 }
2403 }
2404 else {
2405 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2406 }
2407 }
2408
2409 /* some advanced gather instructions (txgo) would require 4 offsets */
2410 if (inst->Texture.NumOffsets == 1) {
2411 unsigned dim;
2412 sample_key |= LP_SAMPLER_OFFSETS;
2413 for (dim = 0; dim < num_offsets; dim++) {
2414 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2415 }
2416 }
2417 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2418
2419 params.type = bld->bld_base.base.type;
2420 params.sample_key = sample_key;
2421 params.texture_index = texture_unit;
2422 params.sampler_index = sampler_unit;
2423 params.context_ptr = bld->context_ptr;
2424 params.thread_data_ptr = bld->thread_data_ptr;
2425 params.coords = coords;
2426 params.offsets = offsets;
2427 params.lod = lod;
2428 params.texel = texel;
2429
2430 bld->sampler->emit_tex_sample(bld->sampler,
2431 bld->bld_base.base.gallivm,
2432 &params);
2433
2434 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2435 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2436 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2437 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2438 unsigned char swizzles[4];
2439 swizzles[0] = inst->Src[1].Register.SwizzleX;
2440 swizzles[1] = inst->Src[1].Register.SwizzleY;
2441 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2442 swizzles[3] = inst->Src[1].Register.SwizzleW;
2443
2444 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2445 }
2446 }
2447
2448 static void
2449 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2450 const struct tgsi_full_instruction *inst,
2451 LLVMValueRef *texel,
2452 boolean is_samplei)
2453 {
2454 unsigned unit, target;
2455 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2456 LLVMValueRef explicit_lod = NULL;
2457 LLVMValueRef coords[5];
2458 LLVMValueRef offsets[3] = { NULL };
2459 LLVMValueRef ms_index = NULL;
2460 struct lp_sampler_params params;
2461 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2462 unsigned dims, i;
2463 unsigned layer_coord = 0;
2464 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2465
2466 memset(&params, 0, sizeof(params));
2467
2468 if (!bld->sampler) {
2469 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2470 for (i = 0; i < 4; i++) {
2471 texel[i] = coord_undef;
2472 }
2473 return;
2474 }
2475
2476 unit = inst->Src[1].Register.Index;
2477
2478 if (is_samplei) {
2479 target = bld->sv[unit].Resource;
2480 }
2481 else {
2482 target = inst->Texture.Texture;
2483 }
2484
2485 switch (target) {
2486 case TGSI_TEXTURE_1D:
2487 case TGSI_TEXTURE_BUFFER:
2488 dims = 1;
2489 break;
2490 case TGSI_TEXTURE_1D_ARRAY:
2491 layer_coord = 1;
2492 dims = 1;
2493 break;
2494 case TGSI_TEXTURE_2D:
2495 case TGSI_TEXTURE_RECT:
2496 case TGSI_TEXTURE_2D_MSAA:
2497 dims = 2;
2498 break;
2499 case TGSI_TEXTURE_2D_ARRAY:
2500 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2501 layer_coord = 2;
2502 dims = 2;
2503 break;
2504 case TGSI_TEXTURE_3D:
2505 dims = 3;
2506 break;
2507 default:
2508 assert(0);
2509 return;
2510 }
2511
2512 /* always have lod except for buffers and msaa targets ? */
2513 if (target != TGSI_TEXTURE_BUFFER &&
2514 target != TGSI_TEXTURE_2D_MSAA &&
2515 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2516 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2517 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2518 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2519 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2520 }
2521
2522 if (target == TGSI_TEXTURE_2D_MSAA ||
2523 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2524 sample_key |= LP_SAMPLER_FETCH_MS;
2525 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2526 }
2527
2528 /*
2529 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2530 * would be the sample index.
2531 */
2532
2533 for (i = 0; i < dims; i++) {
2534 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2535 }
2536 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2537 for (i = dims; i < 5; i++) {
2538 coords[i] = coord_undef;
2539 }
2540 if (layer_coord)
2541 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2542
2543 if (inst->Texture.NumOffsets == 1) {
2544 unsigned dim;
2545 sample_key |= LP_SAMPLER_OFFSETS;
2546 for (dim = 0; dim < dims; dim++) {
2547 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2548 }
2549 }
2550 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2551
2552 params.type = bld->bld_base.base.type;
2553 params.sample_key = sample_key;
2554 params.texture_index = unit;
2555 /*
2556 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2557 * and trigger some assertions with d3d10 where the sampler view number
2558 * can exceed this.
2559 */
2560 params.sampler_index = 0;
2561 params.context_ptr = bld->context_ptr;
2562 params.thread_data_ptr = bld->thread_data_ptr;
2563 params.coords = coords;
2564 params.offsets = offsets;
2565 params.derivs = NULL;
2566 params.lod = explicit_lod;
2567 params.texel = texel;
2568 params.ms_index = ms_index;
2569
2570 bld->sampler->emit_tex_sample(bld->sampler,
2571 bld->bld_base.base.gallivm,
2572 &params);
2573
2574 if (is_samplei &&
2575 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2576 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2577 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2578 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2579 unsigned char swizzles[4];
2580 swizzles[0] = inst->Src[1].Register.SwizzleX;
2581 swizzles[1] = inst->Src[1].Register.SwizzleY;
2582 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2583 swizzles[3] = inst->Src[1].Register.SwizzleW;
2584
2585 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2586 }
2587 }
2588
2589 static void
2590 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2591 const struct tgsi_full_instruction *inst,
2592 LLVMValueRef *sizes_out,
2593 boolean is_sviewinfo)
2594 {
2595 LLVMValueRef explicit_lod;
2596 enum lp_sampler_lod_property lod_property;
2597 unsigned has_lod;
2598 unsigned i;
2599 unsigned unit = inst->Src[1].Register.Index;
2600 unsigned target, pipe_target;
2601 struct lp_sampler_size_query_params params;
2602
2603 if (is_sviewinfo) {
2604 target = bld->sv[unit].Resource;
2605 }
2606 else {
2607 target = inst->Texture.Texture;
2608 }
2609 switch (target) {
2610 case TGSI_TEXTURE_BUFFER:
2611 case TGSI_TEXTURE_RECT:
2612 case TGSI_TEXTURE_SHADOWRECT:
2613 has_lod = 0;
2614 break;
2615 default:
2616 has_lod = 1;
2617 break;
2618 }
2619
2620 if (!bld->sampler) {
2621 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2622 for (i = 0; i < 4; i++)
2623 sizes_out[i] = bld->bld_base.int_bld.undef;
2624 return;
2625 }
2626
2627 if (has_lod) {
2628 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2629 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2630 }
2631 else {
2632 explicit_lod = NULL;
2633 lod_property = LP_SAMPLER_LOD_SCALAR;
2634 }
2635
2636
2637 pipe_target = tgsi_to_pipe_tex_target(target);
2638
2639 params.int_type = bld->bld_base.int_bld.type;
2640 params.texture_unit = unit;
2641 params.target = pipe_target;
2642 params.context_ptr = bld->context_ptr;
2643 params.is_sviewinfo = TRUE;
2644 params.lod_property = lod_property;
2645 params.explicit_lod = explicit_lod;
2646 params.sizes_out = sizes_out;
2647 params.samples_only = false;
2648
2649 bld->sampler->emit_size_query(bld->sampler,
2650 bld->bld_base.base.gallivm,
2651 &params);
2652 }
2653
2654 static boolean
2655 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2656 int pc)
2657 {
2658 unsigned i;
2659
2660 for (i = 0; i < 5; i++) {
2661 enum tgsi_opcode opcode;
2662
2663 if (pc + i >= bld->bld_base.info->num_instructions)
2664 return TRUE;
2665
2666 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2667
2668 if (opcode == TGSI_OPCODE_END)
2669 return TRUE;
2670
2671 if (opcode == TGSI_OPCODE_TEX ||
2672 opcode == TGSI_OPCODE_TXP ||
2673 opcode == TGSI_OPCODE_TXD ||
2674 opcode == TGSI_OPCODE_TXB ||
2675 opcode == TGSI_OPCODE_TXL ||
2676 opcode == TGSI_OPCODE_TXF ||
2677 opcode == TGSI_OPCODE_TXQ ||
2678 opcode == TGSI_OPCODE_TEX2 ||
2679 opcode == TGSI_OPCODE_TXB2 ||
2680 opcode == TGSI_OPCODE_TXL2 ||
2681 opcode == TGSI_OPCODE_SAMPLE ||
2682 opcode == TGSI_OPCODE_SAMPLE_B ||
2683 opcode == TGSI_OPCODE_SAMPLE_C ||
2684 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2685 opcode == TGSI_OPCODE_SAMPLE_D ||
2686 opcode == TGSI_OPCODE_SAMPLE_I ||
2687 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2688 opcode == TGSI_OPCODE_SAMPLE_L ||
2689 opcode == TGSI_OPCODE_SVIEWINFO ||
2690 opcode == TGSI_OPCODE_CAL ||
2691 opcode == TGSI_OPCODE_IF ||
2692 opcode == TGSI_OPCODE_UIF ||
2693 opcode == TGSI_OPCODE_BGNLOOP ||
2694 opcode == TGSI_OPCODE_SWITCH)
2695 return FALSE;
2696 }
2697
2698 return TRUE;
2699 }
2700
2701
2702
2703 /**
2704 * Kill fragment if any of the src register values are negative.
2705 */
2706 static void
2707 emit_kill_if(
2708 struct lp_build_tgsi_soa_context *bld,
2709 const struct tgsi_full_instruction *inst,
2710 int pc)
2711 {
2712 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2713 const struct tgsi_full_src_register *reg = &inst->Src[0];
2714 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2715 LLVMValueRef mask;
2716 unsigned chan_index;
2717
2718 memset(&terms, 0, sizeof terms);
2719
2720 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2721 unsigned swizzle;
2722
2723 /* Unswizzle channel */
2724 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2725
2726 /* Check if the component has not been already tested. */
2727 assert(swizzle < TGSI_NUM_CHANNELS);
2728 if( !terms[swizzle] )
2729 /* TODO: change the comparison operator instead of setting the sign */
2730 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2731 }
2732
2733 mask = NULL;
2734 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2735 if(terms[chan_index]) {
2736 LLVMValueRef chan_mask;
2737
2738 /*
2739 * If term < 0 then mask = 0 else mask = ~0.
2740 */
2741 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2742
2743 if(mask)
2744 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2745 else
2746 mask = chan_mask;
2747 }
2748 }
2749
2750 if (bld->exec_mask.has_mask) {
2751 LLVMValueRef invmask;
2752 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2753 mask = LLVMBuildOr(builder, mask, invmask, "");
2754 }
2755
2756 lp_build_mask_update(bld->mask, mask);
2757 if (!near_end_of_shader(bld, pc))
2758 lp_build_mask_check(bld->mask);
2759 }
2760
2761
2762 /**
2763 * Unconditional fragment kill.
2764 * The only predication is the execution mask which will apply if
2765 * we're inside a loop or conditional.
2766 */
2767 static void
2768 emit_kill(struct lp_build_tgsi_soa_context *bld,
2769 int pc)
2770 {
2771 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2772 LLVMValueRef mask;
2773
2774 /* For those channels which are "alive", disable fragment shader
2775 * execution.
2776 */
2777 if (bld->exec_mask.has_mask) {
2778 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2779 }
2780 else {
2781 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2782 mask = zero;
2783 }
2784
2785 lp_build_mask_update(bld->mask, mask);
2786
2787 if (!near_end_of_shader(bld, pc))
2788 lp_build_mask_check(bld->mask);
2789 }
2790
2791
2792 /**
2793 * Emit code which will dump the value of all the temporary registers
2794 * to stdout.
2795 */
2796 static void
2797 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2798 unsigned file)
2799 {
2800 const struct tgsi_shader_info *info = bld->bld_base.info;
2801 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2802 LLVMBuilderRef builder = gallivm->builder;
2803 LLVMValueRef reg_ptr;
2804 int index;
2805 int max_index = info->file_max[file];
2806
2807 /*
2808 * Some register files, particularly constants, can be very large,
2809 * and dumping everything could make this unusably slow.
2810 */
2811 max_index = MIN2(max_index, 32);
2812
2813 for (index = 0; index <= max_index; index++) {
2814 LLVMValueRef res;
2815 unsigned mask;
2816 int chan;
2817
2818 if (index < 8 * sizeof(unsigned) &&
2819 (info->file_mask[file] & (1u << index)) == 0) {
2820 /* This was not declared.*/
2821 continue;
2822 }
2823
2824 if (file == TGSI_FILE_INPUT) {
2825 mask = info->input_usage_mask[index];
2826 } else {
2827 mask = TGSI_WRITEMASK_XYZW;
2828 }
2829
2830 for (chan = 0; chan < 4; chan++) {
2831 if ((mask & (1 << chan)) == 0) {
2832 /* This channel is not used.*/
2833 continue;
2834 }
2835
2836 if (file == TGSI_FILE_CONSTANT) {
2837 struct tgsi_full_src_register reg;
2838 memset(&reg, 0, sizeof reg);
2839 reg.Register.File = file;
2840 reg.Register.Index = index;
2841 reg.Register.SwizzleX = 0;
2842 reg.Register.SwizzleY = 1;
2843 reg.Register.SwizzleZ = 2;
2844 reg.Register.SwizzleW = 3;
2845
2846 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2847 if (!res) {
2848 continue;
2849 }
2850 } else if (file == TGSI_FILE_INPUT) {
2851 res = bld->inputs[index][chan];
2852 if (!res) {
2853 continue;
2854 }
2855 } else if (file == TGSI_FILE_TEMPORARY) {
2856 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2857 assert(reg_ptr);
2858 res = LLVMBuildLoad(builder, reg_ptr, "");
2859 } else if (file == TGSI_FILE_OUTPUT) {
2860 reg_ptr = lp_get_output_ptr(bld, index, chan);
2861 assert(reg_ptr);
2862 res = LLVMBuildLoad(builder, reg_ptr, "");
2863 } else {
2864 assert(0);
2865 continue;
2866 }
2867
2868 emit_dump_reg(gallivm, file, index, chan, res);
2869 }
2870 }
2871 }
2872
2873
2874
2875 void
2876 lp_emit_declaration_soa(
2877 struct lp_build_tgsi_context *bld_base,
2878 const struct tgsi_full_declaration *decl)
2879 {
2880 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2881 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2882 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2883 const unsigned first = decl->Range.First;
2884 const unsigned last = decl->Range.Last;
2885 unsigned idx, i;
2886
2887 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2888
2889 switch (decl->Declaration.File) {
2890 case TGSI_FILE_TEMPORARY:
2891 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2892 assert(last < LP_MAX_INLINED_TEMPS);
2893 for (idx = first; idx <= last; ++idx) {
2894 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2895 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2896 }
2897 }
2898 break;
2899
2900 case TGSI_FILE_OUTPUT:
2901 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2902 for (idx = first; idx <= last; ++idx) {
2903 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2904 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2905 vec_type, "output");
2906 }
2907 }
2908 break;
2909
2910 case TGSI_FILE_ADDRESS:
2911 /* ADDR registers are only allocated with an integer LLVM IR type,
2912 * as they are guaranteed to always have integers.
2913 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2914 * an ADDR register for that matter).
2915 */
2916 assert(last < LP_MAX_TGSI_ADDRS);
2917 for (idx = first; idx <= last; ++idx) {
2918 assert(idx < LP_MAX_TGSI_ADDRS);
2919 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2920 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2921 }
2922 break;
2923
2924 case TGSI_FILE_SAMPLER_VIEW:
2925 /*
2926 * The target stored here MUST match whatever there actually
2927 * is in the set sampler views (what about return type?).
2928 */
2929 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2930 for (idx = first; idx <= last; ++idx) {
2931 bld->sv[idx] = decl->SamplerView;
2932 }
2933 break;
2934
2935 case TGSI_FILE_CONSTANT:
2936 {
2937 /*
2938 * We could trivially fetch the per-buffer pointer when fetching the
2939 * constant, relying on llvm to figure out it's always the same pointer
2940 * anyway. However, doing so results in a huge (more than factor of 10)
2941 * slowdown in llvm compilation times for some (but not all) shaders
2942 * (more specifically, the IR optimization spends way more time in
2943 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2944 */
2945 unsigned idx2D = decl->Dim.Index2D;
2946 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2947 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2948 bld->consts[idx2D] =
2949 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2950 bld->consts_sizes[idx2D] =
2951 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2952 }
2953 break;
2954 case TGSI_FILE_BUFFER:
2955 {
2956 unsigned idx = decl->Range.First;
2957 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2958 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2959 bld->ssbos[idx] =
2960 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2961 bld->ssbo_sizes[idx] =
2962 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2963
2964 }
2965 break;
2966 case TGSI_FILE_MEMORY:
2967 break;
2968 default:
2969 /* don't need to declare other vars */
2970 break;
2971 }
2972 }
2973
2974
2975 void lp_emit_immediate_soa(
2976 struct lp_build_tgsi_context *bld_base,
2977 const struct tgsi_full_immediate *imm)
2978 {
2979 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2980 struct gallivm_state * gallivm = bld_base->base.gallivm;
2981 LLVMValueRef imms[4];
2982 unsigned i;
2983 const uint size = imm->Immediate.NrTokens - 1;
2984 assert(size <= 4);
2985 switch (imm->Immediate.DataType) {
2986 case TGSI_IMM_FLOAT32:
2987 for( i = 0; i < size; ++i )
2988 imms[i] =
2989 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2990
2991 break;
2992 case TGSI_IMM_FLOAT64:
2993 case TGSI_IMM_UINT64:
2994 case TGSI_IMM_INT64:
2995 case TGSI_IMM_UINT32:
2996 for( i = 0; i < size; ++i ) {
2997 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2998 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2999 }
3000
3001 break;
3002 case TGSI_IMM_INT32:
3003 for( i = 0; i < size; ++i ) {
3004 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3005 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3006 }
3007
3008 break;
3009 }
3010 for( i = size; i < 4; ++i )
3011 imms[i] = bld_base->base.undef;
3012
3013 if (bld->use_immediates_array) {
3014 unsigned index = bld->num_immediates;
3015 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3016 LLVMBuilderRef builder = gallivm->builder;
3017 LLVMValueRef gep[2];
3018 gep[0] = lp_build_const_int32(gallivm, 0);
3019
3020 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3021 for (i = 0; i < 4; ++i ) {
3022 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3023 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3024 bld->imms_array, gep, 2, "");
3025 LLVMBuildStore(builder, imms[i], imm_ptr);
3026 }
3027 } else {
3028 /* simply copy the immediate values into the next immediates[] slot */
3029 unsigned i;
3030 assert(imm->Immediate.NrTokens - 1 <= 4);
3031 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3032
3033 for(i = 0; i < 4; ++i )
3034 bld->immediates[bld->num_immediates][i] = imms[i];
3035
3036 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3037 unsigned index = bld->num_immediates;
3038 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3039 LLVMBuilderRef builder = gallivm->builder;
3040 LLVMValueRef gep[2];
3041 gep[0] = lp_build_const_int32(gallivm, 0);
3042 for (i = 0; i < 4; ++i ) {
3043 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3044 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3045 bld->imms_array, gep, 2, "");
3046 LLVMBuildStore(builder,
3047 bld->immediates[index][i],
3048 imm_ptr);
3049 }
3050 }
3051 }
3052
3053 bld->num_immediates++;
3054 }
3055
3056 static void
3057 ddx_emit(
3058 const struct lp_build_tgsi_action * action,
3059 struct lp_build_tgsi_context * bld_base,
3060 struct lp_build_emit_data * emit_data)
3061 {
3062 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3063
3064 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3065 &emit_data->output[emit_data->chan], NULL);
3066 }
3067
3068 static void
3069 ddy_emit(
3070 const struct lp_build_tgsi_action * action,
3071 struct lp_build_tgsi_context * bld_base,
3072 struct lp_build_emit_data * emit_data)
3073 {
3074 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3075
3076 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3077 &emit_data->output[emit_data->chan]);
3078 }
3079
3080 static void
3081 kill_emit(
3082 const struct lp_build_tgsi_action * action,
3083 struct lp_build_tgsi_context * bld_base,
3084 struct lp_build_emit_data * emit_data)
3085 {
3086 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3087
3088 emit_kill(bld, bld_base->pc - 1);
3089 }
3090
3091 static void
3092 kill_if_emit(
3093 const struct lp_build_tgsi_action * action,
3094 struct lp_build_tgsi_context * bld_base,
3095 struct lp_build_emit_data * emit_data)
3096 {
3097 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3098
3099 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3100 }
3101
3102 static void
3103 tex_emit(
3104 const struct lp_build_tgsi_action * action,
3105 struct lp_build_tgsi_context * bld_base,
3106 struct lp_build_emit_data * emit_data)
3107 {
3108 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3109
3110 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3111 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3112 }
3113
3114 static void
3115 tex2_emit(
3116 const struct lp_build_tgsi_action * action,
3117 struct lp_build_tgsi_context * bld_base,
3118 struct lp_build_emit_data * emit_data)
3119 {
3120 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3121
3122 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3123 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3124 }
3125
3126 static void
3127 txb_emit(
3128 const struct lp_build_tgsi_action * action,
3129 struct lp_build_tgsi_context * bld_base,
3130 struct lp_build_emit_data * emit_data)
3131 {
3132 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3133
3134 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3135 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3136 }
3137
3138 static void
3139 txb2_emit(
3140 const struct lp_build_tgsi_action * action,
3141 struct lp_build_tgsi_context * bld_base,
3142 struct lp_build_emit_data * emit_data)
3143 {
3144 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3145
3146 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3147 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3148 }
3149
3150 static void
3151 txd_emit(
3152 const struct lp_build_tgsi_action * action,
3153 struct lp_build_tgsi_context * bld_base,
3154 struct lp_build_emit_data * emit_data)
3155 {
3156 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3157
3158 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3159 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3160 }
3161
3162 static void
3163 txl_emit(
3164 const struct lp_build_tgsi_action * action,
3165 struct lp_build_tgsi_context * bld_base,
3166 struct lp_build_emit_data * emit_data)
3167 {
3168 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3169
3170 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3171 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3172 }
3173
3174 static void
3175 txl2_emit(
3176 const struct lp_build_tgsi_action * action,
3177 struct lp_build_tgsi_context * bld_base,
3178 struct lp_build_emit_data * emit_data)
3179 {
3180 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3181
3182 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3183 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3184 }
3185
3186 static void
3187 txp_emit(
3188 const struct lp_build_tgsi_action * action,
3189 struct lp_build_tgsi_context * bld_base,
3190 struct lp_build_emit_data * emit_data)
3191 {
3192 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3193
3194 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3195 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3196 }
3197
3198 static void
3199 tg4_emit(
3200 const struct lp_build_tgsi_action * action,
3201 struct lp_build_tgsi_context * bld_base,
3202 struct lp_build_emit_data * emit_data)
3203 {
3204 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3205
3206 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3207 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3208 }
3209
3210 static void
3211 lodq_emit(
3212 const struct lp_build_tgsi_action * action,
3213 struct lp_build_tgsi_context * bld_base,
3214 struct lp_build_emit_data * emit_data)
3215 {
3216 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3217
3218 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3219 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3220 }
3221
3222 static void
3223 txq_emit(
3224 const struct lp_build_tgsi_action * action,
3225 struct lp_build_tgsi_context * bld_base,
3226 struct lp_build_emit_data * emit_data)
3227 {
3228 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3229
3230 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3231 }
3232
3233 static void
3234 txf_emit(
3235 const struct lp_build_tgsi_action * action,
3236 struct lp_build_tgsi_context * bld_base,
3237 struct lp_build_emit_data * emit_data)
3238 {
3239 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3240
3241 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3242 }
3243
3244 static void
3245 sample_i_emit(
3246 const struct lp_build_tgsi_action * action,
3247 struct lp_build_tgsi_context * bld_base,
3248 struct lp_build_emit_data * emit_data)
3249 {
3250 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3251
3252 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3253 }
3254
3255 static void
3256 sample_emit(
3257 const struct lp_build_tgsi_action * action,
3258 struct lp_build_tgsi_context * bld_base,
3259 struct lp_build_emit_data * emit_data)
3260 {
3261 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3262
3263 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3264 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3265 }
3266
3267 static void
3268 sample_b_emit(
3269 const struct lp_build_tgsi_action * action,
3270 struct lp_build_tgsi_context * bld_base,
3271 struct lp_build_emit_data * emit_data)
3272 {
3273 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3274
3275 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3276 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3277 }
3278
3279 static void
3280 sample_c_emit(
3281 const struct lp_build_tgsi_action * action,
3282 struct lp_build_tgsi_context * bld_base,
3283 struct lp_build_emit_data * emit_data)
3284 {
3285 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3286
3287 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3288 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3289 }
3290
3291 static void
3292 sample_c_lz_emit(
3293 const struct lp_build_tgsi_action * action,
3294 struct lp_build_tgsi_context * bld_base,
3295 struct lp_build_emit_data * emit_data)
3296 {
3297 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3298
3299 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3300 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3301 }
3302
3303 static void
3304 sample_d_emit(
3305 const struct lp_build_tgsi_action * action,
3306 struct lp_build_tgsi_context * bld_base,
3307 struct lp_build_emit_data * emit_data)
3308 {
3309 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3310
3311 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3312 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3313 }
3314
3315 static void
3316 sample_l_emit(
3317 const struct lp_build_tgsi_action * action,
3318 struct lp_build_tgsi_context * bld_base,
3319 struct lp_build_emit_data * emit_data)
3320 {
3321 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3322
3323 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3324 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3325 }
3326
3327 static void
3328 gather4_emit(
3329 const struct lp_build_tgsi_action * action,
3330 struct lp_build_tgsi_context * bld_base,
3331 struct lp_build_emit_data * emit_data)
3332 {
3333 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3334
3335 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3336 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3337 }
3338
3339 static void
3340 sviewinfo_emit(
3341 const struct lp_build_tgsi_action * action,
3342 struct lp_build_tgsi_context * bld_base,
3343 struct lp_build_emit_data * emit_data)
3344 {
3345 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3346
3347 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3348 }
3349
3350 static void
3351 lod_emit(
3352 const struct lp_build_tgsi_action * action,
3353 struct lp_build_tgsi_context * bld_base,
3354 struct lp_build_emit_data * emit_data)
3355 {
3356 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3357
3358 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3359 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3360 }
3361
3362 static void target_to_dims_layer(unsigned target,
3363 unsigned *dims,
3364 unsigned *layer_coord)
3365 {
3366 *layer_coord = 0;
3367 switch (target) {
3368 case TGSI_TEXTURE_1D:
3369 case TGSI_TEXTURE_BUFFER:
3370 *dims = 1;
3371 break;
3372 case TGSI_TEXTURE_1D_ARRAY:
3373 *layer_coord = 1;
3374 *dims = 1;
3375 break;
3376 case TGSI_TEXTURE_2D:
3377 case TGSI_TEXTURE_RECT:
3378 *dims = 2;
3379 break;
3380 case TGSI_TEXTURE_2D_ARRAY:
3381 *layer_coord = 2;
3382 *dims = 2;
3383 break;
3384 case TGSI_TEXTURE_3D:
3385 case TGSI_TEXTURE_CUBE:
3386 case TGSI_TEXTURE_CUBE_ARRAY:
3387 *dims = 3;
3388 break;
3389 default:
3390 assert(0);
3391 *dims = 0;
3392 return;
3393 }
3394 }
3395
3396 static void
3397 img_load_emit(
3398 const struct lp_build_tgsi_action * action,
3399 struct lp_build_tgsi_context * bld_base,
3400 struct lp_build_emit_data * emit_data)
3401 {
3402 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3403 struct lp_img_params params;
3404 LLVMValueRef coords[5];
3405 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3406 unsigned dims;
3407 unsigned target = emit_data->inst->Memory.Texture;
3408 unsigned layer_coord;
3409
3410 target_to_dims_layer(target, &dims, &layer_coord);
3411
3412 for (unsigned i = 0; i < dims; i++) {
3413 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3414 }
3415 for (unsigned i = dims; i < 5; i++) {
3416 coords[i] = coord_undef;
3417 }
3418 if (layer_coord)
3419 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3420
3421 memset(&params, 0, sizeof(params));
3422
3423 params.type = bld->bld_base.base.type;
3424 params.context_ptr = bld->context_ptr;
3425 params.thread_data_ptr = bld->thread_data_ptr;
3426 params.coords = coords;
3427 params.outdata = emit_data->output;
3428 params.target = tgsi_to_pipe_tex_target(target);
3429 params.image_index = emit_data->inst->Src[0].Register.Index;
3430 params.img_op = LP_IMG_LOAD;
3431 bld->image->emit_op(bld->image,
3432 bld->bld_base.base.gallivm,
3433 &params);
3434 }
3435
3436 static void
3437 load_emit(
3438 const struct lp_build_tgsi_action * action,
3439 struct lp_build_tgsi_context * bld_base,
3440 struct lp_build_emit_data * emit_data)
3441 {
3442 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3443 struct gallivm_state * gallivm = bld_base->base.gallivm;
3444 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3445 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3446 unsigned buf = bufreg->Register.Index;
3447 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3448 bufreg->Register.File == TGSI_FILE_IMAGE ||
3449 bufreg->Register.File == TGSI_FILE_MEMORY ||
3450 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3451 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3452 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3453
3454 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3455 img_load_emit(action, bld_base, emit_data);
3456 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3457 LLVMValueRef consts_ptr = bld->consts[buf];
3458 LLVMValueRef num_consts = bld->consts_sizes[buf];
3459
3460 LLVMValueRef indirect_index;
3461 LLVMValueRef overflow_mask;
3462
3463 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3464 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3465
3466 /* All fetches are from the same constant buffer, so
3467 * we need to propagate the size to a vector to do a
3468 * vector comparison */
3469 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3470
3471 /* Gather values from the constant buffer */
3472 unsigned chan_index;
3473 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3474 /* Construct a boolean vector telling us which channels
3475 * overflow the bound constant buffer */
3476 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3477 indirect_index, num_consts);
3478
3479 /* index_vec = indirect_index * 4 */
3480 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3481 index_vec = lp_build_add(uint_bld, index_vec,
3482 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3483
3484 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3485 }
3486 } else if (0) {
3487 /* for indirect support with ARB_gpu_shader5 */
3488 } else {
3489 LLVMValueRef index;
3490 LLVMValueRef scalar, scalar_ptr;
3491 unsigned chan_index;
3492
3493 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3494 index = lp_build_shr_imm(uint_bld, index, 2);
3495
3496 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3497
3498 LLVMValueRef ssbo_limit = NULL;
3499
3500 if (!is_shared) {
3501 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3502 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3503 }
3504
3505 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3506 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3507
3508 LLVMValueRef exec_mask = mask_vec(bld_base);
3509 if (!is_shared) {
3510 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3511 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3512 }
3513
3514 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3515 struct lp_build_loop_state loop_state;
3516 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3517
3518 struct lp_build_if_state ifthen;
3519 LLVMValueRef cond, temp_res;
3520
3521 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3522 loop_state.counter, "");
3523
3524 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3525 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3526
3527 lp_build_if(&ifthen, gallivm, cond);
3528 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3529
3530 temp_res = LLVMBuildLoad(builder, result, "");
3531 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3532 LLVMBuildStore(builder, temp_res, result);
3533 lp_build_else(&ifthen);
3534 temp_res = LLVMBuildLoad(builder, result, "");
3535 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3536 LLVMBuildStore(builder, temp_res, result);
3537 lp_build_endif(&ifthen);
3538 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3539 NULL, LLVMIntUGE);
3540 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3541 }
3542 }
3543 }
3544
3545 static void
3546 img_store_emit(
3547 const struct lp_build_tgsi_action * action,
3548 struct lp_build_tgsi_context * bld_base,
3549 struct lp_build_emit_data * emit_data)
3550 {
3551 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3552 struct lp_img_params params;
3553 LLVMValueRef coords[5];
3554 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3555 unsigned dims;
3556 unsigned target = emit_data->inst->Memory.Texture;
3557 unsigned layer_coord;
3558
3559 target_to_dims_layer(target, &dims, &layer_coord);
3560 for (unsigned i = 0; i < dims; i++) {
3561 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3562 }
3563 for (unsigned i = dims; i < 5; i++) {
3564 coords[i] = coord_undef;
3565 }
3566 if (layer_coord)
3567 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3568 memset(&params, 0, sizeof(params));
3569
3570 params.type = bld->bld_base.base.type;
3571 params.context_ptr = bld->context_ptr;
3572 params.thread_data_ptr = bld->thread_data_ptr;
3573 params.coords = coords;
3574 params.outdata = NULL;
3575 params.exec_mask = mask_vec(bld_base);
3576 params.target = tgsi_to_pipe_tex_target(target);
3577 params.image_index = emit_data->inst->Dst[0].Register.Index;
3578 params.img_op = LP_IMG_STORE;
3579 for (unsigned i = 0; i < 4; i++)
3580 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3581
3582 bld->image->emit_op(bld->image,
3583 bld->bld_base.base.gallivm,
3584 &params);
3585 }
3586
3587 static void
3588 store_emit(
3589 const struct lp_build_tgsi_action * action,
3590 struct lp_build_tgsi_context * bld_base,
3591 struct lp_build_emit_data * emit_data)
3592 {
3593 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3594 struct gallivm_state * gallivm = bld_base->base.gallivm;
3595 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3596 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3597 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3598 unsigned buf = bufreg->Register.Index;
3599 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3600 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3601
3602 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3603 img_store_emit(action, bld_base, emit_data);
3604 } else if (0) {
3605
3606 } else {
3607 LLVMValueRef index; /* index into the const buffer */
3608 LLVMValueRef scalar_ptr;
3609 LLVMValueRef value;
3610 unsigned chan_index;
3611
3612 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3613 index = lp_build_shr_imm(uint_bld, index, 2);
3614
3615 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3616
3617 LLVMValueRef ssbo_limit = NULL;
3618
3619 if (!is_shared) {
3620 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3621 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3622 }
3623
3624 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3625 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3626
3627 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3628
3629 LLVMValueRef exec_mask = mask_vec(bld_base);
3630 if (!is_shared) {
3631 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3632 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3633 }
3634
3635 struct lp_build_loop_state loop_state;
3636 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3637
3638 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3639 loop_state.counter, "");
3640 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3641
3642 struct lp_build_if_state ifthen;
3643 LLVMValueRef cond;
3644
3645 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3646 loop_state.counter, "");
3647
3648 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3649 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3650 lp_build_if(&ifthen, gallivm, cond);
3651
3652 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3653
3654 lp_build_endif(&ifthen);
3655 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3656 NULL, LLVMIntUGE);
3657 }
3658 }
3659 }
3660
3661 static void
3662 resq_emit(
3663 const struct lp_build_tgsi_action * action,
3664 struct lp_build_tgsi_context * bld_base,
3665 struct lp_build_emit_data * emit_data)
3666 {
3667 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3668 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3669 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3670
3671 unsigned buf = bufreg->Register.Index;
3672 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3673
3674 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3675 unsigned target = emit_data->inst->Memory.Texture;
3676 struct lp_sampler_size_query_params params = { 0 };
3677 params.int_type = bld->bld_base.int_bld.type;
3678 params.texture_unit = buf;
3679 params.target = tgsi_to_pipe_tex_target(target);
3680 params.context_ptr = bld->context_ptr;
3681 params.sizes_out = emit_data->output;
3682
3683 bld->image->emit_size_query(bld->image,
3684 bld->bld_base.base.gallivm,
3685 &params);
3686 } else {
3687 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3688
3689 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3690 }
3691 }
3692
3693 static void
3694 img_atomic_emit(
3695 const struct lp_build_tgsi_action * action,
3696 struct lp_build_tgsi_context * bld_base,
3697 struct lp_build_emit_data * emit_data,
3698 LLVMAtomicRMWBinOp op)
3699 {
3700 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3701 struct lp_img_params params;
3702 LLVMValueRef coords[5];
3703 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3704 unsigned dims;
3705 unsigned layer_coord;
3706 unsigned target = emit_data->inst->Memory.Texture;
3707
3708 target_to_dims_layer(target, &dims, &layer_coord);
3709
3710 for (unsigned i = 0; i < dims; i++) {
3711 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3712 }
3713 for (unsigned i = dims; i < 5; i++) {
3714 coords[i] = coord_undef;
3715 }
3716 if (layer_coord)
3717 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3718 memset(&params, 0, sizeof(params));
3719
3720 params.type = bld->bld_base.base.type;
3721 params.context_ptr = bld->context_ptr;
3722 params.thread_data_ptr = bld->thread_data_ptr;
3723 params.exec_mask = mask_vec(bld_base);
3724 params.image_index = emit_data->inst->Src[0].Register.Index;
3725 params.coords = coords;
3726 params.target = tgsi_to_pipe_tex_target(target);
3727 params.op = op;
3728 params.outdata = emit_data->output;
3729 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3730
3731 for (unsigned i = 0; i < 4; i++)
3732 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3733 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3734 for (unsigned i = 0; i < 4; i++)
3735 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3736 }
3737 bld->image->emit_op(bld->image,
3738 bld->bld_base.base.gallivm,
3739 &params);
3740 }
3741
3742 static void
3743 atomic_emit(
3744 const struct lp_build_tgsi_action * action,
3745 struct lp_build_tgsi_context * bld_base,
3746 struct lp_build_emit_data * emit_data)
3747 {
3748 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3749 struct gallivm_state * gallivm = bld_base->base.gallivm;
3750 LLVMBuilderRef builder = gallivm->builder;
3751 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3752 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3753
3754 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3755 unsigned buf = bufreg->Register.Index;
3756 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3757
3758 LLVMAtomicRMWBinOp op = -1;
3759 switch (emit_data->inst->Instruction.Opcode) {
3760 case TGSI_OPCODE_ATOMUADD:
3761 op = LLVMAtomicRMWBinOpAdd;
3762 break;
3763 case TGSI_OPCODE_ATOMXCHG:
3764 op = LLVMAtomicRMWBinOpXchg;
3765 break;
3766 case TGSI_OPCODE_ATOMAND:
3767 op = LLVMAtomicRMWBinOpAnd;
3768 break;
3769 case TGSI_OPCODE_ATOMOR:
3770 op = LLVMAtomicRMWBinOpOr;
3771 break;
3772 case TGSI_OPCODE_ATOMXOR:
3773 op = LLVMAtomicRMWBinOpXor;
3774 break;
3775 case TGSI_OPCODE_ATOMUMIN:
3776 op = LLVMAtomicRMWBinOpUMin;
3777 break;
3778 case TGSI_OPCODE_ATOMUMAX:
3779 op = LLVMAtomicRMWBinOpUMax;
3780 break;
3781 case TGSI_OPCODE_ATOMIMIN:
3782 op = LLVMAtomicRMWBinOpMin;
3783 break;
3784 case TGSI_OPCODE_ATOMIMAX:
3785 op = LLVMAtomicRMWBinOpMax;
3786 break;
3787 case TGSI_OPCODE_ATOMCAS:
3788 break;
3789 default:
3790 assert(0);
3791 return;
3792 }
3793
3794 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3795 img_atomic_emit(action, bld_base, emit_data, op);
3796 } else if (0) {
3797 } else {
3798 LLVMValueRef index; /* index into the const buffer */
3799 LLVMValueRef scalar, scalar_ptr;
3800 LLVMValueRef value;
3801
3802 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3803 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3804
3805 index = lp_build_shr_imm(uint_bld, index, 2);
3806
3807 if (!is_shared) {
3808 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3809 scalar_ptr = bld->ssbos[buf];
3810 } else
3811 scalar_ptr = bld->shared_ptr;
3812
3813 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3814 uint_bld->vec_type, "");
3815
3816 LLVMValueRef ssbo_limit;
3817 if (!is_shared) {
3818 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3819 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3820 }
3821
3822 LLVMValueRef exec_mask = mask_vec(bld_base);
3823
3824 if (!is_shared) {
3825 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3826 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3827 }
3828
3829 struct lp_build_loop_state loop_state;
3830 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3831
3832 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3833 loop_state.counter, "");
3834 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3835
3836 index = LLVMBuildExtractElement(gallivm->builder, index,
3837 loop_state.counter, "");
3838
3839 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3840 &index, 1, "");
3841
3842 struct lp_build_if_state ifthen;
3843 LLVMValueRef cond, temp_res;
3844
3845 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3846 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3847 lp_build_if(&ifthen, gallivm, cond);
3848
3849 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3850 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3851 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3852 loop_state.counter, "");
3853 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3854 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3855 cas_src_ptr,
3856 LLVMAtomicOrderingSequentiallyConsistent,
3857 LLVMAtomicOrderingSequentiallyConsistent,
3858 false);
3859 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3860 } else {
3861 scalar = LLVMBuildAtomicRMW(builder, op,
3862 scalar_ptr, value_ptr,
3863 LLVMAtomicOrderingSequentiallyConsistent,
3864 false);
3865 }
3866 temp_res = LLVMBuildLoad(builder, atom_res, "");
3867 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3868 LLVMBuildStore(builder, temp_res, atom_res);
3869 lp_build_else(&ifthen);
3870 temp_res = LLVMBuildLoad(builder, atom_res, "");
3871 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3872 LLVMBuildStore(builder, temp_res, atom_res);
3873 lp_build_endif(&ifthen);
3874
3875 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3876 NULL, LLVMIntUGE);
3877 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3878 }
3879 }
3880
3881 static void
3882 barrier_emit(
3883 const struct lp_build_tgsi_action * action,
3884 struct lp_build_tgsi_context * bld_base,
3885 struct lp_build_emit_data * emit_data)
3886 {
3887 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3888 struct gallivm_state * gallivm = bld_base->base.gallivm;
3889
3890 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3891
3892 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3893 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3894 }
3895
3896 static void
3897 membar_emit(
3898 const struct lp_build_tgsi_action * action,
3899 struct lp_build_tgsi_context * bld_base,
3900 struct lp_build_emit_data * emit_data)
3901 {
3902 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3903 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3904 }
3905
3906 static void
3907 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3908 LLVMValueRef ptr,
3909 LLVMValueRef mask)
3910 {
3911 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3912 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3913
3914 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3915
3916 LLVMBuildStore(builder, current_vec, ptr);
3917 }
3918
3919 static void
3920 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3921 LLVMValueRef ptr,
3922 LLVMValueRef mask)
3923 {
3924 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3925 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3926
3927 current_vec = lp_build_select(&bld_base->uint_bld,
3928 mask,
3929 bld_base->uint_bld.zero,
3930 current_vec);
3931
3932 LLVMBuildStore(builder, current_vec, ptr);
3933 }
3934
3935 static LLVMValueRef
3936 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3937 LLVMValueRef current_mask_vec,
3938 LLVMValueRef total_emitted_vertices_vec)
3939 {
3940 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3941 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3942 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3943 total_emitted_vertices_vec,
3944 bld->max_output_vertices_vec);
3945
3946 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3947 }
3948
3949 static void
3950 emit_vertex(
3951 const struct lp_build_tgsi_action * action,
3952 struct lp_build_tgsi_context * bld_base,
3953 struct lp_build_emit_data * emit_data)
3954 {
3955 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3956 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3957
3958 if (bld->gs_iface->emit_vertex) {
3959 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3960 TGSI_TYPE_UNSIGNED,
3961 emit_data->inst->Src[0].Register.SwizzleX);
3962 LLVMValueRef mask = mask_vec(bld_base);
3963 LLVMValueRef total_emitted_vertices_vec =
3964 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3965
3966 mask = clamp_mask_to_max_output_vertices(bld, mask,
3967 total_emitted_vertices_vec);
3968 gather_outputs(bld);
3969 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3970 bld->outputs,
3971 total_emitted_vertices_vec,
3972 stream_id);
3973 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3974 mask);
3975 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3976 mask);
3977 #if DUMP_GS_EMITS
3978 lp_build_print_value(bld->bld_base.base.gallivm,
3979 " +++ emit vertex masked ones = ",
3980 mask);
3981 lp_build_print_value(bld->bld_base.base.gallivm,
3982 " +++ emit vertex emitted = ",
3983 total_emitted_vertices_vec);
3984 #endif
3985 }
3986 }
3987
3988
3989 static void
3990 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3991 LLVMValueRef mask)
3992 {
3993 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3994 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3995
3996 if (bld->gs_iface->end_primitive) {
3997 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3998 LLVMValueRef emitted_vertices_vec =
3999 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4000 LLVMValueRef emitted_prims_vec =
4001 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4002 LLVMValueRef total_emitted_vertices_vec =
4003 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4004 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4005 emitted_vertices_vec,
4006 uint_bld->zero);
4007 /* We need to combine the current execution mask with the mask
4008 telling us which, if any, execution slots actually have
4009 unemitted primitives, this way we make sure that end_primitives
4010 executes only on the paths that have unflushed vertices */
4011 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4012
4013 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4014 total_emitted_vertices_vec,
4015 emitted_vertices_vec,
4016 emitted_prims_vec,
4017 mask_vec(bld_base));
4018
4019 #if DUMP_GS_EMITS
4020 lp_build_print_value(bld->bld_base.base.gallivm,
4021 " +++ end prim masked ones = ",
4022 mask);
4023 lp_build_print_value(bld->bld_base.base.gallivm,
4024 " +++ end prim emitted verts1 = ",
4025 emitted_vertices_vec);
4026 lp_build_print_value(bld->bld_base.base.gallivm,
4027 " +++ end prim emitted prims1 = ",
4028 LLVMBuildLoad(builder,
4029 bld->emitted_prims_vec_ptr, ""));
4030 #endif
4031 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4032 mask);
4033 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4034 mask);
4035 #if DUMP_GS_EMITS
4036 lp_build_print_value(bld->bld_base.base.gallivm,
4037 " +++ end prim emitted verts2 = ",
4038 LLVMBuildLoad(builder,
4039 bld->emitted_vertices_vec_ptr, ""));
4040 #endif
4041 }
4042
4043 }
4044
4045 static void
4046 end_primitive(
4047 const struct lp_build_tgsi_action * action,
4048 struct lp_build_tgsi_context * bld_base,
4049 struct lp_build_emit_data * emit_data)
4050 {
4051 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4052
4053 if (bld->gs_iface->end_primitive) {
4054 LLVMValueRef mask = mask_vec(bld_base);
4055 end_primitive_masked(bld_base, mask);
4056 }
4057 }
4058
4059 static void
4060 barrier_emit_tcs(
4061 const struct lp_build_tgsi_action * action,
4062 struct lp_build_tgsi_context * bld_base,
4063 struct lp_build_emit_data * emit_data)
4064 {
4065 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4066
4067 if (bld->tcs_iface->emit_barrier) {
4068 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4069 }
4070 }
4071
4072
4073 static void
4074 cal_emit(
4075 const struct lp_build_tgsi_action * action,
4076 struct lp_build_tgsi_context * bld_base,
4077 struct lp_build_emit_data * emit_data)
4078 {
4079 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4080
4081 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4082 &bld_base->pc);
4083 }
4084
4085 static void
4086 ret_emit(
4087 const struct lp_build_tgsi_action * action,
4088 struct lp_build_tgsi_context * bld_base,
4089 struct lp_build_emit_data * emit_data)
4090 {
4091 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4092
4093 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4094 }
4095
4096 static void
4097 brk_emit(
4098 const struct lp_build_tgsi_action * action,
4099 struct lp_build_tgsi_context * bld_base,
4100 struct lp_build_emit_data * emit_data)
4101 {
4102 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4103
4104 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4105 }
4106
4107 static void
4108 if_emit(
4109 const struct lp_build_tgsi_action * action,
4110 struct lp_build_tgsi_context * bld_base,
4111 struct lp_build_emit_data * emit_data)
4112 {
4113 LLVMValueRef tmp;
4114 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4115
4116 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4117 emit_data->args[0], bld->bld_base.base.zero);
4118 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4119 }
4120
4121 static void
4122 uif_emit(
4123 const struct lp_build_tgsi_action * action,
4124 struct lp_build_tgsi_context * bld_base,
4125 struct lp_build_emit_data * emit_data)
4126 {
4127 LLVMValueRef tmp;
4128 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4129 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4130
4131 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4132 emit_data->args[0], uint_bld->zero);
4133 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4134 }
4135
4136 static void
4137 case_emit(
4138 const struct lp_build_tgsi_action * action,
4139 struct lp_build_tgsi_context * bld_base,
4140 struct lp_build_emit_data * emit_data)
4141 {
4142 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4143
4144 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4145 }
4146
4147 static void
4148 default_emit(
4149 const struct lp_build_tgsi_action * action,
4150 struct lp_build_tgsi_context * bld_base,
4151 struct lp_build_emit_data * emit_data)
4152 {
4153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4154
4155 lp_exec_default(&bld->exec_mask, bld_base);
4156 }
4157
4158 static void
4159 switch_emit(
4160 const struct lp_build_tgsi_action * action,
4161 struct lp_build_tgsi_context * bld_base,
4162 struct lp_build_emit_data * emit_data)
4163 {
4164 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4165
4166 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4167 }
4168
4169 static void
4170 endswitch_emit(
4171 const struct lp_build_tgsi_action * action,
4172 struct lp_build_tgsi_context * bld_base,
4173 struct lp_build_emit_data * emit_data)
4174 {
4175 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4176
4177 lp_exec_endswitch(&bld->exec_mask, bld_base);
4178 }
4179
4180 static void
4181 bgnloop_emit(
4182 const struct lp_build_tgsi_action * action,
4183 struct lp_build_tgsi_context * bld_base,
4184 struct lp_build_emit_data * emit_data)
4185 {
4186 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4187
4188 lp_exec_bgnloop(&bld->exec_mask, true);
4189 }
4190
4191 static void
4192 bgnsub_emit(
4193 const struct lp_build_tgsi_action * action,
4194 struct lp_build_tgsi_context * bld_base,
4195 struct lp_build_emit_data * emit_data)
4196 {
4197 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4198
4199 lp_exec_mask_bgnsub(&bld->exec_mask);
4200 }
4201
4202 static void
4203 else_emit(
4204 const struct lp_build_tgsi_action * action,
4205 struct lp_build_tgsi_context * bld_base,
4206 struct lp_build_emit_data * emit_data)
4207 {
4208 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4209
4210 lp_exec_mask_cond_invert(&bld->exec_mask);
4211 }
4212
4213 static void
4214 endif_emit(
4215 const struct lp_build_tgsi_action * action,
4216 struct lp_build_tgsi_context * bld_base,
4217 struct lp_build_emit_data * emit_data)
4218 {
4219 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4220
4221 lp_exec_mask_cond_pop(&bld->exec_mask);
4222 }
4223
4224 static void
4225 endloop_emit(
4226 const struct lp_build_tgsi_action * action,
4227 struct lp_build_tgsi_context * bld_base,
4228 struct lp_build_emit_data * emit_data)
4229 {
4230 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4231
4232 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4233 }
4234
4235 static void
4236 endsub_emit(
4237 const struct lp_build_tgsi_action * action,
4238 struct lp_build_tgsi_context * bld_base,
4239 struct lp_build_emit_data * emit_data)
4240 {
4241 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4242
4243 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4244 }
4245
4246 static void
4247 cont_emit(
4248 const struct lp_build_tgsi_action * action,
4249 struct lp_build_tgsi_context * bld_base,
4250 struct lp_build_emit_data * emit_data)
4251 {
4252 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4253
4254 lp_exec_continue(&bld->exec_mask);
4255 }
4256
4257 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4258 {
4259 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4260 struct gallivm_state * gallivm = bld_base->base.gallivm;
4261
4262 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4263 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4264 bld->temps_array = lp_build_alloca_undef(gallivm,
4265 LLVMArrayType(bld_base->base.vec_type, array_size),
4266 "temp_array");
4267 }
4268
4269 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4270 LLVMValueRef array_size =
4271 lp_build_const_int32(gallivm,
4272 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4273 bld->outputs_array = lp_build_array_alloca(gallivm,
4274 bld_base->base.vec_type, array_size,
4275 "output_array");
4276 }
4277
4278 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4279 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4280 bld->imms_array = lp_build_alloca_undef(gallivm,
4281 LLVMArrayType(bld_base->base.vec_type, array_size),
4282 "imms_array");
4283 }
4284
4285 /* If we have indirect addressing in inputs we need to copy them into
4286 * our alloca array to be able to iterate over them */
4287 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4288 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4289 unsigned index, chan;
4290 LLVMTypeRef vec_type = bld_base->base.vec_type;
4291 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4292 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4293 bld->inputs_array = lp_build_array_alloca(gallivm,
4294 vec_type, array_size,
4295 "input_array");
4296
4297 assert(bld_base->info->num_inputs
4298 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4299
4300 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4301 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4302 LLVMValueRef lindex =
4303 lp_build_const_int32(gallivm, index * 4 + chan);
4304 LLVMValueRef input_ptr =
4305 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4306 &lindex, 1, "");
4307 LLVMValueRef value = bld->inputs[index][chan];
4308 if (value)
4309 LLVMBuildStore(gallivm->builder, value, input_ptr);
4310 }
4311 }
4312 }
4313
4314 if (bld->gs_iface) {
4315 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4316 bld->emitted_prims_vec_ptr =
4317 lp_build_alloca(gallivm,
4318 uint_bld->vec_type,
4319 "emitted_prims_ptr");
4320 bld->emitted_vertices_vec_ptr =
4321 lp_build_alloca(gallivm,
4322 uint_bld->vec_type,
4323 "emitted_vertices_ptr");
4324 bld->total_emitted_vertices_vec_ptr =
4325 lp_build_alloca(gallivm,
4326 uint_bld->vec_type,
4327 "total_emitted_vertices_ptr");
4328
4329 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4330 bld->emitted_prims_vec_ptr);
4331 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4332 bld->emitted_vertices_vec_ptr);
4333 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4334 bld->total_emitted_vertices_vec_ptr);
4335 }
4336
4337 if (DEBUG_EXECUTION) {
4338 lp_build_printf(gallivm, "\n");
4339 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4340 if (!bld->gs_iface)
4341 emit_dump_file(bld, TGSI_FILE_INPUT);
4342 }
4343 }
4344
4345 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4346 {
4347 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4348
4349 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4350 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4351 }
4352 }
4353
4354 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4355 {
4356 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4357 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4358
4359 if (DEBUG_EXECUTION) {
4360 /* for debugging */
4361 if (0) {
4362 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4363 }
4364 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4365 lp_build_printf(bld_base->base.gallivm, "\n");
4366 }
4367
4368 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4369 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4370 }
4371
4372 /* If we have indirect addressing in outputs we need to copy our alloca array
4373 * to the outputs slots specified by the caller */
4374 if (bld->gs_iface) {
4375 LLVMValueRef total_emitted_vertices_vec;
4376 LLVMValueRef emitted_prims_vec;
4377 /* implicit end_primitives, needed in case there are any unflushed
4378 vertices in the cache. Note must not call end_primitive here
4379 since the exec_mask is not valid at this point. */
4380 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4381
4382 total_emitted_vertices_vec =
4383 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4384 emitted_prims_vec =
4385 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4386
4387 bld->gs_iface->gs_epilogue(bld->gs_iface,
4388 total_emitted_vertices_vec,
4389 emitted_prims_vec, 0);
4390 } else {
4391 gather_outputs(bld);
4392 }
4393 }
4394
4395 void
4396 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4397 const struct tgsi_token *tokens,
4398 const struct lp_build_tgsi_params *params,
4399 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4400 {
4401 struct lp_build_tgsi_soa_context bld;
4402 struct lp_type type = params->type;
4403 struct lp_type res_type;
4404
4405 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4406 memset(&res_type, 0, sizeof res_type);
4407 res_type.width = type.width;
4408 res_type.length = type.length;
4409 res_type.sign = 1;
4410
4411 /* Setup build context */
4412 memset(&bld, 0, sizeof bld);
4413 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4414 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4415 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4416 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4417 {
4418 struct lp_type dbl_type;
4419 dbl_type = type;
4420 dbl_type.width *= 2;
4421 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4422 }
4423 {
4424 struct lp_type uint64_type;
4425 uint64_type = lp_uint_type(type);
4426 uint64_type.width *= 2;
4427 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4428 }
4429 {
4430 struct lp_type int64_type;
4431 int64_type = lp_int_type(type);
4432 int64_type.width *= 2;
4433 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4434 }
4435 bld.mask = params->mask;
4436 bld.inputs = params->inputs;
4437 bld.outputs = outputs;
4438 bld.consts_ptr = params->consts_ptr;
4439 bld.const_sizes_ptr = params->const_sizes_ptr;
4440 bld.ssbo_ptr = params->ssbo_ptr;
4441 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4442 bld.sampler = params->sampler;
4443 bld.bld_base.info = params->info;
4444 bld.indirect_files = params->info->indirect_files;
4445 bld.context_ptr = params->context_ptr;
4446 bld.thread_data_ptr = params->thread_data_ptr;
4447 bld.image = params->image;
4448 bld.shared_ptr = params->shared_ptr;
4449 bld.coro = params->coro;
4450
4451 /*
4452 * If the number of temporaries is rather large then we just
4453 * allocate them as an array right from the start and treat
4454 * like indirect temporaries.
4455 */
4456 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4457 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4458 }
4459 /*
4460 * For performance reason immediates are always backed in a static
4461 * array, but if their number is too great, we have to use just
4462 * a dynamically allocated array.
4463 */
4464 bld.use_immediates_array =
4465 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4466 if (bld.use_immediates_array) {
4467 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4468 }
4469
4470
4471 bld.bld_base.soa = TRUE;
4472 bld.bld_base.emit_debug = emit_debug;
4473 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4474 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4475 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4476 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4477 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4478
4479 bld.bld_base.emit_store = emit_store;
4480 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4481 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4482 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4483
4484 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4485 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4486
4487 bld.bld_base.emit_prologue = emit_prologue;
4488 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4489 bld.bld_base.emit_epilogue = emit_epilogue;
4490
4491 /* Set opcode actions */
4492 lp_set_default_actions_cpu(&bld.bld_base);
4493
4494 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4495 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4496 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4497 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4498 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4499 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4500 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4501 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4502 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4503 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4504 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4505 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4506 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4507 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4508 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4509 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4510 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4511 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4512 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4513 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4514 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4515 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4516 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4517 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4518 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4519 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4520 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4521 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4522 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4523 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4524 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4525 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4526 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4527 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4528 /* DX10 sampling ops */
4529 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4530 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4531 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4532 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4533 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4534 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4535 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4536 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4537 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4538 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4539 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4540
4541 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4542 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4543 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4544
4545 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4546 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4547 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4548 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4549 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4550 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4551 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4552 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4553 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4554 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4555
4556 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4557 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4558
4559 if (params->gs_iface) {
4560 /* There's no specific value for this because it should always
4561 * be set, but apps using ext_geometry_shader4 quite often
4562 * were forgetting so we're using MAX_VERTEX_VARYING from
4563 * that spec even though we could debug_assert if it's not
4564 * set, but that's a lot uglier. */
4565 uint max_output_vertices;
4566
4567 /* inputs are always indirect with gs */
4568 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4569 bld.gs_iface = params->gs_iface;
4570 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4571 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4572 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4573
4574 max_output_vertices =
4575 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4576 if (!max_output_vertices)
4577 max_output_vertices = 32;
4578
4579 bld.max_output_vertices_vec =
4580 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4581 max_output_vertices);
4582 }
4583
4584 if (params->tes_iface) {
4585 /* inputs are always indirect with tes */
4586 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4587 bld.tes_iface = params->tes_iface;
4588 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4589 }
4590
4591 if (params->tcs_iface) {
4592 bld.tcs_iface = params->tcs_iface;
4593 /* outputs and inputs are always indirect with tcs */
4594 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4595 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4596 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4597 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4598 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4599 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4600 }
4601
4602 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4603
4604 bld.system_values = *params->system_values;
4605
4606 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4607
4608 if (0) {
4609 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4610 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4611 debug_printf("11111111111111111111111111111 \n");
4612 tgsi_dump(tokens, 0);
4613 lp_debug_dump_value(function);
4614 debug_printf("2222222222222222222222222222 \n");
4615 }
4616
4617 if (0) {
4618 LLVMModuleRef module = LLVMGetGlobalParent(
4619 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4620 LLVMDumpModule(module);
4621
4622 }
4623 lp_exec_mask_fini(&bld.exec_mask);
4624 }