efdce32d2d2a39eeac10f0bb81c74cf0c599f3be
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc;
309 boolean default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = TRUE;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449
450 switch (file) {
451 case TGSI_FILE_TEMPORARY:
452 array_of_vars = bld->temps;
453 var_of_array = bld->temps_array;
454 break;
455 case TGSI_FILE_OUTPUT:
456 array_of_vars = bld->outputs;
457 var_of_array = bld->outputs_array;
458 break;
459 default:
460 assert(0);
461 return NULL;
462 }
463
464 assert(chan < 4);
465
466 if (bld->indirect_files & (1 << file)) {
467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469 LLVMValueRef gep[2];
470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471 gep[1] = lindex;
472 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473 } else {
474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475 }
476 }
477 else {
478 assert(index <= bld->bld_base.info->file_max[file]);
479 return array_of_vars[index][chan];
480 }
481 }
482
483
484 /**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index which temporary register
488 * \param chan which channel of the temp register.
489 */
490 LLVMValueRef
491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492 unsigned index,
493 unsigned chan)
494 {
495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497
498 /**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index which output register
502 * \param chan which channel of the output register.
503 */
504 LLVMValueRef
505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506 unsigned index,
507 unsigned chan)
508 {
509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511
512 /*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517 static void
518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521 unsigned index, chan;
522 assert(bld->bld_base.info->num_outputs <=
523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527 }
528 }
529 }
530 }
531
532 /**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537 static LLVMValueRef
538 build_gather(struct lp_build_tgsi_context *bld_base,
539 LLVMValueRef base_ptr,
540 LLVMValueRef indexes,
541 LLVMValueRef overflow_mask,
542 LLVMValueRef indexes2)
543 {
544 struct gallivm_state *gallivm = bld_base->base.gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *uint_bld = &bld_base->uint_bld;
547 struct lp_build_context *bld = &bld_base->base;
548 LLVMValueRef res;
549 unsigned i;
550
551 if (indexes2)
552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553 else
554 res = bld->undef;
555 /*
556 * overflow_mask is a vector telling us which channels
557 * in the vector overflowed. We use the overflow behavior for
558 * constant buffers which is defined as:
559 * Out of bounds access to constant buffer returns 0 in all
560 * components. Out of bounds behavior is always with respect
561 * to the size of the buffer bound at that slot.
562 */
563
564 if (overflow_mask) {
565 /*
566 * We avoid per-element control flow here (also due to llvm going crazy,
567 * though I suspect it's better anyway since overflow is likely rare).
568 * Note that since we still fetch from buffers even if num_elements was
569 * zero (in this case we'll fetch from index zero) the jit func callers
570 * MUST provide valid fake constant buffers of size 4x32 (the values do
571 * not matter), otherwise we'd still need (not per element though)
572 * control flow.
573 */
574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575 if (indexes2)
576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577 }
578
579 /*
580 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581 */
582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583 LLVMValueRef si, di;
584 LLVMValueRef index;
585 LLVMValueRef scalar_ptr, scalar;
586
587 di = lp_build_const_int32(bld->gallivm, i);
588 if (indexes2)
589 si = lp_build_const_int32(bld->gallivm, i >> 1);
590 else
591 si = di;
592
593 if (indexes2 && (i & 1)) {
594 index = LLVMBuildExtractElement(builder,
595 indexes2, si, "");
596 } else {
597 index = LLVMBuildExtractElement(builder,
598 indexes, si, "");
599 }
600 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601 &index, 1, "gather_ptr");
602 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605 }
606
607 if (overflow_mask) {
608 if (indexes2) {
609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611 bld_base->dbl_bld.int_vec_type, "");
612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613 bld_base->dbl_bld.zero, res);
614 } else
615 res = lp_build_select(bld, overflow_mask, bld->zero, res);
616 }
617
618 return res;
619 }
620
621
622 /**
623 * Scatter/store vector.
624 */
625 static void
626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627 LLVMValueRef base_ptr,
628 LLVMValueRef indexes,
629 LLVMValueRef values,
630 struct lp_exec_mask *mask)
631 {
632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633 LLVMBuilderRef builder = gallivm->builder;
634 unsigned i;
635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637 /*
638 * Loop over elements of index_vec, store scalar value.
639 */
640 for (i = 0; i < bld->bld_base.base.type.length; i++) {
641 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645 LLVMValueRef scalar_pred = pred ?
646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648 if (0)
649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650 ii, val, index, scalar_ptr);
651
652 if (scalar_pred) {
653 LLVMValueRef real_val, dst_val;
654 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656 LLVMBuildStore(builder, real_val, scalar_ptr);
657 }
658 else {
659 LLVMBuildStore(builder, val, scalar_ptr);
660 }
661 }
662 }
663
664
665 /**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671 static LLVMValueRef
672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673 unsigned reg_file, unsigned reg_index,
674 const struct tgsi_ind_register *indirect_reg,
675 int index_limit)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679 /* always use X component of address register */
680 unsigned swizzle = indirect_reg->Swizzle;
681 LLVMValueRef base;
682 LLVMValueRef rel;
683 LLVMValueRef max_index;
684 LLVMValueRef index;
685
686 assert(bld->indirect_files & (1 << reg_file));
687
688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690 assert(swizzle < 4);
691 switch (indirect_reg->File) {
692 case TGSI_FILE_ADDRESS:
693 rel = LLVMBuildLoad(builder,
694 bld->addr[indirect_reg->Index][swizzle],
695 "load addr reg");
696 /* ADDR LLVM values already have LLVM integer type. */
697 break;
698 case TGSI_FILE_TEMPORARY:
699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700 rel = LLVMBuildLoad(builder, rel, "load temp reg");
701 /* TEMP LLVM values always have LLVM float type, but for indirection, the
702 * value actually stored is expected to be an integer */
703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704 break;
705 default:
706 assert(0);
707 rel = uint_bld->zero;
708 }
709
710 index = lp_build_add(uint_bld, base, rel);
711
712 /*
713 * emit_fetch_constant handles constant buffer overflow so this code
714 * is pointless for them.
715 * Furthermore the D3D10 spec in section 6.5 says:
716 * If the constant buffer bound to a slot is larger than the size
717 * declared in the shader for that slot, implementations are allowed
718 * to return incorrect data (not necessarily 0) for indices that are
719 * larger than the declared size but smaller than the buffer size.
720 */
721 if (reg_file != TGSI_FILE_CONSTANT) {
722 assert(index_limit >= 0);
723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724 uint_bld->type, index_limit);
725
726 assert(!uint_bld->type.sign);
727 index = lp_build_min(uint_bld, index, max_index);
728 }
729
730 return index;
731 }
732
733 static struct lp_build_context *
734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 enum tgsi_opcode_type stype)
736 {
737 struct lp_build_context *bld_fetch;
738
739 switch (stype) {
740 case TGSI_TYPE_FLOAT:
741 case TGSI_TYPE_UNTYPED:
742 bld_fetch = &bld_base->base;
743 break;
744 case TGSI_TYPE_UNSIGNED:
745 bld_fetch = &bld_base->uint_bld;
746 break;
747 case TGSI_TYPE_SIGNED:
748 bld_fetch = &bld_base->int_bld;
749 break;
750 case TGSI_TYPE_DOUBLE:
751 bld_fetch = &bld_base->dbl_bld;
752 break;
753 case TGSI_TYPE_UNSIGNED64:
754 bld_fetch = &bld_base->uint64_bld;
755 break;
756 case TGSI_TYPE_SIGNED64:
757 bld_fetch = &bld_base->int64_bld;
758 break;
759 case TGSI_TYPE_VOID:
760 default:
761 assert(0);
762 bld_fetch = NULL;
763 break;
764 }
765 return bld_fetch;
766 }
767
768 static LLVMValueRef
769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770 LLVMValueRef indirect_index,
771 unsigned chan_index,
772 boolean need_perelement_offset)
773 {
774 struct gallivm_state *gallivm = uint_bld->gallivm;
775 LLVMValueRef chan_vec =
776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777 LLVMValueRef length_vec =
778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779 LLVMValueRef index_vec;
780
781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786 if (need_perelement_offset) {
787 LLVMValueRef pixel_offsets;
788 unsigned i;
789 /* build pixel offset vector: {0, 1, 2, 3, ...} */
790 pixel_offsets = uint_bld->undef;
791 for (i = 0; i < uint_bld->type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794 ii, ii, "");
795 }
796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797 }
798 return index_vec;
799 }
800
801 static LLVMValueRef
802 emit_fetch_constant(
803 struct lp_build_tgsi_context * bld_base,
804 const struct tgsi_full_src_register * reg,
805 enum tgsi_opcode_type stype,
806 unsigned swizzle_in)
807 {
808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809 struct gallivm_state *gallivm = bld_base->base.gallivm;
810 LLVMBuilderRef builder = gallivm->builder;
811 struct lp_build_context *uint_bld = &bld_base->uint_bld;
812 unsigned dimension = 0;
813 LLVMValueRef consts_ptr;
814 LLVMValueRef num_consts;
815 LLVMValueRef res;
816 unsigned swizzle = swizzle_in & 0xffff;
817
818 /* XXX: Handle fetching xyzw components as a vector */
819 assert(swizzle != ~0u);
820
821 if (reg->Register.Dimension) {
822 assert(!reg->Dimension.Indirect);
823 dimension = reg->Dimension.Index;
824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825 }
826
827 consts_ptr = bld->consts[dimension];
828 num_consts = bld->consts_sizes[dimension];
829
830 if (reg->Register.Indirect) {
831 LLVMValueRef indirect_index;
832 LLVMValueRef swizzle_vec =
833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834 LLVMValueRef index_vec; /* index into the const buffer */
835 LLVMValueRef overflow_mask;
836 LLVMValueRef index_vec2 = NULL;
837
838 indirect_index = get_indirect_index(bld,
839 reg->Register.File,
840 reg->Register.Index,
841 &reg->Indirect,
842 bld->bld_base.info->file_max[reg->Register.File]);
843
844 /* All fetches are from the same constant buffer, so
845 * we need to propagate the size to a vector to do a
846 * vector comparison */
847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848 /* Construct a boolean vector telling us which channels
849 * overflow the bound constant buffer */
850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851 indirect_index, num_consts);
852
853 /* index_vec = indirect_index * 4 + swizzle */
854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857 if (tgsi_type_is_64bit(stype)) {
858 LLVMValueRef swizzle_vec2;
859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862 }
863 /* Gather values from the constant buffer */
864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865 }
866 else {
867 LLVMValueRef index; /* index into the const buffer */
868 LLVMValueRef scalar, scalar_ptr;
869 struct lp_build_context *bld_broad = &bld_base->base;
870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873 &index, 1, "");
874
875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877 LLVMValueRef scalar2, scalar2_ptr;
878 LLVMValueRef shuffles[2];
879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882 &index, 1, "");
883
884 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886 shuffles[0] = lp_build_const_int32(gallivm, 0);
887 shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892 } else {
893 if (stype == TGSI_TYPE_DOUBLE) {
894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896 bld_broad = &bld_base->dbl_bld;
897 } else if (stype == TGSI_TYPE_UNSIGNED64) {
898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900 bld_broad = &bld_base->uint64_bld;
901 } else if (stype == TGSI_TYPE_SIGNED64) {
902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904 bld_broad = &bld_base->int64_bld;
905 }
906 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907 res = lp_build_broadcast_scalar(bld_broad, scalar);
908 }
909
910 }
911
912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915 }
916
917 return res;
918 }
919
920 /**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927 static LLVMValueRef
928 emit_fetch_64bit(
929 struct lp_build_tgsi_context * bld_base,
930 enum tgsi_opcode_type stype,
931 LLVMValueRef input,
932 LLVMValueRef input2)
933 {
934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936 LLVMBuilderRef builder = gallivm->builder;
937 LLVMValueRef res;
938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939 int i;
940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941 int len = bld_base->base.type.length * 2;
942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947 }
948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952
953 static LLVMValueRef
954 emit_fetch_immediate(
955 struct lp_build_tgsi_context * bld_base,
956 const struct tgsi_full_src_register * reg,
957 enum tgsi_opcode_type stype,
958 unsigned swizzle_in)
959 {
960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962 LLVMBuilderRef builder = gallivm->builder;
963 LLVMValueRef res = NULL;
964 unsigned swizzle = swizzle_in & 0xffff;
965
966 if (bld->use_immediates_array || reg->Register.Indirect) {
967 LLVMValueRef imms_array;
968 LLVMTypeRef fptr_type;
969
970 /* cast imms_array pointer to float* */
971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974 if (reg->Register.Indirect) {
975 LLVMValueRef indirect_index;
976 LLVMValueRef index_vec; /* index into the immediate register array */
977 LLVMValueRef index_vec2 = NULL;
978 indirect_index = get_indirect_index(bld,
979 reg->Register.File,
980 reg->Register.Index,
981 &reg->Indirect,
982 bld->bld_base.info->file_max[reg->Register.File]);
983 /*
984 * Unlike for other reg classes, adding pixel offsets is unnecessary -
985 * immediates are stored as full vectors (FIXME??? - might be better
986 * to store them the same as constants) but all elements are the same
987 * in any case.
988 */
989 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990 indirect_index,
991 swizzle,
992 FALSE);
993 if (tgsi_type_is_64bit(stype))
994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995 indirect_index,
996 swizzle_in >> 16,
997 FALSE);
998 /* Gather values from the immediate register array */
999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000 } else {
1001 LLVMValueRef gep[2];
1002 gep[0] = lp_build_const_int32(gallivm, 0);
1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005 bld->imms_array, gep, 2, "");
1006 res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008 if (tgsi_type_is_64bit(stype)) {
1009 LLVMValueRef imms_ptr2;
1010 LLVMValueRef res2;
1011 gep[1] = lp_build_const_int32(gallivm,
1012 reg->Register.Index * 4 + (swizzle_in >> 16));
1013 imms_ptr2 = LLVMBuildGEP(builder,
1014 bld->imms_array, gep, 2, "");
1015 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016 res = emit_fetch_64bit(bld_base, stype, res, res2);
1017 }
1018 }
1019 }
1020 else {
1021 res = bld->immediates[reg->Register.Index][swizzle];
1022 if (tgsi_type_is_64bit(stype))
1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024 }
1025
1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029 }
1030 return res;
1031 }
1032
1033 static LLVMValueRef
1034 emit_fetch_input(
1035 struct lp_build_tgsi_context * bld_base,
1036 const struct tgsi_full_src_register * reg,
1037 enum tgsi_opcode_type stype,
1038 unsigned swizzle_in)
1039 {
1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 LLVMValueRef res;
1044 unsigned swizzle = swizzle_in & 0xffff;
1045
1046 if (reg->Register.Indirect) {
1047 LLVMValueRef indirect_index;
1048 LLVMValueRef index_vec; /* index into the input reg array */
1049 LLVMValueRef index_vec2 = NULL;
1050 LLVMValueRef inputs_array;
1051 LLVMTypeRef fptr_type;
1052
1053 indirect_index = get_indirect_index(bld,
1054 reg->Register.File,
1055 reg->Register.Index,
1056 &reg->Indirect,
1057 bld->bld_base.info->file_max[reg->Register.File]);
1058
1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060 indirect_index,
1061 swizzle,
1062 TRUE);
1063 if (tgsi_type_is_64bit(stype)) {
1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065 indirect_index,
1066 swizzle_in >> 16,
1067 TRUE);
1068 }
1069 /* cast inputs_array pointer to float* */
1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073 /* Gather values from the input register array */
1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075 } else {
1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078 reg->Register.Index * 4 + swizzle);
1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080 bld->inputs_array, &lindex, 1, "");
1081
1082 res = LLVMBuildLoad(builder, input_ptr, "");
1083 if (tgsi_type_is_64bit(stype)) {
1084 LLVMValueRef lindex1;
1085 LLVMValueRef input_ptr2;
1086 LLVMValueRef res2;
1087
1088 lindex1 = lp_build_const_int32(gallivm,
1089 reg->Register.Index * 4 + (swizzle_in >> 16));
1090 input_ptr2 = LLVMBuildGEP(builder,
1091 bld->inputs_array, &lindex1, 1, "");
1092 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093 res = emit_fetch_64bit(bld_base, stype, res, res2);
1094 }
1095 }
1096 else {
1097 res = bld->inputs[reg->Register.Index][swizzle];
1098 if (tgsi_type_is_64bit(stype))
1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100 }
1101 }
1102
1103 assert(res);
1104
1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle_in)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 const struct tgsi_shader_info *info = bld->bld_base.info;
1124 LLVMBuilderRef builder = gallivm->builder;
1125 LLVMValueRef attrib_index = NULL;
1126 LLVMValueRef vertex_index = NULL;
1127 unsigned swizzle = swizzle_in & 0xffff;
1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129 LLVMValueRef res;
1130
1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132 /* This is really a system value not a regular input */
1133 assert(!reg->Register.Indirect);
1134 assert(!reg->Dimension.Indirect);
1135 res = bld->system_values.prim_id;
1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138 }
1139 return res;
1140 }
1141
1142 if (reg->Register.Indirect) {
1143 /*
1144 * XXX: this is possibly not quite the right value, since file_max may be
1145 * larger than the max attrib index, due to it being the max of declared
1146 * inputs AND the max vertices per prim (which is 6 for tri adj).
1147 * It should however be safe to use (since we always allocate
1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149 */
1150 int index_limit = info->file_max[reg->Register.File];
1151 attrib_index = get_indirect_index(bld,
1152 reg->Register.File,
1153 reg->Register.Index,
1154 &reg->Indirect,
1155 index_limit);
1156 } else {
1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158 }
1159
1160 if (reg->Dimension.Indirect) {
1161 /*
1162 * A fixed 6 should do as well (which is what we allocate).
1163 */
1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165 vertex_index = get_indirect_index(bld,
1166 reg->Register.File,
1167 reg->Dimension.Index,
1168 &reg->DimIndirect,
1169 index_limit);
1170 } else {
1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172 }
1173
1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175 reg->Dimension.Indirect,
1176 vertex_index,
1177 reg->Register.Indirect,
1178 attrib_index,
1179 swizzle_index);
1180
1181 assert(res);
1182 if (tgsi_type_is_64bit(stype)) {
1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184 LLVMValueRef res2;
1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186 reg->Dimension.Indirect,
1187 vertex_index,
1188 reg->Register.Indirect,
1189 attrib_index,
1190 swizzle_index);
1191 assert(res2);
1192 res = emit_fetch_64bit(bld_base, stype, res, res2);
1193 } else if (stype == TGSI_TYPE_UNSIGNED) {
1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195 } else if (stype == TGSI_TYPE_SIGNED) {
1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197 }
1198
1199 return res;
1200 }
1201
1202 static LLVMValueRef
1203 emit_fetch_tcs_input(
1204 struct lp_build_tgsi_context * bld_base,
1205 const struct tgsi_full_src_register * reg,
1206 enum tgsi_opcode_type stype,
1207 unsigned swizzle_in)
1208 {
1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211 const struct tgsi_shader_info *info = bld->bld_base.info;
1212 LLVMBuilderRef builder = gallivm->builder;
1213 LLVMValueRef attrib_index = NULL;
1214 LLVMValueRef vertex_index = NULL;
1215 unsigned swizzle = swizzle_in & 0xffff;
1216 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217 LLVMValueRef res;
1218
1219 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220 /* This is really a system value not a regular input */
1221 assert(!reg->Register.Indirect);
1222 assert(!reg->Dimension.Indirect);
1223 res = bld->system_values.prim_id;
1224 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226 }
1227 return res;
1228 }
1229
1230 if (reg->Register.Indirect) {
1231 int index_limit = info->file_max[reg->Register.File];
1232 attrib_index = get_indirect_index(bld,
1233 reg->Register.File,
1234 reg->Register.Index,
1235 &reg->Indirect,
1236 index_limit);
1237 } else {
1238 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239 }
1240
1241 if (reg->Dimension.Indirect) {
1242 vertex_index = get_indirect_index(bld,
1243 reg->Register.File,
1244 reg->Dimension.Index,
1245 &reg->DimIndirect,
1246 PIPE_MAX_SHADER_INPUTS);
1247 } else {
1248 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249 }
1250
1251 // TCS can read from its own outputs
1252 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254 reg->Dimension.Indirect,
1255 vertex_index,
1256 reg->Register.Indirect,
1257 attrib_index,
1258 swizzle_index,
1259 bld_base->info->output_semantic_name[reg->Register.Index]);
1260 } else {
1261 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1262 reg->Dimension.Indirect,
1263 vertex_index,
1264 reg->Register.Indirect,
1265 attrib_index,
1266 swizzle_index);
1267 }
1268
1269
1270 assert(res);
1271 if (tgsi_type_is_64bit(stype)) {
1272 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1273 LLVMValueRef res2;
1274 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1275 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1276 reg->Dimension.Indirect,
1277 vertex_index,
1278 reg->Register.Indirect,
1279 attrib_index,
1280 swizzle_index,
1281 bld_base->info->output_semantic_name[reg->Register.Index]);
1282 } else {
1283 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1284 reg->Dimension.Indirect,
1285 vertex_index,
1286 reg->Register.Indirect,
1287 attrib_index,
1288 swizzle_index);
1289 }
1290 assert(res2);
1291 res = emit_fetch_64bit(bld_base, stype, res, res2);
1292 } else if (stype == TGSI_TYPE_UNSIGNED) {
1293 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1294 } else if (stype == TGSI_TYPE_SIGNED) {
1295 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1296 }
1297
1298 return res;
1299 }
1300
1301 static LLVMValueRef
1302 emit_fetch_tes_input(
1303 struct lp_build_tgsi_context * bld_base,
1304 const struct tgsi_full_src_register * reg,
1305 enum tgsi_opcode_type stype,
1306 unsigned swizzle_in)
1307 {
1308 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1309 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1310 const struct tgsi_shader_info *info = bld->bld_base.info;
1311 LLVMBuilderRef builder = gallivm->builder;
1312 LLVMValueRef attrib_index = NULL;
1313 LLVMValueRef vertex_index = NULL;
1314 unsigned swizzle = swizzle_in & 0xffff;
1315 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1316 LLVMValueRef res;
1317
1318 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1319 /* This is really a system value not a regular input */
1320 assert(!reg->Register.Indirect);
1321 assert(!reg->Dimension.Indirect);
1322 res = bld->system_values.prim_id;
1323 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1324 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1325 }
1326 return res;
1327 }
1328
1329 if (reg->Register.Indirect) {
1330 int index_limit = info->file_max[reg->Register.File];
1331 attrib_index = get_indirect_index(bld,
1332 reg->Register.File,
1333 reg->Register.Index,
1334 &reg->Indirect,
1335 index_limit);
1336 } else {
1337 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1338 }
1339
1340 if (reg->Dimension.Indirect) {
1341 vertex_index = get_indirect_index(bld,
1342 reg->Register.File,
1343 reg->Dimension.Index,
1344 &reg->DimIndirect,
1345 PIPE_MAX_SHADER_INPUTS);
1346 } else {
1347 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1348 }
1349
1350 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1351 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1352 reg->Register.Indirect,
1353 attrib_index,
1354 swizzle_index);
1355 } else {
1356 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1357 reg->Dimension.Indirect,
1358 vertex_index,
1359 reg->Register.Indirect,
1360 attrib_index,
1361 swizzle_index);
1362 }
1363
1364 assert(res);
1365 if (tgsi_type_is_64bit(stype)) {
1366 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1367 LLVMValueRef res2;
1368 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1369 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1370 reg->Register.Indirect,
1371 attrib_index,
1372 swizzle_index);
1373 }
1374 else {
1375 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1376 reg->Dimension.Indirect,
1377 vertex_index,
1378 reg->Register.Indirect,
1379 attrib_index,
1380 swizzle_index);
1381 }
1382 assert(res2);
1383 res = emit_fetch_64bit(bld_base, stype, res, res2);
1384 } else if (stype == TGSI_TYPE_UNSIGNED) {
1385 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1386 } else if (stype == TGSI_TYPE_SIGNED) {
1387 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1388 }
1389
1390 return res;
1391 }
1392
1393
1394
1395 static LLVMValueRef
1396 emit_fetch_temporary(
1397 struct lp_build_tgsi_context * bld_base,
1398 const struct tgsi_full_src_register * reg,
1399 enum tgsi_opcode_type stype,
1400 unsigned swizzle_in)
1401 {
1402 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1403 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1404 LLVMBuilderRef builder = gallivm->builder;
1405 LLVMValueRef res;
1406 unsigned swizzle = swizzle_in & 0xffff;
1407
1408 if (reg->Register.Indirect) {
1409 LLVMValueRef indirect_index;
1410 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1411 LLVMValueRef temps_array;
1412 LLVMTypeRef fptr_type;
1413
1414 indirect_index = get_indirect_index(bld,
1415 reg->Register.File,
1416 reg->Register.Index,
1417 &reg->Indirect,
1418 bld->bld_base.info->file_max[reg->Register.File]);
1419
1420 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1421 indirect_index,
1422 swizzle,
1423 TRUE);
1424 if (tgsi_type_is_64bit(stype)) {
1425 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1426 indirect_index,
1427 swizzle_in >> 16,
1428 TRUE);
1429 }
1430
1431 /* cast temps_array pointer to float* */
1432 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1433 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1434
1435 /* Gather values from the temporary register array */
1436 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1437 }
1438 else {
1439 LLVMValueRef temp_ptr;
1440 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1441 res = LLVMBuildLoad(builder, temp_ptr, "");
1442
1443 if (tgsi_type_is_64bit(stype)) {
1444 LLVMValueRef temp_ptr2, res2;
1445
1446 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1447 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1448 res = emit_fetch_64bit(bld_base, stype, res, res2);
1449 }
1450 }
1451
1452 if (stype == TGSI_TYPE_SIGNED ||
1453 stype == TGSI_TYPE_UNSIGNED ||
1454 stype == TGSI_TYPE_DOUBLE ||
1455 stype == TGSI_TYPE_SIGNED64 ||
1456 stype == TGSI_TYPE_UNSIGNED64) {
1457 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1458 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1459 }
1460
1461 return res;
1462 }
1463
1464 static LLVMValueRef
1465 emit_fetch_system_value(
1466 struct lp_build_tgsi_context * bld_base,
1467 const struct tgsi_full_src_register * reg,
1468 enum tgsi_opcode_type stype,
1469 unsigned swizzle_in)
1470 {
1471 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1472 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1473 const struct tgsi_shader_info *info = bld->bld_base.info;
1474 LLVMBuilderRef builder = gallivm->builder;
1475 LLVMValueRef res;
1476 enum tgsi_opcode_type atype; // Actual type of the value
1477 unsigned swizzle = swizzle_in & 0xffff;
1478
1479 assert(!reg->Register.Indirect);
1480
1481 switch (info->system_value_semantic_name[reg->Register.Index]) {
1482 case TGSI_SEMANTIC_INSTANCEID:
1483 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1484 atype = TGSI_TYPE_UNSIGNED;
1485 break;
1486
1487 case TGSI_SEMANTIC_VERTEXID:
1488 res = bld->system_values.vertex_id;
1489 atype = TGSI_TYPE_UNSIGNED;
1490 break;
1491
1492 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1493 res = bld->system_values.vertex_id_nobase;
1494 atype = TGSI_TYPE_UNSIGNED;
1495 break;
1496
1497 case TGSI_SEMANTIC_BASEVERTEX:
1498 res = bld->system_values.basevertex;
1499 atype = TGSI_TYPE_UNSIGNED;
1500 break;
1501
1502 case TGSI_SEMANTIC_BASEINSTANCE:
1503 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1504 atype = TGSI_TYPE_UNSIGNED;
1505 break;
1506
1507 case TGSI_SEMANTIC_PRIMID:
1508 res = bld->system_values.prim_id;
1509 atype = TGSI_TYPE_UNSIGNED;
1510 break;
1511
1512 case TGSI_SEMANTIC_INVOCATIONID:
1513 if (info->processor == PIPE_SHADER_TESS_CTRL)
1514 res = bld->system_values.invocation_id;
1515 else
1516 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1517 atype = TGSI_TYPE_UNSIGNED;
1518 break;
1519
1520 case TGSI_SEMANTIC_HELPER_INVOCATION:
1521 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1522 atype = TGSI_TYPE_UNSIGNED;
1523 break;
1524
1525 case TGSI_SEMANTIC_THREAD_ID:
1526 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1527 atype = TGSI_TYPE_UNSIGNED;
1528 break;
1529
1530 case TGSI_SEMANTIC_BLOCK_ID:
1531 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1532 atype = TGSI_TYPE_UNSIGNED;
1533 break;
1534
1535 case TGSI_SEMANTIC_GRID_SIZE:
1536 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1537 atype = TGSI_TYPE_UNSIGNED;
1538 break;
1539
1540 case TGSI_SEMANTIC_TESSCOORD:
1541 {
1542 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1543 LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1544 res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1545 }
1546 atype = TGSI_TYPE_FLOAT;
1547 break;
1548
1549 case TGSI_SEMANTIC_FACE:
1550 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1551 atype = TGSI_TYPE_UNSIGNED;
1552 break;
1553
1554 case TGSI_SEMANTIC_DRAWID:
1555 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1556 atype = TGSI_TYPE_UNSIGNED;
1557 break;
1558
1559 case TGSI_SEMANTIC_TESSOUTER:
1560 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1561 bld->system_values.tess_outer,
1562 lp_build_const_int32(gallivm, swizzle_in));
1563 atype = TGSI_TYPE_FLOAT;
1564 break;
1565
1566 case TGSI_SEMANTIC_TESSINNER:
1567 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1568 bld->system_values.tess_inner,
1569 lp_build_const_int32(gallivm, swizzle_in));
1570 atype = TGSI_TYPE_FLOAT;
1571 break;
1572
1573 case TGSI_SEMANTIC_VERTICESIN:
1574 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1575 atype = TGSI_TYPE_UNSIGNED;
1576 break;
1577
1578 default:
1579 assert(!"unexpected semantic in emit_fetch_system_value");
1580 res = bld_base->base.zero;
1581 atype = TGSI_TYPE_FLOAT;
1582 break;
1583 }
1584
1585 if (atype != stype) {
1586 if (stype == TGSI_TYPE_FLOAT) {
1587 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1588 } else if (stype == TGSI_TYPE_UNSIGNED) {
1589 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1590 } else if (stype == TGSI_TYPE_SIGNED) {
1591 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1592 }
1593 }
1594
1595 return res;
1596 }
1597
1598 /**
1599 * Register fetch with derivatives.
1600 */
1601 static void
1602 emit_fetch_deriv(
1603 struct lp_build_tgsi_soa_context *bld,
1604 LLVMValueRef src,
1605 LLVMValueRef *res,
1606 LLVMValueRef *ddx,
1607 LLVMValueRef *ddy)
1608 {
1609 if (res)
1610 *res = src;
1611
1612 /* TODO: use interpolation coeffs for inputs */
1613
1614 if (ddx)
1615 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1616
1617 if (ddy)
1618 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1619 }
1620
1621 /**
1622 * store an array of vec-length 64-bit into two arrays of vec_length floats
1623 * i.e.
1624 * value is d0, d1, d2, d3 etc.
1625 * each 64-bit has high and low pieces x, y
1626 * so gets stored into the separate channels as:
1627 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1628 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1629 */
1630 static void
1631 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1632 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1633 LLVMValueRef value)
1634 {
1635 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1636 struct gallivm_state *gallivm = bld_base->base.gallivm;
1637 LLVMBuilderRef builder = gallivm->builder;
1638 struct lp_build_context *float_bld = &bld_base->base;
1639 unsigned i;
1640 LLVMValueRef temp, temp2;
1641 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1642 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1643
1644 for (i = 0; i < bld_base->base.type.length; i++) {
1645 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1646 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1647 }
1648
1649 temp = LLVMBuildShuffleVector(builder, value,
1650 LLVMGetUndef(LLVMTypeOf(value)),
1651 LLVMConstVector(shuffles,
1652 bld_base->base.type.length),
1653 "");
1654 temp2 = LLVMBuildShuffleVector(builder, value,
1655 LLVMGetUndef(LLVMTypeOf(value)),
1656 LLVMConstVector(shuffles2,
1657 bld_base->base.type.length),
1658 "");
1659
1660 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1661 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1662 }
1663
1664 static void
1665 emit_store_output(struct lp_build_tgsi_context *bld_base,
1666 enum tgsi_opcode_type dtype,
1667 const struct tgsi_full_dst_register *reg,
1668 unsigned index,
1669 unsigned chan_index,
1670 LLVMValueRef indirect_index,
1671 LLVMValueRef value)
1672 {
1673 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1674 struct gallivm_state *gallivm = bld_base->base.gallivm;
1675 LLVMBuilderRef builder = gallivm->builder;
1676 struct lp_build_context *float_bld = &bld_base->base;
1677
1678 /* Outputs are always stored as floats */
1679 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1680
1681 if (reg->Register.Indirect) {
1682 LLVMValueRef index_vec; /* indexes into the output registers */
1683 LLVMValueRef outputs_array;
1684 LLVMTypeRef fptr_type;
1685
1686 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1687 indirect_index,
1688 chan_index,
1689 TRUE);
1690
1691 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1692 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1693
1694 /* Scatter store values into output registers */
1695 emit_mask_scatter(bld, outputs_array, index_vec, value,
1696 &bld->exec_mask);
1697 }
1698 else {
1699 assert(LLVMTypeOf(value) == float_bld->vec_type);
1700 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1701 chan_index);
1702
1703 if (tgsi_type_is_64bit(dtype)) {
1704 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1705 chan_index + 1);
1706 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1707 value);
1708 } else
1709 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1710 }
1711 }
1712
1713 static void
1714 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1715 enum tgsi_opcode_type dtype,
1716 const struct tgsi_full_dst_register *reg,
1717 unsigned index,
1718 unsigned chan_index,
1719 LLVMValueRef indirect_index,
1720 LLVMValueRef value)
1721 {
1722 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1723 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1724 const struct tgsi_shader_info *info = bld->bld_base.info;
1725 LLVMValueRef attrib_index = NULL;
1726 LLVMValueRef vertex_index = NULL;
1727 LLVMValueRef channel_index = NULL;
1728
1729 if (reg->Register.Indirect) {
1730 /*
1731 * XXX: this is possibly not quite the right value, since file_max may be
1732 * larger than the max attrib index, due to it being the max of declared
1733 * inputs AND the max vertices per prim (which is 6 for tri adj).
1734 * It should however be safe to use (since we always allocate
1735 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1736 */
1737 int index_limit = info->file_max[reg->Register.File];
1738 attrib_index = get_indirect_index(bld,
1739 reg->Register.File,
1740 reg->Register.Index,
1741 &reg->Indirect,
1742 index_limit);
1743 } else {
1744 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1745 }
1746
1747 if (reg->Dimension.Indirect) {
1748 vertex_index = get_indirect_index(bld,
1749 reg->Register.File,
1750 reg->Dimension.Index,
1751 &reg->DimIndirect,
1752 PIPE_MAX_SHADER_OUTPUTS);
1753 } else {
1754 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1755 }
1756
1757 channel_index = lp_build_const_int32(gallivm, chan_index);
1758
1759 assert(bld->tcs_iface->emit_store_output);
1760 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1761 bld_base->info->output_semantic_name[reg->Register.Index],
1762 reg->Dimension.Indirect,
1763 vertex_index,
1764 reg->Register.Indirect,
1765 attrib_index,
1766 channel_index,
1767 value,
1768 mask_vec(bld_base));
1769 }
1770
1771 static void
1772 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1773 enum tgsi_opcode_type dtype,
1774 const struct tgsi_full_dst_register *reg,
1775 unsigned index,
1776 unsigned chan_index,
1777 LLVMValueRef indirect_index,
1778 LLVMValueRef value)
1779 {
1780 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1781 struct gallivm_state *gallivm = bld_base->base.gallivm;
1782 LLVMBuilderRef builder = gallivm->builder;
1783 struct lp_build_context *float_bld = &bld_base->base;
1784
1785 /* Temporaries are always stored as floats */
1786 if (!tgsi_type_is_64bit(dtype))
1787 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1788 else
1789 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1790
1791 if (reg->Register.Indirect) {
1792 LLVMValueRef index_vec; /* indexes into the temp registers */
1793 LLVMValueRef temps_array;
1794 LLVMTypeRef fptr_type;
1795
1796 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1797 indirect_index,
1798 chan_index,
1799 TRUE);
1800
1801 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1802 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1803
1804 /* Scatter store values into temp registers */
1805 emit_mask_scatter(bld, temps_array, index_vec, value,
1806 &bld->exec_mask);
1807 }
1808 else {
1809 LLVMValueRef temp_ptr;
1810 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1811
1812 if (tgsi_type_is_64bit(dtype)) {
1813 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1814 reg->Register.Index,
1815 chan_index + 1);
1816 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1817 value);
1818 }
1819 else
1820 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1821 }
1822 }
1823
1824 static void
1825 emit_store_address(struct lp_build_tgsi_context *bld_base,
1826 enum tgsi_opcode_type dtype,
1827 const struct tgsi_full_dst_register *reg,
1828 unsigned index,
1829 unsigned chan_index,
1830 LLVMValueRef indirect_index,
1831 LLVMValueRef value)
1832 {
1833 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1834 struct gallivm_state *gallivm = bld_base->base.gallivm;
1835 LLVMBuilderRef builder = gallivm->builder;
1836 struct lp_build_context *int_bld = &bld_base->int_bld;
1837
1838 assert(dtype == TGSI_TYPE_SIGNED);
1839 assert(LLVMTypeOf(value) == int_bld->vec_type);
1840 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1841 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1842 bld->addr[reg->Register.Index][chan_index]);
1843 }
1844
1845 /**
1846 * Register store.
1847 */
1848 static void
1849 emit_store_chan(
1850 struct lp_build_tgsi_context *bld_base,
1851 const struct tgsi_full_instruction *inst,
1852 unsigned index,
1853 unsigned chan_index,
1854 LLVMValueRef value)
1855 {
1856 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1857 struct gallivm_state *gallivm = bld_base->base.gallivm;
1858 LLVMBuilderRef builder = gallivm->builder;
1859 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1860 struct lp_build_context *float_bld = &bld_base->base;
1861 LLVMValueRef indirect_index = NULL;
1862 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1863
1864 /*
1865 * Apply saturation.
1866 *
1867 * It is always assumed to be float.
1868 */
1869 if (inst->Instruction.Saturate) {
1870 assert(dtype == TGSI_TYPE_FLOAT ||
1871 dtype == TGSI_TYPE_UNTYPED);
1872 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1873 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1874 }
1875
1876 if (reg->Register.Indirect) {
1877 /*
1878 * Currently the mesa/st doesn't generate indirect stores
1879 * to 64-bit values, it normally uses MOV to do indirect stores.
1880 */
1881 assert(!tgsi_type_is_64bit(dtype));
1882 indirect_index = get_indirect_index(bld,
1883 reg->Register.File,
1884 reg->Register.Index,
1885 &reg->Indirect,
1886 bld->bld_base.info->file_max[reg->Register.File]);
1887 } else {
1888 assert(reg->Register.Index <=
1889 bld_base->info->file_max[reg->Register.File]);
1890 }
1891
1892 if (DEBUG_EXECUTION) {
1893 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1894 }
1895
1896 assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1897 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1898 dtype,
1899 reg,
1900 index,
1901 chan_index,
1902 indirect_index,
1903 value);
1904
1905 (void)dtype;
1906 }
1907
1908 /*
1909 * Called at the beginning of the translation of each TGSI instruction, to
1910 * emit some debug code.
1911 */
1912 static void
1913 emit_debug(
1914 struct lp_build_tgsi_context * bld_base,
1915 const struct tgsi_full_instruction * inst,
1916 const struct tgsi_opcode_info * info)
1917
1918 {
1919 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1920
1921 if (DEBUG_EXECUTION) {
1922 /*
1923 * Dump the TGSI instruction.
1924 */
1925
1926 struct gallivm_state *gallivm = bld_base->base.gallivm;
1927 char buf[512];
1928 buf[0] = '$';
1929 buf[1] = ' ';
1930 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1931 lp_build_printf(gallivm, buf);
1932
1933 /* Dump the execution mask.
1934 */
1935 if (bld->exec_mask.has_mask) {
1936 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1937 }
1938 }
1939 }
1940
1941 static void
1942 emit_store(
1943 struct lp_build_tgsi_context * bld_base,
1944 const struct tgsi_full_instruction * inst,
1945 const struct tgsi_opcode_info * info,
1946 unsigned index,
1947 LLVMValueRef dst[4])
1948
1949 {
1950 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1951
1952 unsigned writemask = inst->Dst[index].Register.WriteMask;
1953 while (writemask) {
1954 unsigned chan_index = u_bit_scan(&writemask);
1955 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1956 continue;
1957 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1958 }
1959 }
1960
1961 static unsigned
1962 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1963 {
1964 switch (tgsi_target) {
1965 case TGSI_TEXTURE_BUFFER:
1966 return PIPE_BUFFER;
1967 case TGSI_TEXTURE_1D:
1968 case TGSI_TEXTURE_SHADOW1D:
1969 return PIPE_TEXTURE_1D;
1970 case TGSI_TEXTURE_2D:
1971 case TGSI_TEXTURE_SHADOW2D:
1972 case TGSI_TEXTURE_2D_MSAA:
1973 return PIPE_TEXTURE_2D;
1974 case TGSI_TEXTURE_3D:
1975 return PIPE_TEXTURE_3D;
1976 case TGSI_TEXTURE_CUBE:
1977 case TGSI_TEXTURE_SHADOWCUBE:
1978 return PIPE_TEXTURE_CUBE;
1979 case TGSI_TEXTURE_RECT:
1980 case TGSI_TEXTURE_SHADOWRECT:
1981 return PIPE_TEXTURE_RECT;
1982 case TGSI_TEXTURE_1D_ARRAY:
1983 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1984 return PIPE_TEXTURE_1D_ARRAY;
1985 case TGSI_TEXTURE_2D_ARRAY:
1986 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1987 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1988 return PIPE_TEXTURE_2D_ARRAY;
1989 case TGSI_TEXTURE_CUBE_ARRAY:
1990 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1991 return PIPE_TEXTURE_CUBE_ARRAY;
1992 default:
1993 assert(0);
1994 return PIPE_BUFFER;
1995 }
1996 }
1997
1998
1999 static enum lp_sampler_lod_property
2000 lp_build_lod_property(
2001 struct lp_build_tgsi_context *bld_base,
2002 const struct tgsi_full_instruction *inst,
2003 unsigned src_op)
2004 {
2005 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2006 enum lp_sampler_lod_property lod_property;
2007
2008 /*
2009 * Not much we can do here. We could try catching inputs declared
2010 * with constant interpolation but not sure it's worth it - since for
2011 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2012 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2013 * like the constant/immediate recognition below.
2014 * What seems to be of more value would be to recognize temps holding
2015 * broadcasted scalars but no way we can do it.
2016 * Tried asking llvm but without any success (using LLVMIsConstant
2017 * even though this isn't exactly what we'd need), even as simple as
2018 * IMM[0] UINT32 (0,-1,0,0)
2019 * MOV TEMP[0] IMM[0].yyyy
2020 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2021 * doesn't work.
2022 * This means there's ZERO chance this will ever catch a scalar lod
2023 * with traditional tex opcodes as well as texel fetches, since the lod
2024 * comes from the same reg as coords (except some test shaders using
2025 * constant coords maybe).
2026 * There's at least hope for sample opcodes as well as size queries.
2027 */
2028 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2029 reg->Register.File == TGSI_FILE_CONSTANT ||
2030 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2031 lod_property = LP_SAMPLER_LOD_SCALAR;
2032 }
2033 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2034 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2035 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2036 }
2037 else {
2038 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2039 }
2040 }
2041 else {
2042 /* never use scalar (per-quad) lod the results are just too wrong. */
2043 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2044 }
2045 return lod_property;
2046 }
2047
2048
2049 /**
2050 * High-level instruction translators.
2051 */
2052
2053 static void
2054 emit_tex( struct lp_build_tgsi_soa_context *bld,
2055 const struct tgsi_full_instruction *inst,
2056 enum lp_build_tex_modifier modifier,
2057 LLVMValueRef *texel,
2058 unsigned sampler_reg,
2059 enum lp_sampler_op_type sampler_op)
2060 {
2061 unsigned unit = inst->Src[sampler_reg].Register.Index;
2062 LLVMValueRef oow = NULL;
2063 LLVMValueRef lod = NULL;
2064 LLVMValueRef coords[5];
2065 LLVMValueRef offsets[3] = { NULL };
2066 struct lp_derivatives derivs;
2067 struct lp_sampler_params params;
2068 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2069 unsigned num_derivs, num_offsets, i;
2070 unsigned shadow_coord = 0;
2071 unsigned layer_coord = 0;
2072 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2073
2074 memset(&params, 0, sizeof(params));
2075
2076 if (!bld->sampler) {
2077 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2078 for (i = 0; i < 4; i++) {
2079 texel[i] = bld->bld_base.base.undef;
2080 }
2081 return;
2082 }
2083
2084 switch (inst->Texture.Texture) {
2085 case TGSI_TEXTURE_1D_ARRAY:
2086 layer_coord = 1;
2087 /* fallthrough */
2088 case TGSI_TEXTURE_1D:
2089 num_offsets = 1;
2090 num_derivs = 1;
2091 break;
2092 case TGSI_TEXTURE_2D_ARRAY:
2093 layer_coord = 2;
2094 /* fallthrough */
2095 case TGSI_TEXTURE_2D:
2096 case TGSI_TEXTURE_RECT:
2097 num_offsets = 2;
2098 num_derivs = 2;
2099 break;
2100 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2101 layer_coord = 1;
2102 /* fallthrough */
2103 case TGSI_TEXTURE_SHADOW1D:
2104 shadow_coord = 2;
2105 num_offsets = 1;
2106 num_derivs = 1;
2107 break;
2108 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2109 layer_coord = 2;
2110 shadow_coord = 3;
2111 num_offsets = 2;
2112 num_derivs = 2;
2113 break;
2114 case TGSI_TEXTURE_SHADOW2D:
2115 case TGSI_TEXTURE_SHADOWRECT:
2116 shadow_coord = 2;
2117 num_offsets = 2;
2118 num_derivs = 2;
2119 break;
2120 case TGSI_TEXTURE_CUBE:
2121 num_offsets = 2;
2122 num_derivs = 3;
2123 break;
2124 case TGSI_TEXTURE_3D:
2125 num_offsets = 3;
2126 num_derivs = 3;
2127 break;
2128 case TGSI_TEXTURE_SHADOWCUBE:
2129 shadow_coord = 3;
2130 num_offsets = 2;
2131 num_derivs = 3;
2132 break;
2133 case TGSI_TEXTURE_CUBE_ARRAY:
2134 num_offsets = 2;
2135 num_derivs = 3;
2136 layer_coord = 3;
2137 break;
2138 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2139 num_offsets = 2;
2140 num_derivs = 3;
2141 layer_coord = 3;
2142 shadow_coord = 4; /* shadow coord special different reg */
2143 break;
2144 case TGSI_TEXTURE_2D_MSAA:
2145 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2146 default:
2147 assert(0);
2148 return;
2149 }
2150
2151 /* Note lod and especially projected are illegal in a LOT of cases */
2152 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2153 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2154 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2155 lod = bld->bld_base.base.zero;
2156 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2157 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2158 /* note that shadow cube array with bias/explicit lod does not exist */
2159 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2160 }
2161 else {
2162 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2163 }
2164 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2165 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2166 }
2167 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2168 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2169 }
2170 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2171 }
2172
2173 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2174 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2175 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2176 }
2177 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2178 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2179 oow = lp_build_rcp(&bld->bld_base.base, oow);
2180 }
2181
2182 for (i = 0; i < num_derivs; i++) {
2183 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2184 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2185 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2186 }
2187 for (i = num_derivs; i < 5; i++) {
2188 coords[i] = bld->bld_base.base.undef;
2189 }
2190
2191 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2192 if (layer_coord) {
2193 if (layer_coord == 3) {
2194 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2195 }
2196 else {
2197 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2198 }
2199 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2200 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2201 }
2202 /* Shadow coord occupies always 5th slot. */
2203 if (shadow_coord) {
2204 sample_key |= LP_SAMPLER_SHADOW;
2205 if (shadow_coord == 4) {
2206 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2207 }
2208 else {
2209 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2210 }
2211 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2212 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2213 }
2214
2215 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2216 unsigned dim;
2217 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2218 for (dim = 0; dim < num_derivs; ++dim) {
2219 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2220 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2221 }
2222 params.derivs = &derivs;
2223 /*
2224 * could also check all src regs if constant but I doubt such
2225 * cases exist in practice.
2226 */
2227 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2228 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2229 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2230 }
2231 else {
2232 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2233 }
2234 }
2235 else {
2236 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2237 }
2238 }
2239 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2240
2241 /* we don't handle the 4 offset version of tg4 */
2242 if (inst->Texture.NumOffsets == 1) {
2243 unsigned dim;
2244 sample_key |= LP_SAMPLER_OFFSETS;
2245 for (dim = 0; dim < num_offsets; dim++) {
2246 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2247 }
2248 }
2249
2250 params.type = bld->bld_base.base.type;
2251 params.sample_key = sample_key;
2252 params.texture_index = unit;
2253 params.sampler_index = unit;
2254 params.context_ptr = bld->context_ptr;
2255 params.thread_data_ptr = bld->thread_data_ptr;
2256 params.coords = coords;
2257 params.offsets = offsets;
2258 params.lod = lod;
2259 params.texel = texel;
2260
2261 bld->sampler->emit_tex_sample(bld->sampler,
2262 bld->bld_base.base.gallivm,
2263 &params);
2264 }
2265
2266 static void
2267 emit_sample(struct lp_build_tgsi_soa_context *bld,
2268 const struct tgsi_full_instruction *inst,
2269 enum lp_build_tex_modifier modifier,
2270 boolean compare,
2271 enum lp_sampler_op_type sample_type,
2272 LLVMValueRef *texel)
2273 {
2274 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2275 unsigned texture_unit, sampler_unit;
2276 LLVMValueRef lod = NULL;
2277 LLVMValueRef coords[5];
2278 LLVMValueRef offsets[3] = { NULL };
2279 struct lp_derivatives derivs;
2280 struct lp_sampler_params params;
2281 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2282
2283 unsigned num_offsets, num_derivs, i;
2284 unsigned layer_coord = 0;
2285 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2286
2287 memset(&params, 0, sizeof(params));
2288
2289 if (!bld->sampler) {
2290 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2291 for (i = 0; i < 4; i++) {
2292 texel[i] = bld->bld_base.base.undef;
2293 }
2294 return;
2295 }
2296
2297 /*
2298 * unlike old-style tex opcodes the texture/sampler indices
2299 * always come from src1 and src2 respectively.
2300 */
2301 texture_unit = inst->Src[1].Register.Index;
2302 sampler_unit = inst->Src[2].Register.Index;
2303
2304 /*
2305 * Note inst->Texture.Texture will contain the number of offsets,
2306 * however the target information is NOT there and comes from the
2307 * declared sampler views instead.
2308 */
2309 switch (bld->sv[texture_unit].Resource) {
2310 case TGSI_TEXTURE_1D:
2311 num_offsets = 1;
2312 num_derivs = 1;
2313 break;
2314 case TGSI_TEXTURE_1D_ARRAY:
2315 layer_coord = 1;
2316 num_offsets = 1;
2317 num_derivs = 1;
2318 break;
2319 case TGSI_TEXTURE_2D:
2320 case TGSI_TEXTURE_RECT:
2321 num_offsets = 2;
2322 num_derivs = 2;
2323 break;
2324 case TGSI_TEXTURE_2D_ARRAY:
2325 layer_coord = 2;
2326 num_offsets = 2;
2327 num_derivs = 2;
2328 break;
2329 case TGSI_TEXTURE_CUBE:
2330 num_offsets = 2;
2331 num_derivs = 3;
2332 break;
2333 case TGSI_TEXTURE_3D:
2334 num_offsets = 3;
2335 num_derivs = 3;
2336 break;
2337 case TGSI_TEXTURE_CUBE_ARRAY:
2338 layer_coord = 3;
2339 num_offsets = 2;
2340 num_derivs = 3;
2341 break;
2342 default:
2343 assert(0);
2344 return;
2345 }
2346
2347 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2348 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2349 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2350 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2351 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2352 }
2353 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2354 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2355 }
2356 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2357 }
2358 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2359 /* XXX might be better to explicitly pass the level zero information */
2360 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2361 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2362 }
2363
2364 for (i = 0; i < num_derivs; i++) {
2365 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2366 }
2367 for (i = num_derivs; i < 5; i++) {
2368 coords[i] = bld->bld_base.base.undef;
2369 }
2370
2371 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2372 if (layer_coord) {
2373 if (layer_coord == 3)
2374 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2375 else
2376 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2377 }
2378 /* Shadow coord occupies always 5th slot. */
2379 if (compare) {
2380 sample_key |= LP_SAMPLER_SHADOW;
2381 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2382 }
2383
2384 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2385 unsigned dim;
2386 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2387 for (dim = 0; dim < num_derivs; ++dim) {
2388 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2389 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2390 }
2391 params.derivs = &derivs;
2392 /*
2393 * could also check all src regs if constant but I doubt such
2394 * cases exist in practice.
2395 */
2396 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2397 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2398 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2399 }
2400 else {
2401 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2402 }
2403 }
2404 else {
2405 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2406 }
2407 }
2408
2409 /* some advanced gather instructions (txgo) would require 4 offsets */
2410 if (inst->Texture.NumOffsets == 1) {
2411 unsigned dim;
2412 sample_key |= LP_SAMPLER_OFFSETS;
2413 for (dim = 0; dim < num_offsets; dim++) {
2414 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2415 }
2416 }
2417 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2418
2419 params.type = bld->bld_base.base.type;
2420 params.sample_key = sample_key;
2421 params.texture_index = texture_unit;
2422 params.sampler_index = sampler_unit;
2423 params.context_ptr = bld->context_ptr;
2424 params.thread_data_ptr = bld->thread_data_ptr;
2425 params.coords = coords;
2426 params.offsets = offsets;
2427 params.lod = lod;
2428 params.texel = texel;
2429
2430 bld->sampler->emit_tex_sample(bld->sampler,
2431 bld->bld_base.base.gallivm,
2432 &params);
2433
2434 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2435 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2436 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2437 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2438 unsigned char swizzles[4];
2439 swizzles[0] = inst->Src[1].Register.SwizzleX;
2440 swizzles[1] = inst->Src[1].Register.SwizzleY;
2441 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2442 swizzles[3] = inst->Src[1].Register.SwizzleW;
2443
2444 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2445 }
2446 }
2447
2448 static void
2449 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2450 const struct tgsi_full_instruction *inst,
2451 LLVMValueRef *texel,
2452 boolean is_samplei)
2453 {
2454 unsigned unit, target;
2455 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2456 LLVMValueRef explicit_lod = NULL;
2457 LLVMValueRef coords[5];
2458 LLVMValueRef offsets[3] = { NULL };
2459 LLVMValueRef ms_index = NULL;
2460 struct lp_sampler_params params;
2461 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2462 unsigned dims, i;
2463 unsigned layer_coord = 0;
2464 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2465
2466 memset(&params, 0, sizeof(params));
2467
2468 if (!bld->sampler) {
2469 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2470 for (i = 0; i < 4; i++) {
2471 texel[i] = coord_undef;
2472 }
2473 return;
2474 }
2475
2476 unit = inst->Src[1].Register.Index;
2477
2478 if (is_samplei) {
2479 target = bld->sv[unit].Resource;
2480 }
2481 else {
2482 target = inst->Texture.Texture;
2483 }
2484
2485 switch (target) {
2486 case TGSI_TEXTURE_1D:
2487 case TGSI_TEXTURE_BUFFER:
2488 dims = 1;
2489 break;
2490 case TGSI_TEXTURE_1D_ARRAY:
2491 layer_coord = 1;
2492 dims = 1;
2493 break;
2494 case TGSI_TEXTURE_2D:
2495 case TGSI_TEXTURE_RECT:
2496 case TGSI_TEXTURE_2D_MSAA:
2497 dims = 2;
2498 break;
2499 case TGSI_TEXTURE_2D_ARRAY:
2500 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2501 layer_coord = 2;
2502 dims = 2;
2503 break;
2504 case TGSI_TEXTURE_3D:
2505 dims = 3;
2506 break;
2507 default:
2508 assert(0);
2509 return;
2510 }
2511
2512 /* always have lod except for buffers and msaa targets ? */
2513 if (target != TGSI_TEXTURE_BUFFER &&
2514 target != TGSI_TEXTURE_2D_MSAA &&
2515 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2516 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2517 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2518 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2519 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2520 }
2521
2522 if (target == TGSI_TEXTURE_2D_MSAA ||
2523 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2524 sample_key |= LP_SAMPLER_FETCH_MS;
2525 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2526 }
2527
2528 /*
2529 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2530 * would be the sample index.
2531 */
2532
2533 for (i = 0; i < dims; i++) {
2534 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2535 }
2536 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2537 for (i = dims; i < 5; i++) {
2538 coords[i] = coord_undef;
2539 }
2540 if (layer_coord)
2541 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2542
2543 if (inst->Texture.NumOffsets == 1) {
2544 unsigned dim;
2545 sample_key |= LP_SAMPLER_OFFSETS;
2546 for (dim = 0; dim < dims; dim++) {
2547 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2548 }
2549 }
2550 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2551
2552 params.type = bld->bld_base.base.type;
2553 params.sample_key = sample_key;
2554 params.texture_index = unit;
2555 /*
2556 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2557 * and trigger some assertions with d3d10 where the sampler view number
2558 * can exceed this.
2559 */
2560 params.sampler_index = 0;
2561 params.context_ptr = bld->context_ptr;
2562 params.thread_data_ptr = bld->thread_data_ptr;
2563 params.coords = coords;
2564 params.offsets = offsets;
2565 params.derivs = NULL;
2566 params.lod = explicit_lod;
2567 params.texel = texel;
2568 params.ms_index = ms_index;
2569
2570 bld->sampler->emit_tex_sample(bld->sampler,
2571 bld->bld_base.base.gallivm,
2572 &params);
2573
2574 if (is_samplei &&
2575 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2576 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2577 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2578 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2579 unsigned char swizzles[4];
2580 swizzles[0] = inst->Src[1].Register.SwizzleX;
2581 swizzles[1] = inst->Src[1].Register.SwizzleY;
2582 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2583 swizzles[3] = inst->Src[1].Register.SwizzleW;
2584
2585 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2586 }
2587 }
2588
2589 static void
2590 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2591 const struct tgsi_full_instruction *inst,
2592 LLVMValueRef *sizes_out,
2593 boolean is_sviewinfo)
2594 {
2595 LLVMValueRef explicit_lod;
2596 enum lp_sampler_lod_property lod_property;
2597 unsigned has_lod;
2598 unsigned i;
2599 unsigned unit = inst->Src[1].Register.Index;
2600 unsigned target, pipe_target;
2601 struct lp_sampler_size_query_params params;
2602
2603 if (is_sviewinfo) {
2604 target = bld->sv[unit].Resource;
2605 }
2606 else {
2607 target = inst->Texture.Texture;
2608 }
2609 switch (target) {
2610 case TGSI_TEXTURE_BUFFER:
2611 case TGSI_TEXTURE_RECT:
2612 case TGSI_TEXTURE_SHADOWRECT:
2613 has_lod = 0;
2614 break;
2615 default:
2616 has_lod = 1;
2617 break;
2618 }
2619
2620 if (!bld->sampler) {
2621 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2622 for (i = 0; i < 4; i++)
2623 sizes_out[i] = bld->bld_base.int_bld.undef;
2624 return;
2625 }
2626
2627 if (has_lod) {
2628 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2629 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2630 }
2631 else {
2632 explicit_lod = NULL;
2633 lod_property = LP_SAMPLER_LOD_SCALAR;
2634 }
2635
2636
2637 pipe_target = tgsi_to_pipe_tex_target(target);
2638
2639 params.int_type = bld->bld_base.int_bld.type;
2640 params.texture_unit = unit;
2641 params.target = pipe_target;
2642 params.context_ptr = bld->context_ptr;
2643 params.is_sviewinfo = TRUE;
2644 params.lod_property = lod_property;
2645 params.explicit_lod = explicit_lod;
2646 params.sizes_out = sizes_out;
2647
2648 bld->sampler->emit_size_query(bld->sampler,
2649 bld->bld_base.base.gallivm,
2650 &params);
2651 }
2652
2653 static boolean
2654 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2655 int pc)
2656 {
2657 unsigned i;
2658
2659 for (i = 0; i < 5; i++) {
2660 enum tgsi_opcode opcode;
2661
2662 if (pc + i >= bld->bld_base.info->num_instructions)
2663 return TRUE;
2664
2665 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2666
2667 if (opcode == TGSI_OPCODE_END)
2668 return TRUE;
2669
2670 if (opcode == TGSI_OPCODE_TEX ||
2671 opcode == TGSI_OPCODE_TXP ||
2672 opcode == TGSI_OPCODE_TXD ||
2673 opcode == TGSI_OPCODE_TXB ||
2674 opcode == TGSI_OPCODE_TXL ||
2675 opcode == TGSI_OPCODE_TXF ||
2676 opcode == TGSI_OPCODE_TXQ ||
2677 opcode == TGSI_OPCODE_TEX2 ||
2678 opcode == TGSI_OPCODE_TXB2 ||
2679 opcode == TGSI_OPCODE_TXL2 ||
2680 opcode == TGSI_OPCODE_SAMPLE ||
2681 opcode == TGSI_OPCODE_SAMPLE_B ||
2682 opcode == TGSI_OPCODE_SAMPLE_C ||
2683 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2684 opcode == TGSI_OPCODE_SAMPLE_D ||
2685 opcode == TGSI_OPCODE_SAMPLE_I ||
2686 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2687 opcode == TGSI_OPCODE_SAMPLE_L ||
2688 opcode == TGSI_OPCODE_SVIEWINFO ||
2689 opcode == TGSI_OPCODE_CAL ||
2690 opcode == TGSI_OPCODE_IF ||
2691 opcode == TGSI_OPCODE_UIF ||
2692 opcode == TGSI_OPCODE_BGNLOOP ||
2693 opcode == TGSI_OPCODE_SWITCH)
2694 return FALSE;
2695 }
2696
2697 return TRUE;
2698 }
2699
2700
2701
2702 /**
2703 * Kill fragment if any of the src register values are negative.
2704 */
2705 static void
2706 emit_kill_if(
2707 struct lp_build_tgsi_soa_context *bld,
2708 const struct tgsi_full_instruction *inst,
2709 int pc)
2710 {
2711 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2712 const struct tgsi_full_src_register *reg = &inst->Src[0];
2713 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2714 LLVMValueRef mask;
2715 unsigned chan_index;
2716
2717 memset(&terms, 0, sizeof terms);
2718
2719 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2720 unsigned swizzle;
2721
2722 /* Unswizzle channel */
2723 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2724
2725 /* Check if the component has not been already tested. */
2726 assert(swizzle < TGSI_NUM_CHANNELS);
2727 if( !terms[swizzle] )
2728 /* TODO: change the comparison operator instead of setting the sign */
2729 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2730 }
2731
2732 mask = NULL;
2733 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2734 if(terms[chan_index]) {
2735 LLVMValueRef chan_mask;
2736
2737 /*
2738 * If term < 0 then mask = 0 else mask = ~0.
2739 */
2740 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2741
2742 if(mask)
2743 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2744 else
2745 mask = chan_mask;
2746 }
2747 }
2748
2749 if (bld->exec_mask.has_mask) {
2750 LLVMValueRef invmask;
2751 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2752 mask = LLVMBuildOr(builder, mask, invmask, "");
2753 }
2754
2755 lp_build_mask_update(bld->mask, mask);
2756 if (!near_end_of_shader(bld, pc))
2757 lp_build_mask_check(bld->mask);
2758 }
2759
2760
2761 /**
2762 * Unconditional fragment kill.
2763 * The only predication is the execution mask which will apply if
2764 * we're inside a loop or conditional.
2765 */
2766 static void
2767 emit_kill(struct lp_build_tgsi_soa_context *bld,
2768 int pc)
2769 {
2770 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2771 LLVMValueRef mask;
2772
2773 /* For those channels which are "alive", disable fragment shader
2774 * execution.
2775 */
2776 if (bld->exec_mask.has_mask) {
2777 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2778 }
2779 else {
2780 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2781 mask = zero;
2782 }
2783
2784 lp_build_mask_update(bld->mask, mask);
2785
2786 if (!near_end_of_shader(bld, pc))
2787 lp_build_mask_check(bld->mask);
2788 }
2789
2790
2791 /**
2792 * Emit code which will dump the value of all the temporary registers
2793 * to stdout.
2794 */
2795 static void
2796 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2797 unsigned file)
2798 {
2799 const struct tgsi_shader_info *info = bld->bld_base.info;
2800 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2801 LLVMBuilderRef builder = gallivm->builder;
2802 LLVMValueRef reg_ptr;
2803 int index;
2804 int max_index = info->file_max[file];
2805
2806 /*
2807 * Some register files, particularly constants, can be very large,
2808 * and dumping everything could make this unusably slow.
2809 */
2810 max_index = MIN2(max_index, 32);
2811
2812 for (index = 0; index <= max_index; index++) {
2813 LLVMValueRef res;
2814 unsigned mask;
2815 int chan;
2816
2817 if (index < 8 * sizeof(unsigned) &&
2818 (info->file_mask[file] & (1u << index)) == 0) {
2819 /* This was not declared.*/
2820 continue;
2821 }
2822
2823 if (file == TGSI_FILE_INPUT) {
2824 mask = info->input_usage_mask[index];
2825 } else {
2826 mask = TGSI_WRITEMASK_XYZW;
2827 }
2828
2829 for (chan = 0; chan < 4; chan++) {
2830 if ((mask & (1 << chan)) == 0) {
2831 /* This channel is not used.*/
2832 continue;
2833 }
2834
2835 if (file == TGSI_FILE_CONSTANT) {
2836 struct tgsi_full_src_register reg;
2837 memset(&reg, 0, sizeof reg);
2838 reg.Register.File = file;
2839 reg.Register.Index = index;
2840 reg.Register.SwizzleX = 0;
2841 reg.Register.SwizzleY = 1;
2842 reg.Register.SwizzleZ = 2;
2843 reg.Register.SwizzleW = 3;
2844
2845 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2846 if (!res) {
2847 continue;
2848 }
2849 } else if (file == TGSI_FILE_INPUT) {
2850 res = bld->inputs[index][chan];
2851 if (!res) {
2852 continue;
2853 }
2854 } else if (file == TGSI_FILE_TEMPORARY) {
2855 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2856 assert(reg_ptr);
2857 res = LLVMBuildLoad(builder, reg_ptr, "");
2858 } else if (file == TGSI_FILE_OUTPUT) {
2859 reg_ptr = lp_get_output_ptr(bld, index, chan);
2860 assert(reg_ptr);
2861 res = LLVMBuildLoad(builder, reg_ptr, "");
2862 } else {
2863 assert(0);
2864 continue;
2865 }
2866
2867 emit_dump_reg(gallivm, file, index, chan, res);
2868 }
2869 }
2870 }
2871
2872
2873
2874 void
2875 lp_emit_declaration_soa(
2876 struct lp_build_tgsi_context *bld_base,
2877 const struct tgsi_full_declaration *decl)
2878 {
2879 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2880 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2881 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2882 const unsigned first = decl->Range.First;
2883 const unsigned last = decl->Range.Last;
2884 unsigned idx, i;
2885
2886 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2887
2888 switch (decl->Declaration.File) {
2889 case TGSI_FILE_TEMPORARY:
2890 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2891 assert(last < LP_MAX_INLINED_TEMPS);
2892 for (idx = first; idx <= last; ++idx) {
2893 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2894 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2895 }
2896 }
2897 break;
2898
2899 case TGSI_FILE_OUTPUT:
2900 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2901 for (idx = first; idx <= last; ++idx) {
2902 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2903 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2904 vec_type, "output");
2905 }
2906 }
2907 break;
2908
2909 case TGSI_FILE_ADDRESS:
2910 /* ADDR registers are only allocated with an integer LLVM IR type,
2911 * as they are guaranteed to always have integers.
2912 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2913 * an ADDR register for that matter).
2914 */
2915 assert(last < LP_MAX_TGSI_ADDRS);
2916 for (idx = first; idx <= last; ++idx) {
2917 assert(idx < LP_MAX_TGSI_ADDRS);
2918 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2919 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2920 }
2921 break;
2922
2923 case TGSI_FILE_SAMPLER_VIEW:
2924 /*
2925 * The target stored here MUST match whatever there actually
2926 * is in the set sampler views (what about return type?).
2927 */
2928 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2929 for (idx = first; idx <= last; ++idx) {
2930 bld->sv[idx] = decl->SamplerView;
2931 }
2932 break;
2933
2934 case TGSI_FILE_CONSTANT:
2935 {
2936 /*
2937 * We could trivially fetch the per-buffer pointer when fetching the
2938 * constant, relying on llvm to figure out it's always the same pointer
2939 * anyway. However, doing so results in a huge (more than factor of 10)
2940 * slowdown in llvm compilation times for some (but not all) shaders
2941 * (more specifically, the IR optimization spends way more time in
2942 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2943 */
2944 unsigned idx2D = decl->Dim.Index2D;
2945 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2946 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2947 bld->consts[idx2D] =
2948 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2949 bld->consts_sizes[idx2D] =
2950 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2951 }
2952 break;
2953 case TGSI_FILE_BUFFER:
2954 {
2955 unsigned idx = decl->Range.First;
2956 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2957 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2958 bld->ssbos[idx] =
2959 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2960 bld->ssbo_sizes[idx] =
2961 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2962
2963 }
2964 break;
2965 case TGSI_FILE_MEMORY:
2966 break;
2967 default:
2968 /* don't need to declare other vars */
2969 break;
2970 }
2971 }
2972
2973
2974 void lp_emit_immediate_soa(
2975 struct lp_build_tgsi_context *bld_base,
2976 const struct tgsi_full_immediate *imm)
2977 {
2978 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2979 struct gallivm_state * gallivm = bld_base->base.gallivm;
2980 LLVMValueRef imms[4];
2981 unsigned i;
2982 const uint size = imm->Immediate.NrTokens - 1;
2983 assert(size <= 4);
2984 switch (imm->Immediate.DataType) {
2985 case TGSI_IMM_FLOAT32:
2986 for( i = 0; i < size; ++i )
2987 imms[i] =
2988 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2989
2990 break;
2991 case TGSI_IMM_FLOAT64:
2992 case TGSI_IMM_UINT64:
2993 case TGSI_IMM_INT64:
2994 case TGSI_IMM_UINT32:
2995 for( i = 0; i < size; ++i ) {
2996 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2997 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2998 }
2999
3000 break;
3001 case TGSI_IMM_INT32:
3002 for( i = 0; i < size; ++i ) {
3003 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3004 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3005 }
3006
3007 break;
3008 }
3009 for( i = size; i < 4; ++i )
3010 imms[i] = bld_base->base.undef;
3011
3012 if (bld->use_immediates_array) {
3013 unsigned index = bld->num_immediates;
3014 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3015 LLVMBuilderRef builder = gallivm->builder;
3016 LLVMValueRef gep[2];
3017 gep[0] = lp_build_const_int32(gallivm, 0);
3018
3019 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3020 for (i = 0; i < 4; ++i ) {
3021 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3022 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3023 bld->imms_array, gep, 2, "");
3024 LLVMBuildStore(builder, imms[i], imm_ptr);
3025 }
3026 } else {
3027 /* simply copy the immediate values into the next immediates[] slot */
3028 unsigned i;
3029 assert(imm->Immediate.NrTokens - 1 <= 4);
3030 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3031
3032 for(i = 0; i < 4; ++i )
3033 bld->immediates[bld->num_immediates][i] = imms[i];
3034
3035 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3036 unsigned index = bld->num_immediates;
3037 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3038 LLVMBuilderRef builder = gallivm->builder;
3039 LLVMValueRef gep[2];
3040 gep[0] = lp_build_const_int32(gallivm, 0);
3041 for (i = 0; i < 4; ++i ) {
3042 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3043 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3044 bld->imms_array, gep, 2, "");
3045 LLVMBuildStore(builder,
3046 bld->immediates[index][i],
3047 imm_ptr);
3048 }
3049 }
3050 }
3051
3052 bld->num_immediates++;
3053 }
3054
3055 static void
3056 ddx_emit(
3057 const struct lp_build_tgsi_action * action,
3058 struct lp_build_tgsi_context * bld_base,
3059 struct lp_build_emit_data * emit_data)
3060 {
3061 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3062
3063 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3064 &emit_data->output[emit_data->chan], NULL);
3065 }
3066
3067 static void
3068 ddy_emit(
3069 const struct lp_build_tgsi_action * action,
3070 struct lp_build_tgsi_context * bld_base,
3071 struct lp_build_emit_data * emit_data)
3072 {
3073 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3074
3075 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3076 &emit_data->output[emit_data->chan]);
3077 }
3078
3079 static void
3080 kill_emit(
3081 const struct lp_build_tgsi_action * action,
3082 struct lp_build_tgsi_context * bld_base,
3083 struct lp_build_emit_data * emit_data)
3084 {
3085 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3086
3087 emit_kill(bld, bld_base->pc - 1);
3088 }
3089
3090 static void
3091 kill_if_emit(
3092 const struct lp_build_tgsi_action * action,
3093 struct lp_build_tgsi_context * bld_base,
3094 struct lp_build_emit_data * emit_data)
3095 {
3096 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3097
3098 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3099 }
3100
3101 static void
3102 tex_emit(
3103 const struct lp_build_tgsi_action * action,
3104 struct lp_build_tgsi_context * bld_base,
3105 struct lp_build_emit_data * emit_data)
3106 {
3107 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3108
3109 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3110 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3111 }
3112
3113 static void
3114 tex2_emit(
3115 const struct lp_build_tgsi_action * action,
3116 struct lp_build_tgsi_context * bld_base,
3117 struct lp_build_emit_data * emit_data)
3118 {
3119 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3120
3121 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3122 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3123 }
3124
3125 static void
3126 txb_emit(
3127 const struct lp_build_tgsi_action * action,
3128 struct lp_build_tgsi_context * bld_base,
3129 struct lp_build_emit_data * emit_data)
3130 {
3131 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3132
3133 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3134 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3135 }
3136
3137 static void
3138 txb2_emit(
3139 const struct lp_build_tgsi_action * action,
3140 struct lp_build_tgsi_context * bld_base,
3141 struct lp_build_emit_data * emit_data)
3142 {
3143 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3144
3145 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3146 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3147 }
3148
3149 static void
3150 txd_emit(
3151 const struct lp_build_tgsi_action * action,
3152 struct lp_build_tgsi_context * bld_base,
3153 struct lp_build_emit_data * emit_data)
3154 {
3155 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3156
3157 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3158 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3159 }
3160
3161 static void
3162 txl_emit(
3163 const struct lp_build_tgsi_action * action,
3164 struct lp_build_tgsi_context * bld_base,
3165 struct lp_build_emit_data * emit_data)
3166 {
3167 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3168
3169 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3170 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3171 }
3172
3173 static void
3174 txl2_emit(
3175 const struct lp_build_tgsi_action * action,
3176 struct lp_build_tgsi_context * bld_base,
3177 struct lp_build_emit_data * emit_data)
3178 {
3179 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3180
3181 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3182 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3183 }
3184
3185 static void
3186 txp_emit(
3187 const struct lp_build_tgsi_action * action,
3188 struct lp_build_tgsi_context * bld_base,
3189 struct lp_build_emit_data * emit_data)
3190 {
3191 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3192
3193 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3194 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3195 }
3196
3197 static void
3198 tg4_emit(
3199 const struct lp_build_tgsi_action * action,
3200 struct lp_build_tgsi_context * bld_base,
3201 struct lp_build_emit_data * emit_data)
3202 {
3203 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3204
3205 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3206 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3207 }
3208
3209 static void
3210 lodq_emit(
3211 const struct lp_build_tgsi_action * action,
3212 struct lp_build_tgsi_context * bld_base,
3213 struct lp_build_emit_data * emit_data)
3214 {
3215 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3216
3217 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3218 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3219 }
3220
3221 static void
3222 txq_emit(
3223 const struct lp_build_tgsi_action * action,
3224 struct lp_build_tgsi_context * bld_base,
3225 struct lp_build_emit_data * emit_data)
3226 {
3227 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3228
3229 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3230 }
3231
3232 static void
3233 txf_emit(
3234 const struct lp_build_tgsi_action * action,
3235 struct lp_build_tgsi_context * bld_base,
3236 struct lp_build_emit_data * emit_data)
3237 {
3238 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3239
3240 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3241 }
3242
3243 static void
3244 sample_i_emit(
3245 const struct lp_build_tgsi_action * action,
3246 struct lp_build_tgsi_context * bld_base,
3247 struct lp_build_emit_data * emit_data)
3248 {
3249 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3250
3251 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3252 }
3253
3254 static void
3255 sample_emit(
3256 const struct lp_build_tgsi_action * action,
3257 struct lp_build_tgsi_context * bld_base,
3258 struct lp_build_emit_data * emit_data)
3259 {
3260 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3261
3262 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3263 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3264 }
3265
3266 static void
3267 sample_b_emit(
3268 const struct lp_build_tgsi_action * action,
3269 struct lp_build_tgsi_context * bld_base,
3270 struct lp_build_emit_data * emit_data)
3271 {
3272 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3273
3274 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3275 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3276 }
3277
3278 static void
3279 sample_c_emit(
3280 const struct lp_build_tgsi_action * action,
3281 struct lp_build_tgsi_context * bld_base,
3282 struct lp_build_emit_data * emit_data)
3283 {
3284 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3285
3286 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3287 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3288 }
3289
3290 static void
3291 sample_c_lz_emit(
3292 const struct lp_build_tgsi_action * action,
3293 struct lp_build_tgsi_context * bld_base,
3294 struct lp_build_emit_data * emit_data)
3295 {
3296 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3297
3298 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3299 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3300 }
3301
3302 static void
3303 sample_d_emit(
3304 const struct lp_build_tgsi_action * action,
3305 struct lp_build_tgsi_context * bld_base,
3306 struct lp_build_emit_data * emit_data)
3307 {
3308 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3309
3310 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3311 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3312 }
3313
3314 static void
3315 sample_l_emit(
3316 const struct lp_build_tgsi_action * action,
3317 struct lp_build_tgsi_context * bld_base,
3318 struct lp_build_emit_data * emit_data)
3319 {
3320 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3321
3322 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3323 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3324 }
3325
3326 static void
3327 gather4_emit(
3328 const struct lp_build_tgsi_action * action,
3329 struct lp_build_tgsi_context * bld_base,
3330 struct lp_build_emit_data * emit_data)
3331 {
3332 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3333
3334 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3335 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3336 }
3337
3338 static void
3339 sviewinfo_emit(
3340 const struct lp_build_tgsi_action * action,
3341 struct lp_build_tgsi_context * bld_base,
3342 struct lp_build_emit_data * emit_data)
3343 {
3344 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3345
3346 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3347 }
3348
3349 static void
3350 lod_emit(
3351 const struct lp_build_tgsi_action * action,
3352 struct lp_build_tgsi_context * bld_base,
3353 struct lp_build_emit_data * emit_data)
3354 {
3355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3356
3357 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3358 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3359 }
3360
3361 static void target_to_dims_layer(unsigned target,
3362 unsigned *dims,
3363 unsigned *layer_coord)
3364 {
3365 *layer_coord = 0;
3366 switch (target) {
3367 case TGSI_TEXTURE_1D:
3368 case TGSI_TEXTURE_BUFFER:
3369 *dims = 1;
3370 break;
3371 case TGSI_TEXTURE_1D_ARRAY:
3372 *layer_coord = 1;
3373 *dims = 1;
3374 break;
3375 case TGSI_TEXTURE_2D:
3376 case TGSI_TEXTURE_RECT:
3377 *dims = 2;
3378 break;
3379 case TGSI_TEXTURE_2D_ARRAY:
3380 *layer_coord = 2;
3381 *dims = 2;
3382 break;
3383 case TGSI_TEXTURE_3D:
3384 case TGSI_TEXTURE_CUBE:
3385 case TGSI_TEXTURE_CUBE_ARRAY:
3386 *dims = 3;
3387 break;
3388 default:
3389 assert(0);
3390 *dims = 0;
3391 return;
3392 }
3393 }
3394
3395 static void
3396 img_load_emit(
3397 const struct lp_build_tgsi_action * action,
3398 struct lp_build_tgsi_context * bld_base,
3399 struct lp_build_emit_data * emit_data)
3400 {
3401 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3402 struct lp_img_params params;
3403 LLVMValueRef coords[5];
3404 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3405 unsigned dims;
3406 unsigned target = emit_data->inst->Memory.Texture;
3407 unsigned layer_coord;
3408
3409 target_to_dims_layer(target, &dims, &layer_coord);
3410
3411 for (unsigned i = 0; i < dims; i++) {
3412 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3413 }
3414 for (unsigned i = dims; i < 5; i++) {
3415 coords[i] = coord_undef;
3416 }
3417 if (layer_coord)
3418 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3419
3420 memset(&params, 0, sizeof(params));
3421
3422 params.type = bld->bld_base.base.type;
3423 params.context_ptr = bld->context_ptr;
3424 params.thread_data_ptr = bld->thread_data_ptr;
3425 params.coords = coords;
3426 params.outdata = emit_data->output;
3427 params.target = tgsi_to_pipe_tex_target(target);
3428 params.image_index = emit_data->inst->Src[0].Register.Index;
3429 params.img_op = LP_IMG_LOAD;
3430 bld->image->emit_op(bld->image,
3431 bld->bld_base.base.gallivm,
3432 &params);
3433 }
3434
3435 static void
3436 load_emit(
3437 const struct lp_build_tgsi_action * action,
3438 struct lp_build_tgsi_context * bld_base,
3439 struct lp_build_emit_data * emit_data)
3440 {
3441 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3442 struct gallivm_state * gallivm = bld_base->base.gallivm;
3443 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3444 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3445 unsigned buf = bufreg->Register.Index;
3446 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3447 bufreg->Register.File == TGSI_FILE_IMAGE ||
3448 bufreg->Register.File == TGSI_FILE_MEMORY ||
3449 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3450 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3451 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3452
3453 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3454 img_load_emit(action, bld_base, emit_data);
3455 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3456 LLVMValueRef consts_ptr = bld->consts[buf];
3457 LLVMValueRef num_consts = bld->consts_sizes[buf];
3458
3459 LLVMValueRef indirect_index;
3460 LLVMValueRef overflow_mask;
3461
3462 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3463 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3464
3465 /* All fetches are from the same constant buffer, so
3466 * we need to propagate the size to a vector to do a
3467 * vector comparison */
3468 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3469
3470 /* Gather values from the constant buffer */
3471 unsigned chan_index;
3472 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3473 /* Construct a boolean vector telling us which channels
3474 * overflow the bound constant buffer */
3475 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3476 indirect_index, num_consts);
3477
3478 /* index_vec = indirect_index * 4 */
3479 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3480 index_vec = lp_build_add(uint_bld, index_vec,
3481 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3482
3483 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3484 }
3485 } else if (0) {
3486 /* for indirect support with ARB_gpu_shader5 */
3487 } else {
3488 LLVMValueRef index;
3489 LLVMValueRef scalar, scalar_ptr;
3490 unsigned chan_index;
3491
3492 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3493 index = lp_build_shr_imm(uint_bld, index, 2);
3494
3495 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3496
3497 LLVMValueRef ssbo_limit = NULL;
3498
3499 if (!is_shared) {
3500 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3501 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3502 }
3503
3504 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3505 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3506
3507 LLVMValueRef exec_mask = mask_vec(bld_base);
3508 if (!is_shared) {
3509 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3510 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3511 }
3512
3513 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3514 struct lp_build_loop_state loop_state;
3515 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3516
3517 struct lp_build_if_state ifthen;
3518 LLVMValueRef cond, temp_res;
3519
3520 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3521 loop_state.counter, "");
3522
3523 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3524 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3525
3526 lp_build_if(&ifthen, gallivm, cond);
3527 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3528
3529 temp_res = LLVMBuildLoad(builder, result, "");
3530 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3531 LLVMBuildStore(builder, temp_res, result);
3532 lp_build_else(&ifthen);
3533 temp_res = LLVMBuildLoad(builder, result, "");
3534 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3535 LLVMBuildStore(builder, temp_res, result);
3536 lp_build_endif(&ifthen);
3537 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3538 NULL, LLVMIntUGE);
3539 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3540 }
3541 }
3542 }
3543
3544 static void
3545 img_store_emit(
3546 const struct lp_build_tgsi_action * action,
3547 struct lp_build_tgsi_context * bld_base,
3548 struct lp_build_emit_data * emit_data)
3549 {
3550 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3551 struct lp_img_params params;
3552 LLVMValueRef coords[5];
3553 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3554 unsigned dims;
3555 unsigned target = emit_data->inst->Memory.Texture;
3556 unsigned layer_coord;
3557
3558 target_to_dims_layer(target, &dims, &layer_coord);
3559 for (unsigned i = 0; i < dims; i++) {
3560 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3561 }
3562 for (unsigned i = dims; i < 5; i++) {
3563 coords[i] = coord_undef;
3564 }
3565 if (layer_coord)
3566 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3567 memset(&params, 0, sizeof(params));
3568
3569 params.type = bld->bld_base.base.type;
3570 params.context_ptr = bld->context_ptr;
3571 params.thread_data_ptr = bld->thread_data_ptr;
3572 params.coords = coords;
3573 params.outdata = NULL;
3574 params.exec_mask = mask_vec(bld_base);
3575 params.target = tgsi_to_pipe_tex_target(target);
3576 params.image_index = emit_data->inst->Dst[0].Register.Index;
3577 params.img_op = LP_IMG_STORE;
3578 for (unsigned i = 0; i < 4; i++)
3579 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3580
3581 bld->image->emit_op(bld->image,
3582 bld->bld_base.base.gallivm,
3583 &params);
3584 }
3585
3586 static void
3587 store_emit(
3588 const struct lp_build_tgsi_action * action,
3589 struct lp_build_tgsi_context * bld_base,
3590 struct lp_build_emit_data * emit_data)
3591 {
3592 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3593 struct gallivm_state * gallivm = bld_base->base.gallivm;
3594 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3595 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3596 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3597 unsigned buf = bufreg->Register.Index;
3598 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3599 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3600
3601 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3602 img_store_emit(action, bld_base, emit_data);
3603 } else if (0) {
3604
3605 } else {
3606 LLVMValueRef index; /* index into the const buffer */
3607 LLVMValueRef scalar_ptr;
3608 LLVMValueRef value;
3609 unsigned chan_index;
3610
3611 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3612 index = lp_build_shr_imm(uint_bld, index, 2);
3613
3614 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3615
3616 LLVMValueRef ssbo_limit = NULL;
3617
3618 if (!is_shared) {
3619 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3620 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3621 }
3622
3623 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3624 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3625
3626 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3627
3628 LLVMValueRef exec_mask = mask_vec(bld_base);
3629 if (!is_shared) {
3630 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3631 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3632 }
3633
3634 struct lp_build_loop_state loop_state;
3635 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3636
3637 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3638 loop_state.counter, "");
3639 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3640
3641 struct lp_build_if_state ifthen;
3642 LLVMValueRef cond;
3643
3644 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3645 loop_state.counter, "");
3646
3647 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3648 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3649 lp_build_if(&ifthen, gallivm, cond);
3650
3651 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3652
3653 lp_build_endif(&ifthen);
3654 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3655 NULL, LLVMIntUGE);
3656 }
3657 }
3658 }
3659
3660 static void
3661 resq_emit(
3662 const struct lp_build_tgsi_action * action,
3663 struct lp_build_tgsi_context * bld_base,
3664 struct lp_build_emit_data * emit_data)
3665 {
3666 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3667 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3668 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3669
3670 unsigned buf = bufreg->Register.Index;
3671 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3672
3673 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3674 unsigned target = emit_data->inst->Memory.Texture;
3675 struct lp_sampler_size_query_params params = { 0 };
3676 params.int_type = bld->bld_base.int_bld.type;
3677 params.texture_unit = buf;
3678 params.target = tgsi_to_pipe_tex_target(target);
3679 params.context_ptr = bld->context_ptr;
3680 params.sizes_out = emit_data->output;
3681
3682 bld->image->emit_size_query(bld->image,
3683 bld->bld_base.base.gallivm,
3684 &params);
3685 } else {
3686 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3687
3688 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3689 }
3690 }
3691
3692 static void
3693 img_atomic_emit(
3694 const struct lp_build_tgsi_action * action,
3695 struct lp_build_tgsi_context * bld_base,
3696 struct lp_build_emit_data * emit_data,
3697 LLVMAtomicRMWBinOp op)
3698 {
3699 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3700 struct lp_img_params params;
3701 LLVMValueRef coords[5];
3702 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3703 unsigned dims;
3704 unsigned layer_coord;
3705 unsigned target = emit_data->inst->Memory.Texture;
3706
3707 target_to_dims_layer(target, &dims, &layer_coord);
3708
3709 for (unsigned i = 0; i < dims; i++) {
3710 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3711 }
3712 for (unsigned i = dims; i < 5; i++) {
3713 coords[i] = coord_undef;
3714 }
3715 if (layer_coord)
3716 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3717 memset(&params, 0, sizeof(params));
3718
3719 params.type = bld->bld_base.base.type;
3720 params.context_ptr = bld->context_ptr;
3721 params.thread_data_ptr = bld->thread_data_ptr;
3722 params.exec_mask = mask_vec(bld_base);
3723 params.image_index = emit_data->inst->Src[0].Register.Index;
3724 params.coords = coords;
3725 params.target = tgsi_to_pipe_tex_target(target);
3726 params.op = op;
3727 params.outdata = emit_data->output;
3728 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3729
3730 for (unsigned i = 0; i < 4; i++)
3731 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3732 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3733 for (unsigned i = 0; i < 4; i++)
3734 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3735 }
3736 bld->image->emit_op(bld->image,
3737 bld->bld_base.base.gallivm,
3738 &params);
3739 }
3740
3741 static void
3742 atomic_emit(
3743 const struct lp_build_tgsi_action * action,
3744 struct lp_build_tgsi_context * bld_base,
3745 struct lp_build_emit_data * emit_data)
3746 {
3747 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3748 struct gallivm_state * gallivm = bld_base->base.gallivm;
3749 LLVMBuilderRef builder = gallivm->builder;
3750 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3751 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3752
3753 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3754 unsigned buf = bufreg->Register.Index;
3755 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3756
3757 LLVMAtomicRMWBinOp op = -1;
3758 switch (emit_data->inst->Instruction.Opcode) {
3759 case TGSI_OPCODE_ATOMUADD:
3760 op = LLVMAtomicRMWBinOpAdd;
3761 break;
3762 case TGSI_OPCODE_ATOMXCHG:
3763 op = LLVMAtomicRMWBinOpXchg;
3764 break;
3765 case TGSI_OPCODE_ATOMAND:
3766 op = LLVMAtomicRMWBinOpAnd;
3767 break;
3768 case TGSI_OPCODE_ATOMOR:
3769 op = LLVMAtomicRMWBinOpOr;
3770 break;
3771 case TGSI_OPCODE_ATOMXOR:
3772 op = LLVMAtomicRMWBinOpXor;
3773 break;
3774 case TGSI_OPCODE_ATOMUMIN:
3775 op = LLVMAtomicRMWBinOpUMin;
3776 break;
3777 case TGSI_OPCODE_ATOMUMAX:
3778 op = LLVMAtomicRMWBinOpUMax;
3779 break;
3780 case TGSI_OPCODE_ATOMIMIN:
3781 op = LLVMAtomicRMWBinOpMin;
3782 break;
3783 case TGSI_OPCODE_ATOMIMAX:
3784 op = LLVMAtomicRMWBinOpMax;
3785 break;
3786 case TGSI_OPCODE_ATOMCAS:
3787 break;
3788 default:
3789 assert(0);
3790 return;
3791 }
3792
3793 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3794 img_atomic_emit(action, bld_base, emit_data, op);
3795 } else if (0) {
3796 } else {
3797 LLVMValueRef index; /* index into the const buffer */
3798 LLVMValueRef scalar, scalar_ptr;
3799 LLVMValueRef value;
3800
3801 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3802 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3803
3804 index = lp_build_shr_imm(uint_bld, index, 2);
3805
3806 if (!is_shared) {
3807 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3808 scalar_ptr = bld->ssbos[buf];
3809 } else
3810 scalar_ptr = bld->shared_ptr;
3811
3812 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3813 uint_bld->vec_type, "");
3814
3815 LLVMValueRef ssbo_limit;
3816 if (!is_shared) {
3817 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3818 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3819 }
3820
3821 LLVMValueRef exec_mask = mask_vec(bld_base);
3822
3823 if (!is_shared) {
3824 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3825 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3826 }
3827
3828 struct lp_build_loop_state loop_state;
3829 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3830
3831 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3832 loop_state.counter, "");
3833 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3834
3835 index = LLVMBuildExtractElement(gallivm->builder, index,
3836 loop_state.counter, "");
3837
3838 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3839 &index, 1, "");
3840
3841 struct lp_build_if_state ifthen;
3842 LLVMValueRef cond, temp_res;
3843
3844 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3845 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3846 lp_build_if(&ifthen, gallivm, cond);
3847
3848 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3849 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3850 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3851 loop_state.counter, "");
3852 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3853 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3854 cas_src_ptr,
3855 LLVMAtomicOrderingSequentiallyConsistent,
3856 LLVMAtomicOrderingSequentiallyConsistent,
3857 false);
3858 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3859 } else {
3860 scalar = LLVMBuildAtomicRMW(builder, op,
3861 scalar_ptr, value_ptr,
3862 LLVMAtomicOrderingSequentiallyConsistent,
3863 false);
3864 }
3865 temp_res = LLVMBuildLoad(builder, atom_res, "");
3866 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3867 LLVMBuildStore(builder, temp_res, atom_res);
3868 lp_build_else(&ifthen);
3869 temp_res = LLVMBuildLoad(builder, atom_res, "");
3870 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3871 LLVMBuildStore(builder, temp_res, atom_res);
3872 lp_build_endif(&ifthen);
3873
3874 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3875 NULL, LLVMIntUGE);
3876 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3877 }
3878 }
3879
3880 static void
3881 barrier_emit(
3882 const struct lp_build_tgsi_action * action,
3883 struct lp_build_tgsi_context * bld_base,
3884 struct lp_build_emit_data * emit_data)
3885 {
3886 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3887 struct gallivm_state * gallivm = bld_base->base.gallivm;
3888
3889 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3890
3891 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3892 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3893 }
3894
3895 static void
3896 membar_emit(
3897 const struct lp_build_tgsi_action * action,
3898 struct lp_build_tgsi_context * bld_base,
3899 struct lp_build_emit_data * emit_data)
3900 {
3901 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3902 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3903 }
3904
3905 static void
3906 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3907 LLVMValueRef ptr,
3908 LLVMValueRef mask)
3909 {
3910 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3911 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3912
3913 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3914
3915 LLVMBuildStore(builder, current_vec, ptr);
3916 }
3917
3918 static void
3919 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3920 LLVMValueRef ptr,
3921 LLVMValueRef mask)
3922 {
3923 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3924 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3925
3926 current_vec = lp_build_select(&bld_base->uint_bld,
3927 mask,
3928 bld_base->uint_bld.zero,
3929 current_vec);
3930
3931 LLVMBuildStore(builder, current_vec, ptr);
3932 }
3933
3934 static LLVMValueRef
3935 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3936 LLVMValueRef current_mask_vec,
3937 LLVMValueRef total_emitted_vertices_vec)
3938 {
3939 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3940 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3941 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3942 total_emitted_vertices_vec,
3943 bld->max_output_vertices_vec);
3944
3945 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3946 }
3947
3948 static void
3949 emit_vertex(
3950 const struct lp_build_tgsi_action * action,
3951 struct lp_build_tgsi_context * bld_base,
3952 struct lp_build_emit_data * emit_data)
3953 {
3954 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3955 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3956
3957 if (bld->gs_iface->emit_vertex) {
3958 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3959 TGSI_TYPE_UNSIGNED,
3960 emit_data->inst->Src[0].Register.SwizzleX);
3961 LLVMValueRef mask = mask_vec(bld_base);
3962 LLVMValueRef total_emitted_vertices_vec =
3963 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3964
3965 mask = clamp_mask_to_max_output_vertices(bld, mask,
3966 total_emitted_vertices_vec);
3967 gather_outputs(bld);
3968 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3969 bld->outputs,
3970 total_emitted_vertices_vec,
3971 stream_id);
3972 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3973 mask);
3974 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3975 mask);
3976 #if DUMP_GS_EMITS
3977 lp_build_print_value(bld->bld_base.base.gallivm,
3978 " +++ emit vertex masked ones = ",
3979 mask);
3980 lp_build_print_value(bld->bld_base.base.gallivm,
3981 " +++ emit vertex emitted = ",
3982 total_emitted_vertices_vec);
3983 #endif
3984 }
3985 }
3986
3987
3988 static void
3989 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3990 LLVMValueRef mask)
3991 {
3992 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3993 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3994
3995 if (bld->gs_iface->end_primitive) {
3996 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3997 LLVMValueRef emitted_vertices_vec =
3998 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3999 LLVMValueRef emitted_prims_vec =
4000 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4001 LLVMValueRef total_emitted_vertices_vec =
4002 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4003 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4004 emitted_vertices_vec,
4005 uint_bld->zero);
4006 /* We need to combine the current execution mask with the mask
4007 telling us which, if any, execution slots actually have
4008 unemitted primitives, this way we make sure that end_primitives
4009 executes only on the paths that have unflushed vertices */
4010 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4011
4012 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4013 total_emitted_vertices_vec,
4014 emitted_vertices_vec,
4015 emitted_prims_vec,
4016 mask_vec(bld_base));
4017
4018 #if DUMP_GS_EMITS
4019 lp_build_print_value(bld->bld_base.base.gallivm,
4020 " +++ end prim masked ones = ",
4021 mask);
4022 lp_build_print_value(bld->bld_base.base.gallivm,
4023 " +++ end prim emitted verts1 = ",
4024 emitted_vertices_vec);
4025 lp_build_print_value(bld->bld_base.base.gallivm,
4026 " +++ end prim emitted prims1 = ",
4027 LLVMBuildLoad(builder,
4028 bld->emitted_prims_vec_ptr, ""));
4029 #endif
4030 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4031 mask);
4032 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4033 mask);
4034 #if DUMP_GS_EMITS
4035 lp_build_print_value(bld->bld_base.base.gallivm,
4036 " +++ end prim emitted verts2 = ",
4037 LLVMBuildLoad(builder,
4038 bld->emitted_vertices_vec_ptr, ""));
4039 #endif
4040 }
4041
4042 }
4043
4044 static void
4045 end_primitive(
4046 const struct lp_build_tgsi_action * action,
4047 struct lp_build_tgsi_context * bld_base,
4048 struct lp_build_emit_data * emit_data)
4049 {
4050 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4051
4052 if (bld->gs_iface->end_primitive) {
4053 LLVMValueRef mask = mask_vec(bld_base);
4054 end_primitive_masked(bld_base, mask);
4055 }
4056 }
4057
4058 static void
4059 barrier_emit_tcs(
4060 const struct lp_build_tgsi_action * action,
4061 struct lp_build_tgsi_context * bld_base,
4062 struct lp_build_emit_data * emit_data)
4063 {
4064 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4065
4066 if (bld->tcs_iface->emit_barrier) {
4067 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4068 }
4069 }
4070
4071
4072 static void
4073 cal_emit(
4074 const struct lp_build_tgsi_action * action,
4075 struct lp_build_tgsi_context * bld_base,
4076 struct lp_build_emit_data * emit_data)
4077 {
4078 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4079
4080 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4081 &bld_base->pc);
4082 }
4083
4084 static void
4085 ret_emit(
4086 const struct lp_build_tgsi_action * action,
4087 struct lp_build_tgsi_context * bld_base,
4088 struct lp_build_emit_data * emit_data)
4089 {
4090 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4091
4092 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4093 }
4094
4095 static void
4096 brk_emit(
4097 const struct lp_build_tgsi_action * action,
4098 struct lp_build_tgsi_context * bld_base,
4099 struct lp_build_emit_data * emit_data)
4100 {
4101 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4102
4103 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4104 }
4105
4106 static void
4107 if_emit(
4108 const struct lp_build_tgsi_action * action,
4109 struct lp_build_tgsi_context * bld_base,
4110 struct lp_build_emit_data * emit_data)
4111 {
4112 LLVMValueRef tmp;
4113 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4114
4115 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4116 emit_data->args[0], bld->bld_base.base.zero);
4117 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4118 }
4119
4120 static void
4121 uif_emit(
4122 const struct lp_build_tgsi_action * action,
4123 struct lp_build_tgsi_context * bld_base,
4124 struct lp_build_emit_data * emit_data)
4125 {
4126 LLVMValueRef tmp;
4127 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4128 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4129
4130 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4131 emit_data->args[0], uint_bld->zero);
4132 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4133 }
4134
4135 static void
4136 case_emit(
4137 const struct lp_build_tgsi_action * action,
4138 struct lp_build_tgsi_context * bld_base,
4139 struct lp_build_emit_data * emit_data)
4140 {
4141 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4142
4143 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4144 }
4145
4146 static void
4147 default_emit(
4148 const struct lp_build_tgsi_action * action,
4149 struct lp_build_tgsi_context * bld_base,
4150 struct lp_build_emit_data * emit_data)
4151 {
4152 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4153
4154 lp_exec_default(&bld->exec_mask, bld_base);
4155 }
4156
4157 static void
4158 switch_emit(
4159 const struct lp_build_tgsi_action * action,
4160 struct lp_build_tgsi_context * bld_base,
4161 struct lp_build_emit_data * emit_data)
4162 {
4163 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4164
4165 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4166 }
4167
4168 static void
4169 endswitch_emit(
4170 const struct lp_build_tgsi_action * action,
4171 struct lp_build_tgsi_context * bld_base,
4172 struct lp_build_emit_data * emit_data)
4173 {
4174 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4175
4176 lp_exec_endswitch(&bld->exec_mask, bld_base);
4177 }
4178
4179 static void
4180 bgnloop_emit(
4181 const struct lp_build_tgsi_action * action,
4182 struct lp_build_tgsi_context * bld_base,
4183 struct lp_build_emit_data * emit_data)
4184 {
4185 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4186
4187 lp_exec_bgnloop(&bld->exec_mask, true);
4188 }
4189
4190 static void
4191 bgnsub_emit(
4192 const struct lp_build_tgsi_action * action,
4193 struct lp_build_tgsi_context * bld_base,
4194 struct lp_build_emit_data * emit_data)
4195 {
4196 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4197
4198 lp_exec_mask_bgnsub(&bld->exec_mask);
4199 }
4200
4201 static void
4202 else_emit(
4203 const struct lp_build_tgsi_action * action,
4204 struct lp_build_tgsi_context * bld_base,
4205 struct lp_build_emit_data * emit_data)
4206 {
4207 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4208
4209 lp_exec_mask_cond_invert(&bld->exec_mask);
4210 }
4211
4212 static void
4213 endif_emit(
4214 const struct lp_build_tgsi_action * action,
4215 struct lp_build_tgsi_context * bld_base,
4216 struct lp_build_emit_data * emit_data)
4217 {
4218 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4219
4220 lp_exec_mask_cond_pop(&bld->exec_mask);
4221 }
4222
4223 static void
4224 endloop_emit(
4225 const struct lp_build_tgsi_action * action,
4226 struct lp_build_tgsi_context * bld_base,
4227 struct lp_build_emit_data * emit_data)
4228 {
4229 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4230
4231 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4232 }
4233
4234 static void
4235 endsub_emit(
4236 const struct lp_build_tgsi_action * action,
4237 struct lp_build_tgsi_context * bld_base,
4238 struct lp_build_emit_data * emit_data)
4239 {
4240 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4241
4242 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4243 }
4244
4245 static void
4246 cont_emit(
4247 const struct lp_build_tgsi_action * action,
4248 struct lp_build_tgsi_context * bld_base,
4249 struct lp_build_emit_data * emit_data)
4250 {
4251 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4252
4253 lp_exec_continue(&bld->exec_mask);
4254 }
4255
4256 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4257 {
4258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4259 struct gallivm_state * gallivm = bld_base->base.gallivm;
4260
4261 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4262 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4263 bld->temps_array = lp_build_alloca_undef(gallivm,
4264 LLVMArrayType(bld_base->base.vec_type, array_size),
4265 "temp_array");
4266 }
4267
4268 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4269 LLVMValueRef array_size =
4270 lp_build_const_int32(gallivm,
4271 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4272 bld->outputs_array = lp_build_array_alloca(gallivm,
4273 bld_base->base.vec_type, array_size,
4274 "output_array");
4275 }
4276
4277 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4278 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4279 bld->imms_array = lp_build_alloca_undef(gallivm,
4280 LLVMArrayType(bld_base->base.vec_type, array_size),
4281 "imms_array");
4282 }
4283
4284 /* If we have indirect addressing in inputs we need to copy them into
4285 * our alloca array to be able to iterate over them */
4286 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4287 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4288 unsigned index, chan;
4289 LLVMTypeRef vec_type = bld_base->base.vec_type;
4290 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4291 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4292 bld->inputs_array = lp_build_array_alloca(gallivm,
4293 vec_type, array_size,
4294 "input_array");
4295
4296 assert(bld_base->info->num_inputs
4297 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4298
4299 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4300 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4301 LLVMValueRef lindex =
4302 lp_build_const_int32(gallivm, index * 4 + chan);
4303 LLVMValueRef input_ptr =
4304 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4305 &lindex, 1, "");
4306 LLVMValueRef value = bld->inputs[index][chan];
4307 if (value)
4308 LLVMBuildStore(gallivm->builder, value, input_ptr);
4309 }
4310 }
4311 }
4312
4313 if (bld->gs_iface) {
4314 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4315 bld->emitted_prims_vec_ptr =
4316 lp_build_alloca(gallivm,
4317 uint_bld->vec_type,
4318 "emitted_prims_ptr");
4319 bld->emitted_vertices_vec_ptr =
4320 lp_build_alloca(gallivm,
4321 uint_bld->vec_type,
4322 "emitted_vertices_ptr");
4323 bld->total_emitted_vertices_vec_ptr =
4324 lp_build_alloca(gallivm,
4325 uint_bld->vec_type,
4326 "total_emitted_vertices_ptr");
4327
4328 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4329 bld->emitted_prims_vec_ptr);
4330 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4331 bld->emitted_vertices_vec_ptr);
4332 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4333 bld->total_emitted_vertices_vec_ptr);
4334 }
4335
4336 if (DEBUG_EXECUTION) {
4337 lp_build_printf(gallivm, "\n");
4338 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4339 if (!bld->gs_iface)
4340 emit_dump_file(bld, TGSI_FILE_INPUT);
4341 }
4342 }
4343
4344 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4345 {
4346 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4347
4348 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4349 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4350 }
4351 }
4352
4353 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4354 {
4355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4356 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4357
4358 if (DEBUG_EXECUTION) {
4359 /* for debugging */
4360 if (0) {
4361 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4362 }
4363 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4364 lp_build_printf(bld_base->base.gallivm, "\n");
4365 }
4366
4367 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4368 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4369 }
4370
4371 /* If we have indirect addressing in outputs we need to copy our alloca array
4372 * to the outputs slots specified by the caller */
4373 if (bld->gs_iface) {
4374 LLVMValueRef total_emitted_vertices_vec;
4375 LLVMValueRef emitted_prims_vec;
4376 /* implicit end_primitives, needed in case there are any unflushed
4377 vertices in the cache. Note must not call end_primitive here
4378 since the exec_mask is not valid at this point. */
4379 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4380
4381 total_emitted_vertices_vec =
4382 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4383 emitted_prims_vec =
4384 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4385
4386 bld->gs_iface->gs_epilogue(bld->gs_iface,
4387 total_emitted_vertices_vec,
4388 emitted_prims_vec, 0);
4389 } else {
4390 gather_outputs(bld);
4391 }
4392 }
4393
4394 void
4395 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4396 const struct tgsi_token *tokens,
4397 const struct lp_build_tgsi_params *params,
4398 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4399 {
4400 struct lp_build_tgsi_soa_context bld;
4401 struct lp_type type = params->type;
4402 struct lp_type res_type;
4403
4404 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4405 memset(&res_type, 0, sizeof res_type);
4406 res_type.width = type.width;
4407 res_type.length = type.length;
4408 res_type.sign = 1;
4409
4410 /* Setup build context */
4411 memset(&bld, 0, sizeof bld);
4412 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4413 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4414 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4415 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4416 {
4417 struct lp_type dbl_type;
4418 dbl_type = type;
4419 dbl_type.width *= 2;
4420 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4421 }
4422 {
4423 struct lp_type uint64_type;
4424 uint64_type = lp_uint_type(type);
4425 uint64_type.width *= 2;
4426 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4427 }
4428 {
4429 struct lp_type int64_type;
4430 int64_type = lp_int_type(type);
4431 int64_type.width *= 2;
4432 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4433 }
4434 bld.mask = params->mask;
4435 bld.inputs = params->inputs;
4436 bld.outputs = outputs;
4437 bld.consts_ptr = params->consts_ptr;
4438 bld.const_sizes_ptr = params->const_sizes_ptr;
4439 bld.ssbo_ptr = params->ssbo_ptr;
4440 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4441 bld.sampler = params->sampler;
4442 bld.bld_base.info = params->info;
4443 bld.indirect_files = params->info->indirect_files;
4444 bld.context_ptr = params->context_ptr;
4445 bld.thread_data_ptr = params->thread_data_ptr;
4446 bld.image = params->image;
4447 bld.shared_ptr = params->shared_ptr;
4448 bld.coro = params->coro;
4449
4450 /*
4451 * If the number of temporaries is rather large then we just
4452 * allocate them as an array right from the start and treat
4453 * like indirect temporaries.
4454 */
4455 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4456 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4457 }
4458 /*
4459 * For performance reason immediates are always backed in a static
4460 * array, but if their number is too great, we have to use just
4461 * a dynamically allocated array.
4462 */
4463 bld.use_immediates_array =
4464 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4465 if (bld.use_immediates_array) {
4466 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4467 }
4468
4469
4470 bld.bld_base.soa = TRUE;
4471 bld.bld_base.emit_debug = emit_debug;
4472 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4473 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4474 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4475 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4476 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4477
4478 bld.bld_base.emit_store = emit_store;
4479 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4480 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4481 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4482
4483 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4484 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4485
4486 bld.bld_base.emit_prologue = emit_prologue;
4487 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4488 bld.bld_base.emit_epilogue = emit_epilogue;
4489
4490 /* Set opcode actions */
4491 lp_set_default_actions_cpu(&bld.bld_base);
4492
4493 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4494 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4495 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4496 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4497 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4498 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4499 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4500 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4501 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4502 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4503 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4504 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4505 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4506 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4507 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4508 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4509 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4510 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4511 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4512 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4513 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4514 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4515 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4516 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4517 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4518 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4519 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4520 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4521 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4522 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4523 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4524 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4525 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4526 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4527 /* DX10 sampling ops */
4528 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4529 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4530 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4531 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4532 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4533 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4534 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4535 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4536 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4537 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4538 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4539
4540 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4541 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4542 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4543
4544 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4545 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4546 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4547 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4548 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4549 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4550 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4551 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4552 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4553 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4554
4555 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4556 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4557
4558 if (params->gs_iface) {
4559 /* There's no specific value for this because it should always
4560 * be set, but apps using ext_geometry_shader4 quite often
4561 * were forgetting so we're using MAX_VERTEX_VARYING from
4562 * that spec even though we could debug_assert if it's not
4563 * set, but that's a lot uglier. */
4564 uint max_output_vertices;
4565
4566 /* inputs are always indirect with gs */
4567 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4568 bld.gs_iface = params->gs_iface;
4569 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4570 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4571 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4572
4573 max_output_vertices =
4574 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4575 if (!max_output_vertices)
4576 max_output_vertices = 32;
4577
4578 bld.max_output_vertices_vec =
4579 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4580 max_output_vertices);
4581 }
4582
4583 if (params->tes_iface) {
4584 /* inputs are always indirect with tes */
4585 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4586 bld.tes_iface = params->tes_iface;
4587 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4588 }
4589
4590 if (params->tcs_iface) {
4591 bld.tcs_iface = params->tcs_iface;
4592 /* outputs and inputs are always indirect with tcs */
4593 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4594 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4595 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4596 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4597 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4598 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4599 }
4600
4601 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4602
4603 bld.system_values = *params->system_values;
4604
4605 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4606
4607 if (0) {
4608 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4609 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4610 debug_printf("11111111111111111111111111111 \n");
4611 tgsi_dump(tokens, 0);
4612 lp_debug_dump_value(function);
4613 debug_printf("2222222222222222222222222222 \n");
4614 }
4615
4616 if (0) {
4617 LLVMModuleRef module = LLVMGetGlobalParent(
4618 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4619 LLVMDumpModule(module);
4620
4621 }
4622 lp_exec_mask_fini(&bld.exec_mask);
4623 }