gallivm: split out the flow control ir to a common file.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc;
309 boolean default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = TRUE;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449
450 switch (file) {
451 case TGSI_FILE_TEMPORARY:
452 array_of_vars = bld->temps;
453 var_of_array = bld->temps_array;
454 break;
455 case TGSI_FILE_OUTPUT:
456 array_of_vars = bld->outputs;
457 var_of_array = bld->outputs_array;
458 break;
459 default:
460 assert(0);
461 return NULL;
462 }
463
464 assert(chan < 4);
465
466 if (bld->indirect_files & (1 << file)) {
467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469 LLVMValueRef gep[2];
470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471 gep[1] = lindex;
472 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473 } else {
474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475 }
476 }
477 else {
478 assert(index <= bld->bld_base.info->file_max[file]);
479 return array_of_vars[index][chan];
480 }
481 }
482
483
484 /**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index which temporary register
488 * \param chan which channel of the temp register.
489 */
490 LLVMValueRef
491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492 unsigned index,
493 unsigned chan)
494 {
495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497
498 /**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index which output register
502 * \param chan which channel of the output register.
503 */
504 LLVMValueRef
505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506 unsigned index,
507 unsigned chan)
508 {
509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511
512 /*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517 static void
518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521 unsigned index, chan;
522 assert(bld->bld_base.info->num_outputs <=
523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527 }
528 }
529 }
530 }
531
532 /**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537 static LLVMValueRef
538 build_gather(struct lp_build_tgsi_context *bld_base,
539 LLVMValueRef base_ptr,
540 LLVMValueRef indexes,
541 LLVMValueRef overflow_mask,
542 LLVMValueRef indexes2)
543 {
544 struct gallivm_state *gallivm = bld_base->base.gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *uint_bld = &bld_base->uint_bld;
547 struct lp_build_context *bld = &bld_base->base;
548 LLVMValueRef res;
549 unsigned i;
550
551 if (indexes2)
552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553 else
554 res = bld->undef;
555 /*
556 * overflow_mask is a vector telling us which channels
557 * in the vector overflowed. We use the overflow behavior for
558 * constant buffers which is defined as:
559 * Out of bounds access to constant buffer returns 0 in all
560 * components. Out of bounds behavior is always with respect
561 * to the size of the buffer bound at that slot.
562 */
563
564 if (overflow_mask) {
565 /*
566 * We avoid per-element control flow here (also due to llvm going crazy,
567 * though I suspect it's better anyway since overflow is likely rare).
568 * Note that since we still fetch from buffers even if num_elements was
569 * zero (in this case we'll fetch from index zero) the jit func callers
570 * MUST provide valid fake constant buffers of size 4x32 (the values do
571 * not matter), otherwise we'd still need (not per element though)
572 * control flow.
573 */
574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575 if (indexes2)
576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577 }
578
579 /*
580 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581 */
582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583 LLVMValueRef si, di;
584 LLVMValueRef index;
585 LLVMValueRef scalar_ptr, scalar;
586
587 di = lp_build_const_int32(bld->gallivm, i);
588 if (indexes2)
589 si = lp_build_const_int32(bld->gallivm, i >> 1);
590 else
591 si = di;
592
593 if (indexes2 && (i & 1)) {
594 index = LLVMBuildExtractElement(builder,
595 indexes2, si, "");
596 } else {
597 index = LLVMBuildExtractElement(builder,
598 indexes, si, "");
599 }
600 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601 &index, 1, "gather_ptr");
602 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605 }
606
607 if (overflow_mask) {
608 if (indexes2) {
609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611 bld_base->dbl_bld.int_vec_type, "");
612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613 bld_base->dbl_bld.zero, res);
614 } else
615 res = lp_build_select(bld, overflow_mask, bld->zero, res);
616 }
617
618 return res;
619 }
620
621
622 /**
623 * Scatter/store vector.
624 */
625 static void
626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627 LLVMValueRef base_ptr,
628 LLVMValueRef indexes,
629 LLVMValueRef values,
630 struct lp_exec_mask *mask)
631 {
632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633 LLVMBuilderRef builder = gallivm->builder;
634 unsigned i;
635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637 /*
638 * Loop over elements of index_vec, store scalar value.
639 */
640 for (i = 0; i < bld->bld_base.base.type.length; i++) {
641 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645 LLVMValueRef scalar_pred = pred ?
646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648 if (0)
649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650 ii, val, index, scalar_ptr);
651
652 if (scalar_pred) {
653 LLVMValueRef real_val, dst_val;
654 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656 LLVMBuildStore(builder, real_val, scalar_ptr);
657 }
658 else {
659 LLVMBuildStore(builder, val, scalar_ptr);
660 }
661 }
662 }
663
664
665 /**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671 static LLVMValueRef
672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673 unsigned reg_file, unsigned reg_index,
674 const struct tgsi_ind_register *indirect_reg,
675 int index_limit)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679 /* always use X component of address register */
680 unsigned swizzle = indirect_reg->Swizzle;
681 LLVMValueRef base;
682 LLVMValueRef rel;
683 LLVMValueRef max_index;
684 LLVMValueRef index;
685
686 assert(bld->indirect_files & (1 << reg_file));
687
688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690 assert(swizzle < 4);
691 switch (indirect_reg->File) {
692 case TGSI_FILE_ADDRESS:
693 rel = LLVMBuildLoad(builder,
694 bld->addr[indirect_reg->Index][swizzle],
695 "load addr reg");
696 /* ADDR LLVM values already have LLVM integer type. */
697 break;
698 case TGSI_FILE_TEMPORARY:
699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700 rel = LLVMBuildLoad(builder, rel, "load temp reg");
701 /* TEMP LLVM values always have LLVM float type, but for indirection, the
702 * value actually stored is expected to be an integer */
703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704 break;
705 default:
706 assert(0);
707 rel = uint_bld->zero;
708 }
709
710 index = lp_build_add(uint_bld, base, rel);
711
712 /*
713 * emit_fetch_constant handles constant buffer overflow so this code
714 * is pointless for them.
715 * Furthermore the D3D10 spec in section 6.5 says:
716 * If the constant buffer bound to a slot is larger than the size
717 * declared in the shader for that slot, implementations are allowed
718 * to return incorrect data (not necessarily 0) for indices that are
719 * larger than the declared size but smaller than the buffer size.
720 */
721 if (reg_file != TGSI_FILE_CONSTANT) {
722 assert(index_limit >= 0);
723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724 uint_bld->type, index_limit);
725
726 assert(!uint_bld->type.sign);
727 index = lp_build_min(uint_bld, index, max_index);
728 }
729
730 return index;
731 }
732
733 static struct lp_build_context *
734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 enum tgsi_opcode_type stype)
736 {
737 struct lp_build_context *bld_fetch;
738
739 switch (stype) {
740 case TGSI_TYPE_FLOAT:
741 case TGSI_TYPE_UNTYPED:
742 bld_fetch = &bld_base->base;
743 break;
744 case TGSI_TYPE_UNSIGNED:
745 bld_fetch = &bld_base->uint_bld;
746 break;
747 case TGSI_TYPE_SIGNED:
748 bld_fetch = &bld_base->int_bld;
749 break;
750 case TGSI_TYPE_DOUBLE:
751 bld_fetch = &bld_base->dbl_bld;
752 break;
753 case TGSI_TYPE_UNSIGNED64:
754 bld_fetch = &bld_base->uint64_bld;
755 break;
756 case TGSI_TYPE_SIGNED64:
757 bld_fetch = &bld_base->int64_bld;
758 break;
759 case TGSI_TYPE_VOID:
760 default:
761 assert(0);
762 bld_fetch = NULL;
763 break;
764 }
765 return bld_fetch;
766 }
767
768 static LLVMValueRef
769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770 LLVMValueRef indirect_index,
771 unsigned chan_index,
772 boolean need_perelement_offset)
773 {
774 struct gallivm_state *gallivm = uint_bld->gallivm;
775 LLVMValueRef chan_vec =
776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777 LLVMValueRef length_vec =
778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779 LLVMValueRef index_vec;
780
781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786 if (need_perelement_offset) {
787 LLVMValueRef pixel_offsets;
788 unsigned i;
789 /* build pixel offset vector: {0, 1, 2, 3, ...} */
790 pixel_offsets = uint_bld->undef;
791 for (i = 0; i < uint_bld->type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794 ii, ii, "");
795 }
796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797 }
798 return index_vec;
799 }
800
801 static LLVMValueRef
802 emit_fetch_constant(
803 struct lp_build_tgsi_context * bld_base,
804 const struct tgsi_full_src_register * reg,
805 enum tgsi_opcode_type stype,
806 unsigned swizzle_in)
807 {
808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809 struct gallivm_state *gallivm = bld_base->base.gallivm;
810 LLVMBuilderRef builder = gallivm->builder;
811 struct lp_build_context *uint_bld = &bld_base->uint_bld;
812 unsigned dimension = 0;
813 LLVMValueRef consts_ptr;
814 LLVMValueRef num_consts;
815 LLVMValueRef res;
816 unsigned swizzle = swizzle_in & 0xffff;
817
818 /* XXX: Handle fetching xyzw components as a vector */
819 assert(swizzle != ~0u);
820
821 if (reg->Register.Dimension) {
822 assert(!reg->Dimension.Indirect);
823 dimension = reg->Dimension.Index;
824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825 }
826
827 consts_ptr = bld->consts[dimension];
828 num_consts = bld->consts_sizes[dimension];
829
830 if (reg->Register.Indirect) {
831 LLVMValueRef indirect_index;
832 LLVMValueRef swizzle_vec =
833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834 LLVMValueRef index_vec; /* index into the const buffer */
835 LLVMValueRef overflow_mask;
836 LLVMValueRef index_vec2 = NULL;
837
838 indirect_index = get_indirect_index(bld,
839 reg->Register.File,
840 reg->Register.Index,
841 &reg->Indirect,
842 bld->bld_base.info->file_max[reg->Register.File]);
843
844 /* All fetches are from the same constant buffer, so
845 * we need to propagate the size to a vector to do a
846 * vector comparison */
847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848 /* Construct a boolean vector telling us which channels
849 * overflow the bound constant buffer */
850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851 indirect_index, num_consts);
852
853 /* index_vec = indirect_index * 4 + swizzle */
854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857 if (tgsi_type_is_64bit(stype)) {
858 LLVMValueRef swizzle_vec2;
859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862 }
863 /* Gather values from the constant buffer */
864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865 }
866 else {
867 LLVMValueRef index; /* index into the const buffer */
868 LLVMValueRef scalar, scalar_ptr;
869 struct lp_build_context *bld_broad = &bld_base->base;
870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873 &index, 1, "");
874
875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877 LLVMValueRef scalar2, scalar2_ptr;
878 LLVMValueRef shuffles[2];
879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882 &index, 1, "");
883
884 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886 shuffles[0] = lp_build_const_int32(gallivm, 0);
887 shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892 } else {
893 if (stype == TGSI_TYPE_DOUBLE) {
894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896 bld_broad = &bld_base->dbl_bld;
897 } else if (stype == TGSI_TYPE_UNSIGNED64) {
898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900 bld_broad = &bld_base->uint64_bld;
901 } else if (stype == TGSI_TYPE_SIGNED64) {
902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904 bld_broad = &bld_base->int64_bld;
905 }
906 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907 res = lp_build_broadcast_scalar(bld_broad, scalar);
908 }
909
910 }
911
912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915 }
916
917 return res;
918 }
919
920 /**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927 static LLVMValueRef
928 emit_fetch_64bit(
929 struct lp_build_tgsi_context * bld_base,
930 enum tgsi_opcode_type stype,
931 LLVMValueRef input,
932 LLVMValueRef input2)
933 {
934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936 LLVMBuilderRef builder = gallivm->builder;
937 LLVMValueRef res;
938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939 int i;
940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941 int len = bld_base->base.type.length * 2;
942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947 }
948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952
953 static LLVMValueRef
954 emit_fetch_immediate(
955 struct lp_build_tgsi_context * bld_base,
956 const struct tgsi_full_src_register * reg,
957 enum tgsi_opcode_type stype,
958 unsigned swizzle_in)
959 {
960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962 LLVMBuilderRef builder = gallivm->builder;
963 LLVMValueRef res = NULL;
964 unsigned swizzle = swizzle_in & 0xffff;
965
966 if (bld->use_immediates_array || reg->Register.Indirect) {
967 LLVMValueRef imms_array;
968 LLVMTypeRef fptr_type;
969
970 /* cast imms_array pointer to float* */
971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974 if (reg->Register.Indirect) {
975 LLVMValueRef indirect_index;
976 LLVMValueRef index_vec; /* index into the immediate register array */
977 LLVMValueRef index_vec2 = NULL;
978 indirect_index = get_indirect_index(bld,
979 reg->Register.File,
980 reg->Register.Index,
981 &reg->Indirect,
982 bld->bld_base.info->file_max[reg->Register.File]);
983 /*
984 * Unlike for other reg classes, adding pixel offsets is unnecessary -
985 * immediates are stored as full vectors (FIXME??? - might be better
986 * to store them the same as constants) but all elements are the same
987 * in any case.
988 */
989 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990 indirect_index,
991 swizzle,
992 FALSE);
993 if (tgsi_type_is_64bit(stype))
994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995 indirect_index,
996 swizzle_in >> 16,
997 FALSE);
998 /* Gather values from the immediate register array */
999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000 } else {
1001 LLVMValueRef gep[2];
1002 gep[0] = lp_build_const_int32(gallivm, 0);
1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005 bld->imms_array, gep, 2, "");
1006 res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008 if (tgsi_type_is_64bit(stype)) {
1009 LLVMValueRef imms_ptr2;
1010 LLVMValueRef res2;
1011 gep[1] = lp_build_const_int32(gallivm,
1012 reg->Register.Index * 4 + (swizzle_in >> 16));
1013 imms_ptr2 = LLVMBuildGEP(builder,
1014 bld->imms_array, gep, 2, "");
1015 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016 res = emit_fetch_64bit(bld_base, stype, res, res2);
1017 }
1018 }
1019 }
1020 else {
1021 res = bld->immediates[reg->Register.Index][swizzle];
1022 if (tgsi_type_is_64bit(stype))
1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024 }
1025
1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029 }
1030 return res;
1031 }
1032
1033 static LLVMValueRef
1034 emit_fetch_input(
1035 struct lp_build_tgsi_context * bld_base,
1036 const struct tgsi_full_src_register * reg,
1037 enum tgsi_opcode_type stype,
1038 unsigned swizzle_in)
1039 {
1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 LLVMValueRef res;
1044 unsigned swizzle = swizzle_in & 0xffff;
1045
1046 if (reg->Register.Indirect) {
1047 LLVMValueRef indirect_index;
1048 LLVMValueRef index_vec; /* index into the input reg array */
1049 LLVMValueRef index_vec2 = NULL;
1050 LLVMValueRef inputs_array;
1051 LLVMTypeRef fptr_type;
1052
1053 indirect_index = get_indirect_index(bld,
1054 reg->Register.File,
1055 reg->Register.Index,
1056 &reg->Indirect,
1057 bld->bld_base.info->file_max[reg->Register.File]);
1058
1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060 indirect_index,
1061 swizzle,
1062 TRUE);
1063 if (tgsi_type_is_64bit(stype)) {
1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065 indirect_index,
1066 swizzle_in >> 16,
1067 TRUE);
1068 }
1069 /* cast inputs_array pointer to float* */
1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073 /* Gather values from the input register array */
1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075 } else {
1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078 reg->Register.Index * 4 + swizzle);
1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080 bld->inputs_array, &lindex, 1, "");
1081
1082 res = LLVMBuildLoad(builder, input_ptr, "");
1083 if (tgsi_type_is_64bit(stype)) {
1084 LLVMValueRef lindex1;
1085 LLVMValueRef input_ptr2;
1086 LLVMValueRef res2;
1087
1088 lindex1 = lp_build_const_int32(gallivm,
1089 reg->Register.Index * 4 + (swizzle_in >> 16));
1090 input_ptr2 = LLVMBuildGEP(builder,
1091 bld->inputs_array, &lindex1, 1, "");
1092 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093 res = emit_fetch_64bit(bld_base, stype, res, res2);
1094 }
1095 }
1096 else {
1097 res = bld->inputs[reg->Register.Index][swizzle];
1098 if (tgsi_type_is_64bit(stype))
1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100 }
1101 }
1102
1103 assert(res);
1104
1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle_in)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 const struct tgsi_shader_info *info = bld->bld_base.info;
1124 LLVMBuilderRef builder = gallivm->builder;
1125 LLVMValueRef attrib_index = NULL;
1126 LLVMValueRef vertex_index = NULL;
1127 unsigned swizzle = swizzle_in & 0xffff;
1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129 LLVMValueRef res;
1130
1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132 /* This is really a system value not a regular input */
1133 assert(!reg->Register.Indirect);
1134 assert(!reg->Dimension.Indirect);
1135 res = bld->system_values.prim_id;
1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138 }
1139 return res;
1140 }
1141
1142 if (reg->Register.Indirect) {
1143 /*
1144 * XXX: this is possibly not quite the right value, since file_max may be
1145 * larger than the max attrib index, due to it being the max of declared
1146 * inputs AND the max vertices per prim (which is 6 for tri adj).
1147 * It should however be safe to use (since we always allocate
1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149 */
1150 int index_limit = info->file_max[reg->Register.File];
1151 attrib_index = get_indirect_index(bld,
1152 reg->Register.File,
1153 reg->Register.Index,
1154 &reg->Indirect,
1155 index_limit);
1156 } else {
1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158 }
1159
1160 if (reg->Dimension.Indirect) {
1161 /*
1162 * A fixed 6 should do as well (which is what we allocate).
1163 */
1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165 vertex_index = get_indirect_index(bld,
1166 reg->Register.File,
1167 reg->Dimension.Index,
1168 &reg->DimIndirect,
1169 index_limit);
1170 } else {
1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172 }
1173
1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175 reg->Dimension.Indirect,
1176 vertex_index,
1177 reg->Register.Indirect,
1178 attrib_index,
1179 swizzle_index);
1180
1181 assert(res);
1182 if (tgsi_type_is_64bit(stype)) {
1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184 LLVMValueRef res2;
1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186 reg->Dimension.Indirect,
1187 vertex_index,
1188 reg->Register.Indirect,
1189 attrib_index,
1190 swizzle_index);
1191 assert(res2);
1192 res = emit_fetch_64bit(bld_base, stype, res, res2);
1193 } else if (stype == TGSI_TYPE_UNSIGNED) {
1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195 } else if (stype == TGSI_TYPE_SIGNED) {
1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197 }
1198
1199 return res;
1200 }
1201
1202 static LLVMValueRef
1203 emit_fetch_temporary(
1204 struct lp_build_tgsi_context * bld_base,
1205 const struct tgsi_full_src_register * reg,
1206 enum tgsi_opcode_type stype,
1207 unsigned swizzle_in)
1208 {
1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211 LLVMBuilderRef builder = gallivm->builder;
1212 LLVMValueRef res;
1213 unsigned swizzle = swizzle_in & 0xffff;
1214
1215 if (reg->Register.Indirect) {
1216 LLVMValueRef indirect_index;
1217 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1218 LLVMValueRef temps_array;
1219 LLVMTypeRef fptr_type;
1220
1221 indirect_index = get_indirect_index(bld,
1222 reg->Register.File,
1223 reg->Register.Index,
1224 &reg->Indirect,
1225 bld->bld_base.info->file_max[reg->Register.File]);
1226
1227 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1228 indirect_index,
1229 swizzle,
1230 TRUE);
1231 if (tgsi_type_is_64bit(stype)) {
1232 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1233 indirect_index,
1234 swizzle_in >> 16,
1235 TRUE);
1236 }
1237
1238 /* cast temps_array pointer to float* */
1239 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1240 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1241
1242 /* Gather values from the temporary register array */
1243 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1244 }
1245 else {
1246 LLVMValueRef temp_ptr;
1247 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1248 res = LLVMBuildLoad(builder, temp_ptr, "");
1249
1250 if (tgsi_type_is_64bit(stype)) {
1251 LLVMValueRef temp_ptr2, res2;
1252
1253 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1254 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1255 res = emit_fetch_64bit(bld_base, stype, res, res2);
1256 }
1257 }
1258
1259 if (stype == TGSI_TYPE_SIGNED ||
1260 stype == TGSI_TYPE_UNSIGNED ||
1261 stype == TGSI_TYPE_DOUBLE ||
1262 stype == TGSI_TYPE_SIGNED64 ||
1263 stype == TGSI_TYPE_UNSIGNED64) {
1264 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1265 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1266 }
1267
1268 return res;
1269 }
1270
1271 static LLVMValueRef
1272 emit_fetch_system_value(
1273 struct lp_build_tgsi_context * bld_base,
1274 const struct tgsi_full_src_register * reg,
1275 enum tgsi_opcode_type stype,
1276 unsigned swizzle_in)
1277 {
1278 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1279 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1280 const struct tgsi_shader_info *info = bld->bld_base.info;
1281 LLVMBuilderRef builder = gallivm->builder;
1282 LLVMValueRef res;
1283 enum tgsi_opcode_type atype; // Actual type of the value
1284 unsigned swizzle = swizzle_in & 0xffff;
1285
1286 assert(!reg->Register.Indirect);
1287
1288 switch (info->system_value_semantic_name[reg->Register.Index]) {
1289 case TGSI_SEMANTIC_INSTANCEID:
1290 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1291 atype = TGSI_TYPE_UNSIGNED;
1292 break;
1293
1294 case TGSI_SEMANTIC_VERTEXID:
1295 res = bld->system_values.vertex_id;
1296 atype = TGSI_TYPE_UNSIGNED;
1297 break;
1298
1299 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1300 res = bld->system_values.vertex_id_nobase;
1301 atype = TGSI_TYPE_UNSIGNED;
1302 break;
1303
1304 case TGSI_SEMANTIC_BASEVERTEX:
1305 res = bld->system_values.basevertex;
1306 atype = TGSI_TYPE_UNSIGNED;
1307 break;
1308
1309 case TGSI_SEMANTIC_PRIMID:
1310 res = bld->system_values.prim_id;
1311 atype = TGSI_TYPE_UNSIGNED;
1312 break;
1313
1314 case TGSI_SEMANTIC_INVOCATIONID:
1315 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1316 atype = TGSI_TYPE_UNSIGNED;
1317 break;
1318
1319 case TGSI_SEMANTIC_HELPER_INVOCATION:
1320 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1321 atype = TGSI_TYPE_UNSIGNED;
1322 break;
1323
1324 case TGSI_SEMANTIC_THREAD_ID:
1325 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1326 atype = TGSI_TYPE_UNSIGNED;
1327 break;
1328
1329 case TGSI_SEMANTIC_BLOCK_ID:
1330 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1331 atype = TGSI_TYPE_UNSIGNED;
1332 break;
1333
1334 case TGSI_SEMANTIC_GRID_SIZE:
1335 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1336 atype = TGSI_TYPE_UNSIGNED;
1337 break;
1338
1339 default:
1340 assert(!"unexpected semantic in emit_fetch_system_value");
1341 res = bld_base->base.zero;
1342 atype = TGSI_TYPE_FLOAT;
1343 break;
1344 }
1345
1346 if (atype != stype) {
1347 if (stype == TGSI_TYPE_FLOAT) {
1348 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1349 } else if (stype == TGSI_TYPE_UNSIGNED) {
1350 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1351 } else if (stype == TGSI_TYPE_SIGNED) {
1352 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1353 }
1354 }
1355
1356 return res;
1357 }
1358
1359 /**
1360 * Register fetch with derivatives.
1361 */
1362 static void
1363 emit_fetch_deriv(
1364 struct lp_build_tgsi_soa_context *bld,
1365 LLVMValueRef src,
1366 LLVMValueRef *res,
1367 LLVMValueRef *ddx,
1368 LLVMValueRef *ddy)
1369 {
1370 if (res)
1371 *res = src;
1372
1373 /* TODO: use interpolation coeffs for inputs */
1374
1375 if (ddx)
1376 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1377
1378 if (ddy)
1379 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1380 }
1381
1382 /**
1383 * store an array of vec-length 64-bit into two arrays of vec_length floats
1384 * i.e.
1385 * value is d0, d1, d2, d3 etc.
1386 * each 64-bit has high and low pieces x, y
1387 * so gets stored into the separate channels as:
1388 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1389 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1390 */
1391 static void
1392 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1393 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1394 LLVMValueRef value)
1395 {
1396 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1397 struct gallivm_state *gallivm = bld_base->base.gallivm;
1398 LLVMBuilderRef builder = gallivm->builder;
1399 struct lp_build_context *float_bld = &bld_base->base;
1400 unsigned i;
1401 LLVMValueRef temp, temp2;
1402 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1403 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1404
1405 for (i = 0; i < bld_base->base.type.length; i++) {
1406 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1407 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1408 }
1409
1410 temp = LLVMBuildShuffleVector(builder, value,
1411 LLVMGetUndef(LLVMTypeOf(value)),
1412 LLVMConstVector(shuffles,
1413 bld_base->base.type.length),
1414 "");
1415 temp2 = LLVMBuildShuffleVector(builder, value,
1416 LLVMGetUndef(LLVMTypeOf(value)),
1417 LLVMConstVector(shuffles2,
1418 bld_base->base.type.length),
1419 "");
1420
1421 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1422 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1423 }
1424
1425 /**
1426 * Register store.
1427 */
1428 static void
1429 emit_store_chan(
1430 struct lp_build_tgsi_context *bld_base,
1431 const struct tgsi_full_instruction *inst,
1432 unsigned index,
1433 unsigned chan_index,
1434 LLVMValueRef value)
1435 {
1436 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1437 struct gallivm_state *gallivm = bld_base->base.gallivm;
1438 LLVMBuilderRef builder = gallivm->builder;
1439 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1440 struct lp_build_context *float_bld = &bld_base->base;
1441 struct lp_build_context *int_bld = &bld_base->int_bld;
1442 LLVMValueRef indirect_index = NULL;
1443 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1444
1445 /*
1446 * Apply saturation.
1447 *
1448 * It is always assumed to be float.
1449 */
1450 if (inst->Instruction.Saturate) {
1451 assert(dtype == TGSI_TYPE_FLOAT ||
1452 dtype == TGSI_TYPE_UNTYPED);
1453 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1454 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1455 }
1456
1457 if (reg->Register.Indirect) {
1458 /*
1459 * Currently the mesa/st doesn't generate indirect stores
1460 * to 64-bit values, it normally uses MOV to do indirect stores.
1461 */
1462 assert(!tgsi_type_is_64bit(dtype));
1463 indirect_index = get_indirect_index(bld,
1464 reg->Register.File,
1465 reg->Register.Index,
1466 &reg->Indirect,
1467 bld->bld_base.info->file_max[reg->Register.File]);
1468 } else {
1469 assert(reg->Register.Index <=
1470 bld_base->info->file_max[reg->Register.File]);
1471 }
1472
1473 if (DEBUG_EXECUTION) {
1474 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1475 }
1476
1477 switch( reg->Register.File ) {
1478 case TGSI_FILE_OUTPUT:
1479 /* Outputs are always stored as floats */
1480 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1481
1482 if (reg->Register.Indirect) {
1483 LLVMValueRef index_vec; /* indexes into the output registers */
1484 LLVMValueRef outputs_array;
1485 LLVMTypeRef fptr_type;
1486
1487 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1488 indirect_index,
1489 chan_index,
1490 TRUE);
1491
1492 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1493 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1494
1495 /* Scatter store values into output registers */
1496 emit_mask_scatter(bld, outputs_array, index_vec, value,
1497 &bld->exec_mask);
1498 }
1499 else {
1500 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1501 chan_index);
1502
1503 if (tgsi_type_is_64bit(dtype)) {
1504 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1505 chan_index + 1);
1506 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1507 value);
1508 } else
1509 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1510 }
1511 break;
1512
1513 case TGSI_FILE_TEMPORARY:
1514 /* Temporaries are always stored as floats */
1515 if (!tgsi_type_is_64bit(dtype))
1516 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1517 else
1518 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1519
1520 if (reg->Register.Indirect) {
1521 LLVMValueRef index_vec; /* indexes into the temp registers */
1522 LLVMValueRef temps_array;
1523 LLVMTypeRef fptr_type;
1524
1525 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1526 indirect_index,
1527 chan_index,
1528 TRUE);
1529
1530 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1531 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1532
1533 /* Scatter store values into temp registers */
1534 emit_mask_scatter(bld, temps_array, index_vec, value,
1535 &bld->exec_mask);
1536 }
1537 else {
1538 LLVMValueRef temp_ptr;
1539 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1540
1541 if (tgsi_type_is_64bit(dtype)) {
1542 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1543 reg->Register.Index,
1544 chan_index + 1);
1545 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1546 value);
1547 }
1548 else
1549 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1550 }
1551 break;
1552
1553 case TGSI_FILE_ADDRESS:
1554 assert(dtype == TGSI_TYPE_SIGNED);
1555 assert(LLVMTypeOf(value) == int_bld->vec_type);
1556 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1557 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1558 bld->addr[reg->Register.Index][chan_index]);
1559 break;
1560
1561 default:
1562 assert( 0 );
1563 }
1564
1565 (void)dtype;
1566 }
1567
1568 /*
1569 * Called at the beginning of the translation of each TGSI instruction, to
1570 * emit some debug code.
1571 */
1572 static void
1573 emit_debug(
1574 struct lp_build_tgsi_context * bld_base,
1575 const struct tgsi_full_instruction * inst,
1576 const struct tgsi_opcode_info * info)
1577
1578 {
1579 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1580
1581 if (DEBUG_EXECUTION) {
1582 /*
1583 * Dump the TGSI instruction.
1584 */
1585
1586 struct gallivm_state *gallivm = bld_base->base.gallivm;
1587 char buf[512];
1588 buf[0] = '$';
1589 buf[1] = ' ';
1590 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1591 lp_build_printf(gallivm, buf);
1592
1593 /* Dump the execution mask.
1594 */
1595 if (bld->exec_mask.has_mask) {
1596 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1597 }
1598 }
1599 }
1600
1601 static void
1602 emit_store(
1603 struct lp_build_tgsi_context * bld_base,
1604 const struct tgsi_full_instruction * inst,
1605 const struct tgsi_opcode_info * info,
1606 unsigned index,
1607 LLVMValueRef dst[4])
1608
1609 {
1610 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1611
1612 unsigned writemask = inst->Dst[index].Register.WriteMask;
1613 while (writemask) {
1614 unsigned chan_index = u_bit_scan(&writemask);
1615 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1616 continue;
1617 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1618 }
1619 }
1620
1621 static unsigned
1622 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1623 {
1624 switch (tgsi_target) {
1625 case TGSI_TEXTURE_BUFFER:
1626 return PIPE_BUFFER;
1627 case TGSI_TEXTURE_1D:
1628 case TGSI_TEXTURE_SHADOW1D:
1629 return PIPE_TEXTURE_1D;
1630 case TGSI_TEXTURE_2D:
1631 case TGSI_TEXTURE_SHADOW2D:
1632 case TGSI_TEXTURE_2D_MSAA:
1633 return PIPE_TEXTURE_2D;
1634 case TGSI_TEXTURE_3D:
1635 return PIPE_TEXTURE_3D;
1636 case TGSI_TEXTURE_CUBE:
1637 case TGSI_TEXTURE_SHADOWCUBE:
1638 return PIPE_TEXTURE_CUBE;
1639 case TGSI_TEXTURE_RECT:
1640 case TGSI_TEXTURE_SHADOWRECT:
1641 return PIPE_TEXTURE_RECT;
1642 case TGSI_TEXTURE_1D_ARRAY:
1643 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1644 return PIPE_TEXTURE_1D_ARRAY;
1645 case TGSI_TEXTURE_2D_ARRAY:
1646 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1647 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1648 return PIPE_TEXTURE_2D_ARRAY;
1649 case TGSI_TEXTURE_CUBE_ARRAY:
1650 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1651 return PIPE_TEXTURE_CUBE_ARRAY;
1652 default:
1653 assert(0);
1654 return PIPE_BUFFER;
1655 }
1656 }
1657
1658
1659 static enum lp_sampler_lod_property
1660 lp_build_lod_property(
1661 struct lp_build_tgsi_context *bld_base,
1662 const struct tgsi_full_instruction *inst,
1663 unsigned src_op)
1664 {
1665 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1666 enum lp_sampler_lod_property lod_property;
1667
1668 /*
1669 * Not much we can do here. We could try catching inputs declared
1670 * with constant interpolation but not sure it's worth it - since for
1671 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1672 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1673 * like the constant/immediate recognition below.
1674 * What seems to be of more value would be to recognize temps holding
1675 * broadcasted scalars but no way we can do it.
1676 * Tried asking llvm but without any success (using LLVMIsConstant
1677 * even though this isn't exactly what we'd need), even as simple as
1678 * IMM[0] UINT32 (0,-1,0,0)
1679 * MOV TEMP[0] IMM[0].yyyy
1680 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1681 * doesn't work.
1682 * This means there's ZERO chance this will ever catch a scalar lod
1683 * with traditional tex opcodes as well as texel fetches, since the lod
1684 * comes from the same reg as coords (except some test shaders using
1685 * constant coords maybe).
1686 * There's at least hope for sample opcodes as well as size queries.
1687 */
1688 if (reg->Register.File == TGSI_FILE_CONSTANT ||
1689 reg->Register.File == TGSI_FILE_IMMEDIATE) {
1690 lod_property = LP_SAMPLER_LOD_SCALAR;
1691 }
1692 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
1693 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
1694 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1695 }
1696 else {
1697 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1698 }
1699 }
1700 else {
1701 /* never use scalar (per-quad) lod the results are just too wrong. */
1702 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1703 }
1704 return lod_property;
1705 }
1706
1707
1708 /**
1709 * High-level instruction translators.
1710 */
1711
1712 static void
1713 emit_tex( struct lp_build_tgsi_soa_context *bld,
1714 const struct tgsi_full_instruction *inst,
1715 enum lp_build_tex_modifier modifier,
1716 LLVMValueRef *texel,
1717 unsigned sampler_reg,
1718 enum lp_sampler_op_type sampler_op)
1719 {
1720 unsigned unit = inst->Src[sampler_reg].Register.Index;
1721 LLVMValueRef oow = NULL;
1722 LLVMValueRef lod = NULL;
1723 LLVMValueRef coords[5];
1724 LLVMValueRef offsets[3] = { NULL };
1725 struct lp_derivatives derivs;
1726 struct lp_sampler_params params;
1727 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1728 unsigned num_derivs, num_offsets, i;
1729 unsigned shadow_coord = 0;
1730 unsigned layer_coord = 0;
1731 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
1732
1733 memset(&params, 0, sizeof(params));
1734
1735 if (!bld->sampler) {
1736 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1737 for (i = 0; i < 4; i++) {
1738 texel[i] = bld->bld_base.base.undef;
1739 }
1740 return;
1741 }
1742
1743 switch (inst->Texture.Texture) {
1744 case TGSI_TEXTURE_1D_ARRAY:
1745 layer_coord = 1;
1746 /* fallthrough */
1747 case TGSI_TEXTURE_1D:
1748 num_offsets = 1;
1749 num_derivs = 1;
1750 break;
1751 case TGSI_TEXTURE_2D_ARRAY:
1752 layer_coord = 2;
1753 /* fallthrough */
1754 case TGSI_TEXTURE_2D:
1755 case TGSI_TEXTURE_RECT:
1756 num_offsets = 2;
1757 num_derivs = 2;
1758 break;
1759 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1760 layer_coord = 1;
1761 /* fallthrough */
1762 case TGSI_TEXTURE_SHADOW1D:
1763 shadow_coord = 2;
1764 num_offsets = 1;
1765 num_derivs = 1;
1766 break;
1767 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1768 layer_coord = 2;
1769 shadow_coord = 3;
1770 num_offsets = 2;
1771 num_derivs = 2;
1772 break;
1773 case TGSI_TEXTURE_SHADOW2D:
1774 case TGSI_TEXTURE_SHADOWRECT:
1775 shadow_coord = 2;
1776 num_offsets = 2;
1777 num_derivs = 2;
1778 break;
1779 case TGSI_TEXTURE_CUBE:
1780 num_offsets = 2;
1781 num_derivs = 3;
1782 break;
1783 case TGSI_TEXTURE_3D:
1784 num_offsets = 3;
1785 num_derivs = 3;
1786 break;
1787 case TGSI_TEXTURE_SHADOWCUBE:
1788 shadow_coord = 3;
1789 num_offsets = 2;
1790 num_derivs = 3;
1791 break;
1792 case TGSI_TEXTURE_CUBE_ARRAY:
1793 num_offsets = 2;
1794 num_derivs = 3;
1795 layer_coord = 3;
1796 break;
1797 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1798 num_offsets = 2;
1799 num_derivs = 3;
1800 layer_coord = 3;
1801 shadow_coord = 4; /* shadow coord special different reg */
1802 break;
1803 case TGSI_TEXTURE_2D_MSAA:
1804 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1805 default:
1806 assert(0);
1807 return;
1808 }
1809
1810 /* Note lod and especially projected are illegal in a LOT of cases */
1811 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
1812 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1813 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1814 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
1815 /* note that shadow cube array with bias/explicit lod does not exist */
1816 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
1817 }
1818 else {
1819 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1820 }
1821 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1822 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
1823 }
1824 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1825 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
1826 }
1827 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
1828 }
1829
1830 if (sampler_op == LP_SAMPLER_OP_GATHER) {
1831 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
1832 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
1833 }
1834 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1835 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1836 oow = lp_build_rcp(&bld->bld_base.base, oow);
1837 }
1838
1839 for (i = 0; i < num_derivs; i++) {
1840 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
1841 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1842 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1843 }
1844 for (i = num_derivs; i < 5; i++) {
1845 coords[i] = bld->bld_base.base.undef;
1846 }
1847
1848 /* Layer coord always goes into 3rd slot, except for cube map arrays */
1849 if (layer_coord) {
1850 if (layer_coord == 3) {
1851 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1852 }
1853 else {
1854 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1855 }
1856 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1857 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
1858 }
1859 /* Shadow coord occupies always 5th slot. */
1860 if (shadow_coord) {
1861 sample_key |= LP_SAMPLER_SHADOW;
1862 if (shadow_coord == 4) {
1863 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
1864 }
1865 else {
1866 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
1867 }
1868 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1869 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
1870 }
1871
1872 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1873 unsigned dim;
1874 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
1875 for (dim = 0; dim < num_derivs; ++dim) {
1876 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
1877 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
1878 }
1879 params.derivs = &derivs;
1880 /*
1881 * could also check all src regs if constant but I doubt such
1882 * cases exist in practice.
1883 */
1884 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
1885 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
1886 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1887 }
1888 else {
1889 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1890 }
1891 }
1892 else {
1893 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1894 }
1895 }
1896 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
1897
1898 /* we don't handle the 4 offset version of tg4 */
1899 if (inst->Texture.NumOffsets == 1) {
1900 unsigned dim;
1901 sample_key |= LP_SAMPLER_OFFSETS;
1902 for (dim = 0; dim < num_offsets; dim++) {
1903 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
1904 }
1905 }
1906
1907 params.type = bld->bld_base.base.type;
1908 params.sample_key = sample_key;
1909 params.texture_index = unit;
1910 params.sampler_index = unit;
1911 params.context_ptr = bld->context_ptr;
1912 params.thread_data_ptr = bld->thread_data_ptr;
1913 params.coords = coords;
1914 params.offsets = offsets;
1915 params.lod = lod;
1916 params.texel = texel;
1917
1918 bld->sampler->emit_tex_sample(bld->sampler,
1919 bld->bld_base.base.gallivm,
1920 &params);
1921 }
1922
1923 static void
1924 emit_sample(struct lp_build_tgsi_soa_context *bld,
1925 const struct tgsi_full_instruction *inst,
1926 enum lp_build_tex_modifier modifier,
1927 boolean compare,
1928 enum lp_sampler_op_type sample_type,
1929 LLVMValueRef *texel)
1930 {
1931 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1932 unsigned texture_unit, sampler_unit;
1933 LLVMValueRef lod = NULL;
1934 LLVMValueRef coords[5];
1935 LLVMValueRef offsets[3] = { NULL };
1936 struct lp_derivatives derivs;
1937 struct lp_sampler_params params;
1938 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1939
1940 unsigned num_offsets, num_derivs, i;
1941 unsigned layer_coord = 0;
1942 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
1943
1944 memset(&params, 0, sizeof(params));
1945
1946 if (!bld->sampler) {
1947 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1948 for (i = 0; i < 4; i++) {
1949 texel[i] = bld->bld_base.base.undef;
1950 }
1951 return;
1952 }
1953
1954 /*
1955 * unlike old-style tex opcodes the texture/sampler indices
1956 * always come from src1 and src2 respectively.
1957 */
1958 texture_unit = inst->Src[1].Register.Index;
1959 sampler_unit = inst->Src[2].Register.Index;
1960
1961 /*
1962 * Note inst->Texture.Texture will contain the number of offsets,
1963 * however the target information is NOT there and comes from the
1964 * declared sampler views instead.
1965 */
1966 switch (bld->sv[texture_unit].Resource) {
1967 case TGSI_TEXTURE_1D:
1968 num_offsets = 1;
1969 num_derivs = 1;
1970 break;
1971 case TGSI_TEXTURE_1D_ARRAY:
1972 layer_coord = 1;
1973 num_offsets = 1;
1974 num_derivs = 1;
1975 break;
1976 case TGSI_TEXTURE_2D:
1977 case TGSI_TEXTURE_RECT:
1978 num_offsets = 2;
1979 num_derivs = 2;
1980 break;
1981 case TGSI_TEXTURE_2D_ARRAY:
1982 layer_coord = 2;
1983 num_offsets = 2;
1984 num_derivs = 2;
1985 break;
1986 case TGSI_TEXTURE_CUBE:
1987 num_offsets = 2;
1988 num_derivs = 3;
1989 break;
1990 case TGSI_TEXTURE_3D:
1991 num_offsets = 3;
1992 num_derivs = 3;
1993 break;
1994 case TGSI_TEXTURE_CUBE_ARRAY:
1995 layer_coord = 3;
1996 num_offsets = 2;
1997 num_derivs = 3;
1998 break;
1999 default:
2000 assert(0);
2001 return;
2002 }
2003
2004 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2005 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2006 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2007 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2008 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2009 }
2010 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2011 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2012 }
2013 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2014 }
2015 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2016 /* XXX might be better to explicitly pass the level zero information */
2017 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2018 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2019 }
2020
2021 for (i = 0; i < num_derivs; i++) {
2022 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2023 }
2024 for (i = num_derivs; i < 5; i++) {
2025 coords[i] = bld->bld_base.base.undef;
2026 }
2027
2028 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2029 if (layer_coord) {
2030 if (layer_coord == 3)
2031 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2032 else
2033 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2034 }
2035 /* Shadow coord occupies always 5th slot. */
2036 if (compare) {
2037 sample_key |= LP_SAMPLER_SHADOW;
2038 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2039 }
2040
2041 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2042 unsigned dim;
2043 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2044 for (dim = 0; dim < num_derivs; ++dim) {
2045 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2046 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2047 }
2048 params.derivs = &derivs;
2049 /*
2050 * could also check all src regs if constant but I doubt such
2051 * cases exist in practice.
2052 */
2053 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2054 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2055 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2056 }
2057 else {
2058 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2059 }
2060 }
2061 else {
2062 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2063 }
2064 }
2065
2066 /* some advanced gather instructions (txgo) would require 4 offsets */
2067 if (inst->Texture.NumOffsets == 1) {
2068 unsigned dim;
2069 sample_key |= LP_SAMPLER_OFFSETS;
2070 for (dim = 0; dim < num_offsets; dim++) {
2071 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2072 }
2073 }
2074 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2075
2076 params.type = bld->bld_base.base.type;
2077 params.sample_key = sample_key;
2078 params.texture_index = texture_unit;
2079 params.sampler_index = sampler_unit;
2080 params.context_ptr = bld->context_ptr;
2081 params.thread_data_ptr = bld->thread_data_ptr;
2082 params.coords = coords;
2083 params.offsets = offsets;
2084 params.lod = lod;
2085 params.texel = texel;
2086
2087 bld->sampler->emit_tex_sample(bld->sampler,
2088 bld->bld_base.base.gallivm,
2089 &params);
2090
2091 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2092 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2093 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2094 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2095 unsigned char swizzles[4];
2096 swizzles[0] = inst->Src[1].Register.SwizzleX;
2097 swizzles[1] = inst->Src[1].Register.SwizzleY;
2098 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2099 swizzles[3] = inst->Src[1].Register.SwizzleW;
2100
2101 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2102 }
2103 }
2104
2105 static void
2106 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2107 const struct tgsi_full_instruction *inst,
2108 LLVMValueRef *texel,
2109 boolean is_samplei)
2110 {
2111 unsigned unit, target;
2112 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2113 LLVMValueRef explicit_lod = NULL;
2114 LLVMValueRef coords[5];
2115 LLVMValueRef offsets[3] = { NULL };
2116 struct lp_sampler_params params;
2117 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2118 unsigned dims, i;
2119 unsigned layer_coord = 0;
2120 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2121
2122 memset(&params, 0, sizeof(params));
2123
2124 if (!bld->sampler) {
2125 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2126 for (i = 0; i < 4; i++) {
2127 texel[i] = coord_undef;
2128 }
2129 return;
2130 }
2131
2132 unit = inst->Src[1].Register.Index;
2133
2134 if (is_samplei) {
2135 target = bld->sv[unit].Resource;
2136 }
2137 else {
2138 target = inst->Texture.Texture;
2139 }
2140
2141 switch (target) {
2142 case TGSI_TEXTURE_1D:
2143 case TGSI_TEXTURE_BUFFER:
2144 dims = 1;
2145 break;
2146 case TGSI_TEXTURE_1D_ARRAY:
2147 layer_coord = 1;
2148 dims = 1;
2149 break;
2150 case TGSI_TEXTURE_2D:
2151 case TGSI_TEXTURE_RECT:
2152 case TGSI_TEXTURE_2D_MSAA:
2153 dims = 2;
2154 break;
2155 case TGSI_TEXTURE_2D_ARRAY:
2156 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2157 layer_coord = 2;
2158 dims = 2;
2159 break;
2160 case TGSI_TEXTURE_3D:
2161 dims = 3;
2162 break;
2163 default:
2164 assert(0);
2165 return;
2166 }
2167
2168 /* always have lod except for buffers and msaa targets ? */
2169 if (target != TGSI_TEXTURE_BUFFER &&
2170 target != TGSI_TEXTURE_2D_MSAA &&
2171 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2172 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2173 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2174 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2175 }
2176 /*
2177 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2178 * would be the sample index.
2179 */
2180
2181 for (i = 0; i < dims; i++) {
2182 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2183 }
2184 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2185 for (i = dims; i < 5; i++) {
2186 coords[i] = coord_undef;
2187 }
2188 if (layer_coord)
2189 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2190
2191 if (inst->Texture.NumOffsets == 1) {
2192 unsigned dim;
2193 sample_key |= LP_SAMPLER_OFFSETS;
2194 for (dim = 0; dim < dims; dim++) {
2195 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2196 }
2197 }
2198 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2199
2200 params.type = bld->bld_base.base.type;
2201 params.sample_key = sample_key;
2202 params.texture_index = unit;
2203 /*
2204 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2205 * and trigger some assertions with d3d10 where the sampler view number
2206 * can exceed this.
2207 */
2208 params.sampler_index = 0;
2209 params.context_ptr = bld->context_ptr;
2210 params.thread_data_ptr = bld->thread_data_ptr;
2211 params.coords = coords;
2212 params.offsets = offsets;
2213 params.derivs = NULL;
2214 params.lod = explicit_lod;
2215 params.texel = texel;
2216
2217 bld->sampler->emit_tex_sample(bld->sampler,
2218 bld->bld_base.base.gallivm,
2219 &params);
2220
2221 if (is_samplei &&
2222 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2223 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2224 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2225 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2226 unsigned char swizzles[4];
2227 swizzles[0] = inst->Src[1].Register.SwizzleX;
2228 swizzles[1] = inst->Src[1].Register.SwizzleY;
2229 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2230 swizzles[3] = inst->Src[1].Register.SwizzleW;
2231
2232 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2233 }
2234 }
2235
2236 static void
2237 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2238 const struct tgsi_full_instruction *inst,
2239 LLVMValueRef *sizes_out,
2240 boolean is_sviewinfo)
2241 {
2242 LLVMValueRef explicit_lod;
2243 enum lp_sampler_lod_property lod_property;
2244 unsigned has_lod;
2245 unsigned i;
2246 unsigned unit = inst->Src[1].Register.Index;
2247 unsigned target, pipe_target;
2248 struct lp_sampler_size_query_params params;
2249
2250 if (is_sviewinfo) {
2251 target = bld->sv[unit].Resource;
2252 }
2253 else {
2254 target = inst->Texture.Texture;
2255 }
2256 switch (target) {
2257 case TGSI_TEXTURE_BUFFER:
2258 case TGSI_TEXTURE_RECT:
2259 case TGSI_TEXTURE_SHADOWRECT:
2260 has_lod = 0;
2261 break;
2262 default:
2263 has_lod = 1;
2264 break;
2265 }
2266
2267 if (!bld->sampler) {
2268 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2269 for (i = 0; i < 4; i++)
2270 sizes_out[i] = bld->bld_base.int_bld.undef;
2271 return;
2272 }
2273
2274 if (has_lod) {
2275 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2276 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2277 }
2278 else {
2279 explicit_lod = NULL;
2280 lod_property = LP_SAMPLER_LOD_SCALAR;
2281 }
2282
2283
2284 pipe_target = tgsi_to_pipe_tex_target(target);
2285
2286 params.int_type = bld->bld_base.int_bld.type;
2287 params.texture_unit = unit;
2288 params.target = pipe_target;
2289 params.context_ptr = bld->context_ptr;
2290 params.is_sviewinfo = TRUE;
2291 params.lod_property = lod_property;
2292 params.explicit_lod = explicit_lod;
2293 params.sizes_out = sizes_out;
2294
2295 bld->sampler->emit_size_query(bld->sampler,
2296 bld->bld_base.base.gallivm,
2297 &params);
2298 }
2299
2300 static boolean
2301 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2302 int pc)
2303 {
2304 unsigned i;
2305
2306 for (i = 0; i < 5; i++) {
2307 enum tgsi_opcode opcode;
2308
2309 if (pc + i >= bld->bld_base.info->num_instructions)
2310 return TRUE;
2311
2312 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2313
2314 if (opcode == TGSI_OPCODE_END)
2315 return TRUE;
2316
2317 if (opcode == TGSI_OPCODE_TEX ||
2318 opcode == TGSI_OPCODE_TXP ||
2319 opcode == TGSI_OPCODE_TXD ||
2320 opcode == TGSI_OPCODE_TXB ||
2321 opcode == TGSI_OPCODE_TXL ||
2322 opcode == TGSI_OPCODE_TXF ||
2323 opcode == TGSI_OPCODE_TXQ ||
2324 opcode == TGSI_OPCODE_TEX2 ||
2325 opcode == TGSI_OPCODE_TXB2 ||
2326 opcode == TGSI_OPCODE_TXL2 ||
2327 opcode == TGSI_OPCODE_SAMPLE ||
2328 opcode == TGSI_OPCODE_SAMPLE_B ||
2329 opcode == TGSI_OPCODE_SAMPLE_C ||
2330 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2331 opcode == TGSI_OPCODE_SAMPLE_D ||
2332 opcode == TGSI_OPCODE_SAMPLE_I ||
2333 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2334 opcode == TGSI_OPCODE_SAMPLE_L ||
2335 opcode == TGSI_OPCODE_SVIEWINFO ||
2336 opcode == TGSI_OPCODE_CAL ||
2337 opcode == TGSI_OPCODE_IF ||
2338 opcode == TGSI_OPCODE_UIF ||
2339 opcode == TGSI_OPCODE_BGNLOOP ||
2340 opcode == TGSI_OPCODE_SWITCH)
2341 return FALSE;
2342 }
2343
2344 return TRUE;
2345 }
2346
2347
2348
2349 /**
2350 * Kill fragment if any of the src register values are negative.
2351 */
2352 static void
2353 emit_kill_if(
2354 struct lp_build_tgsi_soa_context *bld,
2355 const struct tgsi_full_instruction *inst,
2356 int pc)
2357 {
2358 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2359 const struct tgsi_full_src_register *reg = &inst->Src[0];
2360 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2361 LLVMValueRef mask;
2362 unsigned chan_index;
2363
2364 memset(&terms, 0, sizeof terms);
2365
2366 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2367 unsigned swizzle;
2368
2369 /* Unswizzle channel */
2370 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2371
2372 /* Check if the component has not been already tested. */
2373 assert(swizzle < TGSI_NUM_CHANNELS);
2374 if( !terms[swizzle] )
2375 /* TODO: change the comparison operator instead of setting the sign */
2376 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2377 }
2378
2379 mask = NULL;
2380 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2381 if(terms[chan_index]) {
2382 LLVMValueRef chan_mask;
2383
2384 /*
2385 * If term < 0 then mask = 0 else mask = ~0.
2386 */
2387 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2388
2389 if(mask)
2390 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2391 else
2392 mask = chan_mask;
2393 }
2394 }
2395
2396 if (bld->exec_mask.has_mask) {
2397 LLVMValueRef invmask;
2398 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2399 mask = LLVMBuildOr(builder, mask, invmask, "");
2400 }
2401
2402 lp_build_mask_update(bld->mask, mask);
2403 if (!near_end_of_shader(bld, pc))
2404 lp_build_mask_check(bld->mask);
2405 }
2406
2407
2408 /**
2409 * Unconditional fragment kill.
2410 * The only predication is the execution mask which will apply if
2411 * we're inside a loop or conditional.
2412 */
2413 static void
2414 emit_kill(struct lp_build_tgsi_soa_context *bld,
2415 int pc)
2416 {
2417 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2418 LLVMValueRef mask;
2419
2420 /* For those channels which are "alive", disable fragment shader
2421 * execution.
2422 */
2423 if (bld->exec_mask.has_mask) {
2424 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2425 }
2426 else {
2427 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2428 mask = zero;
2429 }
2430
2431 lp_build_mask_update(bld->mask, mask);
2432
2433 if (!near_end_of_shader(bld, pc))
2434 lp_build_mask_check(bld->mask);
2435 }
2436
2437
2438 /**
2439 * Emit code which will dump the value of all the temporary registers
2440 * to stdout.
2441 */
2442 static void
2443 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2444 unsigned file)
2445 {
2446 const struct tgsi_shader_info *info = bld->bld_base.info;
2447 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2448 LLVMBuilderRef builder = gallivm->builder;
2449 LLVMValueRef reg_ptr;
2450 int index;
2451 int max_index = info->file_max[file];
2452
2453 /*
2454 * Some register files, particularly constants, can be very large,
2455 * and dumping everything could make this unusably slow.
2456 */
2457 max_index = MIN2(max_index, 32);
2458
2459 for (index = 0; index <= max_index; index++) {
2460 LLVMValueRef res;
2461 unsigned mask;
2462 int chan;
2463
2464 if (index < 8 * sizeof(unsigned) &&
2465 (info->file_mask[file] & (1u << index)) == 0) {
2466 /* This was not declared.*/
2467 continue;
2468 }
2469
2470 if (file == TGSI_FILE_INPUT) {
2471 mask = info->input_usage_mask[index];
2472 } else {
2473 mask = TGSI_WRITEMASK_XYZW;
2474 }
2475
2476 for (chan = 0; chan < 4; chan++) {
2477 if ((mask & (1 << chan)) == 0) {
2478 /* This channel is not used.*/
2479 continue;
2480 }
2481
2482 if (file == TGSI_FILE_CONSTANT) {
2483 struct tgsi_full_src_register reg;
2484 memset(&reg, 0, sizeof reg);
2485 reg.Register.File = file;
2486 reg.Register.Index = index;
2487 reg.Register.SwizzleX = 0;
2488 reg.Register.SwizzleY = 1;
2489 reg.Register.SwizzleZ = 2;
2490 reg.Register.SwizzleW = 3;
2491
2492 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2493 if (!res) {
2494 continue;
2495 }
2496 } else if (file == TGSI_FILE_INPUT) {
2497 res = bld->inputs[index][chan];
2498 if (!res) {
2499 continue;
2500 }
2501 } else if (file == TGSI_FILE_TEMPORARY) {
2502 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2503 assert(reg_ptr);
2504 res = LLVMBuildLoad(builder, reg_ptr, "");
2505 } else if (file == TGSI_FILE_OUTPUT) {
2506 reg_ptr = lp_get_output_ptr(bld, index, chan);
2507 assert(reg_ptr);
2508 res = LLVMBuildLoad(builder, reg_ptr, "");
2509 } else {
2510 assert(0);
2511 continue;
2512 }
2513
2514 emit_dump_reg(gallivm, file, index, chan, res);
2515 }
2516 }
2517 }
2518
2519
2520
2521 void
2522 lp_emit_declaration_soa(
2523 struct lp_build_tgsi_context *bld_base,
2524 const struct tgsi_full_declaration *decl)
2525 {
2526 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2527 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2528 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2529 const unsigned first = decl->Range.First;
2530 const unsigned last = decl->Range.Last;
2531 unsigned idx, i;
2532
2533 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2534
2535 switch (decl->Declaration.File) {
2536 case TGSI_FILE_TEMPORARY:
2537 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2538 assert(last < LP_MAX_INLINED_TEMPS);
2539 for (idx = first; idx <= last; ++idx) {
2540 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2541 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2542 }
2543 }
2544 break;
2545
2546 case TGSI_FILE_OUTPUT:
2547 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2548 for (idx = first; idx <= last; ++idx) {
2549 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2550 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2551 vec_type, "output");
2552 }
2553 }
2554 break;
2555
2556 case TGSI_FILE_ADDRESS:
2557 /* ADDR registers are only allocated with an integer LLVM IR type,
2558 * as they are guaranteed to always have integers.
2559 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2560 * an ADDR register for that matter).
2561 */
2562 assert(last < LP_MAX_TGSI_ADDRS);
2563 for (idx = first; idx <= last; ++idx) {
2564 assert(idx < LP_MAX_TGSI_ADDRS);
2565 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2566 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2567 }
2568 break;
2569
2570 case TGSI_FILE_SAMPLER_VIEW:
2571 /*
2572 * The target stored here MUST match whatever there actually
2573 * is in the set sampler views (what about return type?).
2574 */
2575 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2576 for (idx = first; idx <= last; ++idx) {
2577 bld->sv[idx] = decl->SamplerView;
2578 }
2579 break;
2580
2581 case TGSI_FILE_CONSTANT:
2582 {
2583 /*
2584 * We could trivially fetch the per-buffer pointer when fetching the
2585 * constant, relying on llvm to figure out it's always the same pointer
2586 * anyway. However, doing so results in a huge (more than factor of 10)
2587 * slowdown in llvm compilation times for some (but not all) shaders
2588 * (more specifically, the IR optimization spends way more time in
2589 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2590 */
2591 unsigned idx2D = decl->Dim.Index2D;
2592 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2593 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2594 bld->consts[idx2D] =
2595 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2596 bld->consts_sizes[idx2D] =
2597 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2598 }
2599 break;
2600 case TGSI_FILE_BUFFER:
2601 {
2602 unsigned idx = decl->Range.First;
2603 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2604 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2605 bld->ssbos[idx] =
2606 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2607 bld->ssbo_sizes[idx] =
2608 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2609
2610 }
2611 break;
2612 case TGSI_FILE_MEMORY:
2613 break;
2614 default:
2615 /* don't need to declare other vars */
2616 break;
2617 }
2618 }
2619
2620
2621 void lp_emit_immediate_soa(
2622 struct lp_build_tgsi_context *bld_base,
2623 const struct tgsi_full_immediate *imm)
2624 {
2625 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2626 struct gallivm_state * gallivm = bld_base->base.gallivm;
2627 LLVMValueRef imms[4];
2628 unsigned i;
2629 const uint size = imm->Immediate.NrTokens - 1;
2630 assert(size <= 4);
2631 switch (imm->Immediate.DataType) {
2632 case TGSI_IMM_FLOAT32:
2633 for( i = 0; i < size; ++i )
2634 imms[i] =
2635 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2636
2637 break;
2638 case TGSI_IMM_FLOAT64:
2639 case TGSI_IMM_UINT64:
2640 case TGSI_IMM_INT64:
2641 case TGSI_IMM_UINT32:
2642 for( i = 0; i < size; ++i ) {
2643 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2644 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2645 }
2646
2647 break;
2648 case TGSI_IMM_INT32:
2649 for( i = 0; i < size; ++i ) {
2650 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2651 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2652 }
2653
2654 break;
2655 }
2656 for( i = size; i < 4; ++i )
2657 imms[i] = bld_base->base.undef;
2658
2659 if (bld->use_immediates_array) {
2660 unsigned index = bld->num_immediates;
2661 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2662 LLVMBuilderRef builder = gallivm->builder;
2663 LLVMValueRef gep[2];
2664 gep[0] = lp_build_const_int32(gallivm, 0);
2665
2666 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2667 for (i = 0; i < 4; ++i ) {
2668 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
2669 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2670 bld->imms_array, gep, 2, "");
2671 LLVMBuildStore(builder, imms[i], imm_ptr);
2672 }
2673 } else {
2674 /* simply copy the immediate values into the next immediates[] slot */
2675 unsigned i;
2676 assert(imm->Immediate.NrTokens - 1 <= 4);
2677 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2678
2679 for(i = 0; i < 4; ++i )
2680 bld->immediates[bld->num_immediates][i] = imms[i];
2681
2682 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2683 unsigned index = bld->num_immediates;
2684 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2685 LLVMBuilderRef builder = gallivm->builder;
2686 LLVMValueRef gep[2];
2687 gep[0] = lp_build_const_int32(gallivm, 0);
2688 for (i = 0; i < 4; ++i ) {
2689 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
2690 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2691 bld->imms_array, gep, 2, "");
2692 LLVMBuildStore(builder,
2693 bld->immediates[index][i],
2694 imm_ptr);
2695 }
2696 }
2697 }
2698
2699 bld->num_immediates++;
2700 }
2701
2702 static void
2703 ddx_emit(
2704 const struct lp_build_tgsi_action * action,
2705 struct lp_build_tgsi_context * bld_base,
2706 struct lp_build_emit_data * emit_data)
2707 {
2708 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2709
2710 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2711 &emit_data->output[emit_data->chan], NULL);
2712 }
2713
2714 static void
2715 ddy_emit(
2716 const struct lp_build_tgsi_action * action,
2717 struct lp_build_tgsi_context * bld_base,
2718 struct lp_build_emit_data * emit_data)
2719 {
2720 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2721
2722 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2723 &emit_data->output[emit_data->chan]);
2724 }
2725
2726 static void
2727 kill_emit(
2728 const struct lp_build_tgsi_action * action,
2729 struct lp_build_tgsi_context * bld_base,
2730 struct lp_build_emit_data * emit_data)
2731 {
2732 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2733
2734 emit_kill(bld, bld_base->pc - 1);
2735 }
2736
2737 static void
2738 kill_if_emit(
2739 const struct lp_build_tgsi_action * action,
2740 struct lp_build_tgsi_context * bld_base,
2741 struct lp_build_emit_data * emit_data)
2742 {
2743 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2744
2745 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2746 }
2747
2748 static void
2749 tex_emit(
2750 const struct lp_build_tgsi_action * action,
2751 struct lp_build_tgsi_context * bld_base,
2752 struct lp_build_emit_data * emit_data)
2753 {
2754 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2755
2756 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2757 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2758 }
2759
2760 static void
2761 tex2_emit(
2762 const struct lp_build_tgsi_action * action,
2763 struct lp_build_tgsi_context * bld_base,
2764 struct lp_build_emit_data * emit_data)
2765 {
2766 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2767
2768 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2769 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2770 }
2771
2772 static void
2773 txb_emit(
2774 const struct lp_build_tgsi_action * action,
2775 struct lp_build_tgsi_context * bld_base,
2776 struct lp_build_emit_data * emit_data)
2777 {
2778 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2779
2780 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2781 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2782 }
2783
2784 static void
2785 txb2_emit(
2786 const struct lp_build_tgsi_action * action,
2787 struct lp_build_tgsi_context * bld_base,
2788 struct lp_build_emit_data * emit_data)
2789 {
2790 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2791
2792 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2793 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2794 }
2795
2796 static void
2797 txd_emit(
2798 const struct lp_build_tgsi_action * action,
2799 struct lp_build_tgsi_context * bld_base,
2800 struct lp_build_emit_data * emit_data)
2801 {
2802 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2803
2804 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2805 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
2806 }
2807
2808 static void
2809 txl_emit(
2810 const struct lp_build_tgsi_action * action,
2811 struct lp_build_tgsi_context * bld_base,
2812 struct lp_build_emit_data * emit_data)
2813 {
2814 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2815
2816 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2817 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2818 }
2819
2820 static void
2821 txl2_emit(
2822 const struct lp_build_tgsi_action * action,
2823 struct lp_build_tgsi_context * bld_base,
2824 struct lp_build_emit_data * emit_data)
2825 {
2826 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2827
2828 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2829 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2830 }
2831
2832 static void
2833 txp_emit(
2834 const struct lp_build_tgsi_action * action,
2835 struct lp_build_tgsi_context * bld_base,
2836 struct lp_build_emit_data * emit_data)
2837 {
2838 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2839
2840 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2841 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2842 }
2843
2844 static void
2845 tg4_emit(
2846 const struct lp_build_tgsi_action * action,
2847 struct lp_build_tgsi_context * bld_base,
2848 struct lp_build_emit_data * emit_data)
2849 {
2850 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2851
2852 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2853 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
2854 }
2855
2856 static void
2857 lodq_emit(
2858 const struct lp_build_tgsi_action * action,
2859 struct lp_build_tgsi_context * bld_base,
2860 struct lp_build_emit_data * emit_data)
2861 {
2862 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2863
2864 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2865 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
2866 }
2867
2868 static void
2869 txq_emit(
2870 const struct lp_build_tgsi_action * action,
2871 struct lp_build_tgsi_context * bld_base,
2872 struct lp_build_emit_data * emit_data)
2873 {
2874 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2875
2876 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2877 }
2878
2879 static void
2880 txf_emit(
2881 const struct lp_build_tgsi_action * action,
2882 struct lp_build_tgsi_context * bld_base,
2883 struct lp_build_emit_data * emit_data)
2884 {
2885 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2886
2887 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2888 }
2889
2890 static void
2891 sample_i_emit(
2892 const struct lp_build_tgsi_action * action,
2893 struct lp_build_tgsi_context * bld_base,
2894 struct lp_build_emit_data * emit_data)
2895 {
2896 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2897
2898 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2899 }
2900
2901 static void
2902 sample_emit(
2903 const struct lp_build_tgsi_action * action,
2904 struct lp_build_tgsi_context * bld_base,
2905 struct lp_build_emit_data * emit_data)
2906 {
2907 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2908
2909 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2910 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2911 }
2912
2913 static void
2914 sample_b_emit(
2915 const struct lp_build_tgsi_action * action,
2916 struct lp_build_tgsi_context * bld_base,
2917 struct lp_build_emit_data * emit_data)
2918 {
2919 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2920
2921 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2922 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2923 }
2924
2925 static void
2926 sample_c_emit(
2927 const struct lp_build_tgsi_action * action,
2928 struct lp_build_tgsi_context * bld_base,
2929 struct lp_build_emit_data * emit_data)
2930 {
2931 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2932
2933 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2934 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2935 }
2936
2937 static void
2938 sample_c_lz_emit(
2939 const struct lp_build_tgsi_action * action,
2940 struct lp_build_tgsi_context * bld_base,
2941 struct lp_build_emit_data * emit_data)
2942 {
2943 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2944
2945 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2946 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2947 }
2948
2949 static void
2950 sample_d_emit(
2951 const struct lp_build_tgsi_action * action,
2952 struct lp_build_tgsi_context * bld_base,
2953 struct lp_build_emit_data * emit_data)
2954 {
2955 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2956
2957 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2958 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2959 }
2960
2961 static void
2962 sample_l_emit(
2963 const struct lp_build_tgsi_action * action,
2964 struct lp_build_tgsi_context * bld_base,
2965 struct lp_build_emit_data * emit_data)
2966 {
2967 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2968
2969 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2970 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
2971 }
2972
2973 static void
2974 gather4_emit(
2975 const struct lp_build_tgsi_action * action,
2976 struct lp_build_tgsi_context * bld_base,
2977 struct lp_build_emit_data * emit_data)
2978 {
2979 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2980
2981 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2982 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
2983 }
2984
2985 static void
2986 sviewinfo_emit(
2987 const struct lp_build_tgsi_action * action,
2988 struct lp_build_tgsi_context * bld_base,
2989 struct lp_build_emit_data * emit_data)
2990 {
2991 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2992
2993 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2994 }
2995
2996 static void
2997 lod_emit(
2998 const struct lp_build_tgsi_action * action,
2999 struct lp_build_tgsi_context * bld_base,
3000 struct lp_build_emit_data * emit_data)
3001 {
3002 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3003
3004 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3005 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3006 }
3007
3008 static void target_to_dims_layer(unsigned target,
3009 unsigned *dims,
3010 unsigned *layer_coord)
3011 {
3012 *layer_coord = 0;
3013 switch (target) {
3014 case TGSI_TEXTURE_1D:
3015 case TGSI_TEXTURE_BUFFER:
3016 *dims = 1;
3017 break;
3018 case TGSI_TEXTURE_1D_ARRAY:
3019 *layer_coord = 1;
3020 *dims = 1;
3021 break;
3022 case TGSI_TEXTURE_2D:
3023 case TGSI_TEXTURE_RECT:
3024 *dims = 2;
3025 break;
3026 case TGSI_TEXTURE_2D_ARRAY:
3027 *layer_coord = 2;
3028 *dims = 2;
3029 break;
3030 case TGSI_TEXTURE_3D:
3031 case TGSI_TEXTURE_CUBE:
3032 case TGSI_TEXTURE_CUBE_ARRAY:
3033 *dims = 3;
3034 break;
3035 default:
3036 assert(0);
3037 return;
3038 }
3039 }
3040
3041 static void
3042 img_load_emit(
3043 const struct lp_build_tgsi_action * action,
3044 struct lp_build_tgsi_context * bld_base,
3045 struct lp_build_emit_data * emit_data)
3046 {
3047 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3048 struct lp_img_params params;
3049 LLVMValueRef coords[5];
3050 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3051 unsigned dims;
3052 unsigned target = emit_data->inst->Memory.Texture;
3053 unsigned layer_coord;
3054
3055 target_to_dims_layer(target, &dims, &layer_coord);
3056
3057 for (unsigned i = 0; i < dims; i++) {
3058 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3059 }
3060 for (unsigned i = dims; i < 5; i++) {
3061 coords[i] = coord_undef;
3062 }
3063 if (layer_coord)
3064 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3065
3066 memset(&params, 0, sizeof(params));
3067
3068 params.type = bld->bld_base.base.type;
3069 params.context_ptr = bld->context_ptr;
3070 params.thread_data_ptr = bld->thread_data_ptr;
3071 params.coords = coords;
3072 params.outdata = emit_data->output;
3073 params.target = tgsi_to_pipe_tex_target(target);
3074 params.image_index = emit_data->inst->Src[0].Register.Index;
3075 params.img_op = LP_IMG_LOAD;
3076 bld->image->emit_op(bld->image,
3077 bld->bld_base.base.gallivm,
3078 &params);
3079 }
3080
3081 static void
3082 load_emit(
3083 const struct lp_build_tgsi_action * action,
3084 struct lp_build_tgsi_context * bld_base,
3085 struct lp_build_emit_data * emit_data)
3086 {
3087 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3088 struct gallivm_state * gallivm = bld_base->base.gallivm;
3089 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3090 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3091 unsigned buf = bufreg->Register.Index;
3092 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3093 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3094 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3095
3096 if (bufreg->Register.File == TGSI_FILE_IMAGE)
3097 img_load_emit(action, bld_base, emit_data);
3098 else if (0) {
3099 /* for indirect support with ARB_gpu_shader5 */
3100 } else {
3101 LLVMValueRef index;
3102 LLVMValueRef scalar, scalar_ptr;
3103 unsigned chan_index;
3104
3105 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3106 index = lp_build_shr_imm(uint_bld, index, 2);
3107
3108 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3109
3110 LLVMValueRef ssbo_limit;
3111
3112 if (!is_shared) {
3113 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3114 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3115 }
3116
3117 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3118 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3119
3120 LLVMValueRef exec_mask = mask_vec(bld_base);
3121 if (!is_shared) {
3122 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3123 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3124 }
3125
3126 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3127 struct lp_build_loop_state loop_state;
3128 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3129
3130 struct lp_build_if_state ifthen;
3131 LLVMValueRef cond, temp_res;
3132
3133 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3134 loop_state.counter, "");
3135
3136 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3137 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3138
3139 lp_build_if(&ifthen, gallivm, cond);
3140 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3141
3142 temp_res = LLVMBuildLoad(builder, result, "");
3143 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3144 LLVMBuildStore(builder, temp_res, result);
3145 lp_build_else(&ifthen);
3146 temp_res = LLVMBuildLoad(builder, result, "");
3147 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3148 LLVMBuildStore(builder, temp_res, result);
3149 lp_build_endif(&ifthen);
3150 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3151 NULL, LLVMIntUGE);
3152 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3153 }
3154 }
3155 }
3156
3157 static void
3158 img_store_emit(
3159 const struct lp_build_tgsi_action * action,
3160 struct lp_build_tgsi_context * bld_base,
3161 struct lp_build_emit_data * emit_data)
3162 {
3163 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3164 struct lp_img_params params;
3165 LLVMValueRef coords[5];
3166 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3167 unsigned dims;
3168 unsigned target = emit_data->inst->Memory.Texture;
3169 unsigned layer_coord;
3170
3171 target_to_dims_layer(target, &dims, &layer_coord);
3172 for (unsigned i = 0; i < dims; i++) {
3173 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3174 }
3175 for (unsigned i = dims; i < 5; i++) {
3176 coords[i] = coord_undef;
3177 }
3178 if (layer_coord)
3179 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3180 memset(&params, 0, sizeof(params));
3181
3182 params.type = bld->bld_base.base.type;
3183 params.context_ptr = bld->context_ptr;
3184 params.thread_data_ptr = bld->thread_data_ptr;
3185 params.coords = coords;
3186 params.outdata = NULL;
3187 params.exec_mask = mask_vec(bld_base);
3188 params.target = tgsi_to_pipe_tex_target(target);
3189 params.image_index = emit_data->inst->Dst[0].Register.Index;
3190 params.img_op = LP_IMG_STORE;
3191 for (unsigned i = 0; i < 4; i++)
3192 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3193
3194 bld->image->emit_op(bld->image,
3195 bld->bld_base.base.gallivm,
3196 &params);
3197 }
3198
3199 static void
3200 store_emit(
3201 const struct lp_build_tgsi_action * action,
3202 struct lp_build_tgsi_context * bld_base,
3203 struct lp_build_emit_data * emit_data)
3204 {
3205 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3206 struct gallivm_state * gallivm = bld_base->base.gallivm;
3207 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3208 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3209 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3210 unsigned buf = bufreg->Register.Index;
3211 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3212 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3213
3214 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3215 img_store_emit(action, bld_base, emit_data);
3216 } else if (0) {
3217
3218 } else {
3219 LLVMValueRef index; /* index into the const buffer */
3220 LLVMValueRef scalar_ptr;
3221 LLVMValueRef value;
3222 unsigned chan_index;
3223
3224 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3225 index = lp_build_shr_imm(uint_bld, index, 2);
3226
3227 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3228
3229 LLVMValueRef ssbo_limit;
3230
3231 if (!is_shared) {
3232 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3233 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3234 }
3235
3236 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3237 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3238
3239 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3240
3241 LLVMValueRef exec_mask = mask_vec(bld_base);
3242 if (!is_shared) {
3243 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3244 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3245 }
3246
3247 struct lp_build_loop_state loop_state;
3248 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3249
3250 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3251 loop_state.counter, "");
3252 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3253
3254 struct lp_build_if_state ifthen;
3255 LLVMValueRef cond;
3256
3257 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3258 loop_state.counter, "");
3259
3260 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3261 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3262 lp_build_if(&ifthen, gallivm, cond);
3263
3264 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3265
3266 lp_build_endif(&ifthen);
3267 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3268 NULL, LLVMIntUGE);
3269 }
3270 }
3271 }
3272
3273 static void
3274 resq_emit(
3275 const struct lp_build_tgsi_action * action,
3276 struct lp_build_tgsi_context * bld_base,
3277 struct lp_build_emit_data * emit_data)
3278 {
3279 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3280 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3281 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3282
3283 unsigned buf = bufreg->Register.Index;
3284 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3285
3286 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3287 unsigned target = emit_data->inst->Memory.Texture;
3288 struct lp_sampler_size_query_params params = { 0 };
3289 params.int_type = bld->bld_base.int_bld.type;
3290 params.texture_unit = buf;
3291 params.target = tgsi_to_pipe_tex_target(target);
3292 params.context_ptr = bld->context_ptr;
3293 params.sizes_out = emit_data->output;
3294
3295 bld->image->emit_size_query(bld->image,
3296 bld->bld_base.base.gallivm,
3297 &params);
3298 } else {
3299 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3300
3301 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3302 }
3303 }
3304
3305 static void
3306 img_atomic_emit(
3307 const struct lp_build_tgsi_action * action,
3308 struct lp_build_tgsi_context * bld_base,
3309 struct lp_build_emit_data * emit_data,
3310 LLVMAtomicRMWBinOp op)
3311 {
3312 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3313 struct lp_img_params params;
3314 LLVMValueRef coords[5];
3315 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3316 unsigned dims;
3317 unsigned layer_coord;
3318 unsigned target = emit_data->inst->Memory.Texture;
3319
3320 target_to_dims_layer(target, &dims, &layer_coord);
3321
3322 for (unsigned i = 0; i < dims; i++) {
3323 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3324 }
3325 for (unsigned i = dims; i < 5; i++) {
3326 coords[i] = coord_undef;
3327 }
3328 if (layer_coord)
3329 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3330 memset(&params, 0, sizeof(params));
3331
3332 params.type = bld->bld_base.base.type;
3333 params.context_ptr = bld->context_ptr;
3334 params.thread_data_ptr = bld->thread_data_ptr;
3335 params.exec_mask = mask_vec(bld_base);
3336 params.image_index = emit_data->inst->Src[0].Register.Index;
3337 params.coords = coords;
3338 params.target = tgsi_to_pipe_tex_target(target);
3339 params.op = op;
3340 params.outdata = emit_data->output;
3341 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3342
3343 for (unsigned i = 0; i < 4; i++)
3344 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3345 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3346 for (unsigned i = 0; i < 4; i++)
3347 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3348 }
3349 bld->image->emit_op(bld->image,
3350 bld->bld_base.base.gallivm,
3351 &params);
3352 }
3353
3354 static void
3355 atomic_emit(
3356 const struct lp_build_tgsi_action * action,
3357 struct lp_build_tgsi_context * bld_base,
3358 struct lp_build_emit_data * emit_data)
3359 {
3360 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3361 struct gallivm_state * gallivm = bld_base->base.gallivm;
3362 LLVMBuilderRef builder = gallivm->builder;
3363 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3364 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3365
3366 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3367 unsigned buf = bufreg->Register.Index;
3368 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3369
3370 LLVMAtomicRMWBinOp op;
3371 switch (emit_data->inst->Instruction.Opcode) {
3372 case TGSI_OPCODE_ATOMUADD:
3373 op = LLVMAtomicRMWBinOpAdd;
3374 break;
3375 case TGSI_OPCODE_ATOMXCHG:
3376 op = LLVMAtomicRMWBinOpXchg;
3377 break;
3378 case TGSI_OPCODE_ATOMAND:
3379 op = LLVMAtomicRMWBinOpAnd;
3380 break;
3381 case TGSI_OPCODE_ATOMOR:
3382 op = LLVMAtomicRMWBinOpOr;
3383 break;
3384 case TGSI_OPCODE_ATOMXOR:
3385 op = LLVMAtomicRMWBinOpXor;
3386 break;
3387 case TGSI_OPCODE_ATOMUMIN:
3388 op = LLVMAtomicRMWBinOpUMin;
3389 break;
3390 case TGSI_OPCODE_ATOMUMAX:
3391 op = LLVMAtomicRMWBinOpUMax;
3392 break;
3393 case TGSI_OPCODE_ATOMIMIN:
3394 op = LLVMAtomicRMWBinOpMin;
3395 break;
3396 case TGSI_OPCODE_ATOMIMAX:
3397 op = LLVMAtomicRMWBinOpMax;
3398 break;
3399 case TGSI_OPCODE_ATOMCAS:
3400 break;
3401 default:
3402 assert(0);
3403 return;
3404 }
3405
3406 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3407 img_atomic_emit(action, bld_base, emit_data, op);
3408 } else if (0) {
3409 } else {
3410 LLVMValueRef index; /* index into the const buffer */
3411 LLVMValueRef scalar, scalar_ptr;
3412 LLVMValueRef value;
3413
3414 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3415 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3416
3417 index = lp_build_shr_imm(uint_bld, index, 2);
3418
3419 if (!is_shared) {
3420 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3421 scalar_ptr = bld->ssbos[buf];
3422 } else
3423 scalar_ptr = bld->shared_ptr;
3424
3425 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3426 uint_bld->vec_type, "");
3427
3428 LLVMValueRef ssbo_limit;
3429 if (!is_shared) {
3430 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3431 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3432 }
3433
3434 LLVMValueRef exec_mask = mask_vec(bld_base);
3435
3436 if (!is_shared) {
3437 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3438 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3439 }
3440
3441 struct lp_build_loop_state loop_state;
3442 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3443
3444 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3445 loop_state.counter, "");
3446 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3447
3448 index = LLVMBuildExtractElement(gallivm->builder, index,
3449 loop_state.counter, "");
3450
3451 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3452 &index, 1, "");
3453
3454 struct lp_build_if_state ifthen;
3455 LLVMValueRef cond, temp_res;
3456
3457 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3458 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3459 lp_build_if(&ifthen, gallivm, cond);
3460
3461 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3462 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3463 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3464 loop_state.counter, "");
3465 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3466 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3467 cas_src_ptr,
3468 LLVMAtomicOrderingSequentiallyConsistent,
3469 LLVMAtomicOrderingSequentiallyConsistent,
3470 false);
3471 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3472 } else {
3473 scalar = LLVMBuildAtomicRMW(builder, op,
3474 scalar_ptr, value_ptr,
3475 LLVMAtomicOrderingSequentiallyConsistent,
3476 false);
3477 }
3478 temp_res = LLVMBuildLoad(builder, atom_res, "");
3479 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3480 LLVMBuildStore(builder, temp_res, atom_res);
3481 lp_build_else(&ifthen);
3482 temp_res = LLVMBuildLoad(builder, atom_res, "");
3483 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3484 LLVMBuildStore(builder, temp_res, atom_res);
3485 lp_build_endif(&ifthen);
3486
3487 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3488 NULL, LLVMIntUGE);
3489 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3490 }
3491 }
3492
3493 static void
3494 barrier_emit(
3495 const struct lp_build_tgsi_action * action,
3496 struct lp_build_tgsi_context * bld_base,
3497 struct lp_build_emit_data * emit_data)
3498 {
3499 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3500 struct gallivm_state * gallivm = bld_base->base.gallivm;
3501
3502 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3503
3504 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3505 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3506 }
3507
3508 static void
3509 membar_emit(
3510 const struct lp_build_tgsi_action * action,
3511 struct lp_build_tgsi_context * bld_base,
3512 struct lp_build_emit_data * emit_data)
3513 {
3514 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3515 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3516 }
3517
3518 static void
3519 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3520 LLVMValueRef ptr,
3521 LLVMValueRef mask)
3522 {
3523 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3524 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3525
3526 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3527
3528 LLVMBuildStore(builder, current_vec, ptr);
3529 }
3530
3531 static void
3532 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3533 LLVMValueRef ptr,
3534 LLVMValueRef mask)
3535 {
3536 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3537 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3538
3539 current_vec = lp_build_select(&bld_base->uint_bld,
3540 mask,
3541 bld_base->uint_bld.zero,
3542 current_vec);
3543
3544 LLVMBuildStore(builder, current_vec, ptr);
3545 }
3546
3547 static LLVMValueRef
3548 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3549 LLVMValueRef current_mask_vec,
3550 LLVMValueRef total_emitted_vertices_vec)
3551 {
3552 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3553 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3554 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3555 total_emitted_vertices_vec,
3556 bld->max_output_vertices_vec);
3557
3558 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3559 }
3560
3561 static void
3562 emit_vertex(
3563 const struct lp_build_tgsi_action * action,
3564 struct lp_build_tgsi_context * bld_base,
3565 struct lp_build_emit_data * emit_data)
3566 {
3567 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3568 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3569
3570 if (bld->gs_iface->emit_vertex) {
3571 uint32_t imms_idx = emit_data->inst->Src[0].Register.SwizzleX;
3572 LLVMValueRef stream_id = bld->immediates[0][imms_idx];
3573 LLVMValueRef mask = mask_vec(bld_base);
3574 LLVMValueRef total_emitted_vertices_vec =
3575 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3576 mask = clamp_mask_to_max_output_vertices(bld, mask,
3577 total_emitted_vertices_vec);
3578 gather_outputs(bld);
3579 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3580 bld->outputs,
3581 total_emitted_vertices_vec,
3582 stream_id);
3583 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3584 mask);
3585 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3586 mask);
3587 #if DUMP_GS_EMITS
3588 lp_build_print_value(bld->bld_base.base.gallivm,
3589 " +++ emit vertex masked ones = ",
3590 mask);
3591 lp_build_print_value(bld->bld_base.base.gallivm,
3592 " +++ emit vertex emitted = ",
3593 total_emitted_vertices_vec);
3594 #endif
3595 }
3596 }
3597
3598
3599 static void
3600 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3601 LLVMValueRef mask)
3602 {
3603 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3604 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3605
3606 if (bld->gs_iface->end_primitive) {
3607 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3608 LLVMValueRef emitted_vertices_vec =
3609 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3610 LLVMValueRef emitted_prims_vec =
3611 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3612 LLVMValueRef total_emitted_vertices_vec =
3613 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3614 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3615 emitted_vertices_vec,
3616 uint_bld->zero);
3617 /* We need to combine the current execution mask with the mask
3618 telling us which, if any, execution slots actually have
3619 unemitted primitives, this way we make sure that end_primitives
3620 executes only on the paths that have unflushed vertices */
3621 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3622
3623 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
3624 total_emitted_vertices_vec,
3625 emitted_vertices_vec,
3626 emitted_prims_vec,
3627 mask_vec(bld_base));
3628
3629 #if DUMP_GS_EMITS
3630 lp_build_print_value(bld->bld_base.base.gallivm,
3631 " +++ end prim masked ones = ",
3632 mask);
3633 lp_build_print_value(bld->bld_base.base.gallivm,
3634 " +++ end prim emitted verts1 = ",
3635 emitted_vertices_vec);
3636 lp_build_print_value(bld->bld_base.base.gallivm,
3637 " +++ end prim emitted prims1 = ",
3638 LLVMBuildLoad(builder,
3639 bld->emitted_prims_vec_ptr, ""));
3640 #endif
3641 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3642 mask);
3643 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3644 mask);
3645 #if DUMP_GS_EMITS
3646 lp_build_print_value(bld->bld_base.base.gallivm,
3647 " +++ end prim emitted verts2 = ",
3648 LLVMBuildLoad(builder,
3649 bld->emitted_vertices_vec_ptr, ""));
3650 #endif
3651 }
3652
3653 }
3654
3655 static void
3656 end_primitive(
3657 const struct lp_build_tgsi_action * action,
3658 struct lp_build_tgsi_context * bld_base,
3659 struct lp_build_emit_data * emit_data)
3660 {
3661 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3662
3663 if (bld->gs_iface->end_primitive) {
3664 LLVMValueRef mask = mask_vec(bld_base);
3665 end_primitive_masked(bld_base, mask);
3666 }
3667 }
3668
3669 static void
3670 cal_emit(
3671 const struct lp_build_tgsi_action * action,
3672 struct lp_build_tgsi_context * bld_base,
3673 struct lp_build_emit_data * emit_data)
3674 {
3675 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3676
3677 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3678 &bld_base->pc);
3679 }
3680
3681 static void
3682 ret_emit(
3683 const struct lp_build_tgsi_action * action,
3684 struct lp_build_tgsi_context * bld_base,
3685 struct lp_build_emit_data * emit_data)
3686 {
3687 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3688
3689 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3690 }
3691
3692 static void
3693 brk_emit(
3694 const struct lp_build_tgsi_action * action,
3695 struct lp_build_tgsi_context * bld_base,
3696 struct lp_build_emit_data * emit_data)
3697 {
3698 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3699
3700 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
3701 }
3702
3703 static void
3704 if_emit(
3705 const struct lp_build_tgsi_action * action,
3706 struct lp_build_tgsi_context * bld_base,
3707 struct lp_build_emit_data * emit_data)
3708 {
3709 LLVMValueRef tmp;
3710 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3711
3712 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3713 emit_data->args[0], bld->bld_base.base.zero);
3714 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3715 }
3716
3717 static void
3718 uif_emit(
3719 const struct lp_build_tgsi_action * action,
3720 struct lp_build_tgsi_context * bld_base,
3721 struct lp_build_emit_data * emit_data)
3722 {
3723 LLVMValueRef tmp;
3724 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3725 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3726
3727 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3728 emit_data->args[0], uint_bld->zero);
3729 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3730 }
3731
3732 static void
3733 case_emit(
3734 const struct lp_build_tgsi_action * action,
3735 struct lp_build_tgsi_context * bld_base,
3736 struct lp_build_emit_data * emit_data)
3737 {
3738 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3739
3740 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3741 }
3742
3743 static void
3744 default_emit(
3745 const struct lp_build_tgsi_action * action,
3746 struct lp_build_tgsi_context * bld_base,
3747 struct lp_build_emit_data * emit_data)
3748 {
3749 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3750
3751 lp_exec_default(&bld->exec_mask, bld_base);
3752 }
3753
3754 static void
3755 switch_emit(
3756 const struct lp_build_tgsi_action * action,
3757 struct lp_build_tgsi_context * bld_base,
3758 struct lp_build_emit_data * emit_data)
3759 {
3760 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3761
3762 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3763 }
3764
3765 static void
3766 endswitch_emit(
3767 const struct lp_build_tgsi_action * action,
3768 struct lp_build_tgsi_context * bld_base,
3769 struct lp_build_emit_data * emit_data)
3770 {
3771 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3772
3773 lp_exec_endswitch(&bld->exec_mask, bld_base);
3774 }
3775
3776 static void
3777 bgnloop_emit(
3778 const struct lp_build_tgsi_action * action,
3779 struct lp_build_tgsi_context * bld_base,
3780 struct lp_build_emit_data * emit_data)
3781 {
3782 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3783
3784 lp_exec_bgnloop(&bld->exec_mask, true);
3785 }
3786
3787 static void
3788 bgnsub_emit(
3789 const struct lp_build_tgsi_action * action,
3790 struct lp_build_tgsi_context * bld_base,
3791 struct lp_build_emit_data * emit_data)
3792 {
3793 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3794
3795 lp_exec_mask_bgnsub(&bld->exec_mask);
3796 }
3797
3798 static void
3799 else_emit(
3800 const struct lp_build_tgsi_action * action,
3801 struct lp_build_tgsi_context * bld_base,
3802 struct lp_build_emit_data * emit_data)
3803 {
3804 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3805
3806 lp_exec_mask_cond_invert(&bld->exec_mask);
3807 }
3808
3809 static void
3810 endif_emit(
3811 const struct lp_build_tgsi_action * action,
3812 struct lp_build_tgsi_context * bld_base,
3813 struct lp_build_emit_data * emit_data)
3814 {
3815 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3816
3817 lp_exec_mask_cond_pop(&bld->exec_mask);
3818 }
3819
3820 static void
3821 endloop_emit(
3822 const struct lp_build_tgsi_action * action,
3823 struct lp_build_tgsi_context * bld_base,
3824 struct lp_build_emit_data * emit_data)
3825 {
3826 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3827
3828 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3829 }
3830
3831 static void
3832 endsub_emit(
3833 const struct lp_build_tgsi_action * action,
3834 struct lp_build_tgsi_context * bld_base,
3835 struct lp_build_emit_data * emit_data)
3836 {
3837 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3838
3839 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3840 }
3841
3842 static void
3843 cont_emit(
3844 const struct lp_build_tgsi_action * action,
3845 struct lp_build_tgsi_context * bld_base,
3846 struct lp_build_emit_data * emit_data)
3847 {
3848 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3849
3850 lp_exec_continue(&bld->exec_mask);
3851 }
3852
3853 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3854 {
3855 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3856 struct gallivm_state * gallivm = bld_base->base.gallivm;
3857
3858 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3859 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
3860 bld->temps_array = lp_build_alloca_undef(gallivm,
3861 LLVMArrayType(bld_base->base.vec_type, array_size),
3862 "temp_array");
3863 }
3864
3865 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3866 LLVMValueRef array_size =
3867 lp_build_const_int32(gallivm,
3868 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3869 bld->outputs_array = lp_build_array_alloca(gallivm,
3870 bld_base->base.vec_type, array_size,
3871 "output_array");
3872 }
3873
3874 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3875 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
3876 bld->imms_array = lp_build_alloca_undef(gallivm,
3877 LLVMArrayType(bld_base->base.vec_type, array_size),
3878 "imms_array");
3879 }
3880
3881 /* If we have indirect addressing in inputs we need to copy them into
3882 * our alloca array to be able to iterate over them */
3883 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3884 unsigned index, chan;
3885 LLVMTypeRef vec_type = bld_base->base.vec_type;
3886 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3887 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3888 bld->inputs_array = lp_build_array_alloca(gallivm,
3889 vec_type, array_size,
3890 "input_array");
3891
3892 assert(bld_base->info->num_inputs
3893 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3894
3895 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3896 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3897 LLVMValueRef lindex =
3898 lp_build_const_int32(gallivm, index * 4 + chan);
3899 LLVMValueRef input_ptr =
3900 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3901 &lindex, 1, "");
3902 LLVMValueRef value = bld->inputs[index][chan];
3903 if (value)
3904 LLVMBuildStore(gallivm->builder, value, input_ptr);
3905 }
3906 }
3907 }
3908
3909 if (bld->gs_iface) {
3910 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3911 bld->emitted_prims_vec_ptr =
3912 lp_build_alloca(gallivm,
3913 uint_bld->vec_type,
3914 "emitted_prims_ptr");
3915 bld->emitted_vertices_vec_ptr =
3916 lp_build_alloca(gallivm,
3917 uint_bld->vec_type,
3918 "emitted_vertices_ptr");
3919 bld->total_emitted_vertices_vec_ptr =
3920 lp_build_alloca(gallivm,
3921 uint_bld->vec_type,
3922 "total_emitted_vertices_ptr");
3923
3924 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3925 bld->emitted_prims_vec_ptr);
3926 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3927 bld->emitted_vertices_vec_ptr);
3928 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3929 bld->total_emitted_vertices_vec_ptr);
3930 }
3931
3932 if (DEBUG_EXECUTION) {
3933 lp_build_printf(gallivm, "\n");
3934 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3935 if (!bld->gs_iface)
3936 emit_dump_file(bld, TGSI_FILE_INPUT);
3937 }
3938 }
3939
3940 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3941 {
3942 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3943 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3944
3945 if (DEBUG_EXECUTION) {
3946 /* for debugging */
3947 if (0) {
3948 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3949 }
3950 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3951 lp_build_printf(bld_base->base.gallivm, "\n");
3952 }
3953
3954 /* If we have indirect addressing in outputs we need to copy our alloca array
3955 * to the outputs slots specified by the caller */
3956 if (bld->gs_iface) {
3957 LLVMValueRef total_emitted_vertices_vec;
3958 LLVMValueRef emitted_prims_vec;
3959 /* implicit end_primitives, needed in case there are any unflushed
3960 vertices in the cache. Note must not call end_primitive here
3961 since the exec_mask is not valid at this point. */
3962 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3963
3964 total_emitted_vertices_vec =
3965 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3966 emitted_prims_vec =
3967 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3968
3969 bld->gs_iface->gs_epilogue(bld->gs_iface,
3970 total_emitted_vertices_vec,
3971 emitted_prims_vec);
3972 } else {
3973 gather_outputs(bld);
3974 }
3975 }
3976
3977 void
3978 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3979 const struct tgsi_token *tokens,
3980 const struct lp_build_tgsi_params *params,
3981 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
3982 {
3983 struct lp_build_tgsi_soa_context bld;
3984 struct lp_type type = params->type;
3985 struct lp_type res_type;
3986
3987 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3988 memset(&res_type, 0, sizeof res_type);
3989 res_type.width = type.width;
3990 res_type.length = type.length;
3991 res_type.sign = 1;
3992
3993 /* Setup build context */
3994 memset(&bld, 0, sizeof bld);
3995 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3996 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3997 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3998 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3999 {
4000 struct lp_type dbl_type;
4001 dbl_type = type;
4002 dbl_type.width *= 2;
4003 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4004 }
4005 {
4006 struct lp_type uint64_type;
4007 uint64_type = lp_uint_type(type);
4008 uint64_type.width *= 2;
4009 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4010 }
4011 {
4012 struct lp_type int64_type;
4013 int64_type = lp_int_type(type);
4014 int64_type.width *= 2;
4015 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4016 }
4017 bld.mask = params->mask;
4018 bld.inputs = params->inputs;
4019 bld.outputs = outputs;
4020 bld.consts_ptr = params->consts_ptr;
4021 bld.const_sizes_ptr = params->const_sizes_ptr;
4022 bld.ssbo_ptr = params->ssbo_ptr;
4023 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4024 bld.sampler = params->sampler;
4025 bld.bld_base.info = params->info;
4026 bld.indirect_files = params->info->indirect_files;
4027 bld.context_ptr = params->context_ptr;
4028 bld.thread_data_ptr = params->thread_data_ptr;
4029 bld.image = params->image;
4030 bld.shared_ptr = params->shared_ptr;
4031 bld.coro = params->coro;
4032
4033 /*
4034 * If the number of temporaries is rather large then we just
4035 * allocate them as an array right from the start and treat
4036 * like indirect temporaries.
4037 */
4038 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4039 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4040 }
4041 /*
4042 * For performance reason immediates are always backed in a static
4043 * array, but if their number is too great, we have to use just
4044 * a dynamically allocated array.
4045 */
4046 bld.use_immediates_array =
4047 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4048 if (bld.use_immediates_array) {
4049 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4050 }
4051
4052
4053 bld.bld_base.soa = TRUE;
4054 bld.bld_base.emit_debug = emit_debug;
4055 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4056 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4057 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4058 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4059 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4060 bld.bld_base.emit_store = emit_store;
4061
4062 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4063 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4064
4065 bld.bld_base.emit_prologue = emit_prologue;
4066 bld.bld_base.emit_epilogue = emit_epilogue;
4067
4068 /* Set opcode actions */
4069 lp_set_default_actions_cpu(&bld.bld_base);
4070
4071 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4072 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4073 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4074 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4075 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4076 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4077 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4078 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4079 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4080 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4081 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4082 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4083 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4084 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4085 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4086 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4087 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4088 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4089 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4090 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4091 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4092 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4093 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4094 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4095 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4096 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4097 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4098 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4099 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4100 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4101 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4102 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4103 /* DX10 sampling ops */
4104 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4105 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4106 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4107 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4108 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4109 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4110 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4111 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4112 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4113 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4114 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4115
4116 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4117 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4118 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4119
4120 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4121 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4122 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4123 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4124 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4125 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4126 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4127 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4128 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4129 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4130
4131 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4132 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4133
4134 if (params->gs_iface) {
4135 /* There's no specific value for this because it should always
4136 * be set, but apps using ext_geometry_shader4 quite often
4137 * were forgetting so we're using MAX_VERTEX_VARYING from
4138 * that spec even though we could debug_assert if it's not
4139 * set, but that's a lot uglier. */
4140 uint max_output_vertices;
4141
4142 /* inputs are always indirect with gs */
4143 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4144 bld.gs_iface = params->gs_iface;
4145 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4146 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4147 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4148
4149 max_output_vertices =
4150 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4151 if (!max_output_vertices)
4152 max_output_vertices = 32;
4153
4154 bld.max_output_vertices_vec =
4155 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4156 max_output_vertices);
4157 }
4158
4159 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4160
4161 bld.system_values = *params->system_values;
4162
4163 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4164
4165 if (0) {
4166 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4167 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4168 debug_printf("11111111111111111111111111111 \n");
4169 tgsi_dump(tokens, 0);
4170 lp_debug_dump_value(function);
4171 debug_printf("2222222222222222222222222222 \n");
4172 }
4173
4174 if (0) {
4175 LLVMModuleRef module = LLVMGetGlobalParent(
4176 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4177 LLVMDumpModule(module);
4178
4179 }
4180 lp_exec_mask_fini(&bld.exec_mask);
4181 }