gallivm: add buffer operations to the tgsi->llvm conversion.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_quad.h"
64 #include "lp_bld_tgsi.h"
65 #include "lp_bld_limits.h"
66 #include "lp_bld_debug.h"
67 #include "lp_bld_printf.h"
68 #include "lp_bld_sample.h"
69 #include "lp_bld_struct.h"
70
71 /* SM 4.0 says that subroutines can nest 32 deep and
72 * we need one more for our main function */
73 #define LP_MAX_NUM_FUNCS 33
74
75 #define DUMP_GS_EMITS 0
76
77 /*
78 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
79 * instruction.
80 *
81 * TODO:
82 * - take execution masks in consideration
83 * - debug control-flow instructions
84 */
85 #define DEBUG_EXECUTION 0
86
87
88 /*
89 * Emit code to print a register value.
90 */
91 static void
92 emit_dump_reg(struct gallivm_state *gallivm,
93 unsigned file,
94 unsigned index,
95 unsigned chan,
96 LLVMValueRef value)
97 {
98 char buf[32];
99
100 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
101 tgsi_file_name(file),
102 index, "xyzw"[chan]);
103
104 lp_build_print_value(gallivm, buf, value);
105 }
106
107 /*
108 * Return the context for the current function.
109 * (always 'main', if shader doesn't do any function calls)
110 */
111 static inline struct function_ctx *
112 func_ctx(struct lp_exec_mask *mask)
113 {
114 assert(mask->function_stack_size > 0);
115 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
116 return &mask->function_stack[mask->function_stack_size - 1];
117 }
118
119 /*
120 * Returns true if we're in a loop.
121 * It's global, meaning that it returns true even if there's
122 * no loop inside the current function, but we were inside
123 * a loop inside another function, from which this one was called.
124 */
125 static inline boolean
126 mask_has_loop(struct lp_exec_mask *mask)
127 {
128 int i;
129 for (i = mask->function_stack_size - 1; i >= 0; --i) {
130 const struct function_ctx *ctx = &mask->function_stack[i];
131 if (ctx->loop_stack_size > 0)
132 return TRUE;
133 }
134 return FALSE;
135 }
136
137 /*
138 * combine the execution mask if there is one with the current mask.
139 */
140 static LLVMValueRef
141 mask_vec(struct lp_build_tgsi_context *bld_base)
142 {
143 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
144 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
145 struct lp_exec_mask *exec_mask = &bld->exec_mask;
146 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
147 if (!exec_mask->has_mask) {
148 return bld_mask;
149 }
150 if (!bld_mask)
151 return exec_mask->exec_mask;
152 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
153 exec_mask->exec_mask, "");
154 }
155
156 /*
157 * Returns true if we're inside a switch statement.
158 * It's global, meaning that it returns true even if there's
159 * no switch in the current function, but we were inside
160 * a switch inside another function, from which this one was called.
161 */
162 static inline boolean
163 mask_has_switch(struct lp_exec_mask *mask)
164 {
165 int i;
166 for (i = mask->function_stack_size - 1; i >= 0; --i) {
167 const struct function_ctx *ctx = &mask->function_stack[i];
168 if (ctx->switch_stack_size > 0)
169 return TRUE;
170 }
171 return FALSE;
172 }
173
174 /*
175 * Returns true if we're inside a conditional.
176 * It's global, meaning that it returns true even if there's
177 * no conditional in the current function, but we were inside
178 * a conditional inside another function, from which this one was called.
179 */
180 static inline boolean
181 mask_has_cond(struct lp_exec_mask *mask)
182 {
183 int i;
184 for (i = mask->function_stack_size - 1; i >= 0; --i) {
185 const struct function_ctx *ctx = &mask->function_stack[i];
186 if (ctx->cond_stack_size > 0)
187 return TRUE;
188 }
189 return FALSE;
190 }
191
192
193 /*
194 * Initialize a function context at the specified index.
195 */
196 static void
197 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
198 {
199 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
200 LLVMBuilderRef builder = mask->bld->gallivm->builder;
201 struct function_ctx *ctx = &mask->function_stack[function_idx];
202
203 ctx->cond_stack_size = 0;
204 ctx->loop_stack_size = 0;
205 ctx->switch_stack_size = 0;
206
207 if (function_idx == 0) {
208 ctx->ret_mask = mask->ret_mask;
209 }
210
211 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
212 int_type, "looplimiter");
213 LLVMBuildStore(
214 builder,
215 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
216 ctx->loop_limiter);
217 }
218
219 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
220 {
221 mask->bld = bld;
222 mask->has_mask = FALSE;
223 mask->ret_in_main = FALSE;
224 /* For the main function */
225 mask->function_stack_size = 1;
226
227 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
228 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
229 mask->cond_mask = mask->switch_mask =
230 LLVMConstAllOnes(mask->int_vec_type);
231
232 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
233 sizeof(mask->function_stack[0]));
234 lp_exec_mask_function_init(mask, 0);
235 }
236
237 static void
238 lp_exec_mask_fini(struct lp_exec_mask *mask)
239 {
240 FREE(mask->function_stack);
241 }
242
243 static void lp_exec_mask_update(struct lp_exec_mask *mask)
244 {
245 LLVMBuilderRef builder = mask->bld->gallivm->builder;
246 boolean has_loop_mask = mask_has_loop(mask);
247 boolean has_cond_mask = mask_has_cond(mask);
248 boolean has_switch_mask = mask_has_switch(mask);
249 boolean has_ret_mask = mask->function_stack_size > 1 ||
250 mask->ret_in_main;
251
252 if (has_loop_mask) {
253 /*for loops we need to update the entire mask at runtime */
254 LLVMValueRef tmp;
255 assert(mask->break_mask);
256 tmp = LLVMBuildAnd(builder,
257 mask->cont_mask,
258 mask->break_mask,
259 "maskcb");
260 mask->exec_mask = LLVMBuildAnd(builder,
261 mask->cond_mask,
262 tmp,
263 "maskfull");
264 } else
265 mask->exec_mask = mask->cond_mask;
266
267 if (has_switch_mask) {
268 mask->exec_mask = LLVMBuildAnd(builder,
269 mask->exec_mask,
270 mask->switch_mask,
271 "switchmask");
272 }
273
274 if (has_ret_mask) {
275 mask->exec_mask = LLVMBuildAnd(builder,
276 mask->exec_mask,
277 mask->ret_mask,
278 "callmask");
279 }
280
281 mask->has_mask = (has_cond_mask ||
282 has_loop_mask ||
283 has_switch_mask ||
284 has_ret_mask);
285 }
286
287 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
288 LLVMValueRef val)
289 {
290 LLVMBuilderRef builder = mask->bld->gallivm->builder;
291 struct function_ctx *ctx = func_ctx(mask);
292
293 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
294 ctx->cond_stack_size++;
295 return;
296 }
297 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
298 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
299 }
300 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
301 assert(LLVMTypeOf(val) == mask->int_vec_type);
302 mask->cond_mask = LLVMBuildAnd(builder,
303 mask->cond_mask,
304 val,
305 "");
306 lp_exec_mask_update(mask);
307 }
308
309 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
310 {
311 LLVMBuilderRef builder = mask->bld->gallivm->builder;
312 struct function_ctx *ctx = func_ctx(mask);
313 LLVMValueRef prev_mask;
314 LLVMValueRef inv_mask;
315
316 assert(ctx->cond_stack_size);
317 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
318 return;
319 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
320 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
321 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
322 }
323
324 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
325
326 mask->cond_mask = LLVMBuildAnd(builder,
327 inv_mask,
328 prev_mask, "");
329 lp_exec_mask_update(mask);
330 }
331
332 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
333 {
334 struct function_ctx *ctx = func_ctx(mask);
335 assert(ctx->cond_stack_size);
336 --ctx->cond_stack_size;
337 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
338 return;
339 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
340 lp_exec_mask_update(mask);
341 }
342
343 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
344 {
345 LLVMBuilderRef builder = mask->bld->gallivm->builder;
346 struct function_ctx *ctx = func_ctx(mask);
347
348 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
349 ++ctx->loop_stack_size;
350 return;
351 }
352
353 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
354 ctx->break_type;
355 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
356
357 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
358 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
359 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
360 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
361 ++ctx->loop_stack_size;
362
363 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
364 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
365
366 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
367
368 LLVMBuildBr(builder, ctx->loop_block);
369 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
370
371 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
372
373 lp_exec_mask_update(mask);
374 }
375
376 static void lp_exec_break(struct lp_exec_mask *mask,
377 struct lp_build_tgsi_context * bld_base)
378 {
379 LLVMBuilderRef builder = mask->bld->gallivm->builder;
380 struct function_ctx *ctx = func_ctx(mask);
381
382 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
383 LLVMValueRef exec_mask = LLVMBuildNot(builder,
384 mask->exec_mask,
385 "break");
386
387 mask->break_mask = LLVMBuildAnd(builder,
388 mask->break_mask,
389 exec_mask, "break_full");
390 }
391 else {
392 enum tgsi_opcode opcode =
393 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
394 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
395 opcode == TGSI_OPCODE_CASE);
396
397
398 if (ctx->switch_in_default) {
399 /*
400 * stop default execution but only if this is an unconditional switch.
401 * (The condition here is not perfect since dead code after break is
402 * allowed but should be sufficient since false negatives are just
403 * unoptimized - so we don't have to pre-evaluate that).
404 */
405 if(break_always && ctx->switch_pc) {
406 bld_base->pc = ctx->switch_pc;
407 return;
408 }
409 }
410
411 if (break_always) {
412 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
413 }
414 else {
415 LLVMValueRef exec_mask = LLVMBuildNot(builder,
416 mask->exec_mask,
417 "break");
418 mask->switch_mask = LLVMBuildAnd(builder,
419 mask->switch_mask,
420 exec_mask, "break_switch");
421 }
422 }
423
424 lp_exec_mask_update(mask);
425 }
426
427 static void lp_exec_continue(struct lp_exec_mask *mask)
428 {
429 LLVMBuilderRef builder = mask->bld->gallivm->builder;
430 LLVMValueRef exec_mask = LLVMBuildNot(builder,
431 mask->exec_mask,
432 "");
433
434 mask->cont_mask = LLVMBuildAnd(builder,
435 mask->cont_mask,
436 exec_mask, "");
437
438 lp_exec_mask_update(mask);
439 }
440
441
442 static void lp_exec_endloop(struct gallivm_state *gallivm,
443 struct lp_exec_mask *mask)
444 {
445 LLVMBuilderRef builder = mask->bld->gallivm->builder;
446 struct function_ctx *ctx = func_ctx(mask);
447 LLVMBasicBlockRef endloop;
448 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
449 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
450 mask->bld->type.width *
451 mask->bld->type.length);
452 LLVMValueRef i1cond, i2cond, icond, limiter;
453
454 assert(mask->break_mask);
455
456
457 assert(ctx->loop_stack_size);
458 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
459 --ctx->loop_stack_size;
460 return;
461 }
462
463 /*
464 * Restore the cont_mask, but don't pop
465 */
466 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
467 lp_exec_mask_update(mask);
468
469 /*
470 * Unlike the continue mask, the break_mask must be preserved across loop
471 * iterations
472 */
473 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
474
475 /* Decrement the loop limiter */
476 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
477
478 limiter = LLVMBuildSub(
479 builder,
480 limiter,
481 LLVMConstInt(int_type, 1, false),
482 "");
483
484 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
485
486 /* i1cond = (mask != 0) */
487 i1cond = LLVMBuildICmp(
488 builder,
489 LLVMIntNE,
490 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
491 LLVMConstNull(reg_type), "i1cond");
492
493 /* i2cond = (looplimiter > 0) */
494 i2cond = LLVMBuildICmp(
495 builder,
496 LLVMIntSGT,
497 limiter,
498 LLVMConstNull(int_type), "i2cond");
499
500 /* if( i1cond && i2cond ) */
501 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
502
503 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
504
505 LLVMBuildCondBr(builder,
506 icond, ctx->loop_block, endloop);
507
508 LLVMPositionBuilderAtEnd(builder, endloop);
509
510 assert(ctx->loop_stack_size);
511 --ctx->loop_stack_size;
512 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
513 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
514 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
515 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
516 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
517 ctx->switch_stack_size];
518
519 lp_exec_mask_update(mask);
520 }
521
522 static void lp_exec_switch(struct lp_exec_mask *mask,
523 LLVMValueRef switchval)
524 {
525 struct function_ctx *ctx = func_ctx(mask);
526
527 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
528 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
529 ctx->switch_stack_size++;
530 return;
531 }
532
533 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
534 ctx->break_type;
535 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
536
537 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
538 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
539 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
540 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
541 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
542 ctx->switch_stack_size++;
543
544 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
545 ctx->switch_val = switchval;
546 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
547 ctx->switch_in_default = false;
548 ctx->switch_pc = 0;
549
550 lp_exec_mask_update(mask);
551 }
552
553 static void lp_exec_endswitch(struct lp_exec_mask *mask,
554 struct lp_build_tgsi_context * bld_base)
555 {
556 LLVMBuilderRef builder = mask->bld->gallivm->builder;
557 struct function_ctx *ctx = func_ctx(mask);
558
559 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
560 ctx->switch_stack_size--;
561 return;
562 }
563
564 /* check if there's deferred default if so do it now */
565 if (ctx->switch_pc && !ctx->switch_in_default) {
566 LLVMValueRef prevmask, defaultmask;
567 unsigned tmp_pc;
568 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
569 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
570 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
571 ctx->switch_in_default = true;
572
573 lp_exec_mask_update(mask);
574
575 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
576 TGSI_OPCODE_DEFAULT);
577
578 tmp_pc = bld_base->pc;
579 bld_base->pc = ctx->switch_pc;
580 /*
581 * re-purpose switch_pc to point to here again, since we stop execution of
582 * the deferred default after next break.
583 */
584 ctx->switch_pc = tmp_pc - 1;
585
586 return;
587 }
588
589 else if (ctx->switch_pc && ctx->switch_in_default) {
590 assert(bld_base->pc == ctx->switch_pc + 1);
591 }
592
593 ctx->switch_stack_size--;
594 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
595 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
596 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
597 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
598 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
599
600 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
601
602 lp_exec_mask_update(mask);
603 }
604
605 static void lp_exec_case(struct lp_exec_mask *mask,
606 LLVMValueRef caseval)
607 {
608 LLVMBuilderRef builder = mask->bld->gallivm->builder;
609 struct function_ctx *ctx = func_ctx(mask);
610
611 LLVMValueRef casemask, prevmask;
612
613 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
614 return;
615 }
616
617 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
618 if (!ctx->switch_in_default) {
619 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
620 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
621 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
622 ctx->switch_mask_default, "sw_default_mask");
623 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
624 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
625
626 lp_exec_mask_update(mask);
627 }
628 }
629
630 /*
631 * Analyse default statement in a switch.
632 * \return true if default is last statement, false otherwise
633 * \param default_pc_start contains pc of instruction to jump to
634 * if default wasn't last but there's no
635 * fallthrough into default.
636 */
637 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
638 struct lp_build_tgsi_context * bld_base,
639 int *default_pc_start)
640 {
641 unsigned pc = bld_base->pc;
642 struct function_ctx *ctx = func_ctx(mask);
643 int curr_switch_stack = ctx->switch_stack_size;
644
645 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
646 return false;
647 }
648
649 /* skip over case statements which are together with default */
650 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
651 pc++;
652 }
653
654 while (pc != ~0u && pc < bld_base->num_instructions) {
655 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
656 switch (opcode) {
657 case TGSI_OPCODE_CASE:
658 if (curr_switch_stack == ctx->switch_stack_size) {
659 *default_pc_start = pc - 1;
660 return false;
661 }
662 break;
663 case TGSI_OPCODE_SWITCH:
664 curr_switch_stack++;
665 break;
666 case TGSI_OPCODE_ENDSWITCH:
667 if (curr_switch_stack == ctx->switch_stack_size) {
668 *default_pc_start = pc - 1;
669 return true;
670 }
671 curr_switch_stack--;
672 break;
673 default:
674 ; /* nothing */
675 }
676 pc++;
677 }
678 /* should never arrive here */
679 assert(0);
680 return true;
681 }
682
683 static void lp_exec_default(struct lp_exec_mask *mask,
684 struct lp_build_tgsi_context * bld_base)
685 {
686 LLVMBuilderRef builder = mask->bld->gallivm->builder;
687 struct function_ctx *ctx = func_ctx(mask);
688
689 int default_exec_pc;
690 boolean default_is_last;
691
692 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
693 return;
694 }
695
696 /*
697 * This is a messy opcode, because it may not be always at the end and
698 * there can be fallthrough in and out of it.
699 */
700
701 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
702 /*
703 * If it is last statement in switch (note that case statements appearing
704 * "at the same time" as default don't change that) everything is just fine,
705 * update switch mask and go on. This means we can handle default with
706 * fallthrough INTO it without overhead, if it is last.
707 */
708 if (default_is_last) {
709 LLVMValueRef prevmask, defaultmask;
710 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
711 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
712 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
713 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
714 ctx->switch_in_default = true;
715
716 lp_exec_mask_update(mask);
717 }
718 else {
719 /*
720 * Technically, "case" immediately before default isn't really a
721 * fallthrough, however we still have to count them as such as we
722 * already have updated the masks.
723 * If that happens in practice could add a switch optimizer pass
724 * which just gets rid of all case statements appearing together with
725 * default (or could do switch analysis at switch start time instead).
726 */
727 enum tgsi_opcode opcode =
728 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
729 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
730 opcode != TGSI_OPCODE_SWITCH);
731 /*
732 * If it is not last statement and there was no fallthrough into it,
733 * we record the PC and continue execution at next case (again, those
734 * case encountered at the same time don't count). At endswitch
735 * time, we update switchmask, and go back executing the code we skipped
736 * until the next break (possibly re-executing some code with changed mask
737 * if there was a fallthrough out of default).
738 * Finally, if it is not last statement and there was a fallthrough into it,
739 * do the same as with the former case, except instead of skipping the code
740 * just execute it without updating the mask, then go back and re-execute.
741 */
742 ctx->switch_pc = bld_base->pc;
743 if (!ft_into) {
744 bld_base->pc = default_exec_pc;
745 }
746 }
747 }
748
749
750 /* stores val into an address pointed to by dst_ptr.
751 * mask->exec_mask is used to figure out which bits of val
752 * should be stored into the address
753 * (0 means don't store this bit, 1 means do store).
754 */
755 static void lp_exec_mask_store(struct lp_exec_mask *mask,
756 struct lp_build_context *bld_store,
757 LLVMValueRef val,
758 LLVMValueRef dst_ptr)
759 {
760 LLVMBuilderRef builder = mask->bld->gallivm->builder;
761 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
762
763 assert(lp_check_value(bld_store->type, val));
764 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
765 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
766 LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
767
768 if (exec_mask) {
769 LLVMValueRef res, dst;
770
771 dst = LLVMBuildLoad(builder, dst_ptr, "");
772 res = lp_build_select(bld_store, exec_mask, val, dst);
773 LLVMBuildStore(builder, res, dst_ptr);
774 } else
775 LLVMBuildStore(builder, val, dst_ptr);
776 }
777
778 static void lp_exec_mask_call(struct lp_exec_mask *mask,
779 int func,
780 int *pc)
781 {
782 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
783 return;
784 }
785
786 lp_exec_mask_function_init(mask, mask->function_stack_size);
787 mask->function_stack[mask->function_stack_size].pc = *pc;
788 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
789 mask->function_stack_size++;
790 *pc = func;
791 }
792
793 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
794 {
795 LLVMBuilderRef builder = mask->bld->gallivm->builder;
796 struct function_ctx *ctx = func_ctx(mask);
797 LLVMValueRef exec_mask;
798
799 if (ctx->cond_stack_size == 0 &&
800 ctx->loop_stack_size == 0 &&
801 ctx->switch_stack_size == 0 &&
802 mask->function_stack_size == 1) {
803 /* returning from main() */
804 *pc = -1;
805 return;
806 }
807
808 if (mask->function_stack_size == 1) {
809 /*
810 * This requires special handling since we need to ensure
811 * we don't drop the mask even if we have no call stack
812 * (e.g. after a ret in a if clause after the endif)
813 */
814 mask->ret_in_main = TRUE;
815 }
816
817 exec_mask = LLVMBuildNot(builder,
818 mask->exec_mask,
819 "ret");
820
821 mask->ret_mask = LLVMBuildAnd(builder,
822 mask->ret_mask,
823 exec_mask, "ret_full");
824
825 lp_exec_mask_update(mask);
826 }
827
828 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
829 {
830 }
831
832 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
833 {
834 struct function_ctx *ctx;
835
836 assert(mask->function_stack_size > 1);
837 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
838
839 ctx = func_ctx(mask);
840 mask->function_stack_size--;
841
842 *pc = ctx->pc;
843 mask->ret_mask = ctx->ret_mask;
844
845 lp_exec_mask_update(mask);
846 }
847
848
849 static LLVMValueRef
850 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
851 unsigned file,
852 int index,
853 unsigned chan)
854 {
855 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
856 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
857 LLVMValueRef var_of_array;
858
859 switch (file) {
860 case TGSI_FILE_TEMPORARY:
861 array_of_vars = bld->temps;
862 var_of_array = bld->temps_array;
863 break;
864 case TGSI_FILE_OUTPUT:
865 array_of_vars = bld->outputs;
866 var_of_array = bld->outputs_array;
867 break;
868 default:
869 assert(0);
870 return NULL;
871 }
872
873 assert(chan < 4);
874
875 if (bld->indirect_files & (1 << file)) {
876 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
877 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
878 LLVMValueRef gep[2];
879 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
880 gep[1] = lindex;
881 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
882 } else {
883 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
884 }
885 }
886 else {
887 assert(index <= bld->bld_base.info->file_max[file]);
888 return array_of_vars[index][chan];
889 }
890 }
891
892
893 /**
894 * Return pointer to a temporary register channel (src or dest).
895 * Note that indirect addressing cannot be handled here.
896 * \param index which temporary register
897 * \param chan which channel of the temp register.
898 */
899 LLVMValueRef
900 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
901 unsigned index,
902 unsigned chan)
903 {
904 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
905 }
906
907 /**
908 * Return pointer to a output register channel (src or dest).
909 * Note that indirect addressing cannot be handled here.
910 * \param index which output register
911 * \param chan which channel of the output register.
912 */
913 LLVMValueRef
914 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
915 unsigned index,
916 unsigned chan)
917 {
918 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
919 }
920
921 /*
922 * If we have indirect addressing in outputs copy our alloca array
923 * to the outputs slots specified by the caller to make sure
924 * our outputs are delivered consistently via the same interface.
925 */
926 static void
927 gather_outputs(struct lp_build_tgsi_soa_context * bld)
928 {
929 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
930 unsigned index, chan;
931 assert(bld->bld_base.info->num_outputs <=
932 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
933 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
934 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
935 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
936 }
937 }
938 }
939 }
940
941 /**
942 * Gather vector.
943 * XXX the lp_build_gather() function should be capable of doing this
944 * with a little work.
945 */
946 static LLVMValueRef
947 build_gather(struct lp_build_tgsi_context *bld_base,
948 LLVMValueRef base_ptr,
949 LLVMValueRef indexes,
950 LLVMValueRef overflow_mask,
951 LLVMValueRef indexes2)
952 {
953 struct gallivm_state *gallivm = bld_base->base.gallivm;
954 LLVMBuilderRef builder = gallivm->builder;
955 struct lp_build_context *uint_bld = &bld_base->uint_bld;
956 struct lp_build_context *bld = &bld_base->base;
957 LLVMValueRef res;
958 unsigned i;
959
960 if (indexes2)
961 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
962 else
963 res = bld->undef;
964 /*
965 * overflow_mask is a vector telling us which channels
966 * in the vector overflowed. We use the overflow behavior for
967 * constant buffers which is defined as:
968 * Out of bounds access to constant buffer returns 0 in all
969 * components. Out of bounds behavior is always with respect
970 * to the size of the buffer bound at that slot.
971 */
972
973 if (overflow_mask) {
974 /*
975 * We avoid per-element control flow here (also due to llvm going crazy,
976 * though I suspect it's better anyway since overflow is likely rare).
977 * Note that since we still fetch from buffers even if num_elements was
978 * zero (in this case we'll fetch from index zero) the jit func callers
979 * MUST provide valid fake constant buffers of size 4x32 (the values do
980 * not matter), otherwise we'd still need (not per element though)
981 * control flow.
982 */
983 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
984 if (indexes2)
985 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
986 }
987
988 /*
989 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
990 */
991 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
992 LLVMValueRef si, di;
993 LLVMValueRef index;
994 LLVMValueRef scalar_ptr, scalar;
995
996 di = lp_build_const_int32(bld->gallivm, i);
997 if (indexes2)
998 si = lp_build_const_int32(bld->gallivm, i >> 1);
999 else
1000 si = di;
1001
1002 if (indexes2 && (i & 1)) {
1003 index = LLVMBuildExtractElement(builder,
1004 indexes2, si, "");
1005 } else {
1006 index = LLVMBuildExtractElement(builder,
1007 indexes, si, "");
1008 }
1009 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
1010 &index, 1, "gather_ptr");
1011 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1012
1013 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
1014 }
1015
1016 if (overflow_mask) {
1017 if (indexes2) {
1018 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
1019 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1020 bld_base->dbl_bld.int_vec_type, "");
1021 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1022 bld_base->dbl_bld.zero, res);
1023 } else
1024 res = lp_build_select(bld, overflow_mask, bld->zero, res);
1025 }
1026
1027 return res;
1028 }
1029
1030
1031 /**
1032 * Scatter/store vector.
1033 */
1034 static void
1035 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1036 LLVMValueRef base_ptr,
1037 LLVMValueRef indexes,
1038 LLVMValueRef values,
1039 struct lp_exec_mask *mask)
1040 {
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 unsigned i;
1044 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1045
1046 /*
1047 * Loop over elements of index_vec, store scalar value.
1048 */
1049 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1050 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1051 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1052 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1053 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1054 LLVMValueRef scalar_pred = pred ?
1055 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1056
1057 if (0)
1058 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1059 ii, val, index, scalar_ptr);
1060
1061 if (scalar_pred) {
1062 LLVMValueRef real_val, dst_val;
1063 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1064 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1065 LLVMBuildStore(builder, real_val, scalar_ptr);
1066 }
1067 else {
1068 LLVMBuildStore(builder, val, scalar_ptr);
1069 }
1070 }
1071 }
1072
1073
1074 /**
1075 * Read the current value of the ADDR register, convert the floats to
1076 * ints, add the base index and return the vector of offsets.
1077 * The offsets will be used to index into the constant buffer or
1078 * temporary register file.
1079 */
1080 static LLVMValueRef
1081 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1082 unsigned reg_file, unsigned reg_index,
1083 const struct tgsi_ind_register *indirect_reg,
1084 int index_limit)
1085 {
1086 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1087 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1088 /* always use X component of address register */
1089 unsigned swizzle = indirect_reg->Swizzle;
1090 LLVMValueRef base;
1091 LLVMValueRef rel;
1092 LLVMValueRef max_index;
1093 LLVMValueRef index;
1094
1095 assert(bld->indirect_files & (1 << reg_file));
1096
1097 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1098
1099 assert(swizzle < 4);
1100 switch (indirect_reg->File) {
1101 case TGSI_FILE_ADDRESS:
1102 rel = LLVMBuildLoad(builder,
1103 bld->addr[indirect_reg->Index][swizzle],
1104 "load addr reg");
1105 /* ADDR LLVM values already have LLVM integer type. */
1106 break;
1107 case TGSI_FILE_TEMPORARY:
1108 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1109 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1110 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1111 * value actually stored is expected to be an integer */
1112 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1113 break;
1114 default:
1115 assert(0);
1116 rel = uint_bld->zero;
1117 }
1118
1119 index = lp_build_add(uint_bld, base, rel);
1120
1121 /*
1122 * emit_fetch_constant handles constant buffer overflow so this code
1123 * is pointless for them.
1124 * Furthermore the D3D10 spec in section 6.5 says:
1125 * If the constant buffer bound to a slot is larger than the size
1126 * declared in the shader for that slot, implementations are allowed
1127 * to return incorrect data (not necessarily 0) for indices that are
1128 * larger than the declared size but smaller than the buffer size.
1129 */
1130 if (reg_file != TGSI_FILE_CONSTANT) {
1131 assert(index_limit >= 0);
1132 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1133 uint_bld->type, index_limit);
1134
1135 assert(!uint_bld->type.sign);
1136 index = lp_build_min(uint_bld, index, max_index);
1137 }
1138
1139 return index;
1140 }
1141
1142 static struct lp_build_context *
1143 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1144 enum tgsi_opcode_type stype)
1145 {
1146 struct lp_build_context *bld_fetch;
1147
1148 switch (stype) {
1149 case TGSI_TYPE_FLOAT:
1150 case TGSI_TYPE_UNTYPED:
1151 bld_fetch = &bld_base->base;
1152 break;
1153 case TGSI_TYPE_UNSIGNED:
1154 bld_fetch = &bld_base->uint_bld;
1155 break;
1156 case TGSI_TYPE_SIGNED:
1157 bld_fetch = &bld_base->int_bld;
1158 break;
1159 case TGSI_TYPE_DOUBLE:
1160 bld_fetch = &bld_base->dbl_bld;
1161 break;
1162 case TGSI_TYPE_UNSIGNED64:
1163 bld_fetch = &bld_base->uint64_bld;
1164 break;
1165 case TGSI_TYPE_SIGNED64:
1166 bld_fetch = &bld_base->int64_bld;
1167 break;
1168 case TGSI_TYPE_VOID:
1169 default:
1170 assert(0);
1171 bld_fetch = NULL;
1172 break;
1173 }
1174 return bld_fetch;
1175 }
1176
1177 static LLVMValueRef
1178 get_soa_array_offsets(struct lp_build_context *uint_bld,
1179 LLVMValueRef indirect_index,
1180 unsigned chan_index,
1181 boolean need_perelement_offset)
1182 {
1183 struct gallivm_state *gallivm = uint_bld->gallivm;
1184 LLVMValueRef chan_vec =
1185 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1186 LLVMValueRef length_vec =
1187 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1188 LLVMValueRef index_vec;
1189
1190 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1191 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1192 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1193 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1194
1195 if (need_perelement_offset) {
1196 LLVMValueRef pixel_offsets;
1197 unsigned i;
1198 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1199 pixel_offsets = uint_bld->undef;
1200 for (i = 0; i < uint_bld->type.length; i++) {
1201 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1202 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1203 ii, ii, "");
1204 }
1205 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1206 }
1207 return index_vec;
1208 }
1209
1210 static LLVMValueRef
1211 emit_fetch_constant(
1212 struct lp_build_tgsi_context * bld_base,
1213 const struct tgsi_full_src_register * reg,
1214 enum tgsi_opcode_type stype,
1215 unsigned swizzle_in)
1216 {
1217 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1218 struct gallivm_state *gallivm = bld_base->base.gallivm;
1219 LLVMBuilderRef builder = gallivm->builder;
1220 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1221 unsigned dimension = 0;
1222 LLVMValueRef consts_ptr;
1223 LLVMValueRef num_consts;
1224 LLVMValueRef res;
1225 unsigned swizzle = swizzle_in & 0xffff;
1226
1227 /* XXX: Handle fetching xyzw components as a vector */
1228 assert(swizzle != ~0u);
1229
1230 if (reg->Register.Dimension) {
1231 assert(!reg->Dimension.Indirect);
1232 dimension = reg->Dimension.Index;
1233 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1234 }
1235
1236 consts_ptr = bld->consts[dimension];
1237 num_consts = bld->consts_sizes[dimension];
1238
1239 if (reg->Register.Indirect) {
1240 LLVMValueRef indirect_index;
1241 LLVMValueRef swizzle_vec =
1242 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1243 LLVMValueRef index_vec; /* index into the const buffer */
1244 LLVMValueRef overflow_mask;
1245 LLVMValueRef index_vec2 = NULL;
1246
1247 indirect_index = get_indirect_index(bld,
1248 reg->Register.File,
1249 reg->Register.Index,
1250 &reg->Indirect,
1251 bld->bld_base.info->file_max[reg->Register.File]);
1252
1253 /* All fetches are from the same constant buffer, so
1254 * we need to propagate the size to a vector to do a
1255 * vector comparison */
1256 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1257 /* Construct a boolean vector telling us which channels
1258 * overflow the bound constant buffer */
1259 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1260 indirect_index, num_consts);
1261
1262 /* index_vec = indirect_index * 4 + swizzle */
1263 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1264 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1265
1266 if (tgsi_type_is_64bit(stype)) {
1267 LLVMValueRef swizzle_vec2;
1268 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
1269 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1270 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1271 }
1272 /* Gather values from the constant buffer */
1273 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1274 }
1275 else {
1276 LLVMValueRef index; /* index into the const buffer */
1277 LLVMValueRef scalar, scalar_ptr;
1278 struct lp_build_context *bld_broad = &bld_base->base;
1279 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1280
1281 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1282 &index, 1, "");
1283
1284 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
1285
1286 LLVMValueRef scalar2, scalar2_ptr;
1287 LLVMValueRef shuffles[2];
1288 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
1289
1290 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
1291 &index, 1, "");
1292
1293 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1294 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
1295 shuffles[0] = lp_build_const_int32(gallivm, 0);
1296 shuffles[1] = lp_build_const_int32(gallivm, 1);
1297
1298 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
1299 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
1300 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
1301 } else {
1302 if (stype == TGSI_TYPE_DOUBLE) {
1303 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1304 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1305 bld_broad = &bld_base->dbl_bld;
1306 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1307 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1308 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1309 bld_broad = &bld_base->uint64_bld;
1310 } else if (stype == TGSI_TYPE_SIGNED64) {
1311 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1312 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1313 bld_broad = &bld_base->int64_bld;
1314 }
1315 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1316 res = lp_build_broadcast_scalar(bld_broad, scalar);
1317 }
1318
1319 }
1320
1321 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1322 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1323 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1324 }
1325
1326 return res;
1327 }
1328
1329 /**
1330 * Fetch 64-bit values from two separate channels.
1331 * 64-bit values are stored split across two channels, like xy and zw.
1332 * This function creates a set of vec_length*2 floats,
1333 * extracts the values from the two channels,
1334 * puts them in the correct place, then casts to vec_length 64-bits.
1335 */
1336 static LLVMValueRef
1337 emit_fetch_64bit(
1338 struct lp_build_tgsi_context * bld_base,
1339 enum tgsi_opcode_type stype,
1340 LLVMValueRef input,
1341 LLVMValueRef input2)
1342 {
1343 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1344 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1345 LLVMBuilderRef builder = gallivm->builder;
1346 LLVMValueRef res;
1347 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1348 int i;
1349 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
1350 int len = bld_base->base.type.length * 2;
1351 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
1352
1353 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1354 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1355 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1356 }
1357 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1358
1359 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1360 }
1361
1362 static LLVMValueRef
1363 emit_fetch_immediate(
1364 struct lp_build_tgsi_context * bld_base,
1365 const struct tgsi_full_src_register * reg,
1366 enum tgsi_opcode_type stype,
1367 unsigned swizzle_in)
1368 {
1369 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1370 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1371 LLVMBuilderRef builder = gallivm->builder;
1372 LLVMValueRef res = NULL;
1373 unsigned swizzle = swizzle_in & 0xffff;
1374
1375 if (bld->use_immediates_array || reg->Register.Indirect) {
1376 LLVMValueRef imms_array;
1377 LLVMTypeRef fptr_type;
1378
1379 /* cast imms_array pointer to float* */
1380 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1381 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1382
1383 if (reg->Register.Indirect) {
1384 LLVMValueRef indirect_index;
1385 LLVMValueRef index_vec; /* index into the immediate register array */
1386 LLVMValueRef index_vec2 = NULL;
1387 indirect_index = get_indirect_index(bld,
1388 reg->Register.File,
1389 reg->Register.Index,
1390 &reg->Indirect,
1391 bld->bld_base.info->file_max[reg->Register.File]);
1392 /*
1393 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1394 * immediates are stored as full vectors (FIXME??? - might be better
1395 * to store them the same as constants) but all elements are the same
1396 * in any case.
1397 */
1398 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1399 indirect_index,
1400 swizzle,
1401 FALSE);
1402 if (tgsi_type_is_64bit(stype))
1403 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1404 indirect_index,
1405 swizzle_in >> 16,
1406 FALSE);
1407 /* Gather values from the immediate register array */
1408 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1409 } else {
1410 LLVMValueRef gep[2];
1411 gep[0] = lp_build_const_int32(gallivm, 0);
1412 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1413 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1414 bld->imms_array, gep, 2, "");
1415 res = LLVMBuildLoad(builder, imms_ptr, "");
1416
1417 if (tgsi_type_is_64bit(stype)) {
1418 LLVMValueRef imms_ptr2;
1419 LLVMValueRef res2;
1420 gep[1] = lp_build_const_int32(gallivm,
1421 reg->Register.Index * 4 + (swizzle_in >> 16));
1422 imms_ptr2 = LLVMBuildGEP(builder,
1423 bld->imms_array, gep, 2, "");
1424 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1425 res = emit_fetch_64bit(bld_base, stype, res, res2);
1426 }
1427 }
1428 }
1429 else {
1430 res = bld->immediates[reg->Register.Index][swizzle];
1431 if (tgsi_type_is_64bit(stype))
1432 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1433 }
1434
1435 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1436 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1437 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1438 }
1439 return res;
1440 }
1441
1442 static LLVMValueRef
1443 emit_fetch_input(
1444 struct lp_build_tgsi_context * bld_base,
1445 const struct tgsi_full_src_register * reg,
1446 enum tgsi_opcode_type stype,
1447 unsigned swizzle_in)
1448 {
1449 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1450 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1451 LLVMBuilderRef builder = gallivm->builder;
1452 LLVMValueRef res;
1453 unsigned swizzle = swizzle_in & 0xffff;
1454
1455 if (reg->Register.Indirect) {
1456 LLVMValueRef indirect_index;
1457 LLVMValueRef index_vec; /* index into the input reg array */
1458 LLVMValueRef index_vec2 = NULL;
1459 LLVMValueRef inputs_array;
1460 LLVMTypeRef fptr_type;
1461
1462 indirect_index = get_indirect_index(bld,
1463 reg->Register.File,
1464 reg->Register.Index,
1465 &reg->Indirect,
1466 bld->bld_base.info->file_max[reg->Register.File]);
1467
1468 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1469 indirect_index,
1470 swizzle,
1471 TRUE);
1472 if (tgsi_type_is_64bit(stype)) {
1473 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1474 indirect_index,
1475 swizzle_in >> 16,
1476 TRUE);
1477 }
1478 /* cast inputs_array pointer to float* */
1479 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1480 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1481
1482 /* Gather values from the input register array */
1483 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1484 } else {
1485 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1486 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1487 reg->Register.Index * 4 + swizzle);
1488 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1489 bld->inputs_array, &lindex, 1, "");
1490
1491 res = LLVMBuildLoad(builder, input_ptr, "");
1492 if (tgsi_type_is_64bit(stype)) {
1493 LLVMValueRef lindex1;
1494 LLVMValueRef input_ptr2;
1495 LLVMValueRef res2;
1496
1497 lindex1 = lp_build_const_int32(gallivm,
1498 reg->Register.Index * 4 + (swizzle_in >> 16));
1499 input_ptr2 = LLVMBuildGEP(builder,
1500 bld->inputs_array, &lindex1, 1, "");
1501 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1502 res = emit_fetch_64bit(bld_base, stype, res, res2);
1503 }
1504 }
1505 else {
1506 res = bld->inputs[reg->Register.Index][swizzle];
1507 if (tgsi_type_is_64bit(stype))
1508 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1509 }
1510 }
1511
1512 assert(res);
1513
1514 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1515 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1516 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1517 }
1518
1519 return res;
1520 }
1521
1522
1523 static LLVMValueRef
1524 emit_fetch_gs_input(
1525 struct lp_build_tgsi_context * bld_base,
1526 const struct tgsi_full_src_register * reg,
1527 enum tgsi_opcode_type stype,
1528 unsigned swizzle_in)
1529 {
1530 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1531 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1532 const struct tgsi_shader_info *info = bld->bld_base.info;
1533 LLVMBuilderRef builder = gallivm->builder;
1534 LLVMValueRef attrib_index = NULL;
1535 LLVMValueRef vertex_index = NULL;
1536 unsigned swizzle = swizzle_in & 0xffff;
1537 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1538 LLVMValueRef res;
1539
1540 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1541 /* This is really a system value not a regular input */
1542 assert(!reg->Register.Indirect);
1543 assert(!reg->Dimension.Indirect);
1544 res = bld->system_values.prim_id;
1545 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1546 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1547 }
1548 return res;
1549 }
1550
1551 if (reg->Register.Indirect) {
1552 /*
1553 * XXX: this is possibly not quite the right value, since file_max may be
1554 * larger than the max attrib index, due to it being the max of declared
1555 * inputs AND the max vertices per prim (which is 6 for tri adj).
1556 * It should however be safe to use (since we always allocate
1557 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1558 */
1559 int index_limit = info->file_max[reg->Register.File];
1560 attrib_index = get_indirect_index(bld,
1561 reg->Register.File,
1562 reg->Register.Index,
1563 &reg->Indirect,
1564 index_limit);
1565 } else {
1566 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1567 }
1568
1569 if (reg->Dimension.Indirect) {
1570 /*
1571 * A fixed 6 should do as well (which is what we allocate).
1572 */
1573 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1574 vertex_index = get_indirect_index(bld,
1575 reg->Register.File,
1576 reg->Dimension.Index,
1577 &reg->DimIndirect,
1578 index_limit);
1579 } else {
1580 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1581 }
1582
1583 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1584 reg->Dimension.Indirect,
1585 vertex_index,
1586 reg->Register.Indirect,
1587 attrib_index,
1588 swizzle_index);
1589
1590 assert(res);
1591 if (tgsi_type_is_64bit(stype)) {
1592 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1593 LLVMValueRef res2;
1594 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1595 reg->Dimension.Indirect,
1596 vertex_index,
1597 reg->Register.Indirect,
1598 attrib_index,
1599 swizzle_index);
1600 assert(res2);
1601 res = emit_fetch_64bit(bld_base, stype, res, res2);
1602 } else if (stype == TGSI_TYPE_UNSIGNED) {
1603 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1604 } else if (stype == TGSI_TYPE_SIGNED) {
1605 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1606 }
1607
1608 return res;
1609 }
1610
1611 static LLVMValueRef
1612 emit_fetch_temporary(
1613 struct lp_build_tgsi_context * bld_base,
1614 const struct tgsi_full_src_register * reg,
1615 enum tgsi_opcode_type stype,
1616 unsigned swizzle_in)
1617 {
1618 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1619 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1620 LLVMBuilderRef builder = gallivm->builder;
1621 LLVMValueRef res;
1622 unsigned swizzle = swizzle_in & 0xffff;
1623
1624 if (reg->Register.Indirect) {
1625 LLVMValueRef indirect_index;
1626 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1627 LLVMValueRef temps_array;
1628 LLVMTypeRef fptr_type;
1629
1630 indirect_index = get_indirect_index(bld,
1631 reg->Register.File,
1632 reg->Register.Index,
1633 &reg->Indirect,
1634 bld->bld_base.info->file_max[reg->Register.File]);
1635
1636 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1637 indirect_index,
1638 swizzle,
1639 TRUE);
1640 if (tgsi_type_is_64bit(stype)) {
1641 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1642 indirect_index,
1643 swizzle_in >> 16,
1644 TRUE);
1645 }
1646
1647 /* cast temps_array pointer to float* */
1648 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1649 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1650
1651 /* Gather values from the temporary register array */
1652 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1653 }
1654 else {
1655 LLVMValueRef temp_ptr;
1656 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1657 res = LLVMBuildLoad(builder, temp_ptr, "");
1658
1659 if (tgsi_type_is_64bit(stype)) {
1660 LLVMValueRef temp_ptr2, res2;
1661
1662 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1663 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1664 res = emit_fetch_64bit(bld_base, stype, res, res2);
1665 }
1666 }
1667
1668 if (stype == TGSI_TYPE_SIGNED ||
1669 stype == TGSI_TYPE_UNSIGNED ||
1670 stype == TGSI_TYPE_DOUBLE ||
1671 stype == TGSI_TYPE_SIGNED64 ||
1672 stype == TGSI_TYPE_UNSIGNED64) {
1673 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1674 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1675 }
1676
1677 return res;
1678 }
1679
1680 static LLVMValueRef
1681 emit_fetch_system_value(
1682 struct lp_build_tgsi_context * bld_base,
1683 const struct tgsi_full_src_register * reg,
1684 enum tgsi_opcode_type stype,
1685 unsigned swizzle_in)
1686 {
1687 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1688 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1689 const struct tgsi_shader_info *info = bld->bld_base.info;
1690 LLVMBuilderRef builder = gallivm->builder;
1691 LLVMValueRef res;
1692 enum tgsi_opcode_type atype; // Actual type of the value
1693
1694 assert(!reg->Register.Indirect);
1695
1696 switch (info->system_value_semantic_name[reg->Register.Index]) {
1697 case TGSI_SEMANTIC_INSTANCEID:
1698 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1699 atype = TGSI_TYPE_UNSIGNED;
1700 break;
1701
1702 case TGSI_SEMANTIC_VERTEXID:
1703 res = bld->system_values.vertex_id;
1704 atype = TGSI_TYPE_UNSIGNED;
1705 break;
1706
1707 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1708 res = bld->system_values.vertex_id_nobase;
1709 atype = TGSI_TYPE_UNSIGNED;
1710 break;
1711
1712 case TGSI_SEMANTIC_BASEVERTEX:
1713 res = bld->system_values.basevertex;
1714 atype = TGSI_TYPE_UNSIGNED;
1715 break;
1716
1717 case TGSI_SEMANTIC_PRIMID:
1718 res = bld->system_values.prim_id;
1719 atype = TGSI_TYPE_UNSIGNED;
1720 break;
1721
1722 case TGSI_SEMANTIC_INVOCATIONID:
1723 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1724 atype = TGSI_TYPE_UNSIGNED;
1725 break;
1726
1727 default:
1728 assert(!"unexpected semantic in emit_fetch_system_value");
1729 res = bld_base->base.zero;
1730 atype = TGSI_TYPE_FLOAT;
1731 break;
1732 }
1733
1734 if (atype != stype) {
1735 if (stype == TGSI_TYPE_FLOAT) {
1736 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1737 } else if (stype == TGSI_TYPE_UNSIGNED) {
1738 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1739 } else if (stype == TGSI_TYPE_SIGNED) {
1740 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1741 }
1742 }
1743
1744 return res;
1745 }
1746
1747 /**
1748 * Register fetch with derivatives.
1749 */
1750 static void
1751 emit_fetch_deriv(
1752 struct lp_build_tgsi_soa_context *bld,
1753 LLVMValueRef src,
1754 LLVMValueRef *res,
1755 LLVMValueRef *ddx,
1756 LLVMValueRef *ddy)
1757 {
1758 if (res)
1759 *res = src;
1760
1761 /* TODO: use interpolation coeffs for inputs */
1762
1763 if (ddx)
1764 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1765
1766 if (ddy)
1767 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1768 }
1769
1770 /**
1771 * store an array of vec-length 64-bit into two arrays of vec_length floats
1772 * i.e.
1773 * value is d0, d1, d2, d3 etc.
1774 * each 64-bit has high and low pieces x, y
1775 * so gets stored into the separate channels as:
1776 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1777 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1778 */
1779 static void
1780 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1781 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1782 LLVMValueRef value)
1783 {
1784 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1785 struct gallivm_state *gallivm = bld_base->base.gallivm;
1786 LLVMBuilderRef builder = gallivm->builder;
1787 struct lp_build_context *float_bld = &bld_base->base;
1788 unsigned i;
1789 LLVMValueRef temp, temp2;
1790 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1791 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1792
1793 for (i = 0; i < bld_base->base.type.length; i++) {
1794 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1795 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1796 }
1797
1798 temp = LLVMBuildShuffleVector(builder, value,
1799 LLVMGetUndef(LLVMTypeOf(value)),
1800 LLVMConstVector(shuffles,
1801 bld_base->base.type.length),
1802 "");
1803 temp2 = LLVMBuildShuffleVector(builder, value,
1804 LLVMGetUndef(LLVMTypeOf(value)),
1805 LLVMConstVector(shuffles2,
1806 bld_base->base.type.length),
1807 "");
1808
1809 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1810 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1811 }
1812
1813 /**
1814 * Register store.
1815 */
1816 static void
1817 emit_store_chan(
1818 struct lp_build_tgsi_context *bld_base,
1819 const struct tgsi_full_instruction *inst,
1820 unsigned index,
1821 unsigned chan_index,
1822 LLVMValueRef value)
1823 {
1824 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1825 struct gallivm_state *gallivm = bld_base->base.gallivm;
1826 LLVMBuilderRef builder = gallivm->builder;
1827 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1828 struct lp_build_context *float_bld = &bld_base->base;
1829 struct lp_build_context *int_bld = &bld_base->int_bld;
1830 LLVMValueRef indirect_index = NULL;
1831 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1832
1833 /*
1834 * Apply saturation.
1835 *
1836 * It is always assumed to be float.
1837 */
1838 if (inst->Instruction.Saturate) {
1839 assert(dtype == TGSI_TYPE_FLOAT ||
1840 dtype == TGSI_TYPE_UNTYPED);
1841 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1842 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1843 }
1844
1845 if (reg->Register.Indirect) {
1846 /*
1847 * Currently the mesa/st doesn't generate indirect stores
1848 * to 64-bit values, it normally uses MOV to do indirect stores.
1849 */
1850 assert(!tgsi_type_is_64bit(dtype));
1851 indirect_index = get_indirect_index(bld,
1852 reg->Register.File,
1853 reg->Register.Index,
1854 &reg->Indirect,
1855 bld->bld_base.info->file_max[reg->Register.File]);
1856 } else {
1857 assert(reg->Register.Index <=
1858 bld_base->info->file_max[reg->Register.File]);
1859 }
1860
1861 if (DEBUG_EXECUTION) {
1862 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1863 }
1864
1865 switch( reg->Register.File ) {
1866 case TGSI_FILE_OUTPUT:
1867 /* Outputs are always stored as floats */
1868 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1869
1870 if (reg->Register.Indirect) {
1871 LLVMValueRef index_vec; /* indexes into the output registers */
1872 LLVMValueRef outputs_array;
1873 LLVMTypeRef fptr_type;
1874
1875 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1876 indirect_index,
1877 chan_index,
1878 TRUE);
1879
1880 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1881 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1882
1883 /* Scatter store values into output registers */
1884 emit_mask_scatter(bld, outputs_array, index_vec, value,
1885 &bld->exec_mask);
1886 }
1887 else {
1888 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1889 chan_index);
1890
1891 if (tgsi_type_is_64bit(dtype)) {
1892 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1893 chan_index + 1);
1894 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1895 value);
1896 } else
1897 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1898 }
1899 break;
1900
1901 case TGSI_FILE_TEMPORARY:
1902 /* Temporaries are always stored as floats */
1903 if (!tgsi_type_is_64bit(dtype))
1904 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1905 else
1906 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1907
1908 if (reg->Register.Indirect) {
1909 LLVMValueRef index_vec; /* indexes into the temp registers */
1910 LLVMValueRef temps_array;
1911 LLVMTypeRef fptr_type;
1912
1913 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1914 indirect_index,
1915 chan_index,
1916 TRUE);
1917
1918 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1919 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1920
1921 /* Scatter store values into temp registers */
1922 emit_mask_scatter(bld, temps_array, index_vec, value,
1923 &bld->exec_mask);
1924 }
1925 else {
1926 LLVMValueRef temp_ptr;
1927 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1928
1929 if (tgsi_type_is_64bit(dtype)) {
1930 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1931 reg->Register.Index,
1932 chan_index + 1);
1933 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1934 value);
1935 }
1936 else
1937 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1938 }
1939 break;
1940
1941 case TGSI_FILE_ADDRESS:
1942 assert(dtype == TGSI_TYPE_SIGNED);
1943 assert(LLVMTypeOf(value) == int_bld->vec_type);
1944 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1945 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1946 bld->addr[reg->Register.Index][chan_index]);
1947 break;
1948
1949 default:
1950 assert( 0 );
1951 }
1952
1953 (void)dtype;
1954 }
1955
1956 /*
1957 * Called at the beginning of the translation of each TGSI instruction, to
1958 * emit some debug code.
1959 */
1960 static void
1961 emit_debug(
1962 struct lp_build_tgsi_context * bld_base,
1963 const struct tgsi_full_instruction * inst,
1964 const struct tgsi_opcode_info * info)
1965
1966 {
1967 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1968
1969 if (DEBUG_EXECUTION) {
1970 /*
1971 * Dump the TGSI instruction.
1972 */
1973
1974 struct gallivm_state *gallivm = bld_base->base.gallivm;
1975 char buf[512];
1976 buf[0] = '$';
1977 buf[1] = ' ';
1978 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1979 lp_build_printf(gallivm, buf);
1980
1981 /* Dump the execution mask.
1982 */
1983 if (bld->exec_mask.has_mask) {
1984 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1985 }
1986 }
1987 }
1988
1989 static void
1990 emit_store(
1991 struct lp_build_tgsi_context * bld_base,
1992 const struct tgsi_full_instruction * inst,
1993 const struct tgsi_opcode_info * info,
1994 unsigned index,
1995 LLVMValueRef dst[4])
1996
1997 {
1998 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1999
2000 unsigned writemask = inst->Dst[index].Register.WriteMask;
2001 while (writemask) {
2002 unsigned chan_index = u_bit_scan(&writemask);
2003 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
2004 continue;
2005 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
2006 }
2007 }
2008
2009 static unsigned
2010 tgsi_to_pipe_tex_target(unsigned tgsi_target)
2011 {
2012 switch (tgsi_target) {
2013 case TGSI_TEXTURE_BUFFER:
2014 return PIPE_BUFFER;
2015 case TGSI_TEXTURE_1D:
2016 case TGSI_TEXTURE_SHADOW1D:
2017 return PIPE_TEXTURE_1D;
2018 case TGSI_TEXTURE_2D:
2019 case TGSI_TEXTURE_SHADOW2D:
2020 case TGSI_TEXTURE_2D_MSAA:
2021 return PIPE_TEXTURE_2D;
2022 case TGSI_TEXTURE_3D:
2023 return PIPE_TEXTURE_3D;
2024 case TGSI_TEXTURE_CUBE:
2025 case TGSI_TEXTURE_SHADOWCUBE:
2026 return PIPE_TEXTURE_CUBE;
2027 case TGSI_TEXTURE_RECT:
2028 case TGSI_TEXTURE_SHADOWRECT:
2029 return PIPE_TEXTURE_RECT;
2030 case TGSI_TEXTURE_1D_ARRAY:
2031 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2032 return PIPE_TEXTURE_1D_ARRAY;
2033 case TGSI_TEXTURE_2D_ARRAY:
2034 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2035 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2036 return PIPE_TEXTURE_2D_ARRAY;
2037 case TGSI_TEXTURE_CUBE_ARRAY:
2038 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2039 return PIPE_TEXTURE_CUBE_ARRAY;
2040 default:
2041 assert(0);
2042 return PIPE_BUFFER;
2043 }
2044 }
2045
2046
2047 static enum lp_sampler_lod_property
2048 lp_build_lod_property(
2049 struct lp_build_tgsi_context *bld_base,
2050 const struct tgsi_full_instruction *inst,
2051 unsigned src_op)
2052 {
2053 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2054 enum lp_sampler_lod_property lod_property;
2055
2056 /*
2057 * Not much we can do here. We could try catching inputs declared
2058 * with constant interpolation but not sure it's worth it - since for
2059 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2060 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2061 * like the constant/immediate recognition below.
2062 * What seems to be of more value would be to recognize temps holding
2063 * broadcasted scalars but no way we can do it.
2064 * Tried asking llvm but without any success (using LLVMIsConstant
2065 * even though this isn't exactly what we'd need), even as simple as
2066 * IMM[0] UINT32 (0,-1,0,0)
2067 * MOV TEMP[0] IMM[0].yyyy
2068 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2069 * doesn't work.
2070 * This means there's ZERO chance this will ever catch a scalar lod
2071 * with traditional tex opcodes as well as texel fetches, since the lod
2072 * comes from the same reg as coords (except some test shaders using
2073 * constant coords maybe).
2074 * There's at least hope for sample opcodes as well as size queries.
2075 */
2076 if (reg->Register.File == TGSI_FILE_CONSTANT ||
2077 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2078 lod_property = LP_SAMPLER_LOD_SCALAR;
2079 }
2080 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2081 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2082 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2083 }
2084 else {
2085 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2086 }
2087 }
2088 else {
2089 /* never use scalar (per-quad) lod the results are just too wrong. */
2090 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2091 }
2092 return lod_property;
2093 }
2094
2095
2096 /**
2097 * High-level instruction translators.
2098 */
2099
2100 static void
2101 emit_tex( struct lp_build_tgsi_soa_context *bld,
2102 const struct tgsi_full_instruction *inst,
2103 enum lp_build_tex_modifier modifier,
2104 LLVMValueRef *texel,
2105 unsigned sampler_reg,
2106 enum lp_sampler_op_type sampler_op)
2107 {
2108 unsigned unit = inst->Src[sampler_reg].Register.Index;
2109 LLVMValueRef oow = NULL;
2110 LLVMValueRef lod = NULL;
2111 LLVMValueRef coords[5];
2112 LLVMValueRef offsets[3] = { NULL };
2113 struct lp_derivatives derivs;
2114 struct lp_sampler_params params;
2115 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2116 unsigned num_derivs, num_offsets, i;
2117 unsigned shadow_coord = 0;
2118 unsigned layer_coord = 0;
2119 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2120
2121 memset(&params, 0, sizeof(params));
2122
2123 if (!bld->sampler) {
2124 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2125 for (i = 0; i < 4; i++) {
2126 texel[i] = bld->bld_base.base.undef;
2127 }
2128 return;
2129 }
2130
2131 switch (inst->Texture.Texture) {
2132 case TGSI_TEXTURE_1D_ARRAY:
2133 layer_coord = 1;
2134 /* fallthrough */
2135 case TGSI_TEXTURE_1D:
2136 num_offsets = 1;
2137 num_derivs = 1;
2138 break;
2139 case TGSI_TEXTURE_2D_ARRAY:
2140 layer_coord = 2;
2141 /* fallthrough */
2142 case TGSI_TEXTURE_2D:
2143 case TGSI_TEXTURE_RECT:
2144 num_offsets = 2;
2145 num_derivs = 2;
2146 break;
2147 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2148 layer_coord = 1;
2149 /* fallthrough */
2150 case TGSI_TEXTURE_SHADOW1D:
2151 shadow_coord = 2;
2152 num_offsets = 1;
2153 num_derivs = 1;
2154 break;
2155 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2156 layer_coord = 2;
2157 shadow_coord = 3;
2158 num_offsets = 2;
2159 num_derivs = 2;
2160 break;
2161 case TGSI_TEXTURE_SHADOW2D:
2162 case TGSI_TEXTURE_SHADOWRECT:
2163 shadow_coord = 2;
2164 num_offsets = 2;
2165 num_derivs = 2;
2166 break;
2167 case TGSI_TEXTURE_CUBE:
2168 num_offsets = 2;
2169 num_derivs = 3;
2170 break;
2171 case TGSI_TEXTURE_3D:
2172 num_offsets = 3;
2173 num_derivs = 3;
2174 break;
2175 case TGSI_TEXTURE_SHADOWCUBE:
2176 shadow_coord = 3;
2177 num_offsets = 2;
2178 num_derivs = 3;
2179 break;
2180 case TGSI_TEXTURE_CUBE_ARRAY:
2181 num_offsets = 2;
2182 num_derivs = 3;
2183 layer_coord = 3;
2184 break;
2185 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2186 num_offsets = 2;
2187 num_derivs = 3;
2188 layer_coord = 3;
2189 shadow_coord = 4; /* shadow coord special different reg */
2190 break;
2191 case TGSI_TEXTURE_2D_MSAA:
2192 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2193 default:
2194 assert(0);
2195 return;
2196 }
2197
2198 /* Note lod and especially projected are illegal in a LOT of cases */
2199 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2200 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2201 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2202 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2203 /* note that shadow cube array with bias/explicit lod does not exist */
2204 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2205 }
2206 else {
2207 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2208 }
2209 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2210 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2211 }
2212 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2213 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2214 }
2215 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2216 }
2217
2218 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2219 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2220 oow = lp_build_rcp(&bld->bld_base.base, oow);
2221 }
2222
2223 for (i = 0; i < num_derivs; i++) {
2224 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2225 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2226 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2227 }
2228 for (i = num_derivs; i < 5; i++) {
2229 coords[i] = bld->bld_base.base.undef;
2230 }
2231
2232 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2233 if (layer_coord) {
2234 if (layer_coord == 3) {
2235 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2236 }
2237 else {
2238 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2239 }
2240 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2241 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2242 }
2243 /* Shadow coord occupies always 5th slot. */
2244 if (shadow_coord) {
2245 sample_key |= LP_SAMPLER_SHADOW;
2246 if (shadow_coord == 4) {
2247 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2248 }
2249 else {
2250 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2251 }
2252 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2253 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2254 }
2255
2256 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2257 unsigned dim;
2258 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2259 for (dim = 0; dim < num_derivs; ++dim) {
2260 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2261 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2262 }
2263 params.derivs = &derivs;
2264 /*
2265 * could also check all src regs if constant but I doubt such
2266 * cases exist in practice.
2267 */
2268 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2269 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2270 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2271 }
2272 else {
2273 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2274 }
2275 }
2276 else {
2277 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2278 }
2279 }
2280 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2281
2282 /* we don't handle the 4 offset version of tg4 */
2283 if (inst->Texture.NumOffsets == 1) {
2284 unsigned dim;
2285 sample_key |= LP_SAMPLER_OFFSETS;
2286 for (dim = 0; dim < num_offsets; dim++) {
2287 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2288 }
2289 }
2290
2291 params.type = bld->bld_base.base.type;
2292 params.sample_key = sample_key;
2293 params.texture_index = unit;
2294 params.sampler_index = unit;
2295 params.context_ptr = bld->context_ptr;
2296 params.thread_data_ptr = bld->thread_data_ptr;
2297 params.coords = coords;
2298 params.offsets = offsets;
2299 params.lod = lod;
2300 params.texel = texel;
2301
2302 bld->sampler->emit_tex_sample(bld->sampler,
2303 bld->bld_base.base.gallivm,
2304 &params);
2305 }
2306
2307 static void
2308 emit_sample(struct lp_build_tgsi_soa_context *bld,
2309 const struct tgsi_full_instruction *inst,
2310 enum lp_build_tex_modifier modifier,
2311 boolean compare,
2312 enum lp_sampler_op_type sample_type,
2313 LLVMValueRef *texel)
2314 {
2315 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2316 unsigned texture_unit, sampler_unit;
2317 LLVMValueRef lod = NULL;
2318 LLVMValueRef coords[5];
2319 LLVMValueRef offsets[3] = { NULL };
2320 struct lp_derivatives derivs;
2321 struct lp_sampler_params params;
2322 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2323
2324 unsigned num_offsets, num_derivs, i;
2325 unsigned layer_coord = 0;
2326 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2327
2328 memset(&params, 0, sizeof(params));
2329
2330 if (!bld->sampler) {
2331 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2332 for (i = 0; i < 4; i++) {
2333 texel[i] = bld->bld_base.base.undef;
2334 }
2335 return;
2336 }
2337
2338 /*
2339 * unlike old-style tex opcodes the texture/sampler indices
2340 * always come from src1 and src2 respectively.
2341 */
2342 texture_unit = inst->Src[1].Register.Index;
2343 sampler_unit = inst->Src[2].Register.Index;
2344
2345 /*
2346 * Note inst->Texture.Texture will contain the number of offsets,
2347 * however the target information is NOT there and comes from the
2348 * declared sampler views instead.
2349 */
2350 switch (bld->sv[texture_unit].Resource) {
2351 case TGSI_TEXTURE_1D:
2352 num_offsets = 1;
2353 num_derivs = 1;
2354 break;
2355 case TGSI_TEXTURE_1D_ARRAY:
2356 layer_coord = 1;
2357 num_offsets = 1;
2358 num_derivs = 1;
2359 break;
2360 case TGSI_TEXTURE_2D:
2361 case TGSI_TEXTURE_RECT:
2362 num_offsets = 2;
2363 num_derivs = 2;
2364 break;
2365 case TGSI_TEXTURE_2D_ARRAY:
2366 layer_coord = 2;
2367 num_offsets = 2;
2368 num_derivs = 2;
2369 break;
2370 case TGSI_TEXTURE_CUBE:
2371 num_offsets = 2;
2372 num_derivs = 3;
2373 break;
2374 case TGSI_TEXTURE_3D:
2375 num_offsets = 3;
2376 num_derivs = 3;
2377 break;
2378 case TGSI_TEXTURE_CUBE_ARRAY:
2379 layer_coord = 3;
2380 num_offsets = 2;
2381 num_derivs = 3;
2382 break;
2383 default:
2384 assert(0);
2385 return;
2386 }
2387
2388 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2389 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2390 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2391 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2392 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2393 }
2394 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2395 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2396 }
2397 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2398 }
2399 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2400 /* XXX might be better to explicitly pass the level zero information */
2401 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2402 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2403 }
2404
2405 for (i = 0; i < num_derivs; i++) {
2406 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2407 }
2408 for (i = num_derivs; i < 5; i++) {
2409 coords[i] = bld->bld_base.base.undef;
2410 }
2411
2412 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2413 if (layer_coord) {
2414 if (layer_coord == 3)
2415 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2416 else
2417 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2418 }
2419 /* Shadow coord occupies always 5th slot. */
2420 if (compare) {
2421 sample_key |= LP_SAMPLER_SHADOW;
2422 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2423 }
2424
2425 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2426 unsigned dim;
2427 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2428 for (dim = 0; dim < num_derivs; ++dim) {
2429 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2430 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2431 }
2432 params.derivs = &derivs;
2433 /*
2434 * could also check all src regs if constant but I doubt such
2435 * cases exist in practice.
2436 */
2437 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2438 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2439 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2440 }
2441 else {
2442 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2443 }
2444 }
2445 else {
2446 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2447 }
2448 }
2449
2450 /* some advanced gather instructions (txgo) would require 4 offsets */
2451 if (inst->Texture.NumOffsets == 1) {
2452 unsigned dim;
2453 sample_key |= LP_SAMPLER_OFFSETS;
2454 for (dim = 0; dim < num_offsets; dim++) {
2455 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2456 }
2457 }
2458 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2459
2460 params.type = bld->bld_base.base.type;
2461 params.sample_key = sample_key;
2462 params.texture_index = texture_unit;
2463 params.sampler_index = sampler_unit;
2464 params.context_ptr = bld->context_ptr;
2465 params.thread_data_ptr = bld->thread_data_ptr;
2466 params.coords = coords;
2467 params.offsets = offsets;
2468 params.lod = lod;
2469 params.texel = texel;
2470
2471 bld->sampler->emit_tex_sample(bld->sampler,
2472 bld->bld_base.base.gallivm,
2473 &params);
2474
2475 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2476 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2477 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2478 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2479 unsigned char swizzles[4];
2480 swizzles[0] = inst->Src[1].Register.SwizzleX;
2481 swizzles[1] = inst->Src[1].Register.SwizzleY;
2482 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2483 swizzles[3] = inst->Src[1].Register.SwizzleW;
2484
2485 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2486 }
2487 }
2488
2489 static void
2490 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2491 const struct tgsi_full_instruction *inst,
2492 LLVMValueRef *texel,
2493 boolean is_samplei)
2494 {
2495 unsigned unit, target;
2496 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2497 LLVMValueRef explicit_lod = NULL;
2498 LLVMValueRef coords[5];
2499 LLVMValueRef offsets[3] = { NULL };
2500 struct lp_sampler_params params;
2501 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2502 unsigned dims, i;
2503 unsigned layer_coord = 0;
2504 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2505
2506 memset(&params, 0, sizeof(params));
2507
2508 if (!bld->sampler) {
2509 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2510 for (i = 0; i < 4; i++) {
2511 texel[i] = coord_undef;
2512 }
2513 return;
2514 }
2515
2516 unit = inst->Src[1].Register.Index;
2517
2518 if (is_samplei) {
2519 target = bld->sv[unit].Resource;
2520 }
2521 else {
2522 target = inst->Texture.Texture;
2523 }
2524
2525 switch (target) {
2526 case TGSI_TEXTURE_1D:
2527 case TGSI_TEXTURE_BUFFER:
2528 dims = 1;
2529 break;
2530 case TGSI_TEXTURE_1D_ARRAY:
2531 layer_coord = 1;
2532 dims = 1;
2533 break;
2534 case TGSI_TEXTURE_2D:
2535 case TGSI_TEXTURE_RECT:
2536 case TGSI_TEXTURE_2D_MSAA:
2537 dims = 2;
2538 break;
2539 case TGSI_TEXTURE_2D_ARRAY:
2540 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2541 layer_coord = 2;
2542 dims = 2;
2543 break;
2544 case TGSI_TEXTURE_3D:
2545 dims = 3;
2546 break;
2547 default:
2548 assert(0);
2549 return;
2550 }
2551
2552 /* always have lod except for buffers and msaa targets ? */
2553 if (target != TGSI_TEXTURE_BUFFER &&
2554 target != TGSI_TEXTURE_2D_MSAA &&
2555 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2556 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2557 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2558 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2559 }
2560 /*
2561 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2562 * would be the sample index.
2563 */
2564
2565 for (i = 0; i < dims; i++) {
2566 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2567 }
2568 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2569 for (i = dims; i < 5; i++) {
2570 coords[i] = coord_undef;
2571 }
2572 if (layer_coord)
2573 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2574
2575 if (inst->Texture.NumOffsets == 1) {
2576 unsigned dim;
2577 sample_key |= LP_SAMPLER_OFFSETS;
2578 for (dim = 0; dim < dims; dim++) {
2579 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2580 }
2581 }
2582 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2583
2584 params.type = bld->bld_base.base.type;
2585 params.sample_key = sample_key;
2586 params.texture_index = unit;
2587 /*
2588 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2589 * and trigger some assertions with d3d10 where the sampler view number
2590 * can exceed this.
2591 */
2592 params.sampler_index = 0;
2593 params.context_ptr = bld->context_ptr;
2594 params.thread_data_ptr = bld->thread_data_ptr;
2595 params.coords = coords;
2596 params.offsets = offsets;
2597 params.derivs = NULL;
2598 params.lod = explicit_lod;
2599 params.texel = texel;
2600
2601 bld->sampler->emit_tex_sample(bld->sampler,
2602 bld->bld_base.base.gallivm,
2603 &params);
2604
2605 if (is_samplei &&
2606 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2607 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2608 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2609 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2610 unsigned char swizzles[4];
2611 swizzles[0] = inst->Src[1].Register.SwizzleX;
2612 swizzles[1] = inst->Src[1].Register.SwizzleY;
2613 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2614 swizzles[3] = inst->Src[1].Register.SwizzleW;
2615
2616 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2617 }
2618 }
2619
2620 static void
2621 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2622 const struct tgsi_full_instruction *inst,
2623 LLVMValueRef *sizes_out,
2624 boolean is_sviewinfo)
2625 {
2626 LLVMValueRef explicit_lod;
2627 enum lp_sampler_lod_property lod_property;
2628 unsigned has_lod;
2629 unsigned i;
2630 unsigned unit = inst->Src[1].Register.Index;
2631 unsigned target, pipe_target;
2632 struct lp_sampler_size_query_params params;
2633
2634 if (is_sviewinfo) {
2635 target = bld->sv[unit].Resource;
2636 }
2637 else {
2638 target = inst->Texture.Texture;
2639 }
2640 switch (target) {
2641 case TGSI_TEXTURE_BUFFER:
2642 case TGSI_TEXTURE_RECT:
2643 case TGSI_TEXTURE_SHADOWRECT:
2644 has_lod = 0;
2645 break;
2646 default:
2647 has_lod = 1;
2648 break;
2649 }
2650
2651 if (!bld->sampler) {
2652 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2653 for (i = 0; i < 4; i++)
2654 sizes_out[i] = bld->bld_base.int_bld.undef;
2655 return;
2656 }
2657
2658 if (has_lod) {
2659 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2660 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2661 }
2662 else {
2663 explicit_lod = NULL;
2664 lod_property = LP_SAMPLER_LOD_SCALAR;
2665 }
2666
2667
2668 pipe_target = tgsi_to_pipe_tex_target(target);
2669
2670 params.int_type = bld->bld_base.int_bld.type;
2671 params.texture_unit = unit;
2672 params.target = pipe_target;
2673 params.context_ptr = bld->context_ptr;
2674 params.is_sviewinfo = TRUE;
2675 params.lod_property = lod_property;
2676 params.explicit_lod = explicit_lod;
2677 params.sizes_out = sizes_out;
2678
2679 bld->sampler->emit_size_query(bld->sampler,
2680 bld->bld_base.base.gallivm,
2681 &params);
2682 }
2683
2684 static boolean
2685 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2686 int pc)
2687 {
2688 unsigned i;
2689
2690 for (i = 0; i < 5; i++) {
2691 enum tgsi_opcode opcode;
2692
2693 if (pc + i >= bld->bld_base.info->num_instructions)
2694 return TRUE;
2695
2696 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2697
2698 if (opcode == TGSI_OPCODE_END)
2699 return TRUE;
2700
2701 if (opcode == TGSI_OPCODE_TEX ||
2702 opcode == TGSI_OPCODE_TXP ||
2703 opcode == TGSI_OPCODE_TXD ||
2704 opcode == TGSI_OPCODE_TXB ||
2705 opcode == TGSI_OPCODE_TXL ||
2706 opcode == TGSI_OPCODE_TXF ||
2707 opcode == TGSI_OPCODE_TXQ ||
2708 opcode == TGSI_OPCODE_TEX2 ||
2709 opcode == TGSI_OPCODE_TXB2 ||
2710 opcode == TGSI_OPCODE_TXL2 ||
2711 opcode == TGSI_OPCODE_SAMPLE ||
2712 opcode == TGSI_OPCODE_SAMPLE_B ||
2713 opcode == TGSI_OPCODE_SAMPLE_C ||
2714 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2715 opcode == TGSI_OPCODE_SAMPLE_D ||
2716 opcode == TGSI_OPCODE_SAMPLE_I ||
2717 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2718 opcode == TGSI_OPCODE_SAMPLE_L ||
2719 opcode == TGSI_OPCODE_SVIEWINFO ||
2720 opcode == TGSI_OPCODE_CAL ||
2721 opcode == TGSI_OPCODE_IF ||
2722 opcode == TGSI_OPCODE_UIF ||
2723 opcode == TGSI_OPCODE_BGNLOOP ||
2724 opcode == TGSI_OPCODE_SWITCH)
2725 return FALSE;
2726 }
2727
2728 return TRUE;
2729 }
2730
2731
2732
2733 /**
2734 * Kill fragment if any of the src register values are negative.
2735 */
2736 static void
2737 emit_kill_if(
2738 struct lp_build_tgsi_soa_context *bld,
2739 const struct tgsi_full_instruction *inst,
2740 int pc)
2741 {
2742 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2743 const struct tgsi_full_src_register *reg = &inst->Src[0];
2744 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2745 LLVMValueRef mask;
2746 unsigned chan_index;
2747
2748 memset(&terms, 0, sizeof terms);
2749
2750 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2751 unsigned swizzle;
2752
2753 /* Unswizzle channel */
2754 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2755
2756 /* Check if the component has not been already tested. */
2757 assert(swizzle < TGSI_NUM_CHANNELS);
2758 if( !terms[swizzle] )
2759 /* TODO: change the comparison operator instead of setting the sign */
2760 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2761 }
2762
2763 mask = NULL;
2764 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2765 if(terms[chan_index]) {
2766 LLVMValueRef chan_mask;
2767
2768 /*
2769 * If term < 0 then mask = 0 else mask = ~0.
2770 */
2771 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2772
2773 if(mask)
2774 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2775 else
2776 mask = chan_mask;
2777 }
2778 }
2779
2780 if (bld->exec_mask.has_mask) {
2781 LLVMValueRef invmask;
2782 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2783 mask = LLVMBuildOr(builder, mask, invmask, "");
2784 }
2785
2786 lp_build_mask_update(bld->mask, mask);
2787 if (!near_end_of_shader(bld, pc))
2788 lp_build_mask_check(bld->mask);
2789 }
2790
2791
2792 /**
2793 * Unconditional fragment kill.
2794 * The only predication is the execution mask which will apply if
2795 * we're inside a loop or conditional.
2796 */
2797 static void
2798 emit_kill(struct lp_build_tgsi_soa_context *bld,
2799 int pc)
2800 {
2801 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2802 LLVMValueRef mask;
2803
2804 /* For those channels which are "alive", disable fragment shader
2805 * execution.
2806 */
2807 if (bld->exec_mask.has_mask) {
2808 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2809 }
2810 else {
2811 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2812 mask = zero;
2813 }
2814
2815 lp_build_mask_update(bld->mask, mask);
2816
2817 if (!near_end_of_shader(bld, pc))
2818 lp_build_mask_check(bld->mask);
2819 }
2820
2821
2822 /**
2823 * Emit code which will dump the value of all the temporary registers
2824 * to stdout.
2825 */
2826 static void
2827 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2828 unsigned file)
2829 {
2830 const struct tgsi_shader_info *info = bld->bld_base.info;
2831 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2832 LLVMBuilderRef builder = gallivm->builder;
2833 LLVMValueRef reg_ptr;
2834 int index;
2835 int max_index = info->file_max[file];
2836
2837 /*
2838 * Some register files, particularly constants, can be very large,
2839 * and dumping everything could make this unusably slow.
2840 */
2841 max_index = MIN2(max_index, 32);
2842
2843 for (index = 0; index <= max_index; index++) {
2844 LLVMValueRef res;
2845 unsigned mask;
2846 int chan;
2847
2848 if (index < 8 * sizeof(unsigned) &&
2849 (info->file_mask[file] & (1u << index)) == 0) {
2850 /* This was not declared.*/
2851 continue;
2852 }
2853
2854 if (file == TGSI_FILE_INPUT) {
2855 mask = info->input_usage_mask[index];
2856 } else {
2857 mask = TGSI_WRITEMASK_XYZW;
2858 }
2859
2860 for (chan = 0; chan < 4; chan++) {
2861 if ((mask & (1 << chan)) == 0) {
2862 /* This channel is not used.*/
2863 continue;
2864 }
2865
2866 if (file == TGSI_FILE_CONSTANT) {
2867 struct tgsi_full_src_register reg;
2868 memset(&reg, 0, sizeof reg);
2869 reg.Register.File = file;
2870 reg.Register.Index = index;
2871 reg.Register.SwizzleX = 0;
2872 reg.Register.SwizzleY = 1;
2873 reg.Register.SwizzleZ = 2;
2874 reg.Register.SwizzleW = 3;
2875
2876 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2877 if (!res) {
2878 continue;
2879 }
2880 } else if (file == TGSI_FILE_INPUT) {
2881 res = bld->inputs[index][chan];
2882 if (!res) {
2883 continue;
2884 }
2885 } else if (file == TGSI_FILE_TEMPORARY) {
2886 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2887 assert(reg_ptr);
2888 res = LLVMBuildLoad(builder, reg_ptr, "");
2889 } else if (file == TGSI_FILE_OUTPUT) {
2890 reg_ptr = lp_get_output_ptr(bld, index, chan);
2891 assert(reg_ptr);
2892 res = LLVMBuildLoad(builder, reg_ptr, "");
2893 } else {
2894 assert(0);
2895 continue;
2896 }
2897
2898 emit_dump_reg(gallivm, file, index, chan, res);
2899 }
2900 }
2901 }
2902
2903
2904
2905 void
2906 lp_emit_declaration_soa(
2907 struct lp_build_tgsi_context *bld_base,
2908 const struct tgsi_full_declaration *decl)
2909 {
2910 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2911 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2912 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2913 const unsigned first = decl->Range.First;
2914 const unsigned last = decl->Range.Last;
2915 unsigned idx, i;
2916
2917 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2918
2919 switch (decl->Declaration.File) {
2920 case TGSI_FILE_TEMPORARY:
2921 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2922 assert(last < LP_MAX_INLINED_TEMPS);
2923 for (idx = first; idx <= last; ++idx) {
2924 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2925 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2926 }
2927 }
2928 break;
2929
2930 case TGSI_FILE_OUTPUT:
2931 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2932 for (idx = first; idx <= last; ++idx) {
2933 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2934 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2935 vec_type, "output");
2936 }
2937 }
2938 break;
2939
2940 case TGSI_FILE_ADDRESS:
2941 /* ADDR registers are only allocated with an integer LLVM IR type,
2942 * as they are guaranteed to always have integers.
2943 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2944 * an ADDR register for that matter).
2945 */
2946 assert(last < LP_MAX_TGSI_ADDRS);
2947 for (idx = first; idx <= last; ++idx) {
2948 assert(idx < LP_MAX_TGSI_ADDRS);
2949 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2950 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2951 }
2952 break;
2953
2954 case TGSI_FILE_SAMPLER_VIEW:
2955 /*
2956 * The target stored here MUST match whatever there actually
2957 * is in the set sampler views (what about return type?).
2958 */
2959 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2960 for (idx = first; idx <= last; ++idx) {
2961 bld->sv[idx] = decl->SamplerView;
2962 }
2963 break;
2964
2965 case TGSI_FILE_CONSTANT:
2966 {
2967 /*
2968 * We could trivially fetch the per-buffer pointer when fetching the
2969 * constant, relying on llvm to figure out it's always the same pointer
2970 * anyway. However, doing so results in a huge (more than factor of 10)
2971 * slowdown in llvm compilation times for some (but not all) shaders
2972 * (more specifically, the IR optimization spends way more time in
2973 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2974 */
2975 unsigned idx2D = decl->Dim.Index2D;
2976 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2977 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2978 bld->consts[idx2D] =
2979 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2980 bld->consts_sizes[idx2D] =
2981 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2982 }
2983 break;
2984 case TGSI_FILE_BUFFER:
2985 {
2986 unsigned idx = decl->Range.First;
2987 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2988 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2989 bld->ssbos[idx] =
2990 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2991 bld->ssbo_sizes[idx] =
2992 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2993
2994 }
2995 break;
2996 default:
2997 /* don't need to declare other vars */
2998 break;
2999 }
3000 }
3001
3002
3003 void lp_emit_immediate_soa(
3004 struct lp_build_tgsi_context *bld_base,
3005 const struct tgsi_full_immediate *imm)
3006 {
3007 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3008 struct gallivm_state * gallivm = bld_base->base.gallivm;
3009 LLVMValueRef imms[4];
3010 unsigned i;
3011 const uint size = imm->Immediate.NrTokens - 1;
3012 assert(size <= 4);
3013 switch (imm->Immediate.DataType) {
3014 case TGSI_IMM_FLOAT32:
3015 for( i = 0; i < size; ++i )
3016 imms[i] =
3017 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3018
3019 break;
3020 case TGSI_IMM_FLOAT64:
3021 case TGSI_IMM_UINT64:
3022 case TGSI_IMM_INT64:
3023 case TGSI_IMM_UINT32:
3024 for( i = 0; i < size; ++i ) {
3025 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3026 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3027 }
3028
3029 break;
3030 case TGSI_IMM_INT32:
3031 for( i = 0; i < size; ++i ) {
3032 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3033 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3034 }
3035
3036 break;
3037 }
3038 for( i = size; i < 4; ++i )
3039 imms[i] = bld_base->base.undef;
3040
3041 if (bld->use_immediates_array) {
3042 unsigned index = bld->num_immediates;
3043 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3044 LLVMBuilderRef builder = gallivm->builder;
3045 LLVMValueRef gep[2];
3046 gep[0] = lp_build_const_int32(gallivm, 0);
3047
3048 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3049 for (i = 0; i < 4; ++i ) {
3050 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3051 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3052 bld->imms_array, gep, 2, "");
3053 LLVMBuildStore(builder, imms[i], imm_ptr);
3054 }
3055 } else {
3056 /* simply copy the immediate values into the next immediates[] slot */
3057 unsigned i;
3058 assert(imm->Immediate.NrTokens - 1 <= 4);
3059 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3060
3061 for(i = 0; i < 4; ++i )
3062 bld->immediates[bld->num_immediates][i] = imms[i];
3063
3064 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3065 unsigned index = bld->num_immediates;
3066 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3067 LLVMBuilderRef builder = gallivm->builder;
3068 LLVMValueRef gep[2];
3069 gep[0] = lp_build_const_int32(gallivm, 0);
3070 for (i = 0; i < 4; ++i ) {
3071 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3072 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3073 bld->imms_array, gep, 2, "");
3074 LLVMBuildStore(builder,
3075 bld->immediates[index][i],
3076 imm_ptr);
3077 }
3078 }
3079 }
3080
3081 bld->num_immediates++;
3082 }
3083
3084 static void
3085 ddx_emit(
3086 const struct lp_build_tgsi_action * action,
3087 struct lp_build_tgsi_context * bld_base,
3088 struct lp_build_emit_data * emit_data)
3089 {
3090 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3091
3092 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3093 &emit_data->output[emit_data->chan], NULL);
3094 }
3095
3096 static void
3097 ddy_emit(
3098 const struct lp_build_tgsi_action * action,
3099 struct lp_build_tgsi_context * bld_base,
3100 struct lp_build_emit_data * emit_data)
3101 {
3102 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3103
3104 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3105 &emit_data->output[emit_data->chan]);
3106 }
3107
3108 static void
3109 kill_emit(
3110 const struct lp_build_tgsi_action * action,
3111 struct lp_build_tgsi_context * bld_base,
3112 struct lp_build_emit_data * emit_data)
3113 {
3114 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3115
3116 emit_kill(bld, bld_base->pc - 1);
3117 }
3118
3119 static void
3120 kill_if_emit(
3121 const struct lp_build_tgsi_action * action,
3122 struct lp_build_tgsi_context * bld_base,
3123 struct lp_build_emit_data * emit_data)
3124 {
3125 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3126
3127 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3128 }
3129
3130 static void
3131 tex_emit(
3132 const struct lp_build_tgsi_action * action,
3133 struct lp_build_tgsi_context * bld_base,
3134 struct lp_build_emit_data * emit_data)
3135 {
3136 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3137
3138 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3139 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3140 }
3141
3142 static void
3143 tex2_emit(
3144 const struct lp_build_tgsi_action * action,
3145 struct lp_build_tgsi_context * bld_base,
3146 struct lp_build_emit_data * emit_data)
3147 {
3148 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3149
3150 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3151 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3152 }
3153
3154 static void
3155 txb_emit(
3156 const struct lp_build_tgsi_action * action,
3157 struct lp_build_tgsi_context * bld_base,
3158 struct lp_build_emit_data * emit_data)
3159 {
3160 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3161
3162 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3163 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3164 }
3165
3166 static void
3167 txb2_emit(
3168 const struct lp_build_tgsi_action * action,
3169 struct lp_build_tgsi_context * bld_base,
3170 struct lp_build_emit_data * emit_data)
3171 {
3172 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3173
3174 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3175 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3176 }
3177
3178 static void
3179 txd_emit(
3180 const struct lp_build_tgsi_action * action,
3181 struct lp_build_tgsi_context * bld_base,
3182 struct lp_build_emit_data * emit_data)
3183 {
3184 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3185
3186 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3187 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3188 }
3189
3190 static void
3191 txl_emit(
3192 const struct lp_build_tgsi_action * action,
3193 struct lp_build_tgsi_context * bld_base,
3194 struct lp_build_emit_data * emit_data)
3195 {
3196 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3197
3198 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3199 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3200 }
3201
3202 static void
3203 txl2_emit(
3204 const struct lp_build_tgsi_action * action,
3205 struct lp_build_tgsi_context * bld_base,
3206 struct lp_build_emit_data * emit_data)
3207 {
3208 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3209
3210 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3211 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3212 }
3213
3214 static void
3215 txp_emit(
3216 const struct lp_build_tgsi_action * action,
3217 struct lp_build_tgsi_context * bld_base,
3218 struct lp_build_emit_data * emit_data)
3219 {
3220 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3221
3222 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3223 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3224 }
3225
3226 static void
3227 tg4_emit(
3228 const struct lp_build_tgsi_action * action,
3229 struct lp_build_tgsi_context * bld_base,
3230 struct lp_build_emit_data * emit_data)
3231 {
3232 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3233
3234 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3235 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3236 }
3237
3238 static void
3239 lodq_emit(
3240 const struct lp_build_tgsi_action * action,
3241 struct lp_build_tgsi_context * bld_base,
3242 struct lp_build_emit_data * emit_data)
3243 {
3244 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3245
3246 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3247 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3248 }
3249
3250 static void
3251 txq_emit(
3252 const struct lp_build_tgsi_action * action,
3253 struct lp_build_tgsi_context * bld_base,
3254 struct lp_build_emit_data * emit_data)
3255 {
3256 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3257
3258 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3259 }
3260
3261 static void
3262 txf_emit(
3263 const struct lp_build_tgsi_action * action,
3264 struct lp_build_tgsi_context * bld_base,
3265 struct lp_build_emit_data * emit_data)
3266 {
3267 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3268
3269 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3270 }
3271
3272 static void
3273 sample_i_emit(
3274 const struct lp_build_tgsi_action * action,
3275 struct lp_build_tgsi_context * bld_base,
3276 struct lp_build_emit_data * emit_data)
3277 {
3278 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3279
3280 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3281 }
3282
3283 static void
3284 sample_emit(
3285 const struct lp_build_tgsi_action * action,
3286 struct lp_build_tgsi_context * bld_base,
3287 struct lp_build_emit_data * emit_data)
3288 {
3289 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3290
3291 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3292 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3293 }
3294
3295 static void
3296 sample_b_emit(
3297 const struct lp_build_tgsi_action * action,
3298 struct lp_build_tgsi_context * bld_base,
3299 struct lp_build_emit_data * emit_data)
3300 {
3301 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3302
3303 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3304 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3305 }
3306
3307 static void
3308 sample_c_emit(
3309 const struct lp_build_tgsi_action * action,
3310 struct lp_build_tgsi_context * bld_base,
3311 struct lp_build_emit_data * emit_data)
3312 {
3313 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3314
3315 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3316 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3317 }
3318
3319 static void
3320 sample_c_lz_emit(
3321 const struct lp_build_tgsi_action * action,
3322 struct lp_build_tgsi_context * bld_base,
3323 struct lp_build_emit_data * emit_data)
3324 {
3325 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3326
3327 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3328 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3329 }
3330
3331 static void
3332 sample_d_emit(
3333 const struct lp_build_tgsi_action * action,
3334 struct lp_build_tgsi_context * bld_base,
3335 struct lp_build_emit_data * emit_data)
3336 {
3337 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3338
3339 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3340 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3341 }
3342
3343 static void
3344 sample_l_emit(
3345 const struct lp_build_tgsi_action * action,
3346 struct lp_build_tgsi_context * bld_base,
3347 struct lp_build_emit_data * emit_data)
3348 {
3349 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3350
3351 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3352 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3353 }
3354
3355 static void
3356 gather4_emit(
3357 const struct lp_build_tgsi_action * action,
3358 struct lp_build_tgsi_context * bld_base,
3359 struct lp_build_emit_data * emit_data)
3360 {
3361 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3362
3363 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3364 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3365 }
3366
3367 static void
3368 sviewinfo_emit(
3369 const struct lp_build_tgsi_action * action,
3370 struct lp_build_tgsi_context * bld_base,
3371 struct lp_build_emit_data * emit_data)
3372 {
3373 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3374
3375 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3376 }
3377
3378 static void
3379 lod_emit(
3380 const struct lp_build_tgsi_action * action,
3381 struct lp_build_tgsi_context * bld_base,
3382 struct lp_build_emit_data * emit_data)
3383 {
3384 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3385
3386 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3387 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3388 }
3389
3390 static void
3391 load_emit(
3392 const struct lp_build_tgsi_action * action,
3393 struct lp_build_tgsi_context * bld_base,
3394 struct lp_build_emit_data * emit_data)
3395 {
3396 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3397 struct gallivm_state * gallivm = bld_base->base.gallivm;
3398 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3399 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3400 unsigned buf = bufreg->Register.Index;
3401 assert(bufreg->Register.File == TGSI_FILE_BUFFER);
3402 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3403
3404 if (0) {
3405 /* for indirect support with ARB_gpu_shader5 */
3406 } else {
3407 LLVMValueRef index;
3408 LLVMValueRef scalar, scalar_ptr;
3409 unsigned chan_index;
3410
3411 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3412 index = lp_build_shr_imm(uint_bld, index, 2);
3413
3414 scalar_ptr = bld->ssbos[buf];
3415
3416 LLVMValueRef ssbo_limit;
3417
3418 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3419 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3420
3421 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3422 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3423
3424 LLVMValueRef exec_mask = mask_vec(bld_base);
3425 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3426 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3427
3428 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3429 struct lp_build_loop_state loop_state;
3430 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3431
3432 struct lp_build_if_state ifthen;
3433 LLVMValueRef cond, temp_res;
3434
3435 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3436 loop_state.counter, "");
3437
3438 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3439 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3440
3441 lp_build_if(&ifthen, gallivm, cond);
3442 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3443
3444 temp_res = LLVMBuildLoad(builder, result, "");
3445 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3446 LLVMBuildStore(builder, temp_res, result);
3447 lp_build_else(&ifthen);
3448 temp_res = LLVMBuildLoad(builder, result, "");
3449 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3450 LLVMBuildStore(builder, temp_res, result);
3451 lp_build_endif(&ifthen);
3452 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3453 NULL, LLVMIntUGE);
3454 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3455 }
3456 }
3457 }
3458
3459 static void
3460 store_emit(
3461 const struct lp_build_tgsi_action * action,
3462 struct lp_build_tgsi_context * bld_base,
3463 struct lp_build_emit_data * emit_data)
3464 {
3465 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3466 struct gallivm_state * gallivm = bld_base->base.gallivm;
3467 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3468 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3469 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3470 unsigned buf = bufreg->Register.Index;
3471 assert(bufreg->Register.File == TGSI_FILE_BUFFER);
3472
3473 if (0) {
3474
3475 } else {
3476 LLVMValueRef index; /* index into the const buffer */
3477 LLVMValueRef scalar_ptr;
3478 LLVMValueRef value;
3479 unsigned chan_index;
3480
3481 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3482 index = lp_build_shr_imm(uint_bld, index, 2);
3483
3484 scalar_ptr = bld->ssbos[buf];
3485
3486 LLVMValueRef ssbo_limit;
3487
3488 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3489 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3490
3491 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3492 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3493
3494 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3495
3496 LLVMValueRef exec_mask = mask_vec(bld_base);
3497 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3498 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3499
3500 struct lp_build_loop_state loop_state;
3501 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3502
3503 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3504 loop_state.counter, "");
3505 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3506
3507 struct lp_build_if_state ifthen;
3508 LLVMValueRef cond;
3509
3510 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3511 loop_state.counter, "");
3512
3513 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3514 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3515 lp_build_if(&ifthen, gallivm, cond);
3516
3517 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3518
3519 lp_build_endif(&ifthen);
3520 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3521 NULL, LLVMIntUGE);
3522 }
3523 }
3524 }
3525
3526 static void
3527 resq_emit(
3528 const struct lp_build_tgsi_action * action,
3529 struct lp_build_tgsi_context * bld_base,
3530 struct lp_build_emit_data * emit_data)
3531 {
3532 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3533 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3534 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3535
3536 unsigned buf = bufreg->Register.Index;
3537 assert(bufreg->Register.File == TGSI_FILE_BUFFER);
3538
3539 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3540
3541 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3542 }
3543
3544 static void
3545 atomic_emit(
3546 const struct lp_build_tgsi_action * action,
3547 struct lp_build_tgsi_context * bld_base,
3548 struct lp_build_emit_data * emit_data)
3549 {
3550 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3551 struct gallivm_state * gallivm = bld_base->base.gallivm;
3552 LLVMBuilderRef builder = gallivm->builder;
3553 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3554 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3555
3556 assert(bufreg->Register.File == TGSI_FILE_BUFFER);
3557 unsigned buf = bufreg->Register.Index;
3558
3559 LLVMAtomicRMWBinOp op;
3560 switch (emit_data->inst->Instruction.Opcode) {
3561 case TGSI_OPCODE_ATOMUADD:
3562 op = LLVMAtomicRMWBinOpAdd;
3563 break;
3564 case TGSI_OPCODE_ATOMXCHG:
3565 op = LLVMAtomicRMWBinOpXchg;
3566 break;
3567 case TGSI_OPCODE_ATOMAND:
3568 op = LLVMAtomicRMWBinOpAnd;
3569 break;
3570 case TGSI_OPCODE_ATOMOR:
3571 op = LLVMAtomicRMWBinOpOr;
3572 break;
3573 case TGSI_OPCODE_ATOMXOR:
3574 op = LLVMAtomicRMWBinOpXor;
3575 break;
3576 case TGSI_OPCODE_ATOMUMIN:
3577 op = LLVMAtomicRMWBinOpUMin;
3578 break;
3579 case TGSI_OPCODE_ATOMUMAX:
3580 op = LLVMAtomicRMWBinOpUMax;
3581 break;
3582 case TGSI_OPCODE_ATOMIMIN:
3583 op = LLVMAtomicRMWBinOpMin;
3584 break;
3585 case TGSI_OPCODE_ATOMIMAX:
3586 op = LLVMAtomicRMWBinOpMax;
3587 break;
3588 }
3589
3590 if (0) {
3591 } else {
3592 LLVMValueRef index; /* index into the const buffer */
3593 LLVMValueRef scalar, scalar_ptr;
3594 LLVMValueRef value;
3595
3596 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3597 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3598
3599 index = lp_build_shr_imm(uint_bld, index, 2);
3600 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3601
3602 scalar_ptr = bld->ssbos[buf];
3603
3604 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3605 uint_bld->vec_type, "");
3606
3607 LLVMValueRef ssbo_limit;
3608 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3609 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3610
3611 LLVMValueRef exec_mask = mask_vec(bld_base);
3612 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3613 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3614
3615 struct lp_build_loop_state loop_state;
3616 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3617
3618 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3619 loop_state.counter, "");
3620 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3621
3622 index = LLVMBuildExtractElement(gallivm->builder, index,
3623 loop_state.counter, "");
3624
3625 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3626 &index, 1, "");
3627
3628 struct lp_build_if_state ifthen;
3629 LLVMValueRef cond, temp_res;
3630
3631 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3632 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3633 lp_build_if(&ifthen, gallivm, cond);
3634
3635 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3636 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3637 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3638 loop_state.counter, "");
3639 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3640 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3641 cas_src_ptr,
3642 LLVMAtomicOrderingSequentiallyConsistent,
3643 LLVMAtomicOrderingSequentiallyConsistent,
3644 false);
3645 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3646 } else {
3647 scalar = LLVMBuildAtomicRMW(builder, op,
3648 scalar_ptr, value_ptr,
3649 LLVMAtomicOrderingSequentiallyConsistent,
3650 false);
3651 }
3652 temp_res = LLVMBuildLoad(builder, atom_res, "");
3653 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3654 LLVMBuildStore(builder, temp_res, atom_res);
3655 lp_build_else(&ifthen);
3656 temp_res = LLVMBuildLoad(builder, atom_res, "");
3657 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3658 LLVMBuildStore(builder, temp_res, atom_res);
3659 lp_build_endif(&ifthen);
3660
3661 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3662 NULL, LLVMIntUGE);
3663 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3664 }
3665 }
3666
3667 static void
3668 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3669 LLVMValueRef ptr,
3670 LLVMValueRef mask)
3671 {
3672 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3673 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3674
3675 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3676
3677 LLVMBuildStore(builder, current_vec, ptr);
3678 }
3679
3680 static void
3681 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3682 LLVMValueRef ptr,
3683 LLVMValueRef mask)
3684 {
3685 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3686 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3687
3688 current_vec = lp_build_select(&bld_base->uint_bld,
3689 mask,
3690 bld_base->uint_bld.zero,
3691 current_vec);
3692
3693 LLVMBuildStore(builder, current_vec, ptr);
3694 }
3695
3696 static LLVMValueRef
3697 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3698 LLVMValueRef current_mask_vec,
3699 LLVMValueRef total_emitted_vertices_vec)
3700 {
3701 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3702 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3703 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3704 total_emitted_vertices_vec,
3705 bld->max_output_vertices_vec);
3706
3707 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3708 }
3709
3710 static void
3711 emit_vertex(
3712 const struct lp_build_tgsi_action * action,
3713 struct lp_build_tgsi_context * bld_base,
3714 struct lp_build_emit_data * emit_data)
3715 {
3716 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3717 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3718
3719 if (bld->gs_iface->emit_vertex) {
3720 LLVMValueRef mask = mask_vec(bld_base);
3721 LLVMValueRef total_emitted_vertices_vec =
3722 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3723 mask = clamp_mask_to_max_output_vertices(bld, mask,
3724 total_emitted_vertices_vec);
3725 gather_outputs(bld);
3726 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3727 bld->outputs,
3728 total_emitted_vertices_vec);
3729 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3730 mask);
3731 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3732 mask);
3733 #if DUMP_GS_EMITS
3734 lp_build_print_value(bld->bld_base.base.gallivm,
3735 " +++ emit vertex masked ones = ",
3736 mask);
3737 lp_build_print_value(bld->bld_base.base.gallivm,
3738 " +++ emit vertex emitted = ",
3739 total_emitted_vertices_vec);
3740 #endif
3741 }
3742 }
3743
3744
3745 static void
3746 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3747 LLVMValueRef mask)
3748 {
3749 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3750 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3751
3752 if (bld->gs_iface->end_primitive) {
3753 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3754 LLVMValueRef emitted_vertices_vec =
3755 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3756 LLVMValueRef emitted_prims_vec =
3757 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3758
3759 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3760 emitted_vertices_vec,
3761 uint_bld->zero);
3762 /* We need to combine the current execution mask with the mask
3763 telling us which, if any, execution slots actually have
3764 unemitted primitives, this way we make sure that end_primitives
3765 executes only on the paths that have unflushed vertices */
3766 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3767
3768 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3769 emitted_vertices_vec,
3770 emitted_prims_vec);
3771
3772 #if DUMP_GS_EMITS
3773 lp_build_print_value(bld->bld_base.base.gallivm,
3774 " +++ end prim masked ones = ",
3775 mask);
3776 lp_build_print_value(bld->bld_base.base.gallivm,
3777 " +++ end prim emitted verts1 = ",
3778 emitted_vertices_vec);
3779 lp_build_print_value(bld->bld_base.base.gallivm,
3780 " +++ end prim emitted prims1 = ",
3781 LLVMBuildLoad(builder,
3782 bld->emitted_prims_vec_ptr, ""));
3783 #endif
3784 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3785 mask);
3786 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3787 mask);
3788 #if DUMP_GS_EMITS
3789 lp_build_print_value(bld->bld_base.base.gallivm,
3790 " +++ end prim emitted verts2 = ",
3791 LLVMBuildLoad(builder,
3792 bld->emitted_vertices_vec_ptr, ""));
3793 #endif
3794 }
3795
3796 }
3797
3798 static void
3799 end_primitive(
3800 const struct lp_build_tgsi_action * action,
3801 struct lp_build_tgsi_context * bld_base,
3802 struct lp_build_emit_data * emit_data)
3803 {
3804 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3805
3806 if (bld->gs_iface->end_primitive) {
3807 LLVMValueRef mask = mask_vec(bld_base);
3808 end_primitive_masked(bld_base, mask);
3809 }
3810 }
3811
3812 static void
3813 cal_emit(
3814 const struct lp_build_tgsi_action * action,
3815 struct lp_build_tgsi_context * bld_base,
3816 struct lp_build_emit_data * emit_data)
3817 {
3818 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3819
3820 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3821 &bld_base->pc);
3822 }
3823
3824 static void
3825 ret_emit(
3826 const struct lp_build_tgsi_action * action,
3827 struct lp_build_tgsi_context * bld_base,
3828 struct lp_build_emit_data * emit_data)
3829 {
3830 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3831
3832 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3833 }
3834
3835 static void
3836 brk_emit(
3837 const struct lp_build_tgsi_action * action,
3838 struct lp_build_tgsi_context * bld_base,
3839 struct lp_build_emit_data * emit_data)
3840 {
3841 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3842
3843 lp_exec_break(&bld->exec_mask, bld_base);
3844 }
3845
3846 static void
3847 if_emit(
3848 const struct lp_build_tgsi_action * action,
3849 struct lp_build_tgsi_context * bld_base,
3850 struct lp_build_emit_data * emit_data)
3851 {
3852 LLVMValueRef tmp;
3853 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3854
3855 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3856 emit_data->args[0], bld->bld_base.base.zero);
3857 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3858 }
3859
3860 static void
3861 uif_emit(
3862 const struct lp_build_tgsi_action * action,
3863 struct lp_build_tgsi_context * bld_base,
3864 struct lp_build_emit_data * emit_data)
3865 {
3866 LLVMValueRef tmp;
3867 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3868 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3869
3870 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3871 emit_data->args[0], uint_bld->zero);
3872 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3873 }
3874
3875 static void
3876 case_emit(
3877 const struct lp_build_tgsi_action * action,
3878 struct lp_build_tgsi_context * bld_base,
3879 struct lp_build_emit_data * emit_data)
3880 {
3881 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3882
3883 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3884 }
3885
3886 static void
3887 default_emit(
3888 const struct lp_build_tgsi_action * action,
3889 struct lp_build_tgsi_context * bld_base,
3890 struct lp_build_emit_data * emit_data)
3891 {
3892 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3893
3894 lp_exec_default(&bld->exec_mask, bld_base);
3895 }
3896
3897 static void
3898 switch_emit(
3899 const struct lp_build_tgsi_action * action,
3900 struct lp_build_tgsi_context * bld_base,
3901 struct lp_build_emit_data * emit_data)
3902 {
3903 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3904
3905 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3906 }
3907
3908 static void
3909 endswitch_emit(
3910 const struct lp_build_tgsi_action * action,
3911 struct lp_build_tgsi_context * bld_base,
3912 struct lp_build_emit_data * emit_data)
3913 {
3914 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3915
3916 lp_exec_endswitch(&bld->exec_mask, bld_base);
3917 }
3918
3919 static void
3920 bgnloop_emit(
3921 const struct lp_build_tgsi_action * action,
3922 struct lp_build_tgsi_context * bld_base,
3923 struct lp_build_emit_data * emit_data)
3924 {
3925 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3926
3927 lp_exec_bgnloop(&bld->exec_mask);
3928 }
3929
3930 static void
3931 bgnsub_emit(
3932 const struct lp_build_tgsi_action * action,
3933 struct lp_build_tgsi_context * bld_base,
3934 struct lp_build_emit_data * emit_data)
3935 {
3936 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3937
3938 lp_exec_mask_bgnsub(&bld->exec_mask);
3939 }
3940
3941 static void
3942 else_emit(
3943 const struct lp_build_tgsi_action * action,
3944 struct lp_build_tgsi_context * bld_base,
3945 struct lp_build_emit_data * emit_data)
3946 {
3947 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3948
3949 lp_exec_mask_cond_invert(&bld->exec_mask);
3950 }
3951
3952 static void
3953 endif_emit(
3954 const struct lp_build_tgsi_action * action,
3955 struct lp_build_tgsi_context * bld_base,
3956 struct lp_build_emit_data * emit_data)
3957 {
3958 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3959
3960 lp_exec_mask_cond_pop(&bld->exec_mask);
3961 }
3962
3963 static void
3964 endloop_emit(
3965 const struct lp_build_tgsi_action * action,
3966 struct lp_build_tgsi_context * bld_base,
3967 struct lp_build_emit_data * emit_data)
3968 {
3969 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3970
3971 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3972 }
3973
3974 static void
3975 endsub_emit(
3976 const struct lp_build_tgsi_action * action,
3977 struct lp_build_tgsi_context * bld_base,
3978 struct lp_build_emit_data * emit_data)
3979 {
3980 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3981
3982 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3983 }
3984
3985 static void
3986 cont_emit(
3987 const struct lp_build_tgsi_action * action,
3988 struct lp_build_tgsi_context * bld_base,
3989 struct lp_build_emit_data * emit_data)
3990 {
3991 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3992
3993 lp_exec_continue(&bld->exec_mask);
3994 }
3995
3996 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3997 {
3998 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3999 struct gallivm_state * gallivm = bld_base->base.gallivm;
4000
4001 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4002 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4003 bld->temps_array = lp_build_alloca_undef(gallivm,
4004 LLVMArrayType(bld_base->base.vec_type, array_size),
4005 "temp_array");
4006 }
4007
4008 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4009 LLVMValueRef array_size =
4010 lp_build_const_int32(gallivm,
4011 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4012 bld->outputs_array = lp_build_array_alloca(gallivm,
4013 bld_base->base.vec_type, array_size,
4014 "output_array");
4015 }
4016
4017 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4018 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4019 bld->imms_array = lp_build_alloca_undef(gallivm,
4020 LLVMArrayType(bld_base->base.vec_type, array_size),
4021 "imms_array");
4022 }
4023
4024 /* If we have indirect addressing in inputs we need to copy them into
4025 * our alloca array to be able to iterate over them */
4026 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
4027 unsigned index, chan;
4028 LLVMTypeRef vec_type = bld_base->base.vec_type;
4029 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4030 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4031 bld->inputs_array = lp_build_array_alloca(gallivm,
4032 vec_type, array_size,
4033 "input_array");
4034
4035 assert(bld_base->info->num_inputs
4036 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4037
4038 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4039 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4040 LLVMValueRef lindex =
4041 lp_build_const_int32(gallivm, index * 4 + chan);
4042 LLVMValueRef input_ptr =
4043 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4044 &lindex, 1, "");
4045 LLVMValueRef value = bld->inputs[index][chan];
4046 if (value)
4047 LLVMBuildStore(gallivm->builder, value, input_ptr);
4048 }
4049 }
4050 }
4051
4052 if (bld->gs_iface) {
4053 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4054 bld->emitted_prims_vec_ptr =
4055 lp_build_alloca(gallivm,
4056 uint_bld->vec_type,
4057 "emitted_prims_ptr");
4058 bld->emitted_vertices_vec_ptr =
4059 lp_build_alloca(gallivm,
4060 uint_bld->vec_type,
4061 "emitted_vertices_ptr");
4062 bld->total_emitted_vertices_vec_ptr =
4063 lp_build_alloca(gallivm,
4064 uint_bld->vec_type,
4065 "total_emitted_vertices_ptr");
4066
4067 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4068 bld->emitted_prims_vec_ptr);
4069 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4070 bld->emitted_vertices_vec_ptr);
4071 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4072 bld->total_emitted_vertices_vec_ptr);
4073 }
4074
4075 if (DEBUG_EXECUTION) {
4076 lp_build_printf(gallivm, "\n");
4077 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4078 if (!bld->gs_iface)
4079 emit_dump_file(bld, TGSI_FILE_INPUT);
4080 }
4081 }
4082
4083 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4084 {
4085 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4086 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4087
4088 if (DEBUG_EXECUTION) {
4089 /* for debugging */
4090 if (0) {
4091 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4092 }
4093 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4094 lp_build_printf(bld_base->base.gallivm, "\n");
4095 }
4096
4097 /* If we have indirect addressing in outputs we need to copy our alloca array
4098 * to the outputs slots specified by the caller */
4099 if (bld->gs_iface) {
4100 LLVMValueRef total_emitted_vertices_vec;
4101 LLVMValueRef emitted_prims_vec;
4102 /* implicit end_primitives, needed in case there are any unflushed
4103 vertices in the cache. Note must not call end_primitive here
4104 since the exec_mask is not valid at this point. */
4105 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4106
4107 total_emitted_vertices_vec =
4108 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4109 emitted_prims_vec =
4110 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4111
4112 bld->gs_iface->gs_epilogue(bld->gs_iface,
4113 &bld->bld_base,
4114 total_emitted_vertices_vec,
4115 emitted_prims_vec);
4116 } else {
4117 gather_outputs(bld);
4118 }
4119 }
4120
4121 void
4122 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4123 const struct tgsi_token *tokens,
4124 struct lp_type type,
4125 struct lp_build_mask_context *mask,
4126 LLVMValueRef consts_ptr,
4127 LLVMValueRef const_sizes_ptr,
4128 const struct lp_bld_tgsi_system_values *system_values,
4129 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
4130 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
4131 LLVMValueRef context_ptr,
4132 LLVMValueRef thread_data_ptr,
4133 const struct lp_build_sampler_soa *sampler,
4134 const struct tgsi_shader_info *info,
4135 const struct lp_build_tgsi_gs_iface *gs_iface,
4136 LLVMValueRef ssbo_ptr,
4137 LLVMValueRef ssbo_sizes_ptr)
4138 {
4139 struct lp_build_tgsi_soa_context bld;
4140
4141 struct lp_type res_type;
4142
4143 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4144 memset(&res_type, 0, sizeof res_type);
4145 res_type.width = type.width;
4146 res_type.length = type.length;
4147 res_type.sign = 1;
4148
4149 /* Setup build context */
4150 memset(&bld, 0, sizeof bld);
4151 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4152 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4153 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4154 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4155 {
4156 struct lp_type dbl_type;
4157 dbl_type = type;
4158 dbl_type.width *= 2;
4159 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4160 }
4161 {
4162 struct lp_type uint64_type;
4163 uint64_type = lp_uint_type(type);
4164 uint64_type.width *= 2;
4165 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4166 }
4167 {
4168 struct lp_type int64_type;
4169 int64_type = lp_int_type(type);
4170 int64_type.width *= 2;
4171 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4172 }
4173 bld.mask = mask;
4174 bld.inputs = inputs;
4175 bld.outputs = outputs;
4176 bld.consts_ptr = consts_ptr;
4177 bld.const_sizes_ptr = const_sizes_ptr;
4178 bld.ssbo_ptr = ssbo_ptr;
4179 bld.ssbo_sizes_ptr = ssbo_sizes_ptr;
4180 bld.sampler = sampler;
4181 bld.bld_base.info = info;
4182 bld.indirect_files = info->indirect_files;
4183 bld.context_ptr = context_ptr;
4184 bld.thread_data_ptr = thread_data_ptr;
4185
4186 /*
4187 * If the number of temporaries is rather large then we just
4188 * allocate them as an array right from the start and treat
4189 * like indirect temporaries.
4190 */
4191 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4192 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4193 }
4194 /*
4195 * For performance reason immediates are always backed in a static
4196 * array, but if their number is too great, we have to use just
4197 * a dynamically allocated array.
4198 */
4199 bld.use_immediates_array =
4200 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4201 if (bld.use_immediates_array) {
4202 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4203 }
4204
4205
4206 bld.bld_base.soa = TRUE;
4207 bld.bld_base.emit_debug = emit_debug;
4208 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4209 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4210 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4211 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4212 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4213 bld.bld_base.emit_store = emit_store;
4214
4215 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4216 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4217
4218 bld.bld_base.emit_prologue = emit_prologue;
4219 bld.bld_base.emit_epilogue = emit_epilogue;
4220
4221 /* Set opcode actions */
4222 lp_set_default_actions_cpu(&bld.bld_base);
4223
4224 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4225 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4226 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4227 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4228 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4229 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4230 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4231 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4232 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4233 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4234 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4235 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4236 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4237 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4238 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4239 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4240 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4241 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4242 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4243 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4244 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4245 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4246 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4247 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4248 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4249 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4250 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4251 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4252 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4253 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4254 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4255 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4256 /* DX10 sampling ops */
4257 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4258 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4259 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4260 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4261 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4262 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4263 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4264 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4265 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4266 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4267 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4268
4269 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4270 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4271 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4272
4273 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4274 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4275 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4276 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4277 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4278 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4279 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4280 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4281 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4282 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4283
4284 if (gs_iface) {
4285 /* There's no specific value for this because it should always
4286 * be set, but apps using ext_geometry_shader4 quite often
4287 * were forgetting so we're using MAX_VERTEX_VARYING from
4288 * that spec even though we could debug_assert if it's not
4289 * set, but that's a lot uglier. */
4290 uint max_output_vertices;
4291
4292 /* inputs are always indirect with gs */
4293 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4294 bld.gs_iface = gs_iface;
4295 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4296 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4297 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4298
4299 max_output_vertices =
4300 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4301 if (!max_output_vertices)
4302 max_output_vertices = 32;
4303
4304 bld.max_output_vertices_vec =
4305 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4306 max_output_vertices);
4307 }
4308
4309 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4310
4311 bld.system_values = *system_values;
4312
4313 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4314
4315 if (0) {
4316 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4317 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4318 debug_printf("11111111111111111111111111111 \n");
4319 tgsi_dump(tokens, 0);
4320 lp_debug_dump_value(function);
4321 debug_printf("2222222222222222222222222222 \n");
4322 }
4323
4324 if (0) {
4325 LLVMModuleRef module = LLVMGetGlobalParent(
4326 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4327 LLVMDumpModule(module);
4328
4329 }
4330 lp_exec_mask_fini(&bld.exec_mask);
4331 }