gallivm: move mask_vec function up higher so it can be reused.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_swizzle.h"
61 #include "lp_bld_flow.h"
62 #include "lp_bld_quad.h"
63 #include "lp_bld_tgsi.h"
64 #include "lp_bld_limits.h"
65 #include "lp_bld_debug.h"
66 #include "lp_bld_printf.h"
67 #include "lp_bld_sample.h"
68 #include "lp_bld_struct.h"
69
70 /* SM 4.0 says that subroutines can nest 32 deep and
71 * we need one more for our main function */
72 #define LP_MAX_NUM_FUNCS 33
73
74 #define DUMP_GS_EMITS 0
75
76 /*
77 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
78 * instruction.
79 *
80 * TODO:
81 * - take execution masks in consideration
82 * - debug control-flow instructions
83 */
84 #define DEBUG_EXECUTION 0
85
86
87 /*
88 * Emit code to print a register value.
89 */
90 static void
91 emit_dump_reg(struct gallivm_state *gallivm,
92 unsigned file,
93 unsigned index,
94 unsigned chan,
95 LLVMValueRef value)
96 {
97 char buf[32];
98
99 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
100 tgsi_file_name(file),
101 index, "xyzw"[chan]);
102
103 lp_build_print_value(gallivm, buf, value);
104 }
105
106 /*
107 * Return the context for the current function.
108 * (always 'main', if shader doesn't do any function calls)
109 */
110 static inline struct function_ctx *
111 func_ctx(struct lp_exec_mask *mask)
112 {
113 assert(mask->function_stack_size > 0);
114 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
115 return &mask->function_stack[mask->function_stack_size - 1];
116 }
117
118 /*
119 * Returns true if we're in a loop.
120 * It's global, meaning that it returns true even if there's
121 * no loop inside the current function, but we were inside
122 * a loop inside another function, from which this one was called.
123 */
124 static inline boolean
125 mask_has_loop(struct lp_exec_mask *mask)
126 {
127 int i;
128 for (i = mask->function_stack_size - 1; i >= 0; --i) {
129 const struct function_ctx *ctx = &mask->function_stack[i];
130 if (ctx->loop_stack_size > 0)
131 return TRUE;
132 }
133 return FALSE;
134 }
135
136 static LLVMValueRef
137 mask_vec(struct lp_build_tgsi_context *bld_base)
138 {
139 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
140 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
141 struct lp_exec_mask *exec_mask = &bld->exec_mask;
142
143 if (!exec_mask->has_mask) {
144 return lp_build_mask_value(bld->mask);
145 }
146 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
147 exec_mask->exec_mask, "");
148 }
149
150
151 /*
152 * Returns true if we're inside a switch statement.
153 * It's global, meaning that it returns true even if there's
154 * no switch in the current function, but we were inside
155 * a switch inside another function, from which this one was called.
156 */
157 static inline boolean
158 mask_has_switch(struct lp_exec_mask *mask)
159 {
160 int i;
161 for (i = mask->function_stack_size - 1; i >= 0; --i) {
162 const struct function_ctx *ctx = &mask->function_stack[i];
163 if (ctx->switch_stack_size > 0)
164 return TRUE;
165 }
166 return FALSE;
167 }
168
169 /*
170 * Returns true if we're inside a conditional.
171 * It's global, meaning that it returns true even if there's
172 * no conditional in the current function, but we were inside
173 * a conditional inside another function, from which this one was called.
174 */
175 static inline boolean
176 mask_has_cond(struct lp_exec_mask *mask)
177 {
178 int i;
179 for (i = mask->function_stack_size - 1; i >= 0; --i) {
180 const struct function_ctx *ctx = &mask->function_stack[i];
181 if (ctx->cond_stack_size > 0)
182 return TRUE;
183 }
184 return FALSE;
185 }
186
187
188 /*
189 * Initialize a function context at the specified index.
190 */
191 static void
192 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
193 {
194 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
195 LLVMBuilderRef builder = mask->bld->gallivm->builder;
196 struct function_ctx *ctx = &mask->function_stack[function_idx];
197
198 ctx->cond_stack_size = 0;
199 ctx->loop_stack_size = 0;
200 ctx->switch_stack_size = 0;
201
202 if (function_idx == 0) {
203 ctx->ret_mask = mask->ret_mask;
204 }
205
206 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
207 int_type, "looplimiter");
208 LLVMBuildStore(
209 builder,
210 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
211 ctx->loop_limiter);
212 }
213
214 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
215 {
216 mask->bld = bld;
217 mask->has_mask = FALSE;
218 mask->ret_in_main = FALSE;
219 /* For the main function */
220 mask->function_stack_size = 1;
221
222 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
223 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
224 mask->cond_mask = mask->switch_mask =
225 LLVMConstAllOnes(mask->int_vec_type);
226
227 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
228 sizeof(mask->function_stack[0]));
229 lp_exec_mask_function_init(mask, 0);
230 }
231
232 static void
233 lp_exec_mask_fini(struct lp_exec_mask *mask)
234 {
235 FREE(mask->function_stack);
236 }
237
238 static void lp_exec_mask_update(struct lp_exec_mask *mask)
239 {
240 LLVMBuilderRef builder = mask->bld->gallivm->builder;
241 boolean has_loop_mask = mask_has_loop(mask);
242 boolean has_cond_mask = mask_has_cond(mask);
243 boolean has_switch_mask = mask_has_switch(mask);
244 boolean has_ret_mask = mask->function_stack_size > 1 ||
245 mask->ret_in_main;
246
247 if (has_loop_mask) {
248 /*for loops we need to update the entire mask at runtime */
249 LLVMValueRef tmp;
250 assert(mask->break_mask);
251 tmp = LLVMBuildAnd(builder,
252 mask->cont_mask,
253 mask->break_mask,
254 "maskcb");
255 mask->exec_mask = LLVMBuildAnd(builder,
256 mask->cond_mask,
257 tmp,
258 "maskfull");
259 } else
260 mask->exec_mask = mask->cond_mask;
261
262 if (has_switch_mask) {
263 mask->exec_mask = LLVMBuildAnd(builder,
264 mask->exec_mask,
265 mask->switch_mask,
266 "switchmask");
267 }
268
269 if (has_ret_mask) {
270 mask->exec_mask = LLVMBuildAnd(builder,
271 mask->exec_mask,
272 mask->ret_mask,
273 "callmask");
274 }
275
276 mask->has_mask = (has_cond_mask ||
277 has_loop_mask ||
278 has_switch_mask ||
279 has_ret_mask);
280 }
281
282 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
283 LLVMValueRef val)
284 {
285 LLVMBuilderRef builder = mask->bld->gallivm->builder;
286 struct function_ctx *ctx = func_ctx(mask);
287
288 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
289 ctx->cond_stack_size++;
290 return;
291 }
292 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
293 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
294 }
295 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
296 assert(LLVMTypeOf(val) == mask->int_vec_type);
297 mask->cond_mask = LLVMBuildAnd(builder,
298 mask->cond_mask,
299 val,
300 "");
301 lp_exec_mask_update(mask);
302 }
303
304 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
305 {
306 LLVMBuilderRef builder = mask->bld->gallivm->builder;
307 struct function_ctx *ctx = func_ctx(mask);
308 LLVMValueRef prev_mask;
309 LLVMValueRef inv_mask;
310
311 assert(ctx->cond_stack_size);
312 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
313 return;
314 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
315 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
316 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
317 }
318
319 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
320
321 mask->cond_mask = LLVMBuildAnd(builder,
322 inv_mask,
323 prev_mask, "");
324 lp_exec_mask_update(mask);
325 }
326
327 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
328 {
329 struct function_ctx *ctx = func_ctx(mask);
330 assert(ctx->cond_stack_size);
331 --ctx->cond_stack_size;
332 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
333 return;
334 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
335 lp_exec_mask_update(mask);
336 }
337
338 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
339 {
340 LLVMBuilderRef builder = mask->bld->gallivm->builder;
341 struct function_ctx *ctx = func_ctx(mask);
342
343 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
344 ++ctx->loop_stack_size;
345 return;
346 }
347
348 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
349 ctx->break_type;
350 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
351
352 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
353 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
354 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
355 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
356 ++ctx->loop_stack_size;
357
358 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
359 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
360
361 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
362
363 LLVMBuildBr(builder, ctx->loop_block);
364 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
365
366 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
367
368 lp_exec_mask_update(mask);
369 }
370
371 static void lp_exec_break(struct lp_exec_mask *mask,
372 struct lp_build_tgsi_context * bld_base)
373 {
374 LLVMBuilderRef builder = mask->bld->gallivm->builder;
375 struct function_ctx *ctx = func_ctx(mask);
376
377 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
378 LLVMValueRef exec_mask = LLVMBuildNot(builder,
379 mask->exec_mask,
380 "break");
381
382 mask->break_mask = LLVMBuildAnd(builder,
383 mask->break_mask,
384 exec_mask, "break_full");
385 }
386 else {
387 enum tgsi_opcode opcode =
388 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
389 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
390 opcode == TGSI_OPCODE_CASE);
391
392
393 if (ctx->switch_in_default) {
394 /*
395 * stop default execution but only if this is an unconditional switch.
396 * (The condition here is not perfect since dead code after break is
397 * allowed but should be sufficient since false negatives are just
398 * unoptimized - so we don't have to pre-evaluate that).
399 */
400 if(break_always && ctx->switch_pc) {
401 bld_base->pc = ctx->switch_pc;
402 return;
403 }
404 }
405
406 if (break_always) {
407 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
408 }
409 else {
410 LLVMValueRef exec_mask = LLVMBuildNot(builder,
411 mask->exec_mask,
412 "break");
413 mask->switch_mask = LLVMBuildAnd(builder,
414 mask->switch_mask,
415 exec_mask, "break_switch");
416 }
417 }
418
419 lp_exec_mask_update(mask);
420 }
421
422 static void lp_exec_continue(struct lp_exec_mask *mask)
423 {
424 LLVMBuilderRef builder = mask->bld->gallivm->builder;
425 LLVMValueRef exec_mask = LLVMBuildNot(builder,
426 mask->exec_mask,
427 "");
428
429 mask->cont_mask = LLVMBuildAnd(builder,
430 mask->cont_mask,
431 exec_mask, "");
432
433 lp_exec_mask_update(mask);
434 }
435
436
437 static void lp_exec_endloop(struct gallivm_state *gallivm,
438 struct lp_exec_mask *mask)
439 {
440 LLVMBuilderRef builder = mask->bld->gallivm->builder;
441 struct function_ctx *ctx = func_ctx(mask);
442 LLVMBasicBlockRef endloop;
443 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
444 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
445 mask->bld->type.width *
446 mask->bld->type.length);
447 LLVMValueRef i1cond, i2cond, icond, limiter;
448
449 assert(mask->break_mask);
450
451
452 assert(ctx->loop_stack_size);
453 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
454 --ctx->loop_stack_size;
455 return;
456 }
457
458 /*
459 * Restore the cont_mask, but don't pop
460 */
461 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
462 lp_exec_mask_update(mask);
463
464 /*
465 * Unlike the continue mask, the break_mask must be preserved across loop
466 * iterations
467 */
468 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
469
470 /* Decrement the loop limiter */
471 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
472
473 limiter = LLVMBuildSub(
474 builder,
475 limiter,
476 LLVMConstInt(int_type, 1, false),
477 "");
478
479 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
480
481 /* i1cond = (mask != 0) */
482 i1cond = LLVMBuildICmp(
483 builder,
484 LLVMIntNE,
485 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
486 LLVMConstNull(reg_type), "i1cond");
487
488 /* i2cond = (looplimiter > 0) */
489 i2cond = LLVMBuildICmp(
490 builder,
491 LLVMIntSGT,
492 limiter,
493 LLVMConstNull(int_type), "i2cond");
494
495 /* if( i1cond && i2cond ) */
496 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
497
498 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
499
500 LLVMBuildCondBr(builder,
501 icond, ctx->loop_block, endloop);
502
503 LLVMPositionBuilderAtEnd(builder, endloop);
504
505 assert(ctx->loop_stack_size);
506 --ctx->loop_stack_size;
507 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
508 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
509 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
510 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
511 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
512 ctx->switch_stack_size];
513
514 lp_exec_mask_update(mask);
515 }
516
517 static void lp_exec_switch(struct lp_exec_mask *mask,
518 LLVMValueRef switchval)
519 {
520 struct function_ctx *ctx = func_ctx(mask);
521
522 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
523 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
524 ctx->switch_stack_size++;
525 return;
526 }
527
528 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
529 ctx->break_type;
530 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
531
532 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
533 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
534 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
535 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
536 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
537 ctx->switch_stack_size++;
538
539 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
540 ctx->switch_val = switchval;
541 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
542 ctx->switch_in_default = false;
543 ctx->switch_pc = 0;
544
545 lp_exec_mask_update(mask);
546 }
547
548 static void lp_exec_endswitch(struct lp_exec_mask *mask,
549 struct lp_build_tgsi_context * bld_base)
550 {
551 LLVMBuilderRef builder = mask->bld->gallivm->builder;
552 struct function_ctx *ctx = func_ctx(mask);
553
554 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
555 ctx->switch_stack_size--;
556 return;
557 }
558
559 /* check if there's deferred default if so do it now */
560 if (ctx->switch_pc && !ctx->switch_in_default) {
561 LLVMValueRef prevmask, defaultmask;
562 unsigned tmp_pc;
563 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
564 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
565 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
566 ctx->switch_in_default = true;
567
568 lp_exec_mask_update(mask);
569
570 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
571 TGSI_OPCODE_DEFAULT);
572
573 tmp_pc = bld_base->pc;
574 bld_base->pc = ctx->switch_pc;
575 /*
576 * re-purpose switch_pc to point to here again, since we stop execution of
577 * the deferred default after next break.
578 */
579 ctx->switch_pc = tmp_pc - 1;
580
581 return;
582 }
583
584 else if (ctx->switch_pc && ctx->switch_in_default) {
585 assert(bld_base->pc == ctx->switch_pc + 1);
586 }
587
588 ctx->switch_stack_size--;
589 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
590 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
591 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
592 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
593 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
594
595 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
596
597 lp_exec_mask_update(mask);
598 }
599
600 static void lp_exec_case(struct lp_exec_mask *mask,
601 LLVMValueRef caseval)
602 {
603 LLVMBuilderRef builder = mask->bld->gallivm->builder;
604 struct function_ctx *ctx = func_ctx(mask);
605
606 LLVMValueRef casemask, prevmask;
607
608 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
609 return;
610 }
611
612 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
613 if (!ctx->switch_in_default) {
614 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
615 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
616 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
617 ctx->switch_mask_default, "sw_default_mask");
618 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
619 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
620
621 lp_exec_mask_update(mask);
622 }
623 }
624
625 /*
626 * Analyse default statement in a switch.
627 * \return true if default is last statement, false otherwise
628 * \param default_pc_start contains pc of instruction to jump to
629 * if default wasn't last but there's no
630 * fallthrough into default.
631 */
632 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
633 struct lp_build_tgsi_context * bld_base,
634 int *default_pc_start)
635 {
636 unsigned pc = bld_base->pc;
637 struct function_ctx *ctx = func_ctx(mask);
638 int curr_switch_stack = ctx->switch_stack_size;
639
640 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
641 return false;
642 }
643
644 /* skip over case statements which are together with default */
645 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
646 pc++;
647 }
648
649 while (pc != ~0u && pc < bld_base->num_instructions) {
650 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
651 switch (opcode) {
652 case TGSI_OPCODE_CASE:
653 if (curr_switch_stack == ctx->switch_stack_size) {
654 *default_pc_start = pc - 1;
655 return false;
656 }
657 break;
658 case TGSI_OPCODE_SWITCH:
659 curr_switch_stack++;
660 break;
661 case TGSI_OPCODE_ENDSWITCH:
662 if (curr_switch_stack == ctx->switch_stack_size) {
663 *default_pc_start = pc - 1;
664 return true;
665 }
666 curr_switch_stack--;
667 break;
668 default:
669 ; /* nothing */
670 }
671 pc++;
672 }
673 /* should never arrive here */
674 assert(0);
675 return true;
676 }
677
678 static void lp_exec_default(struct lp_exec_mask *mask,
679 struct lp_build_tgsi_context * bld_base)
680 {
681 LLVMBuilderRef builder = mask->bld->gallivm->builder;
682 struct function_ctx *ctx = func_ctx(mask);
683
684 int default_exec_pc;
685 boolean default_is_last;
686
687 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
688 return;
689 }
690
691 /*
692 * This is a messy opcode, because it may not be always at the end and
693 * there can be fallthrough in and out of it.
694 */
695
696 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
697 /*
698 * If it is last statement in switch (note that case statements appearing
699 * "at the same time" as default don't change that) everything is just fine,
700 * update switch mask and go on. This means we can handle default with
701 * fallthrough INTO it without overhead, if it is last.
702 */
703 if (default_is_last) {
704 LLVMValueRef prevmask, defaultmask;
705 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
706 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
707 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
708 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
709 ctx->switch_in_default = true;
710
711 lp_exec_mask_update(mask);
712 }
713 else {
714 /*
715 * Technically, "case" immediately before default isn't really a
716 * fallthrough, however we still have to count them as such as we
717 * already have updated the masks.
718 * If that happens in practice could add a switch optimizer pass
719 * which just gets rid of all case statements appearing together with
720 * default (or could do switch analysis at switch start time instead).
721 */
722 enum tgsi_opcode opcode =
723 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
724 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
725 opcode != TGSI_OPCODE_SWITCH);
726 /*
727 * If it is not last statement and there was no fallthrough into it,
728 * we record the PC and continue execution at next case (again, those
729 * case encountered at the same time don't count). At endswitch
730 * time, we update switchmask, and go back executing the code we skipped
731 * until the next break (possibly re-executing some code with changed mask
732 * if there was a fallthrough out of default).
733 * Finally, if it is not last statement and there was a fallthrough into it,
734 * do the same as with the former case, except instead of skipping the code
735 * just execute it without updating the mask, then go back and re-execute.
736 */
737 ctx->switch_pc = bld_base->pc;
738 if (!ft_into) {
739 bld_base->pc = default_exec_pc;
740 }
741 }
742 }
743
744
745 /* stores val into an address pointed to by dst_ptr.
746 * mask->exec_mask is used to figure out which bits of val
747 * should be stored into the address
748 * (0 means don't store this bit, 1 means do store).
749 */
750 static void lp_exec_mask_store(struct lp_exec_mask *mask,
751 struct lp_build_context *bld_store,
752 LLVMValueRef val,
753 LLVMValueRef dst_ptr)
754 {
755 LLVMBuilderRef builder = mask->bld->gallivm->builder;
756 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
757
758 assert(lp_check_value(bld_store->type, val));
759 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
760 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
761 LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
762
763 if (exec_mask) {
764 LLVMValueRef res, dst;
765
766 dst = LLVMBuildLoad(builder, dst_ptr, "");
767 res = lp_build_select(bld_store, exec_mask, val, dst);
768 LLVMBuildStore(builder, res, dst_ptr);
769 } else
770 LLVMBuildStore(builder, val, dst_ptr);
771 }
772
773 static void lp_exec_mask_call(struct lp_exec_mask *mask,
774 int func,
775 int *pc)
776 {
777 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
778 return;
779 }
780
781 lp_exec_mask_function_init(mask, mask->function_stack_size);
782 mask->function_stack[mask->function_stack_size].pc = *pc;
783 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
784 mask->function_stack_size++;
785 *pc = func;
786 }
787
788 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
789 {
790 LLVMBuilderRef builder = mask->bld->gallivm->builder;
791 struct function_ctx *ctx = func_ctx(mask);
792 LLVMValueRef exec_mask;
793
794 if (ctx->cond_stack_size == 0 &&
795 ctx->loop_stack_size == 0 &&
796 ctx->switch_stack_size == 0 &&
797 mask->function_stack_size == 1) {
798 /* returning from main() */
799 *pc = -1;
800 return;
801 }
802
803 if (mask->function_stack_size == 1) {
804 /*
805 * This requires special handling since we need to ensure
806 * we don't drop the mask even if we have no call stack
807 * (e.g. after a ret in a if clause after the endif)
808 */
809 mask->ret_in_main = TRUE;
810 }
811
812 exec_mask = LLVMBuildNot(builder,
813 mask->exec_mask,
814 "ret");
815
816 mask->ret_mask = LLVMBuildAnd(builder,
817 mask->ret_mask,
818 exec_mask, "ret_full");
819
820 lp_exec_mask_update(mask);
821 }
822
823 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
824 {
825 }
826
827 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
828 {
829 struct function_ctx *ctx;
830
831 assert(mask->function_stack_size > 1);
832 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
833
834 ctx = func_ctx(mask);
835 mask->function_stack_size--;
836
837 *pc = ctx->pc;
838 mask->ret_mask = ctx->ret_mask;
839
840 lp_exec_mask_update(mask);
841 }
842
843
844 static LLVMValueRef
845 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
846 unsigned file,
847 int index,
848 unsigned chan)
849 {
850 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
851 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
852 LLVMValueRef var_of_array;
853
854 switch (file) {
855 case TGSI_FILE_TEMPORARY:
856 array_of_vars = bld->temps;
857 var_of_array = bld->temps_array;
858 break;
859 case TGSI_FILE_OUTPUT:
860 array_of_vars = bld->outputs;
861 var_of_array = bld->outputs_array;
862 break;
863 default:
864 assert(0);
865 return NULL;
866 }
867
868 assert(chan < 4);
869
870 if (bld->indirect_files & (1 << file)) {
871 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
872 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
873 LLVMValueRef gep[2];
874 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
875 gep[1] = lindex;
876 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
877 } else {
878 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
879 }
880 }
881 else {
882 assert(index <= bld->bld_base.info->file_max[file]);
883 return array_of_vars[index][chan];
884 }
885 }
886
887
888 /**
889 * Return pointer to a temporary register channel (src or dest).
890 * Note that indirect addressing cannot be handled here.
891 * \param index which temporary register
892 * \param chan which channel of the temp register.
893 */
894 LLVMValueRef
895 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
896 unsigned index,
897 unsigned chan)
898 {
899 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
900 }
901
902 /**
903 * Return pointer to a output register channel (src or dest).
904 * Note that indirect addressing cannot be handled here.
905 * \param index which output register
906 * \param chan which channel of the output register.
907 */
908 LLVMValueRef
909 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
910 unsigned index,
911 unsigned chan)
912 {
913 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
914 }
915
916 /*
917 * If we have indirect addressing in outputs copy our alloca array
918 * to the outputs slots specified by the caller to make sure
919 * our outputs are delivered consistently via the same interface.
920 */
921 static void
922 gather_outputs(struct lp_build_tgsi_soa_context * bld)
923 {
924 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
925 unsigned index, chan;
926 assert(bld->bld_base.info->num_outputs <=
927 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
928 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
929 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
930 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
931 }
932 }
933 }
934 }
935
936 /**
937 * Gather vector.
938 * XXX the lp_build_gather() function should be capable of doing this
939 * with a little work.
940 */
941 static LLVMValueRef
942 build_gather(struct lp_build_tgsi_context *bld_base,
943 LLVMValueRef base_ptr,
944 LLVMValueRef indexes,
945 LLVMValueRef overflow_mask,
946 LLVMValueRef indexes2)
947 {
948 struct gallivm_state *gallivm = bld_base->base.gallivm;
949 LLVMBuilderRef builder = gallivm->builder;
950 struct lp_build_context *uint_bld = &bld_base->uint_bld;
951 struct lp_build_context *bld = &bld_base->base;
952 LLVMValueRef res;
953 unsigned i;
954
955 if (indexes2)
956 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
957 else
958 res = bld->undef;
959 /*
960 * overflow_mask is a vector telling us which channels
961 * in the vector overflowed. We use the overflow behavior for
962 * constant buffers which is defined as:
963 * Out of bounds access to constant buffer returns 0 in all
964 * components. Out of bounds behavior is always with respect
965 * to the size of the buffer bound at that slot.
966 */
967
968 if (overflow_mask) {
969 /*
970 * We avoid per-element control flow here (also due to llvm going crazy,
971 * though I suspect it's better anyway since overflow is likely rare).
972 * Note that since we still fetch from buffers even if num_elements was
973 * zero (in this case we'll fetch from index zero) the jit func callers
974 * MUST provide valid fake constant buffers of size 4x32 (the values do
975 * not matter), otherwise we'd still need (not per element though)
976 * control flow.
977 */
978 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
979 if (indexes2)
980 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
981 }
982
983 /*
984 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
985 */
986 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
987 LLVMValueRef si, di;
988 LLVMValueRef index;
989 LLVMValueRef scalar_ptr, scalar;
990
991 di = lp_build_const_int32(bld->gallivm, i);
992 if (indexes2)
993 si = lp_build_const_int32(bld->gallivm, i >> 1);
994 else
995 si = di;
996
997 if (indexes2 && (i & 1)) {
998 index = LLVMBuildExtractElement(builder,
999 indexes2, si, "");
1000 } else {
1001 index = LLVMBuildExtractElement(builder,
1002 indexes, si, "");
1003 }
1004 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
1005 &index, 1, "gather_ptr");
1006 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1007
1008 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
1009 }
1010
1011 if (overflow_mask) {
1012 if (indexes2) {
1013 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
1014 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1015 bld_base->dbl_bld.int_vec_type, "");
1016 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1017 bld_base->dbl_bld.zero, res);
1018 } else
1019 res = lp_build_select(bld, overflow_mask, bld->zero, res);
1020 }
1021
1022 return res;
1023 }
1024
1025
1026 /**
1027 * Scatter/store vector.
1028 */
1029 static void
1030 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1031 LLVMValueRef base_ptr,
1032 LLVMValueRef indexes,
1033 LLVMValueRef values,
1034 struct lp_exec_mask *mask)
1035 {
1036 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1037 LLVMBuilderRef builder = gallivm->builder;
1038 unsigned i;
1039 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1040
1041 /*
1042 * Loop over elements of index_vec, store scalar value.
1043 */
1044 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1045 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1046 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1047 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1048 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1049 LLVMValueRef scalar_pred = pred ?
1050 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1051
1052 if (0)
1053 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1054 ii, val, index, scalar_ptr);
1055
1056 if (scalar_pred) {
1057 LLVMValueRef real_val, dst_val;
1058 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1059 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1060 LLVMBuildStore(builder, real_val, scalar_ptr);
1061 }
1062 else {
1063 LLVMBuildStore(builder, val, scalar_ptr);
1064 }
1065 }
1066 }
1067
1068
1069 /**
1070 * Read the current value of the ADDR register, convert the floats to
1071 * ints, add the base index and return the vector of offsets.
1072 * The offsets will be used to index into the constant buffer or
1073 * temporary register file.
1074 */
1075 static LLVMValueRef
1076 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1077 unsigned reg_file, unsigned reg_index,
1078 const struct tgsi_ind_register *indirect_reg,
1079 int index_limit)
1080 {
1081 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1082 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1083 /* always use X component of address register */
1084 unsigned swizzle = indirect_reg->Swizzle;
1085 LLVMValueRef base;
1086 LLVMValueRef rel;
1087 LLVMValueRef max_index;
1088 LLVMValueRef index;
1089
1090 assert(bld->indirect_files & (1 << reg_file));
1091
1092 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1093
1094 assert(swizzle < 4);
1095 switch (indirect_reg->File) {
1096 case TGSI_FILE_ADDRESS:
1097 rel = LLVMBuildLoad(builder,
1098 bld->addr[indirect_reg->Index][swizzle],
1099 "load addr reg");
1100 /* ADDR LLVM values already have LLVM integer type. */
1101 break;
1102 case TGSI_FILE_TEMPORARY:
1103 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1104 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1105 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1106 * value actually stored is expected to be an integer */
1107 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1108 break;
1109 default:
1110 assert(0);
1111 rel = uint_bld->zero;
1112 }
1113
1114 index = lp_build_add(uint_bld, base, rel);
1115
1116 /*
1117 * emit_fetch_constant handles constant buffer overflow so this code
1118 * is pointless for them.
1119 * Furthermore the D3D10 spec in section 6.5 says:
1120 * If the constant buffer bound to a slot is larger than the size
1121 * declared in the shader for that slot, implementations are allowed
1122 * to return incorrect data (not necessarily 0) for indices that are
1123 * larger than the declared size but smaller than the buffer size.
1124 */
1125 if (reg_file != TGSI_FILE_CONSTANT) {
1126 assert(index_limit >= 0);
1127 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1128 uint_bld->type, index_limit);
1129
1130 assert(!uint_bld->type.sign);
1131 index = lp_build_min(uint_bld, index, max_index);
1132 }
1133
1134 return index;
1135 }
1136
1137 static struct lp_build_context *
1138 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1139 enum tgsi_opcode_type stype)
1140 {
1141 struct lp_build_context *bld_fetch;
1142
1143 switch (stype) {
1144 case TGSI_TYPE_FLOAT:
1145 case TGSI_TYPE_UNTYPED:
1146 bld_fetch = &bld_base->base;
1147 break;
1148 case TGSI_TYPE_UNSIGNED:
1149 bld_fetch = &bld_base->uint_bld;
1150 break;
1151 case TGSI_TYPE_SIGNED:
1152 bld_fetch = &bld_base->int_bld;
1153 break;
1154 case TGSI_TYPE_DOUBLE:
1155 bld_fetch = &bld_base->dbl_bld;
1156 break;
1157 case TGSI_TYPE_UNSIGNED64:
1158 bld_fetch = &bld_base->uint64_bld;
1159 break;
1160 case TGSI_TYPE_SIGNED64:
1161 bld_fetch = &bld_base->int64_bld;
1162 break;
1163 case TGSI_TYPE_VOID:
1164 default:
1165 assert(0);
1166 bld_fetch = NULL;
1167 break;
1168 }
1169 return bld_fetch;
1170 }
1171
1172 static LLVMValueRef
1173 get_soa_array_offsets(struct lp_build_context *uint_bld,
1174 LLVMValueRef indirect_index,
1175 unsigned chan_index,
1176 boolean need_perelement_offset)
1177 {
1178 struct gallivm_state *gallivm = uint_bld->gallivm;
1179 LLVMValueRef chan_vec =
1180 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1181 LLVMValueRef length_vec =
1182 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1183 LLVMValueRef index_vec;
1184
1185 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1186 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1187 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1188 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1189
1190 if (need_perelement_offset) {
1191 LLVMValueRef pixel_offsets;
1192 unsigned i;
1193 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1194 pixel_offsets = uint_bld->undef;
1195 for (i = 0; i < uint_bld->type.length; i++) {
1196 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1197 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1198 ii, ii, "");
1199 }
1200 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1201 }
1202 return index_vec;
1203 }
1204
1205 static LLVMValueRef
1206 emit_fetch_constant(
1207 struct lp_build_tgsi_context * bld_base,
1208 const struct tgsi_full_src_register * reg,
1209 enum tgsi_opcode_type stype,
1210 unsigned swizzle_in)
1211 {
1212 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1213 struct gallivm_state *gallivm = bld_base->base.gallivm;
1214 LLVMBuilderRef builder = gallivm->builder;
1215 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1216 unsigned dimension = 0;
1217 LLVMValueRef consts_ptr;
1218 LLVMValueRef num_consts;
1219 LLVMValueRef res;
1220 unsigned swizzle = swizzle_in & 0xffff;
1221
1222 /* XXX: Handle fetching xyzw components as a vector */
1223 assert(swizzle != ~0u);
1224
1225 if (reg->Register.Dimension) {
1226 assert(!reg->Dimension.Indirect);
1227 dimension = reg->Dimension.Index;
1228 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1229 }
1230
1231 consts_ptr = bld->consts[dimension];
1232 num_consts = bld->consts_sizes[dimension];
1233
1234 if (reg->Register.Indirect) {
1235 LLVMValueRef indirect_index;
1236 LLVMValueRef swizzle_vec =
1237 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1238 LLVMValueRef index_vec; /* index into the const buffer */
1239 LLVMValueRef overflow_mask;
1240 LLVMValueRef index_vec2 = NULL;
1241
1242 indirect_index = get_indirect_index(bld,
1243 reg->Register.File,
1244 reg->Register.Index,
1245 &reg->Indirect,
1246 bld->bld_base.info->file_max[reg->Register.File]);
1247
1248 /* All fetches are from the same constant buffer, so
1249 * we need to propagate the size to a vector to do a
1250 * vector comparison */
1251 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1252 /* Construct a boolean vector telling us which channels
1253 * overflow the bound constant buffer */
1254 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1255 indirect_index, num_consts);
1256
1257 /* index_vec = indirect_index * 4 + swizzle */
1258 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1259 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1260
1261 if (tgsi_type_is_64bit(stype)) {
1262 LLVMValueRef swizzle_vec2;
1263 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
1264 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1265 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1266 }
1267 /* Gather values from the constant buffer */
1268 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1269 }
1270 else {
1271 LLVMValueRef index; /* index into the const buffer */
1272 LLVMValueRef scalar, scalar_ptr;
1273 struct lp_build_context *bld_broad = &bld_base->base;
1274 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1275
1276 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1277 &index, 1, "");
1278
1279 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
1280
1281 LLVMValueRef scalar2, scalar2_ptr;
1282 LLVMValueRef shuffles[2];
1283 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
1284
1285 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
1286 &index, 1, "");
1287
1288 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1289 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
1290 shuffles[0] = lp_build_const_int32(gallivm, 0);
1291 shuffles[1] = lp_build_const_int32(gallivm, 1);
1292
1293 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
1294 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
1295 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
1296 } else {
1297 if (stype == TGSI_TYPE_DOUBLE) {
1298 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1299 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1300 bld_broad = &bld_base->dbl_bld;
1301 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1302 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1303 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1304 bld_broad = &bld_base->uint64_bld;
1305 } else if (stype == TGSI_TYPE_SIGNED64) {
1306 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1307 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1308 bld_broad = &bld_base->int64_bld;
1309 }
1310 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1311 res = lp_build_broadcast_scalar(bld_broad, scalar);
1312 }
1313
1314 }
1315
1316 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1317 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1318 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1319 }
1320
1321 return res;
1322 }
1323
1324 /**
1325 * Fetch 64-bit values from two separate channels.
1326 * 64-bit values are stored split across two channels, like xy and zw.
1327 * This function creates a set of vec_length*2 floats,
1328 * extracts the values from the two channels,
1329 * puts them in the correct place, then casts to vec_length 64-bits.
1330 */
1331 static LLVMValueRef
1332 emit_fetch_64bit(
1333 struct lp_build_tgsi_context * bld_base,
1334 enum tgsi_opcode_type stype,
1335 LLVMValueRef input,
1336 LLVMValueRef input2)
1337 {
1338 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1339 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1340 LLVMBuilderRef builder = gallivm->builder;
1341 LLVMValueRef res;
1342 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1343 int i;
1344 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
1345 int len = bld_base->base.type.length * 2;
1346 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
1347
1348 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1349 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1350 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1351 }
1352 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1353
1354 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1355 }
1356
1357 static LLVMValueRef
1358 emit_fetch_immediate(
1359 struct lp_build_tgsi_context * bld_base,
1360 const struct tgsi_full_src_register * reg,
1361 enum tgsi_opcode_type stype,
1362 unsigned swizzle_in)
1363 {
1364 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1365 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1366 LLVMBuilderRef builder = gallivm->builder;
1367 LLVMValueRef res = NULL;
1368 unsigned swizzle = swizzle_in & 0xffff;
1369
1370 if (bld->use_immediates_array || reg->Register.Indirect) {
1371 LLVMValueRef imms_array;
1372 LLVMTypeRef fptr_type;
1373
1374 /* cast imms_array pointer to float* */
1375 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1376 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1377
1378 if (reg->Register.Indirect) {
1379 LLVMValueRef indirect_index;
1380 LLVMValueRef index_vec; /* index into the immediate register array */
1381 LLVMValueRef index_vec2 = NULL;
1382 indirect_index = get_indirect_index(bld,
1383 reg->Register.File,
1384 reg->Register.Index,
1385 &reg->Indirect,
1386 bld->bld_base.info->file_max[reg->Register.File]);
1387 /*
1388 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1389 * immediates are stored as full vectors (FIXME??? - might be better
1390 * to store them the same as constants) but all elements are the same
1391 * in any case.
1392 */
1393 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1394 indirect_index,
1395 swizzle,
1396 FALSE);
1397 if (tgsi_type_is_64bit(stype))
1398 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1399 indirect_index,
1400 swizzle_in >> 16,
1401 FALSE);
1402 /* Gather values from the immediate register array */
1403 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1404 } else {
1405 LLVMValueRef gep[2];
1406 gep[0] = lp_build_const_int32(gallivm, 0);
1407 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1408 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1409 bld->imms_array, gep, 2, "");
1410 res = LLVMBuildLoad(builder, imms_ptr, "");
1411
1412 if (tgsi_type_is_64bit(stype)) {
1413 LLVMValueRef imms_ptr2;
1414 LLVMValueRef res2;
1415 gep[1] = lp_build_const_int32(gallivm,
1416 reg->Register.Index * 4 + (swizzle_in >> 16));
1417 imms_ptr2 = LLVMBuildGEP(builder,
1418 bld->imms_array, gep, 2, "");
1419 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1420 res = emit_fetch_64bit(bld_base, stype, res, res2);
1421 }
1422 }
1423 }
1424 else {
1425 res = bld->immediates[reg->Register.Index][swizzle];
1426 if (tgsi_type_is_64bit(stype))
1427 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1428 }
1429
1430 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1431 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1432 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1433 }
1434 return res;
1435 }
1436
1437 static LLVMValueRef
1438 emit_fetch_input(
1439 struct lp_build_tgsi_context * bld_base,
1440 const struct tgsi_full_src_register * reg,
1441 enum tgsi_opcode_type stype,
1442 unsigned swizzle_in)
1443 {
1444 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1445 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1446 LLVMBuilderRef builder = gallivm->builder;
1447 LLVMValueRef res;
1448 unsigned swizzle = swizzle_in & 0xffff;
1449
1450 if (reg->Register.Indirect) {
1451 LLVMValueRef indirect_index;
1452 LLVMValueRef index_vec; /* index into the input reg array */
1453 LLVMValueRef index_vec2 = NULL;
1454 LLVMValueRef inputs_array;
1455 LLVMTypeRef fptr_type;
1456
1457 indirect_index = get_indirect_index(bld,
1458 reg->Register.File,
1459 reg->Register.Index,
1460 &reg->Indirect,
1461 bld->bld_base.info->file_max[reg->Register.File]);
1462
1463 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1464 indirect_index,
1465 swizzle,
1466 TRUE);
1467 if (tgsi_type_is_64bit(stype)) {
1468 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1469 indirect_index,
1470 swizzle_in >> 16,
1471 TRUE);
1472 }
1473 /* cast inputs_array pointer to float* */
1474 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1475 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1476
1477 /* Gather values from the input register array */
1478 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1479 } else {
1480 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1481 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1482 reg->Register.Index * 4 + swizzle);
1483 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1484 bld->inputs_array, &lindex, 1, "");
1485
1486 res = LLVMBuildLoad(builder, input_ptr, "");
1487 if (tgsi_type_is_64bit(stype)) {
1488 LLVMValueRef lindex1;
1489 LLVMValueRef input_ptr2;
1490 LLVMValueRef res2;
1491
1492 lindex1 = lp_build_const_int32(gallivm,
1493 reg->Register.Index * 4 + (swizzle_in >> 16));
1494 input_ptr2 = LLVMBuildGEP(builder,
1495 bld->inputs_array, &lindex1, 1, "");
1496 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1497 res = emit_fetch_64bit(bld_base, stype, res, res2);
1498 }
1499 }
1500 else {
1501 res = bld->inputs[reg->Register.Index][swizzle];
1502 if (tgsi_type_is_64bit(stype))
1503 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1504 }
1505 }
1506
1507 assert(res);
1508
1509 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1510 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1511 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1512 }
1513
1514 return res;
1515 }
1516
1517
1518 static LLVMValueRef
1519 emit_fetch_gs_input(
1520 struct lp_build_tgsi_context * bld_base,
1521 const struct tgsi_full_src_register * reg,
1522 enum tgsi_opcode_type stype,
1523 unsigned swizzle_in)
1524 {
1525 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1526 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1527 const struct tgsi_shader_info *info = bld->bld_base.info;
1528 LLVMBuilderRef builder = gallivm->builder;
1529 LLVMValueRef attrib_index = NULL;
1530 LLVMValueRef vertex_index = NULL;
1531 unsigned swizzle = swizzle_in & 0xffff;
1532 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1533 LLVMValueRef res;
1534
1535 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1536 /* This is really a system value not a regular input */
1537 assert(!reg->Register.Indirect);
1538 assert(!reg->Dimension.Indirect);
1539 res = bld->system_values.prim_id;
1540 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1541 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1542 }
1543 return res;
1544 }
1545
1546 if (reg->Register.Indirect) {
1547 /*
1548 * XXX: this is possibly not quite the right value, since file_max may be
1549 * larger than the max attrib index, due to it being the max of declared
1550 * inputs AND the max vertices per prim (which is 6 for tri adj).
1551 * It should however be safe to use (since we always allocate
1552 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1553 */
1554 int index_limit = info->file_max[reg->Register.File];
1555 attrib_index = get_indirect_index(bld,
1556 reg->Register.File,
1557 reg->Register.Index,
1558 &reg->Indirect,
1559 index_limit);
1560 } else {
1561 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1562 }
1563
1564 if (reg->Dimension.Indirect) {
1565 /*
1566 * A fixed 6 should do as well (which is what we allocate).
1567 */
1568 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1569 vertex_index = get_indirect_index(bld,
1570 reg->Register.File,
1571 reg->Dimension.Index,
1572 &reg->DimIndirect,
1573 index_limit);
1574 } else {
1575 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1576 }
1577
1578 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1579 reg->Dimension.Indirect,
1580 vertex_index,
1581 reg->Register.Indirect,
1582 attrib_index,
1583 swizzle_index);
1584
1585 assert(res);
1586 if (tgsi_type_is_64bit(stype)) {
1587 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1588 LLVMValueRef res2;
1589 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1590 reg->Dimension.Indirect,
1591 vertex_index,
1592 reg->Register.Indirect,
1593 attrib_index,
1594 swizzle_index);
1595 assert(res2);
1596 res = emit_fetch_64bit(bld_base, stype, res, res2);
1597 } else if (stype == TGSI_TYPE_UNSIGNED) {
1598 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1599 } else if (stype == TGSI_TYPE_SIGNED) {
1600 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1601 }
1602
1603 return res;
1604 }
1605
1606 static LLVMValueRef
1607 emit_fetch_temporary(
1608 struct lp_build_tgsi_context * bld_base,
1609 const struct tgsi_full_src_register * reg,
1610 enum tgsi_opcode_type stype,
1611 unsigned swizzle_in)
1612 {
1613 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1614 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1615 LLVMBuilderRef builder = gallivm->builder;
1616 LLVMValueRef res;
1617 unsigned swizzle = swizzle_in & 0xffff;
1618
1619 if (reg->Register.Indirect) {
1620 LLVMValueRef indirect_index;
1621 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1622 LLVMValueRef temps_array;
1623 LLVMTypeRef fptr_type;
1624
1625 indirect_index = get_indirect_index(bld,
1626 reg->Register.File,
1627 reg->Register.Index,
1628 &reg->Indirect,
1629 bld->bld_base.info->file_max[reg->Register.File]);
1630
1631 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1632 indirect_index,
1633 swizzle,
1634 TRUE);
1635 if (tgsi_type_is_64bit(stype)) {
1636 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1637 indirect_index,
1638 swizzle_in >> 16,
1639 TRUE);
1640 }
1641
1642 /* cast temps_array pointer to float* */
1643 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1644 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1645
1646 /* Gather values from the temporary register array */
1647 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1648 }
1649 else {
1650 LLVMValueRef temp_ptr;
1651 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1652 res = LLVMBuildLoad(builder, temp_ptr, "");
1653
1654 if (tgsi_type_is_64bit(stype)) {
1655 LLVMValueRef temp_ptr2, res2;
1656
1657 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1658 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1659 res = emit_fetch_64bit(bld_base, stype, res, res2);
1660 }
1661 }
1662
1663 if (stype == TGSI_TYPE_SIGNED ||
1664 stype == TGSI_TYPE_UNSIGNED ||
1665 stype == TGSI_TYPE_DOUBLE ||
1666 stype == TGSI_TYPE_SIGNED64 ||
1667 stype == TGSI_TYPE_UNSIGNED64) {
1668 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1669 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1670 }
1671
1672 return res;
1673 }
1674
1675 static LLVMValueRef
1676 emit_fetch_system_value(
1677 struct lp_build_tgsi_context * bld_base,
1678 const struct tgsi_full_src_register * reg,
1679 enum tgsi_opcode_type stype,
1680 unsigned swizzle_in)
1681 {
1682 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1683 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1684 const struct tgsi_shader_info *info = bld->bld_base.info;
1685 LLVMBuilderRef builder = gallivm->builder;
1686 LLVMValueRef res;
1687 enum tgsi_opcode_type atype; // Actual type of the value
1688
1689 assert(!reg->Register.Indirect);
1690
1691 switch (info->system_value_semantic_name[reg->Register.Index]) {
1692 case TGSI_SEMANTIC_INSTANCEID:
1693 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1694 atype = TGSI_TYPE_UNSIGNED;
1695 break;
1696
1697 case TGSI_SEMANTIC_VERTEXID:
1698 res = bld->system_values.vertex_id;
1699 atype = TGSI_TYPE_UNSIGNED;
1700 break;
1701
1702 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1703 res = bld->system_values.vertex_id_nobase;
1704 atype = TGSI_TYPE_UNSIGNED;
1705 break;
1706
1707 case TGSI_SEMANTIC_BASEVERTEX:
1708 res = bld->system_values.basevertex;
1709 atype = TGSI_TYPE_UNSIGNED;
1710 break;
1711
1712 case TGSI_SEMANTIC_PRIMID:
1713 res = bld->system_values.prim_id;
1714 atype = TGSI_TYPE_UNSIGNED;
1715 break;
1716
1717 case TGSI_SEMANTIC_INVOCATIONID:
1718 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1719 atype = TGSI_TYPE_UNSIGNED;
1720 break;
1721
1722 default:
1723 assert(!"unexpected semantic in emit_fetch_system_value");
1724 res = bld_base->base.zero;
1725 atype = TGSI_TYPE_FLOAT;
1726 break;
1727 }
1728
1729 if (atype != stype) {
1730 if (stype == TGSI_TYPE_FLOAT) {
1731 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1732 } else if (stype == TGSI_TYPE_UNSIGNED) {
1733 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1734 } else if (stype == TGSI_TYPE_SIGNED) {
1735 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1736 }
1737 }
1738
1739 return res;
1740 }
1741
1742 /**
1743 * Register fetch with derivatives.
1744 */
1745 static void
1746 emit_fetch_deriv(
1747 struct lp_build_tgsi_soa_context *bld,
1748 LLVMValueRef src,
1749 LLVMValueRef *res,
1750 LLVMValueRef *ddx,
1751 LLVMValueRef *ddy)
1752 {
1753 if (res)
1754 *res = src;
1755
1756 /* TODO: use interpolation coeffs for inputs */
1757
1758 if (ddx)
1759 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1760
1761 if (ddy)
1762 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1763 }
1764
1765 /**
1766 * store an array of vec-length 64-bit into two arrays of vec_length floats
1767 * i.e.
1768 * value is d0, d1, d2, d3 etc.
1769 * each 64-bit has high and low pieces x, y
1770 * so gets stored into the separate channels as:
1771 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1772 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1773 */
1774 static void
1775 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1776 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1777 LLVMValueRef value)
1778 {
1779 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1780 struct gallivm_state *gallivm = bld_base->base.gallivm;
1781 LLVMBuilderRef builder = gallivm->builder;
1782 struct lp_build_context *float_bld = &bld_base->base;
1783 unsigned i;
1784 LLVMValueRef temp, temp2;
1785 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1786 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1787
1788 for (i = 0; i < bld_base->base.type.length; i++) {
1789 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1790 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1791 }
1792
1793 temp = LLVMBuildShuffleVector(builder, value,
1794 LLVMGetUndef(LLVMTypeOf(value)),
1795 LLVMConstVector(shuffles,
1796 bld_base->base.type.length),
1797 "");
1798 temp2 = LLVMBuildShuffleVector(builder, value,
1799 LLVMGetUndef(LLVMTypeOf(value)),
1800 LLVMConstVector(shuffles2,
1801 bld_base->base.type.length),
1802 "");
1803
1804 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1805 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1806 }
1807
1808 /**
1809 * Register store.
1810 */
1811 static void
1812 emit_store_chan(
1813 struct lp_build_tgsi_context *bld_base,
1814 const struct tgsi_full_instruction *inst,
1815 unsigned index,
1816 unsigned chan_index,
1817 LLVMValueRef value)
1818 {
1819 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1820 struct gallivm_state *gallivm = bld_base->base.gallivm;
1821 LLVMBuilderRef builder = gallivm->builder;
1822 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1823 struct lp_build_context *float_bld = &bld_base->base;
1824 struct lp_build_context *int_bld = &bld_base->int_bld;
1825 LLVMValueRef indirect_index = NULL;
1826 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1827
1828 /*
1829 * Apply saturation.
1830 *
1831 * It is always assumed to be float.
1832 */
1833 if (inst->Instruction.Saturate) {
1834 assert(dtype == TGSI_TYPE_FLOAT ||
1835 dtype == TGSI_TYPE_UNTYPED);
1836 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1837 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1838 }
1839
1840 if (reg->Register.Indirect) {
1841 /*
1842 * Currently the mesa/st doesn't generate indirect stores
1843 * to 64-bit values, it normally uses MOV to do indirect stores.
1844 */
1845 assert(!tgsi_type_is_64bit(dtype));
1846 indirect_index = get_indirect_index(bld,
1847 reg->Register.File,
1848 reg->Register.Index,
1849 &reg->Indirect,
1850 bld->bld_base.info->file_max[reg->Register.File]);
1851 } else {
1852 assert(reg->Register.Index <=
1853 bld_base->info->file_max[reg->Register.File]);
1854 }
1855
1856 if (DEBUG_EXECUTION) {
1857 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1858 }
1859
1860 switch( reg->Register.File ) {
1861 case TGSI_FILE_OUTPUT:
1862 /* Outputs are always stored as floats */
1863 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1864
1865 if (reg->Register.Indirect) {
1866 LLVMValueRef index_vec; /* indexes into the output registers */
1867 LLVMValueRef outputs_array;
1868 LLVMTypeRef fptr_type;
1869
1870 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1871 indirect_index,
1872 chan_index,
1873 TRUE);
1874
1875 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1876 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1877
1878 /* Scatter store values into output registers */
1879 emit_mask_scatter(bld, outputs_array, index_vec, value,
1880 &bld->exec_mask);
1881 }
1882 else {
1883 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1884 chan_index);
1885
1886 if (tgsi_type_is_64bit(dtype)) {
1887 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1888 chan_index + 1);
1889 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1890 value);
1891 } else
1892 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1893 }
1894 break;
1895
1896 case TGSI_FILE_TEMPORARY:
1897 /* Temporaries are always stored as floats */
1898 if (!tgsi_type_is_64bit(dtype))
1899 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1900 else
1901 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1902
1903 if (reg->Register.Indirect) {
1904 LLVMValueRef index_vec; /* indexes into the temp registers */
1905 LLVMValueRef temps_array;
1906 LLVMTypeRef fptr_type;
1907
1908 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1909 indirect_index,
1910 chan_index,
1911 TRUE);
1912
1913 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1914 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1915
1916 /* Scatter store values into temp registers */
1917 emit_mask_scatter(bld, temps_array, index_vec, value,
1918 &bld->exec_mask);
1919 }
1920 else {
1921 LLVMValueRef temp_ptr;
1922 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1923
1924 if (tgsi_type_is_64bit(dtype)) {
1925 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1926 reg->Register.Index,
1927 chan_index + 1);
1928 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1929 value);
1930 }
1931 else
1932 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1933 }
1934 break;
1935
1936 case TGSI_FILE_ADDRESS:
1937 assert(dtype == TGSI_TYPE_SIGNED);
1938 assert(LLVMTypeOf(value) == int_bld->vec_type);
1939 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1940 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1941 bld->addr[reg->Register.Index][chan_index]);
1942 break;
1943
1944 default:
1945 assert( 0 );
1946 }
1947
1948 (void)dtype;
1949 }
1950
1951 /*
1952 * Called at the beginning of the translation of each TGSI instruction, to
1953 * emit some debug code.
1954 */
1955 static void
1956 emit_debug(
1957 struct lp_build_tgsi_context * bld_base,
1958 const struct tgsi_full_instruction * inst,
1959 const struct tgsi_opcode_info * info)
1960
1961 {
1962 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1963
1964 if (DEBUG_EXECUTION) {
1965 /*
1966 * Dump the TGSI instruction.
1967 */
1968
1969 struct gallivm_state *gallivm = bld_base->base.gallivm;
1970 char buf[512];
1971 buf[0] = '$';
1972 buf[1] = ' ';
1973 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1974 lp_build_printf(gallivm, buf);
1975
1976 /* Dump the execution mask.
1977 */
1978 if (bld->exec_mask.has_mask) {
1979 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1980 }
1981 }
1982 }
1983
1984 static void
1985 emit_store(
1986 struct lp_build_tgsi_context * bld_base,
1987 const struct tgsi_full_instruction * inst,
1988 const struct tgsi_opcode_info * info,
1989 unsigned index,
1990 LLVMValueRef dst[4])
1991
1992 {
1993 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1994
1995 unsigned writemask = inst->Dst[index].Register.WriteMask;
1996 while (writemask) {
1997 unsigned chan_index = u_bit_scan(&writemask);
1998 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1999 continue;
2000 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
2001 }
2002 }
2003
2004 static unsigned
2005 tgsi_to_pipe_tex_target(unsigned tgsi_target)
2006 {
2007 switch (tgsi_target) {
2008 case TGSI_TEXTURE_BUFFER:
2009 return PIPE_BUFFER;
2010 case TGSI_TEXTURE_1D:
2011 case TGSI_TEXTURE_SHADOW1D:
2012 return PIPE_TEXTURE_1D;
2013 case TGSI_TEXTURE_2D:
2014 case TGSI_TEXTURE_SHADOW2D:
2015 case TGSI_TEXTURE_2D_MSAA:
2016 return PIPE_TEXTURE_2D;
2017 case TGSI_TEXTURE_3D:
2018 return PIPE_TEXTURE_3D;
2019 case TGSI_TEXTURE_CUBE:
2020 case TGSI_TEXTURE_SHADOWCUBE:
2021 return PIPE_TEXTURE_CUBE;
2022 case TGSI_TEXTURE_RECT:
2023 case TGSI_TEXTURE_SHADOWRECT:
2024 return PIPE_TEXTURE_RECT;
2025 case TGSI_TEXTURE_1D_ARRAY:
2026 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2027 return PIPE_TEXTURE_1D_ARRAY;
2028 case TGSI_TEXTURE_2D_ARRAY:
2029 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2030 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2031 return PIPE_TEXTURE_2D_ARRAY;
2032 case TGSI_TEXTURE_CUBE_ARRAY:
2033 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2034 return PIPE_TEXTURE_CUBE_ARRAY;
2035 default:
2036 assert(0);
2037 return PIPE_BUFFER;
2038 }
2039 }
2040
2041
2042 static enum lp_sampler_lod_property
2043 lp_build_lod_property(
2044 struct lp_build_tgsi_context *bld_base,
2045 const struct tgsi_full_instruction *inst,
2046 unsigned src_op)
2047 {
2048 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2049 enum lp_sampler_lod_property lod_property;
2050
2051 /*
2052 * Not much we can do here. We could try catching inputs declared
2053 * with constant interpolation but not sure it's worth it - since for
2054 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2055 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2056 * like the constant/immediate recognition below.
2057 * What seems to be of more value would be to recognize temps holding
2058 * broadcasted scalars but no way we can do it.
2059 * Tried asking llvm but without any success (using LLVMIsConstant
2060 * even though this isn't exactly what we'd need), even as simple as
2061 * IMM[0] UINT32 (0,-1,0,0)
2062 * MOV TEMP[0] IMM[0].yyyy
2063 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2064 * doesn't work.
2065 * This means there's ZERO chance this will ever catch a scalar lod
2066 * with traditional tex opcodes as well as texel fetches, since the lod
2067 * comes from the same reg as coords (except some test shaders using
2068 * constant coords maybe).
2069 * There's at least hope for sample opcodes as well as size queries.
2070 */
2071 if (reg->Register.File == TGSI_FILE_CONSTANT ||
2072 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2073 lod_property = LP_SAMPLER_LOD_SCALAR;
2074 }
2075 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2076 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2077 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2078 }
2079 else {
2080 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2081 }
2082 }
2083 else {
2084 /* never use scalar (per-quad) lod the results are just too wrong. */
2085 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2086 }
2087 return lod_property;
2088 }
2089
2090
2091 /**
2092 * High-level instruction translators.
2093 */
2094
2095 static void
2096 emit_tex( struct lp_build_tgsi_soa_context *bld,
2097 const struct tgsi_full_instruction *inst,
2098 enum lp_build_tex_modifier modifier,
2099 LLVMValueRef *texel,
2100 unsigned sampler_reg,
2101 enum lp_sampler_op_type sampler_op)
2102 {
2103 unsigned unit = inst->Src[sampler_reg].Register.Index;
2104 LLVMValueRef oow = NULL;
2105 LLVMValueRef lod = NULL;
2106 LLVMValueRef coords[5];
2107 LLVMValueRef offsets[3] = { NULL };
2108 struct lp_derivatives derivs;
2109 struct lp_sampler_params params;
2110 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2111 unsigned num_derivs, num_offsets, i;
2112 unsigned shadow_coord = 0;
2113 unsigned layer_coord = 0;
2114 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2115
2116 memset(&params, 0, sizeof(params));
2117
2118 if (!bld->sampler) {
2119 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2120 for (i = 0; i < 4; i++) {
2121 texel[i] = bld->bld_base.base.undef;
2122 }
2123 return;
2124 }
2125
2126 switch (inst->Texture.Texture) {
2127 case TGSI_TEXTURE_1D_ARRAY:
2128 layer_coord = 1;
2129 /* fallthrough */
2130 case TGSI_TEXTURE_1D:
2131 num_offsets = 1;
2132 num_derivs = 1;
2133 break;
2134 case TGSI_TEXTURE_2D_ARRAY:
2135 layer_coord = 2;
2136 /* fallthrough */
2137 case TGSI_TEXTURE_2D:
2138 case TGSI_TEXTURE_RECT:
2139 num_offsets = 2;
2140 num_derivs = 2;
2141 break;
2142 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2143 layer_coord = 1;
2144 /* fallthrough */
2145 case TGSI_TEXTURE_SHADOW1D:
2146 shadow_coord = 2;
2147 num_offsets = 1;
2148 num_derivs = 1;
2149 break;
2150 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2151 layer_coord = 2;
2152 shadow_coord = 3;
2153 num_offsets = 2;
2154 num_derivs = 2;
2155 break;
2156 case TGSI_TEXTURE_SHADOW2D:
2157 case TGSI_TEXTURE_SHADOWRECT:
2158 shadow_coord = 2;
2159 num_offsets = 2;
2160 num_derivs = 2;
2161 break;
2162 case TGSI_TEXTURE_CUBE:
2163 num_offsets = 2;
2164 num_derivs = 3;
2165 break;
2166 case TGSI_TEXTURE_3D:
2167 num_offsets = 3;
2168 num_derivs = 3;
2169 break;
2170 case TGSI_TEXTURE_SHADOWCUBE:
2171 shadow_coord = 3;
2172 num_offsets = 2;
2173 num_derivs = 3;
2174 break;
2175 case TGSI_TEXTURE_CUBE_ARRAY:
2176 num_offsets = 2;
2177 num_derivs = 3;
2178 layer_coord = 3;
2179 break;
2180 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2181 num_offsets = 2;
2182 num_derivs = 3;
2183 layer_coord = 3;
2184 shadow_coord = 4; /* shadow coord special different reg */
2185 break;
2186 case TGSI_TEXTURE_2D_MSAA:
2187 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2188 default:
2189 assert(0);
2190 return;
2191 }
2192
2193 /* Note lod and especially projected are illegal in a LOT of cases */
2194 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2195 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2196 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2197 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2198 /* note that shadow cube array with bias/explicit lod does not exist */
2199 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2200 }
2201 else {
2202 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2203 }
2204 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2205 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2206 }
2207 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2208 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2209 }
2210 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2211 }
2212
2213 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2214 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2215 oow = lp_build_rcp(&bld->bld_base.base, oow);
2216 }
2217
2218 for (i = 0; i < num_derivs; i++) {
2219 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2220 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2221 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2222 }
2223 for (i = num_derivs; i < 5; i++) {
2224 coords[i] = bld->bld_base.base.undef;
2225 }
2226
2227 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2228 if (layer_coord) {
2229 if (layer_coord == 3) {
2230 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2231 }
2232 else {
2233 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2234 }
2235 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2236 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2237 }
2238 /* Shadow coord occupies always 5th slot. */
2239 if (shadow_coord) {
2240 sample_key |= LP_SAMPLER_SHADOW;
2241 if (shadow_coord == 4) {
2242 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2243 }
2244 else {
2245 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2246 }
2247 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2248 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2249 }
2250
2251 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2252 unsigned dim;
2253 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2254 for (dim = 0; dim < num_derivs; ++dim) {
2255 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2256 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2257 }
2258 params.derivs = &derivs;
2259 /*
2260 * could also check all src regs if constant but I doubt such
2261 * cases exist in practice.
2262 */
2263 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2264 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2265 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2266 }
2267 else {
2268 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2269 }
2270 }
2271 else {
2272 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2273 }
2274 }
2275 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2276
2277 /* we don't handle the 4 offset version of tg4 */
2278 if (inst->Texture.NumOffsets == 1) {
2279 unsigned dim;
2280 sample_key |= LP_SAMPLER_OFFSETS;
2281 for (dim = 0; dim < num_offsets; dim++) {
2282 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2283 }
2284 }
2285
2286 params.type = bld->bld_base.base.type;
2287 params.sample_key = sample_key;
2288 params.texture_index = unit;
2289 params.sampler_index = unit;
2290 params.context_ptr = bld->context_ptr;
2291 params.thread_data_ptr = bld->thread_data_ptr;
2292 params.coords = coords;
2293 params.offsets = offsets;
2294 params.lod = lod;
2295 params.texel = texel;
2296
2297 bld->sampler->emit_tex_sample(bld->sampler,
2298 bld->bld_base.base.gallivm,
2299 &params);
2300 }
2301
2302 static void
2303 emit_sample(struct lp_build_tgsi_soa_context *bld,
2304 const struct tgsi_full_instruction *inst,
2305 enum lp_build_tex_modifier modifier,
2306 boolean compare,
2307 enum lp_sampler_op_type sample_type,
2308 LLVMValueRef *texel)
2309 {
2310 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2311 unsigned texture_unit, sampler_unit;
2312 LLVMValueRef lod = NULL;
2313 LLVMValueRef coords[5];
2314 LLVMValueRef offsets[3] = { NULL };
2315 struct lp_derivatives derivs;
2316 struct lp_sampler_params params;
2317 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2318
2319 unsigned num_offsets, num_derivs, i;
2320 unsigned layer_coord = 0;
2321 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2322
2323 memset(&params, 0, sizeof(params));
2324
2325 if (!bld->sampler) {
2326 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2327 for (i = 0; i < 4; i++) {
2328 texel[i] = bld->bld_base.base.undef;
2329 }
2330 return;
2331 }
2332
2333 /*
2334 * unlike old-style tex opcodes the texture/sampler indices
2335 * always come from src1 and src2 respectively.
2336 */
2337 texture_unit = inst->Src[1].Register.Index;
2338 sampler_unit = inst->Src[2].Register.Index;
2339
2340 /*
2341 * Note inst->Texture.Texture will contain the number of offsets,
2342 * however the target information is NOT there and comes from the
2343 * declared sampler views instead.
2344 */
2345 switch (bld->sv[texture_unit].Resource) {
2346 case TGSI_TEXTURE_1D:
2347 num_offsets = 1;
2348 num_derivs = 1;
2349 break;
2350 case TGSI_TEXTURE_1D_ARRAY:
2351 layer_coord = 1;
2352 num_offsets = 1;
2353 num_derivs = 1;
2354 break;
2355 case TGSI_TEXTURE_2D:
2356 case TGSI_TEXTURE_RECT:
2357 num_offsets = 2;
2358 num_derivs = 2;
2359 break;
2360 case TGSI_TEXTURE_2D_ARRAY:
2361 layer_coord = 2;
2362 num_offsets = 2;
2363 num_derivs = 2;
2364 break;
2365 case TGSI_TEXTURE_CUBE:
2366 num_offsets = 2;
2367 num_derivs = 3;
2368 break;
2369 case TGSI_TEXTURE_3D:
2370 num_offsets = 3;
2371 num_derivs = 3;
2372 break;
2373 case TGSI_TEXTURE_CUBE_ARRAY:
2374 layer_coord = 3;
2375 num_offsets = 2;
2376 num_derivs = 3;
2377 break;
2378 default:
2379 assert(0);
2380 return;
2381 }
2382
2383 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2384 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2385 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2386 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2387 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2388 }
2389 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2390 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2391 }
2392 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2393 }
2394 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2395 /* XXX might be better to explicitly pass the level zero information */
2396 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2397 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2398 }
2399
2400 for (i = 0; i < num_derivs; i++) {
2401 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2402 }
2403 for (i = num_derivs; i < 5; i++) {
2404 coords[i] = bld->bld_base.base.undef;
2405 }
2406
2407 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2408 if (layer_coord) {
2409 if (layer_coord == 3)
2410 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2411 else
2412 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2413 }
2414 /* Shadow coord occupies always 5th slot. */
2415 if (compare) {
2416 sample_key |= LP_SAMPLER_SHADOW;
2417 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2418 }
2419
2420 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2421 unsigned dim;
2422 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2423 for (dim = 0; dim < num_derivs; ++dim) {
2424 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2425 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2426 }
2427 params.derivs = &derivs;
2428 /*
2429 * could also check all src regs if constant but I doubt such
2430 * cases exist in practice.
2431 */
2432 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2433 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2434 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2435 }
2436 else {
2437 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2438 }
2439 }
2440 else {
2441 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2442 }
2443 }
2444
2445 /* some advanced gather instructions (txgo) would require 4 offsets */
2446 if (inst->Texture.NumOffsets == 1) {
2447 unsigned dim;
2448 sample_key |= LP_SAMPLER_OFFSETS;
2449 for (dim = 0; dim < num_offsets; dim++) {
2450 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2451 }
2452 }
2453 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2454
2455 params.type = bld->bld_base.base.type;
2456 params.sample_key = sample_key;
2457 params.texture_index = texture_unit;
2458 params.sampler_index = sampler_unit;
2459 params.context_ptr = bld->context_ptr;
2460 params.thread_data_ptr = bld->thread_data_ptr;
2461 params.coords = coords;
2462 params.offsets = offsets;
2463 params.lod = lod;
2464 params.texel = texel;
2465
2466 bld->sampler->emit_tex_sample(bld->sampler,
2467 bld->bld_base.base.gallivm,
2468 &params);
2469
2470 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2471 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2472 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2473 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2474 unsigned char swizzles[4];
2475 swizzles[0] = inst->Src[1].Register.SwizzleX;
2476 swizzles[1] = inst->Src[1].Register.SwizzleY;
2477 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2478 swizzles[3] = inst->Src[1].Register.SwizzleW;
2479
2480 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2481 }
2482 }
2483
2484 static void
2485 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2486 const struct tgsi_full_instruction *inst,
2487 LLVMValueRef *texel,
2488 boolean is_samplei)
2489 {
2490 unsigned unit, target;
2491 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2492 LLVMValueRef explicit_lod = NULL;
2493 LLVMValueRef coords[5];
2494 LLVMValueRef offsets[3] = { NULL };
2495 struct lp_sampler_params params;
2496 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2497 unsigned dims, i;
2498 unsigned layer_coord = 0;
2499 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2500
2501 memset(&params, 0, sizeof(params));
2502
2503 if (!bld->sampler) {
2504 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2505 for (i = 0; i < 4; i++) {
2506 texel[i] = coord_undef;
2507 }
2508 return;
2509 }
2510
2511 unit = inst->Src[1].Register.Index;
2512
2513 if (is_samplei) {
2514 target = bld->sv[unit].Resource;
2515 }
2516 else {
2517 target = inst->Texture.Texture;
2518 }
2519
2520 switch (target) {
2521 case TGSI_TEXTURE_1D:
2522 case TGSI_TEXTURE_BUFFER:
2523 dims = 1;
2524 break;
2525 case TGSI_TEXTURE_1D_ARRAY:
2526 layer_coord = 1;
2527 dims = 1;
2528 break;
2529 case TGSI_TEXTURE_2D:
2530 case TGSI_TEXTURE_RECT:
2531 case TGSI_TEXTURE_2D_MSAA:
2532 dims = 2;
2533 break;
2534 case TGSI_TEXTURE_2D_ARRAY:
2535 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2536 layer_coord = 2;
2537 dims = 2;
2538 break;
2539 case TGSI_TEXTURE_3D:
2540 dims = 3;
2541 break;
2542 default:
2543 assert(0);
2544 return;
2545 }
2546
2547 /* always have lod except for buffers and msaa targets ? */
2548 if (target != TGSI_TEXTURE_BUFFER &&
2549 target != TGSI_TEXTURE_2D_MSAA &&
2550 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2551 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2552 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2553 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2554 }
2555 /*
2556 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2557 * would be the sample index.
2558 */
2559
2560 for (i = 0; i < dims; i++) {
2561 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2562 }
2563 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2564 for (i = dims; i < 5; i++) {
2565 coords[i] = coord_undef;
2566 }
2567 if (layer_coord)
2568 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2569
2570 if (inst->Texture.NumOffsets == 1) {
2571 unsigned dim;
2572 sample_key |= LP_SAMPLER_OFFSETS;
2573 for (dim = 0; dim < dims; dim++) {
2574 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2575 }
2576 }
2577 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2578
2579 params.type = bld->bld_base.base.type;
2580 params.sample_key = sample_key;
2581 params.texture_index = unit;
2582 /*
2583 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2584 * and trigger some assertions with d3d10 where the sampler view number
2585 * can exceed this.
2586 */
2587 params.sampler_index = 0;
2588 params.context_ptr = bld->context_ptr;
2589 params.thread_data_ptr = bld->thread_data_ptr;
2590 params.coords = coords;
2591 params.offsets = offsets;
2592 params.derivs = NULL;
2593 params.lod = explicit_lod;
2594 params.texel = texel;
2595
2596 bld->sampler->emit_tex_sample(bld->sampler,
2597 bld->bld_base.base.gallivm,
2598 &params);
2599
2600 if (is_samplei &&
2601 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2602 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2603 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2604 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2605 unsigned char swizzles[4];
2606 swizzles[0] = inst->Src[1].Register.SwizzleX;
2607 swizzles[1] = inst->Src[1].Register.SwizzleY;
2608 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2609 swizzles[3] = inst->Src[1].Register.SwizzleW;
2610
2611 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2612 }
2613 }
2614
2615 static void
2616 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2617 const struct tgsi_full_instruction *inst,
2618 LLVMValueRef *sizes_out,
2619 boolean is_sviewinfo)
2620 {
2621 LLVMValueRef explicit_lod;
2622 enum lp_sampler_lod_property lod_property;
2623 unsigned has_lod;
2624 unsigned i;
2625 unsigned unit = inst->Src[1].Register.Index;
2626 unsigned target, pipe_target;
2627 struct lp_sampler_size_query_params params;
2628
2629 if (is_sviewinfo) {
2630 target = bld->sv[unit].Resource;
2631 }
2632 else {
2633 target = inst->Texture.Texture;
2634 }
2635 switch (target) {
2636 case TGSI_TEXTURE_BUFFER:
2637 case TGSI_TEXTURE_RECT:
2638 case TGSI_TEXTURE_SHADOWRECT:
2639 has_lod = 0;
2640 break;
2641 default:
2642 has_lod = 1;
2643 break;
2644 }
2645
2646 if (!bld->sampler) {
2647 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2648 for (i = 0; i < 4; i++)
2649 sizes_out[i] = bld->bld_base.int_bld.undef;
2650 return;
2651 }
2652
2653 if (has_lod) {
2654 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2655 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2656 }
2657 else {
2658 explicit_lod = NULL;
2659 lod_property = LP_SAMPLER_LOD_SCALAR;
2660 }
2661
2662
2663 pipe_target = tgsi_to_pipe_tex_target(target);
2664
2665 params.int_type = bld->bld_base.int_bld.type;
2666 params.texture_unit = unit;
2667 params.target = pipe_target;
2668 params.context_ptr = bld->context_ptr;
2669 params.is_sviewinfo = TRUE;
2670 params.lod_property = lod_property;
2671 params.explicit_lod = explicit_lod;
2672 params.sizes_out = sizes_out;
2673
2674 bld->sampler->emit_size_query(bld->sampler,
2675 bld->bld_base.base.gallivm,
2676 &params);
2677 }
2678
2679 static boolean
2680 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2681 int pc)
2682 {
2683 unsigned i;
2684
2685 for (i = 0; i < 5; i++) {
2686 enum tgsi_opcode opcode;
2687
2688 if (pc + i >= bld->bld_base.info->num_instructions)
2689 return TRUE;
2690
2691 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2692
2693 if (opcode == TGSI_OPCODE_END)
2694 return TRUE;
2695
2696 if (opcode == TGSI_OPCODE_TEX ||
2697 opcode == TGSI_OPCODE_TXP ||
2698 opcode == TGSI_OPCODE_TXD ||
2699 opcode == TGSI_OPCODE_TXB ||
2700 opcode == TGSI_OPCODE_TXL ||
2701 opcode == TGSI_OPCODE_TXF ||
2702 opcode == TGSI_OPCODE_TXQ ||
2703 opcode == TGSI_OPCODE_TEX2 ||
2704 opcode == TGSI_OPCODE_TXB2 ||
2705 opcode == TGSI_OPCODE_TXL2 ||
2706 opcode == TGSI_OPCODE_SAMPLE ||
2707 opcode == TGSI_OPCODE_SAMPLE_B ||
2708 opcode == TGSI_OPCODE_SAMPLE_C ||
2709 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2710 opcode == TGSI_OPCODE_SAMPLE_D ||
2711 opcode == TGSI_OPCODE_SAMPLE_I ||
2712 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2713 opcode == TGSI_OPCODE_SAMPLE_L ||
2714 opcode == TGSI_OPCODE_SVIEWINFO ||
2715 opcode == TGSI_OPCODE_CAL ||
2716 opcode == TGSI_OPCODE_IF ||
2717 opcode == TGSI_OPCODE_UIF ||
2718 opcode == TGSI_OPCODE_BGNLOOP ||
2719 opcode == TGSI_OPCODE_SWITCH)
2720 return FALSE;
2721 }
2722
2723 return TRUE;
2724 }
2725
2726
2727
2728 /**
2729 * Kill fragment if any of the src register values are negative.
2730 */
2731 static void
2732 emit_kill_if(
2733 struct lp_build_tgsi_soa_context *bld,
2734 const struct tgsi_full_instruction *inst,
2735 int pc)
2736 {
2737 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2738 const struct tgsi_full_src_register *reg = &inst->Src[0];
2739 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2740 LLVMValueRef mask;
2741 unsigned chan_index;
2742
2743 memset(&terms, 0, sizeof terms);
2744
2745 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2746 unsigned swizzle;
2747
2748 /* Unswizzle channel */
2749 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2750
2751 /* Check if the component has not been already tested. */
2752 assert(swizzle < TGSI_NUM_CHANNELS);
2753 if( !terms[swizzle] )
2754 /* TODO: change the comparison operator instead of setting the sign */
2755 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2756 }
2757
2758 mask = NULL;
2759 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2760 if(terms[chan_index]) {
2761 LLVMValueRef chan_mask;
2762
2763 /*
2764 * If term < 0 then mask = 0 else mask = ~0.
2765 */
2766 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2767
2768 if(mask)
2769 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2770 else
2771 mask = chan_mask;
2772 }
2773 }
2774
2775 if (bld->exec_mask.has_mask) {
2776 LLVMValueRef invmask;
2777 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2778 mask = LLVMBuildOr(builder, mask, invmask, "");
2779 }
2780
2781 lp_build_mask_update(bld->mask, mask);
2782 if (!near_end_of_shader(bld, pc))
2783 lp_build_mask_check(bld->mask);
2784 }
2785
2786
2787 /**
2788 * Unconditional fragment kill.
2789 * The only predication is the execution mask which will apply if
2790 * we're inside a loop or conditional.
2791 */
2792 static void
2793 emit_kill(struct lp_build_tgsi_soa_context *bld,
2794 int pc)
2795 {
2796 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2797 LLVMValueRef mask;
2798
2799 /* For those channels which are "alive", disable fragment shader
2800 * execution.
2801 */
2802 if (bld->exec_mask.has_mask) {
2803 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2804 }
2805 else {
2806 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2807 mask = zero;
2808 }
2809
2810 lp_build_mask_update(bld->mask, mask);
2811
2812 if (!near_end_of_shader(bld, pc))
2813 lp_build_mask_check(bld->mask);
2814 }
2815
2816
2817 /**
2818 * Emit code which will dump the value of all the temporary registers
2819 * to stdout.
2820 */
2821 static void
2822 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2823 unsigned file)
2824 {
2825 const struct tgsi_shader_info *info = bld->bld_base.info;
2826 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2827 LLVMBuilderRef builder = gallivm->builder;
2828 LLVMValueRef reg_ptr;
2829 int index;
2830 int max_index = info->file_max[file];
2831
2832 /*
2833 * Some register files, particularly constants, can be very large,
2834 * and dumping everything could make this unusably slow.
2835 */
2836 max_index = MIN2(max_index, 32);
2837
2838 for (index = 0; index <= max_index; index++) {
2839 LLVMValueRef res;
2840 unsigned mask;
2841 int chan;
2842
2843 if (index < 8 * sizeof(unsigned) &&
2844 (info->file_mask[file] & (1u << index)) == 0) {
2845 /* This was not declared.*/
2846 continue;
2847 }
2848
2849 if (file == TGSI_FILE_INPUT) {
2850 mask = info->input_usage_mask[index];
2851 } else {
2852 mask = TGSI_WRITEMASK_XYZW;
2853 }
2854
2855 for (chan = 0; chan < 4; chan++) {
2856 if ((mask & (1 << chan)) == 0) {
2857 /* This channel is not used.*/
2858 continue;
2859 }
2860
2861 if (file == TGSI_FILE_CONSTANT) {
2862 struct tgsi_full_src_register reg;
2863 memset(&reg, 0, sizeof reg);
2864 reg.Register.File = file;
2865 reg.Register.Index = index;
2866 reg.Register.SwizzleX = 0;
2867 reg.Register.SwizzleY = 1;
2868 reg.Register.SwizzleZ = 2;
2869 reg.Register.SwizzleW = 3;
2870
2871 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2872 if (!res) {
2873 continue;
2874 }
2875 } else if (file == TGSI_FILE_INPUT) {
2876 res = bld->inputs[index][chan];
2877 if (!res) {
2878 continue;
2879 }
2880 } else if (file == TGSI_FILE_TEMPORARY) {
2881 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2882 assert(reg_ptr);
2883 res = LLVMBuildLoad(builder, reg_ptr, "");
2884 } else if (file == TGSI_FILE_OUTPUT) {
2885 reg_ptr = lp_get_output_ptr(bld, index, chan);
2886 assert(reg_ptr);
2887 res = LLVMBuildLoad(builder, reg_ptr, "");
2888 } else {
2889 assert(0);
2890 continue;
2891 }
2892
2893 emit_dump_reg(gallivm, file, index, chan, res);
2894 }
2895 }
2896 }
2897
2898
2899
2900 void
2901 lp_emit_declaration_soa(
2902 struct lp_build_tgsi_context *bld_base,
2903 const struct tgsi_full_declaration *decl)
2904 {
2905 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2906 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2907 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2908 const unsigned first = decl->Range.First;
2909 const unsigned last = decl->Range.Last;
2910 unsigned idx, i;
2911
2912 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2913
2914 switch (decl->Declaration.File) {
2915 case TGSI_FILE_TEMPORARY:
2916 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2917 assert(last < LP_MAX_INLINED_TEMPS);
2918 for (idx = first; idx <= last; ++idx) {
2919 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2920 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2921 }
2922 }
2923 break;
2924
2925 case TGSI_FILE_OUTPUT:
2926 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2927 for (idx = first; idx <= last; ++idx) {
2928 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2929 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2930 vec_type, "output");
2931 }
2932 }
2933 break;
2934
2935 case TGSI_FILE_ADDRESS:
2936 /* ADDR registers are only allocated with an integer LLVM IR type,
2937 * as they are guaranteed to always have integers.
2938 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2939 * an ADDR register for that matter).
2940 */
2941 assert(last < LP_MAX_TGSI_ADDRS);
2942 for (idx = first; idx <= last; ++idx) {
2943 assert(idx < LP_MAX_TGSI_ADDRS);
2944 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2945 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2946 }
2947 break;
2948
2949 case TGSI_FILE_SAMPLER_VIEW:
2950 /*
2951 * The target stored here MUST match whatever there actually
2952 * is in the set sampler views (what about return type?).
2953 */
2954 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2955 for (idx = first; idx <= last; ++idx) {
2956 bld->sv[idx] = decl->SamplerView;
2957 }
2958 break;
2959
2960 case TGSI_FILE_CONSTANT:
2961 {
2962 /*
2963 * We could trivially fetch the per-buffer pointer when fetching the
2964 * constant, relying on llvm to figure out it's always the same pointer
2965 * anyway. However, doing so results in a huge (more than factor of 10)
2966 * slowdown in llvm compilation times for some (but not all) shaders
2967 * (more specifically, the IR optimization spends way more time in
2968 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2969 */
2970 unsigned idx2D = decl->Dim.Index2D;
2971 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2972 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2973 bld->consts[idx2D] =
2974 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2975 bld->consts_sizes[idx2D] =
2976 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2977 }
2978 break;
2979
2980 default:
2981 /* don't need to declare other vars */
2982 break;
2983 }
2984 }
2985
2986
2987 void lp_emit_immediate_soa(
2988 struct lp_build_tgsi_context *bld_base,
2989 const struct tgsi_full_immediate *imm)
2990 {
2991 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2992 struct gallivm_state * gallivm = bld_base->base.gallivm;
2993 LLVMValueRef imms[4];
2994 unsigned i;
2995 const uint size = imm->Immediate.NrTokens - 1;
2996 assert(size <= 4);
2997 switch (imm->Immediate.DataType) {
2998 case TGSI_IMM_FLOAT32:
2999 for( i = 0; i < size; ++i )
3000 imms[i] =
3001 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3002
3003 break;
3004 case TGSI_IMM_FLOAT64:
3005 case TGSI_IMM_UINT64:
3006 case TGSI_IMM_INT64:
3007 case TGSI_IMM_UINT32:
3008 for( i = 0; i < size; ++i ) {
3009 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3010 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3011 }
3012
3013 break;
3014 case TGSI_IMM_INT32:
3015 for( i = 0; i < size; ++i ) {
3016 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3017 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3018 }
3019
3020 break;
3021 }
3022 for( i = size; i < 4; ++i )
3023 imms[i] = bld_base->base.undef;
3024
3025 if (bld->use_immediates_array) {
3026 unsigned index = bld->num_immediates;
3027 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3028 LLVMBuilderRef builder = gallivm->builder;
3029 LLVMValueRef gep[2];
3030 gep[0] = lp_build_const_int32(gallivm, 0);
3031
3032 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3033 for (i = 0; i < 4; ++i ) {
3034 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3035 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3036 bld->imms_array, gep, 2, "");
3037 LLVMBuildStore(builder, imms[i], imm_ptr);
3038 }
3039 } else {
3040 /* simply copy the immediate values into the next immediates[] slot */
3041 unsigned i;
3042 assert(imm->Immediate.NrTokens - 1 <= 4);
3043 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3044
3045 for(i = 0; i < 4; ++i )
3046 bld->immediates[bld->num_immediates][i] = imms[i];
3047
3048 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3049 unsigned index = bld->num_immediates;
3050 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3051 LLVMBuilderRef builder = gallivm->builder;
3052 LLVMValueRef gep[2];
3053 gep[0] = lp_build_const_int32(gallivm, 0);
3054 for (i = 0; i < 4; ++i ) {
3055 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3056 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3057 bld->imms_array, gep, 2, "");
3058 LLVMBuildStore(builder,
3059 bld->immediates[index][i],
3060 imm_ptr);
3061 }
3062 }
3063 }
3064
3065 bld->num_immediates++;
3066 }
3067
3068 static void
3069 ddx_emit(
3070 const struct lp_build_tgsi_action * action,
3071 struct lp_build_tgsi_context * bld_base,
3072 struct lp_build_emit_data * emit_data)
3073 {
3074 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3075
3076 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3077 &emit_data->output[emit_data->chan], NULL);
3078 }
3079
3080 static void
3081 ddy_emit(
3082 const struct lp_build_tgsi_action * action,
3083 struct lp_build_tgsi_context * bld_base,
3084 struct lp_build_emit_data * emit_data)
3085 {
3086 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3087
3088 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3089 &emit_data->output[emit_data->chan]);
3090 }
3091
3092 static void
3093 kill_emit(
3094 const struct lp_build_tgsi_action * action,
3095 struct lp_build_tgsi_context * bld_base,
3096 struct lp_build_emit_data * emit_data)
3097 {
3098 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3099
3100 emit_kill(bld, bld_base->pc - 1);
3101 }
3102
3103 static void
3104 kill_if_emit(
3105 const struct lp_build_tgsi_action * action,
3106 struct lp_build_tgsi_context * bld_base,
3107 struct lp_build_emit_data * emit_data)
3108 {
3109 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3110
3111 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3112 }
3113
3114 static void
3115 tex_emit(
3116 const struct lp_build_tgsi_action * action,
3117 struct lp_build_tgsi_context * bld_base,
3118 struct lp_build_emit_data * emit_data)
3119 {
3120 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3121
3122 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3123 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3124 }
3125
3126 static void
3127 tex2_emit(
3128 const struct lp_build_tgsi_action * action,
3129 struct lp_build_tgsi_context * bld_base,
3130 struct lp_build_emit_data * emit_data)
3131 {
3132 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3133
3134 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3135 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3136 }
3137
3138 static void
3139 txb_emit(
3140 const struct lp_build_tgsi_action * action,
3141 struct lp_build_tgsi_context * bld_base,
3142 struct lp_build_emit_data * emit_data)
3143 {
3144 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3145
3146 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3147 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3148 }
3149
3150 static void
3151 txb2_emit(
3152 const struct lp_build_tgsi_action * action,
3153 struct lp_build_tgsi_context * bld_base,
3154 struct lp_build_emit_data * emit_data)
3155 {
3156 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3157
3158 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3159 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3160 }
3161
3162 static void
3163 txd_emit(
3164 const struct lp_build_tgsi_action * action,
3165 struct lp_build_tgsi_context * bld_base,
3166 struct lp_build_emit_data * emit_data)
3167 {
3168 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3169
3170 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3171 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3172 }
3173
3174 static void
3175 txl_emit(
3176 const struct lp_build_tgsi_action * action,
3177 struct lp_build_tgsi_context * bld_base,
3178 struct lp_build_emit_data * emit_data)
3179 {
3180 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3181
3182 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3183 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3184 }
3185
3186 static void
3187 txl2_emit(
3188 const struct lp_build_tgsi_action * action,
3189 struct lp_build_tgsi_context * bld_base,
3190 struct lp_build_emit_data * emit_data)
3191 {
3192 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3193
3194 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3195 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3196 }
3197
3198 static void
3199 txp_emit(
3200 const struct lp_build_tgsi_action * action,
3201 struct lp_build_tgsi_context * bld_base,
3202 struct lp_build_emit_data * emit_data)
3203 {
3204 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3205
3206 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3207 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3208 }
3209
3210 static void
3211 tg4_emit(
3212 const struct lp_build_tgsi_action * action,
3213 struct lp_build_tgsi_context * bld_base,
3214 struct lp_build_emit_data * emit_data)
3215 {
3216 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3217
3218 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3219 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3220 }
3221
3222 static void
3223 lodq_emit(
3224 const struct lp_build_tgsi_action * action,
3225 struct lp_build_tgsi_context * bld_base,
3226 struct lp_build_emit_data * emit_data)
3227 {
3228 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3229
3230 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3231 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3232 }
3233
3234 static void
3235 txq_emit(
3236 const struct lp_build_tgsi_action * action,
3237 struct lp_build_tgsi_context * bld_base,
3238 struct lp_build_emit_data * emit_data)
3239 {
3240 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3241
3242 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3243 }
3244
3245 static void
3246 txf_emit(
3247 const struct lp_build_tgsi_action * action,
3248 struct lp_build_tgsi_context * bld_base,
3249 struct lp_build_emit_data * emit_data)
3250 {
3251 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3252
3253 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3254 }
3255
3256 static void
3257 sample_i_emit(
3258 const struct lp_build_tgsi_action * action,
3259 struct lp_build_tgsi_context * bld_base,
3260 struct lp_build_emit_data * emit_data)
3261 {
3262 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3263
3264 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3265 }
3266
3267 static void
3268 sample_emit(
3269 const struct lp_build_tgsi_action * action,
3270 struct lp_build_tgsi_context * bld_base,
3271 struct lp_build_emit_data * emit_data)
3272 {
3273 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3274
3275 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3276 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3277 }
3278
3279 static void
3280 sample_b_emit(
3281 const struct lp_build_tgsi_action * action,
3282 struct lp_build_tgsi_context * bld_base,
3283 struct lp_build_emit_data * emit_data)
3284 {
3285 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3286
3287 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3288 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3289 }
3290
3291 static void
3292 sample_c_emit(
3293 const struct lp_build_tgsi_action * action,
3294 struct lp_build_tgsi_context * bld_base,
3295 struct lp_build_emit_data * emit_data)
3296 {
3297 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3298
3299 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3300 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3301 }
3302
3303 static void
3304 sample_c_lz_emit(
3305 const struct lp_build_tgsi_action * action,
3306 struct lp_build_tgsi_context * bld_base,
3307 struct lp_build_emit_data * emit_data)
3308 {
3309 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3310
3311 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3312 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3313 }
3314
3315 static void
3316 sample_d_emit(
3317 const struct lp_build_tgsi_action * action,
3318 struct lp_build_tgsi_context * bld_base,
3319 struct lp_build_emit_data * emit_data)
3320 {
3321 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3322
3323 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3324 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3325 }
3326
3327 static void
3328 sample_l_emit(
3329 const struct lp_build_tgsi_action * action,
3330 struct lp_build_tgsi_context * bld_base,
3331 struct lp_build_emit_data * emit_data)
3332 {
3333 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3334
3335 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3336 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3337 }
3338
3339 static void
3340 gather4_emit(
3341 const struct lp_build_tgsi_action * action,
3342 struct lp_build_tgsi_context * bld_base,
3343 struct lp_build_emit_data * emit_data)
3344 {
3345 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3346
3347 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3348 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3349 }
3350
3351 static void
3352 sviewinfo_emit(
3353 const struct lp_build_tgsi_action * action,
3354 struct lp_build_tgsi_context * bld_base,
3355 struct lp_build_emit_data * emit_data)
3356 {
3357 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3358
3359 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3360 }
3361
3362 static void
3363 lod_emit(
3364 const struct lp_build_tgsi_action * action,
3365 struct lp_build_tgsi_context * bld_base,
3366 struct lp_build_emit_data * emit_data)
3367 {
3368 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3369
3370 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3371 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3372 }
3373
3374 static void
3375 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3376 LLVMValueRef ptr,
3377 LLVMValueRef mask)
3378 {
3379 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3380 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3381
3382 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3383
3384 LLVMBuildStore(builder, current_vec, ptr);
3385 }
3386
3387 static void
3388 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3389 LLVMValueRef ptr,
3390 LLVMValueRef mask)
3391 {
3392 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3393 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3394
3395 current_vec = lp_build_select(&bld_base->uint_bld,
3396 mask,
3397 bld_base->uint_bld.zero,
3398 current_vec);
3399
3400 LLVMBuildStore(builder, current_vec, ptr);
3401 }
3402
3403 static LLVMValueRef
3404 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3405 LLVMValueRef current_mask_vec,
3406 LLVMValueRef total_emitted_vertices_vec)
3407 {
3408 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3409 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3410 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3411 total_emitted_vertices_vec,
3412 bld->max_output_vertices_vec);
3413
3414 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3415 }
3416
3417 static void
3418 emit_vertex(
3419 const struct lp_build_tgsi_action * action,
3420 struct lp_build_tgsi_context * bld_base,
3421 struct lp_build_emit_data * emit_data)
3422 {
3423 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3424 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3425
3426 if (bld->gs_iface->emit_vertex) {
3427 LLVMValueRef mask = mask_vec(bld_base);
3428 LLVMValueRef total_emitted_vertices_vec =
3429 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3430 mask = clamp_mask_to_max_output_vertices(bld, mask,
3431 total_emitted_vertices_vec);
3432 gather_outputs(bld);
3433 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3434 bld->outputs,
3435 total_emitted_vertices_vec);
3436 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3437 mask);
3438 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3439 mask);
3440 #if DUMP_GS_EMITS
3441 lp_build_print_value(bld->bld_base.base.gallivm,
3442 " +++ emit vertex masked ones = ",
3443 mask);
3444 lp_build_print_value(bld->bld_base.base.gallivm,
3445 " +++ emit vertex emitted = ",
3446 total_emitted_vertices_vec);
3447 #endif
3448 }
3449 }
3450
3451
3452 static void
3453 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3454 LLVMValueRef mask)
3455 {
3456 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3457 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3458
3459 if (bld->gs_iface->end_primitive) {
3460 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3461 LLVMValueRef emitted_vertices_vec =
3462 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3463 LLVMValueRef emitted_prims_vec =
3464 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3465
3466 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3467 emitted_vertices_vec,
3468 uint_bld->zero);
3469 /* We need to combine the current execution mask with the mask
3470 telling us which, if any, execution slots actually have
3471 unemitted primitives, this way we make sure that end_primitives
3472 executes only on the paths that have unflushed vertices */
3473 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3474
3475 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3476 emitted_vertices_vec,
3477 emitted_prims_vec);
3478
3479 #if DUMP_GS_EMITS
3480 lp_build_print_value(bld->bld_base.base.gallivm,
3481 " +++ end prim masked ones = ",
3482 mask);
3483 lp_build_print_value(bld->bld_base.base.gallivm,
3484 " +++ end prim emitted verts1 = ",
3485 emitted_vertices_vec);
3486 lp_build_print_value(bld->bld_base.base.gallivm,
3487 " +++ end prim emitted prims1 = ",
3488 LLVMBuildLoad(builder,
3489 bld->emitted_prims_vec_ptr, ""));
3490 #endif
3491 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3492 mask);
3493 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3494 mask);
3495 #if DUMP_GS_EMITS
3496 lp_build_print_value(bld->bld_base.base.gallivm,
3497 " +++ end prim emitted verts2 = ",
3498 LLVMBuildLoad(builder,
3499 bld->emitted_vertices_vec_ptr, ""));
3500 #endif
3501 }
3502
3503 }
3504
3505 static void
3506 end_primitive(
3507 const struct lp_build_tgsi_action * action,
3508 struct lp_build_tgsi_context * bld_base,
3509 struct lp_build_emit_data * emit_data)
3510 {
3511 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3512
3513 if (bld->gs_iface->end_primitive) {
3514 LLVMValueRef mask = mask_vec(bld_base);
3515 end_primitive_masked(bld_base, mask);
3516 }
3517 }
3518
3519 static void
3520 cal_emit(
3521 const struct lp_build_tgsi_action * action,
3522 struct lp_build_tgsi_context * bld_base,
3523 struct lp_build_emit_data * emit_data)
3524 {
3525 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3526
3527 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3528 &bld_base->pc);
3529 }
3530
3531 static void
3532 ret_emit(
3533 const struct lp_build_tgsi_action * action,
3534 struct lp_build_tgsi_context * bld_base,
3535 struct lp_build_emit_data * emit_data)
3536 {
3537 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3538
3539 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3540 }
3541
3542 static void
3543 brk_emit(
3544 const struct lp_build_tgsi_action * action,
3545 struct lp_build_tgsi_context * bld_base,
3546 struct lp_build_emit_data * emit_data)
3547 {
3548 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3549
3550 lp_exec_break(&bld->exec_mask, bld_base);
3551 }
3552
3553 static void
3554 if_emit(
3555 const struct lp_build_tgsi_action * action,
3556 struct lp_build_tgsi_context * bld_base,
3557 struct lp_build_emit_data * emit_data)
3558 {
3559 LLVMValueRef tmp;
3560 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3561
3562 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3563 emit_data->args[0], bld->bld_base.base.zero);
3564 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3565 }
3566
3567 static void
3568 uif_emit(
3569 const struct lp_build_tgsi_action * action,
3570 struct lp_build_tgsi_context * bld_base,
3571 struct lp_build_emit_data * emit_data)
3572 {
3573 LLVMValueRef tmp;
3574 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3575 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3576
3577 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3578 emit_data->args[0], uint_bld->zero);
3579 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3580 }
3581
3582 static void
3583 case_emit(
3584 const struct lp_build_tgsi_action * action,
3585 struct lp_build_tgsi_context * bld_base,
3586 struct lp_build_emit_data * emit_data)
3587 {
3588 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3589
3590 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3591 }
3592
3593 static void
3594 default_emit(
3595 const struct lp_build_tgsi_action * action,
3596 struct lp_build_tgsi_context * bld_base,
3597 struct lp_build_emit_data * emit_data)
3598 {
3599 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3600
3601 lp_exec_default(&bld->exec_mask, bld_base);
3602 }
3603
3604 static void
3605 switch_emit(
3606 const struct lp_build_tgsi_action * action,
3607 struct lp_build_tgsi_context * bld_base,
3608 struct lp_build_emit_data * emit_data)
3609 {
3610 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3611
3612 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3613 }
3614
3615 static void
3616 endswitch_emit(
3617 const struct lp_build_tgsi_action * action,
3618 struct lp_build_tgsi_context * bld_base,
3619 struct lp_build_emit_data * emit_data)
3620 {
3621 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3622
3623 lp_exec_endswitch(&bld->exec_mask, bld_base);
3624 }
3625
3626 static void
3627 bgnloop_emit(
3628 const struct lp_build_tgsi_action * action,
3629 struct lp_build_tgsi_context * bld_base,
3630 struct lp_build_emit_data * emit_data)
3631 {
3632 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3633
3634 lp_exec_bgnloop(&bld->exec_mask);
3635 }
3636
3637 static void
3638 bgnsub_emit(
3639 const struct lp_build_tgsi_action * action,
3640 struct lp_build_tgsi_context * bld_base,
3641 struct lp_build_emit_data * emit_data)
3642 {
3643 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3644
3645 lp_exec_mask_bgnsub(&bld->exec_mask);
3646 }
3647
3648 static void
3649 else_emit(
3650 const struct lp_build_tgsi_action * action,
3651 struct lp_build_tgsi_context * bld_base,
3652 struct lp_build_emit_data * emit_data)
3653 {
3654 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3655
3656 lp_exec_mask_cond_invert(&bld->exec_mask);
3657 }
3658
3659 static void
3660 endif_emit(
3661 const struct lp_build_tgsi_action * action,
3662 struct lp_build_tgsi_context * bld_base,
3663 struct lp_build_emit_data * emit_data)
3664 {
3665 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3666
3667 lp_exec_mask_cond_pop(&bld->exec_mask);
3668 }
3669
3670 static void
3671 endloop_emit(
3672 const struct lp_build_tgsi_action * action,
3673 struct lp_build_tgsi_context * bld_base,
3674 struct lp_build_emit_data * emit_data)
3675 {
3676 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3677
3678 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3679 }
3680
3681 static void
3682 endsub_emit(
3683 const struct lp_build_tgsi_action * action,
3684 struct lp_build_tgsi_context * bld_base,
3685 struct lp_build_emit_data * emit_data)
3686 {
3687 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3688
3689 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3690 }
3691
3692 static void
3693 cont_emit(
3694 const struct lp_build_tgsi_action * action,
3695 struct lp_build_tgsi_context * bld_base,
3696 struct lp_build_emit_data * emit_data)
3697 {
3698 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3699
3700 lp_exec_continue(&bld->exec_mask);
3701 }
3702
3703 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3704 {
3705 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3706 struct gallivm_state * gallivm = bld_base->base.gallivm;
3707
3708 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3709 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
3710 bld->temps_array = lp_build_alloca_undef(gallivm,
3711 LLVMArrayType(bld_base->base.vec_type, array_size),
3712 "temp_array");
3713 }
3714
3715 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3716 LLVMValueRef array_size =
3717 lp_build_const_int32(gallivm,
3718 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3719 bld->outputs_array = lp_build_array_alloca(gallivm,
3720 bld_base->base.vec_type, array_size,
3721 "output_array");
3722 }
3723
3724 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3725 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
3726 bld->imms_array = lp_build_alloca_undef(gallivm,
3727 LLVMArrayType(bld_base->base.vec_type, array_size),
3728 "imms_array");
3729 }
3730
3731 /* If we have indirect addressing in inputs we need to copy them into
3732 * our alloca array to be able to iterate over them */
3733 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3734 unsigned index, chan;
3735 LLVMTypeRef vec_type = bld_base->base.vec_type;
3736 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3737 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3738 bld->inputs_array = lp_build_array_alloca(gallivm,
3739 vec_type, array_size,
3740 "input_array");
3741
3742 assert(bld_base->info->num_inputs
3743 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3744
3745 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3746 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3747 LLVMValueRef lindex =
3748 lp_build_const_int32(gallivm, index * 4 + chan);
3749 LLVMValueRef input_ptr =
3750 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3751 &lindex, 1, "");
3752 LLVMValueRef value = bld->inputs[index][chan];
3753 if (value)
3754 LLVMBuildStore(gallivm->builder, value, input_ptr);
3755 }
3756 }
3757 }
3758
3759 if (bld->gs_iface) {
3760 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3761 bld->emitted_prims_vec_ptr =
3762 lp_build_alloca(gallivm,
3763 uint_bld->vec_type,
3764 "emitted_prims_ptr");
3765 bld->emitted_vertices_vec_ptr =
3766 lp_build_alloca(gallivm,
3767 uint_bld->vec_type,
3768 "emitted_vertices_ptr");
3769 bld->total_emitted_vertices_vec_ptr =
3770 lp_build_alloca(gallivm,
3771 uint_bld->vec_type,
3772 "total_emitted_vertices_ptr");
3773
3774 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3775 bld->emitted_prims_vec_ptr);
3776 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3777 bld->emitted_vertices_vec_ptr);
3778 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3779 bld->total_emitted_vertices_vec_ptr);
3780 }
3781
3782 if (DEBUG_EXECUTION) {
3783 lp_build_printf(gallivm, "\n");
3784 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3785 if (!bld->gs_iface)
3786 emit_dump_file(bld, TGSI_FILE_INPUT);
3787 }
3788 }
3789
3790 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3791 {
3792 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3793 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3794
3795 if (DEBUG_EXECUTION) {
3796 /* for debugging */
3797 if (0) {
3798 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3799 }
3800 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3801 lp_build_printf(bld_base->base.gallivm, "\n");
3802 }
3803
3804 /* If we have indirect addressing in outputs we need to copy our alloca array
3805 * to the outputs slots specified by the caller */
3806 if (bld->gs_iface) {
3807 LLVMValueRef total_emitted_vertices_vec;
3808 LLVMValueRef emitted_prims_vec;
3809 /* implicit end_primitives, needed in case there are any unflushed
3810 vertices in the cache. Note must not call end_primitive here
3811 since the exec_mask is not valid at this point. */
3812 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3813
3814 total_emitted_vertices_vec =
3815 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3816 emitted_prims_vec =
3817 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3818
3819 bld->gs_iface->gs_epilogue(bld->gs_iface,
3820 &bld->bld_base,
3821 total_emitted_vertices_vec,
3822 emitted_prims_vec);
3823 } else {
3824 gather_outputs(bld);
3825 }
3826 }
3827
3828 void
3829 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3830 const struct tgsi_token *tokens,
3831 struct lp_type type,
3832 struct lp_build_mask_context *mask,
3833 LLVMValueRef consts_ptr,
3834 LLVMValueRef const_sizes_ptr,
3835 const struct lp_bld_tgsi_system_values *system_values,
3836 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3837 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3838 LLVMValueRef context_ptr,
3839 LLVMValueRef thread_data_ptr,
3840 const struct lp_build_sampler_soa *sampler,
3841 const struct tgsi_shader_info *info,
3842 const struct lp_build_tgsi_gs_iface *gs_iface,
3843 LLVMValueRef ssbo_ptr,
3844 LLVMValueRef ssbo_sizes_ptr)
3845 {
3846 struct lp_build_tgsi_soa_context bld;
3847
3848 struct lp_type res_type;
3849
3850 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3851 memset(&res_type, 0, sizeof res_type);
3852 res_type.width = type.width;
3853 res_type.length = type.length;
3854 res_type.sign = 1;
3855
3856 /* Setup build context */
3857 memset(&bld, 0, sizeof bld);
3858 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3859 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3860 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3861 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3862 {
3863 struct lp_type dbl_type;
3864 dbl_type = type;
3865 dbl_type.width *= 2;
3866 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3867 }
3868 {
3869 struct lp_type uint64_type;
3870 uint64_type = lp_uint_type(type);
3871 uint64_type.width *= 2;
3872 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3873 }
3874 {
3875 struct lp_type int64_type;
3876 int64_type = lp_int_type(type);
3877 int64_type.width *= 2;
3878 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3879 }
3880 bld.mask = mask;
3881 bld.inputs = inputs;
3882 bld.outputs = outputs;
3883 bld.consts_ptr = consts_ptr;
3884 bld.const_sizes_ptr = const_sizes_ptr;
3885 bld.ssbo_ptr = ssbo_ptr;
3886 bld.ssbo_sizes_ptr = ssbo_sizes_ptr;
3887 bld.sampler = sampler;
3888 bld.bld_base.info = info;
3889 bld.indirect_files = info->indirect_files;
3890 bld.context_ptr = context_ptr;
3891 bld.thread_data_ptr = thread_data_ptr;
3892
3893 /*
3894 * If the number of temporaries is rather large then we just
3895 * allocate them as an array right from the start and treat
3896 * like indirect temporaries.
3897 */
3898 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3899 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3900 }
3901 /*
3902 * For performance reason immediates are always backed in a static
3903 * array, but if their number is too great, we have to use just
3904 * a dynamically allocated array.
3905 */
3906 bld.use_immediates_array =
3907 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3908 if (bld.use_immediates_array) {
3909 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3910 }
3911
3912
3913 bld.bld_base.soa = TRUE;
3914 bld.bld_base.emit_debug = emit_debug;
3915 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3916 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3917 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3918 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3919 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3920 bld.bld_base.emit_store = emit_store;
3921
3922 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3923 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3924
3925 bld.bld_base.emit_prologue = emit_prologue;
3926 bld.bld_base.emit_epilogue = emit_epilogue;
3927
3928 /* Set opcode actions */
3929 lp_set_default_actions_cpu(&bld.bld_base);
3930
3931 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3932 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3933 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3934 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3935 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3936 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3937 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3938 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3939 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3940 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3941 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3942 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3943 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3944 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3945 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3946 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3947 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3948 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3949 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3950 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3951 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3952 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3953 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3954 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3955 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3956 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3957 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3958 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3959 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3960 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3961 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3962 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
3963 /* DX10 sampling ops */
3964 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3965 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3966 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3967 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3968 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3969 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3970 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
3971 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3972 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
3973 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3974 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
3975
3976
3977 if (gs_iface) {
3978 /* There's no specific value for this because it should always
3979 * be set, but apps using ext_geometry_shader4 quite often
3980 * were forgetting so we're using MAX_VERTEX_VARYING from
3981 * that spec even though we could debug_assert if it's not
3982 * set, but that's a lot uglier. */
3983 uint max_output_vertices;
3984
3985 /* inputs are always indirect with gs */
3986 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3987 bld.gs_iface = gs_iface;
3988 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3989 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3990 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3991
3992 max_output_vertices =
3993 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3994 if (!max_output_vertices)
3995 max_output_vertices = 32;
3996
3997 bld.max_output_vertices_vec =
3998 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3999 max_output_vertices);
4000 }
4001
4002 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4003
4004 bld.system_values = *system_values;
4005
4006 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4007
4008 if (0) {
4009 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4010 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4011 debug_printf("11111111111111111111111111111 \n");
4012 tgsi_dump(tokens, 0);
4013 lp_debug_dump_value(function);
4014 debug_printf("2222222222222222222222222222 \n");
4015 }
4016
4017 if (0) {
4018 LLVMModuleRef module = LLVMGetGlobalParent(
4019 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4020 LLVMDumpModule(module);
4021
4022 }
4023 lp_exec_mask_fini(&bld.exec_mask);
4024 }