gallivm: add no-signed-zeros-fp-math option to lp_create_builder (v2)
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68
69 /* SM 4.0 says that subroutines can nest 32 deep and
70 * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72
73 #define DUMP_GS_EMITS 0
74
75 /*
76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77 * instruction.
78 *
79 * TODO:
80 * - take execution masks in consideration
81 * - debug control-flow instructions
82 */
83 #define DEBUG_EXECUTION 0
84
85
86 /*
87 * Emit code to print a register value.
88 */
89 static void
90 emit_dump_reg(struct gallivm_state *gallivm,
91 unsigned file,
92 unsigned index,
93 unsigned chan,
94 LLVMValueRef value)
95 {
96 char buf[32];
97
98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
99 tgsi_file_name(file),
100 index, "xyzw"[chan]);
101
102 lp_build_print_value(gallivm, buf, value);
103 }
104
105 /*
106 * Return the context for the current function.
107 * (always 'main', if shader doesn't do any function calls)
108 */
109 static inline struct function_ctx *
110 func_ctx(struct lp_exec_mask *mask)
111 {
112 assert(mask->function_stack_size > 0);
113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114 return &mask->function_stack[mask->function_stack_size - 1];
115 }
116
117 /*
118 * Returns true if we're in a loop.
119 * It's global, meaning that it returns true even if there's
120 * no loop inside the current function, but we were inside
121 * a loop inside another function, from which this one was called.
122 */
123 static inline boolean
124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126 int i;
127 for (i = mask->function_stack_size - 1; i >= 0; --i) {
128 const struct function_ctx *ctx = &mask->function_stack[i];
129 if (ctx->loop_stack_size > 0)
130 return TRUE;
131 }
132 return FALSE;
133 }
134
135 /*
136 * Returns true if we're inside a switch statement.
137 * It's global, meaning that it returns true even if there's
138 * no switch in the current function, but we were inside
139 * a switch inside another function, from which this one was called.
140 */
141 static inline boolean
142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144 int i;
145 for (i = mask->function_stack_size - 1; i >= 0; --i) {
146 const struct function_ctx *ctx = &mask->function_stack[i];
147 if (ctx->switch_stack_size > 0)
148 return TRUE;
149 }
150 return FALSE;
151 }
152
153 /*
154 * Returns true if we're inside a conditional.
155 * It's global, meaning that it returns true even if there's
156 * no conditional in the current function, but we were inside
157 * a conditional inside another function, from which this one was called.
158 */
159 static inline boolean
160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162 int i;
163 for (i = mask->function_stack_size - 1; i >= 0; --i) {
164 const struct function_ctx *ctx = &mask->function_stack[i];
165 if (ctx->cond_stack_size > 0)
166 return TRUE;
167 }
168 return FALSE;
169 }
170
171
172 /*
173 * Initialize a function context at the specified index.
174 */
175 static void
176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179 LLVMBuilderRef builder = mask->bld->gallivm->builder;
180 struct function_ctx *ctx = &mask->function_stack[function_idx];
181
182 ctx->cond_stack_size = 0;
183 ctx->loop_stack_size = 0;
184 ctx->switch_stack_size = 0;
185
186 if (function_idx == 0) {
187 ctx->ret_mask = mask->ret_mask;
188 }
189
190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191 int_type, "looplimiter");
192 LLVMBuildStore(
193 builder,
194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195 ctx->loop_limiter);
196 }
197
198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200 mask->bld = bld;
201 mask->has_mask = FALSE;
202 mask->ret_in_main = FALSE;
203 /* For the main function */
204 mask->function_stack_size = 1;
205
206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208 mask->cond_mask = mask->switch_mask =
209 LLVMConstAllOnes(mask->int_vec_type);
210
211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212 sizeof(mask->function_stack[0]));
213 lp_exec_mask_function_init(mask, 0);
214 }
215
216 static void
217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219 FREE(mask->function_stack);
220 }
221
222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224 LLVMBuilderRef builder = mask->bld->gallivm->builder;
225 boolean has_loop_mask = mask_has_loop(mask);
226 boolean has_cond_mask = mask_has_cond(mask);
227 boolean has_switch_mask = mask_has_switch(mask);
228 boolean has_ret_mask = mask->function_stack_size > 1 ||
229 mask->ret_in_main;
230
231 if (has_loop_mask) {
232 /*for loops we need to update the entire mask at runtime */
233 LLVMValueRef tmp;
234 assert(mask->break_mask);
235 tmp = LLVMBuildAnd(builder,
236 mask->cont_mask,
237 mask->break_mask,
238 "maskcb");
239 mask->exec_mask = LLVMBuildAnd(builder,
240 mask->cond_mask,
241 tmp,
242 "maskfull");
243 } else
244 mask->exec_mask = mask->cond_mask;
245
246 if (has_switch_mask) {
247 mask->exec_mask = LLVMBuildAnd(builder,
248 mask->exec_mask,
249 mask->switch_mask,
250 "switchmask");
251 }
252
253 if (has_ret_mask) {
254 mask->exec_mask = LLVMBuildAnd(builder,
255 mask->exec_mask,
256 mask->ret_mask,
257 "callmask");
258 }
259
260 mask->has_mask = (has_cond_mask ||
261 has_loop_mask ||
262 has_switch_mask ||
263 has_ret_mask);
264 }
265
266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267 LLVMValueRef val)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 struct function_ctx *ctx = func_ctx(mask);
271
272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273 ctx->cond_stack_size++;
274 return;
275 }
276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278 }
279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280 assert(LLVMTypeOf(val) == mask->int_vec_type);
281 mask->cond_mask = LLVMBuildAnd(builder,
282 mask->cond_mask,
283 val,
284 "");
285 lp_exec_mask_update(mask);
286 }
287
288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290 LLVMBuilderRef builder = mask->bld->gallivm->builder;
291 struct function_ctx *ctx = func_ctx(mask);
292 LLVMValueRef prev_mask;
293 LLVMValueRef inv_mask;
294
295 assert(ctx->cond_stack_size);
296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297 return;
298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301 }
302
303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304
305 mask->cond_mask = LLVMBuildAnd(builder,
306 inv_mask,
307 prev_mask, "");
308 lp_exec_mask_update(mask);
309 }
310
311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313 struct function_ctx *ctx = func_ctx(mask);
314 assert(ctx->cond_stack_size);
315 --ctx->cond_stack_size;
316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317 return;
318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319 lp_exec_mask_update(mask);
320 }
321
322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324 LLVMBuilderRef builder = mask->bld->gallivm->builder;
325 struct function_ctx *ctx = func_ctx(mask);
326
327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328 ++ctx->loop_stack_size;
329 return;
330 }
331
332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333 ctx->break_type;
334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335
336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340 ++ctx->loop_stack_size;
341
342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344
345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346
347 LLVMBuildBr(builder, ctx->loop_block);
348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349
350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351
352 lp_exec_mask_update(mask);
353 }
354
355 static void lp_exec_break(struct lp_exec_mask *mask,
356 struct lp_build_tgsi_context * bld_base)
357 {
358 LLVMBuilderRef builder = mask->bld->gallivm->builder;
359 struct function_ctx *ctx = func_ctx(mask);
360
361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362 LLVMValueRef exec_mask = LLVMBuildNot(builder,
363 mask->exec_mask,
364 "break");
365
366 mask->break_mask = LLVMBuildAnd(builder,
367 mask->break_mask,
368 exec_mask, "break_full");
369 }
370 else {
371 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373 opcode == TGSI_OPCODE_CASE);
374
375
376 if (ctx->switch_in_default) {
377 /*
378 * stop default execution but only if this is an unconditional switch.
379 * (The condition here is not perfect since dead code after break is
380 * allowed but should be sufficient since false negatives are just
381 * unoptimized - so we don't have to pre-evaluate that).
382 */
383 if(break_always && ctx->switch_pc) {
384 bld_base->pc = ctx->switch_pc;
385 return;
386 }
387 }
388
389 if (break_always) {
390 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391 }
392 else {
393 LLVMValueRef exec_mask = LLVMBuildNot(builder,
394 mask->exec_mask,
395 "break");
396 mask->switch_mask = LLVMBuildAnd(builder,
397 mask->switch_mask,
398 exec_mask, "break_switch");
399 }
400 }
401
402 lp_exec_mask_update(mask);
403 }
404
405 static void lp_exec_break_condition(struct lp_exec_mask *mask,
406 LLVMValueRef cond)
407 {
408 LLVMBuilderRef builder = mask->bld->gallivm->builder;
409 struct function_ctx *ctx = func_ctx(mask);
410 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
411 mask->exec_mask,
412 cond, "cond_mask");
413 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
414
415 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
416 mask->break_mask = LLVMBuildAnd(builder,
417 mask->break_mask,
418 cond_mask, "breakc_full");
419 }
420 else {
421 mask->switch_mask = LLVMBuildAnd(builder,
422 mask->switch_mask,
423 cond_mask, "breakc_switch");
424 }
425
426 lp_exec_mask_update(mask);
427 }
428
429 static void lp_exec_continue(struct lp_exec_mask *mask)
430 {
431 LLVMBuilderRef builder = mask->bld->gallivm->builder;
432 LLVMValueRef exec_mask = LLVMBuildNot(builder,
433 mask->exec_mask,
434 "");
435
436 mask->cont_mask = LLVMBuildAnd(builder,
437 mask->cont_mask,
438 exec_mask, "");
439
440 lp_exec_mask_update(mask);
441 }
442
443
444 static void lp_exec_endloop(struct gallivm_state *gallivm,
445 struct lp_exec_mask *mask)
446 {
447 LLVMBuilderRef builder = mask->bld->gallivm->builder;
448 struct function_ctx *ctx = func_ctx(mask);
449 LLVMBasicBlockRef endloop;
450 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
451 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
452 mask->bld->type.width *
453 mask->bld->type.length);
454 LLVMValueRef i1cond, i2cond, icond, limiter;
455
456 assert(mask->break_mask);
457
458
459 assert(ctx->loop_stack_size);
460 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
461 --ctx->loop_stack_size;
462 return;
463 }
464
465 /*
466 * Restore the cont_mask, but don't pop
467 */
468 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
469 lp_exec_mask_update(mask);
470
471 /*
472 * Unlike the continue mask, the break_mask must be preserved across loop
473 * iterations
474 */
475 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
476
477 /* Decrement the loop limiter */
478 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
479
480 limiter = LLVMBuildSub(
481 builder,
482 limiter,
483 LLVMConstInt(int_type, 1, false),
484 "");
485
486 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
487
488 /* i1cond = (mask != 0) */
489 i1cond = LLVMBuildICmp(
490 builder,
491 LLVMIntNE,
492 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
493 LLVMConstNull(reg_type), "i1cond");
494
495 /* i2cond = (looplimiter > 0) */
496 i2cond = LLVMBuildICmp(
497 builder,
498 LLVMIntSGT,
499 limiter,
500 LLVMConstNull(int_type), "i2cond");
501
502 /* if( i1cond && i2cond ) */
503 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
504
505 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
506
507 LLVMBuildCondBr(builder,
508 icond, ctx->loop_block, endloop);
509
510 LLVMPositionBuilderAtEnd(builder, endloop);
511
512 assert(ctx->loop_stack_size);
513 --ctx->loop_stack_size;
514 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
515 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
516 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
517 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
518 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
519 ctx->switch_stack_size];
520
521 lp_exec_mask_update(mask);
522 }
523
524 static void lp_exec_switch(struct lp_exec_mask *mask,
525 LLVMValueRef switchval)
526 {
527 struct function_ctx *ctx = func_ctx(mask);
528
529 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
530 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
531 ctx->switch_stack_size++;
532 return;
533 }
534
535 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
536 ctx->break_type;
537 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
538
539 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
540 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
541 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
542 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
543 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
544 ctx->switch_stack_size++;
545
546 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
547 ctx->switch_val = switchval;
548 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
549 ctx->switch_in_default = false;
550 ctx->switch_pc = 0;
551
552 lp_exec_mask_update(mask);
553 }
554
555 static void lp_exec_endswitch(struct lp_exec_mask *mask,
556 struct lp_build_tgsi_context * bld_base)
557 {
558 LLVMBuilderRef builder = mask->bld->gallivm->builder;
559 struct function_ctx *ctx = func_ctx(mask);
560
561 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
562 ctx->switch_stack_size--;
563 return;
564 }
565
566 /* check if there's deferred default if so do it now */
567 if (ctx->switch_pc && !ctx->switch_in_default) {
568 LLVMValueRef prevmask, defaultmask;
569 unsigned tmp_pc;
570 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
571 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
572 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
573 ctx->switch_in_default = true;
574
575 lp_exec_mask_update(mask);
576
577 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
578 TGSI_OPCODE_DEFAULT);
579
580 tmp_pc = bld_base->pc;
581 bld_base->pc = ctx->switch_pc;
582 /*
583 * re-purpose switch_pc to point to here again, since we stop execution of
584 * the deferred default after next break.
585 */
586 ctx->switch_pc = tmp_pc - 1;
587
588 return;
589 }
590
591 else if (ctx->switch_pc && ctx->switch_in_default) {
592 assert(bld_base->pc == ctx->switch_pc + 1);
593 }
594
595 ctx->switch_stack_size--;
596 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
597 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
598 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
599 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
600 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
601
602 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
603
604 lp_exec_mask_update(mask);
605 }
606
607 static void lp_exec_case(struct lp_exec_mask *mask,
608 LLVMValueRef caseval)
609 {
610 LLVMBuilderRef builder = mask->bld->gallivm->builder;
611 struct function_ctx *ctx = func_ctx(mask);
612
613 LLVMValueRef casemask, prevmask;
614
615 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
616 return;
617 }
618
619 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
620 if (!ctx->switch_in_default) {
621 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
622 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
623 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
624 ctx->switch_mask_default, "sw_default_mask");
625 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
626 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
627
628 lp_exec_mask_update(mask);
629 }
630 }
631
632 /*
633 * Analyse default statement in a switch.
634 * \return true if default is last statement, false otherwise
635 * \param default_pc_start contains pc of instruction to jump to
636 * if default wasn't last but there's no
637 * fallthrough into default.
638 */
639 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
640 struct lp_build_tgsi_context * bld_base,
641 int *default_pc_start)
642 {
643 unsigned pc = bld_base->pc;
644 struct function_ctx *ctx = func_ctx(mask);
645 int curr_switch_stack = ctx->switch_stack_size;
646
647 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
648 return false;
649 }
650
651 /* skip over case statements which are together with default */
652 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
653 pc++;
654 }
655
656 while (pc != ~0u && pc < bld_base->num_instructions) {
657 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
658 switch (opcode) {
659 case TGSI_OPCODE_CASE:
660 if (curr_switch_stack == ctx->switch_stack_size) {
661 *default_pc_start = pc - 1;
662 return false;
663 }
664 break;
665 case TGSI_OPCODE_SWITCH:
666 curr_switch_stack++;
667 break;
668 case TGSI_OPCODE_ENDSWITCH:
669 if (curr_switch_stack == ctx->switch_stack_size) {
670 *default_pc_start = pc - 1;
671 return true;
672 }
673 curr_switch_stack--;
674 break;
675 }
676 pc++;
677 }
678 /* should never arrive here */
679 assert(0);
680 return true;
681 }
682
683 static void lp_exec_default(struct lp_exec_mask *mask,
684 struct lp_build_tgsi_context * bld_base)
685 {
686 LLVMBuilderRef builder = mask->bld->gallivm->builder;
687 struct function_ctx *ctx = func_ctx(mask);
688
689 int default_exec_pc;
690 boolean default_is_last;
691
692 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
693 return;
694 }
695
696 /*
697 * This is a messy opcode, because it may not be always at the end and
698 * there can be fallthrough in and out of it.
699 */
700
701 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
702 /*
703 * If it is last statement in switch (note that case statements appearing
704 * "at the same time" as default don't change that) everything is just fine,
705 * update switch mask and go on. This means we can handle default with
706 * fallthrough INTO it without overhead, if it is last.
707 */
708 if (default_is_last) {
709 LLVMValueRef prevmask, defaultmask;
710 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
711 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
712 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
713 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
714 ctx->switch_in_default = true;
715
716 lp_exec_mask_update(mask);
717 }
718 else {
719 /*
720 * Technically, "case" immediately before default isn't really a
721 * fallthrough, however we still have to count them as such as we
722 * already have updated the masks.
723 * If that happens in practice could add a switch optimizer pass
724 * which just gets rid of all case statements appearing together with
725 * default (or could do switch analysis at switch start time instead).
726 */
727 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
728 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
729 opcode != TGSI_OPCODE_SWITCH);
730 /*
731 * If it is not last statement and there was no fallthrough into it,
732 * we record the PC and continue execution at next case (again, those
733 * case encountered at the same time don't count). At endswitch
734 * time, we update switchmask, and go back executing the code we skipped
735 * until the next break (possibly re-executing some code with changed mask
736 * if there was a fallthrough out of default).
737 * Finally, if it is not last statement and there was a fallthrough into it,
738 * do the same as with the former case, except instead of skipping the code
739 * just execute it without updating the mask, then go back and re-execute.
740 */
741 ctx->switch_pc = bld_base->pc;
742 if (!ft_into) {
743 bld_base->pc = default_exec_pc;
744 }
745 }
746 }
747
748
749 /* stores val into an address pointed to by dst_ptr.
750 * mask->exec_mask is used to figure out which bits of val
751 * should be stored into the address
752 * (0 means don't store this bit, 1 means do store).
753 */
754 static void lp_exec_mask_store(struct lp_exec_mask *mask,
755 struct lp_build_context *bld_store,
756 LLVMValueRef pred,
757 LLVMValueRef val,
758 LLVMValueRef dst_ptr)
759 {
760 LLVMBuilderRef builder = mask->bld->gallivm->builder;
761
762 assert(lp_check_value(bld_store->type, val));
763 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
764 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
765
766 /* Mix the predicate and execution mask */
767 if (mask->has_mask) {
768 if (pred) {
769 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
770 } else {
771 pred = mask->exec_mask;
772 }
773 }
774
775 if (pred) {
776 LLVMValueRef res, dst;
777
778 dst = LLVMBuildLoad(builder, dst_ptr, "");
779 res = lp_build_select(bld_store, pred, val, dst);
780 LLVMBuildStore(builder, res, dst_ptr);
781 } else
782 LLVMBuildStore(builder, val, dst_ptr);
783 }
784
785 static void lp_exec_mask_call(struct lp_exec_mask *mask,
786 int func,
787 int *pc)
788 {
789 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
790 return;
791 }
792
793 lp_exec_mask_function_init(mask, mask->function_stack_size);
794 mask->function_stack[mask->function_stack_size].pc = *pc;
795 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
796 mask->function_stack_size++;
797 *pc = func;
798 }
799
800 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
801 {
802 LLVMBuilderRef builder = mask->bld->gallivm->builder;
803 struct function_ctx *ctx = func_ctx(mask);
804 LLVMValueRef exec_mask;
805
806 if (ctx->cond_stack_size == 0 &&
807 ctx->loop_stack_size == 0 &&
808 ctx->switch_stack_size == 0 &&
809 mask->function_stack_size == 1) {
810 /* returning from main() */
811 *pc = -1;
812 return;
813 }
814
815 if (mask->function_stack_size == 1) {
816 /*
817 * This requires special handling since we need to ensure
818 * we don't drop the mask even if we have no call stack
819 * (e.g. after a ret in a if clause after the endif)
820 */
821 mask->ret_in_main = TRUE;
822 }
823
824 exec_mask = LLVMBuildNot(builder,
825 mask->exec_mask,
826 "ret");
827
828 mask->ret_mask = LLVMBuildAnd(builder,
829 mask->ret_mask,
830 exec_mask, "ret_full");
831
832 lp_exec_mask_update(mask);
833 }
834
835 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
836 {
837 }
838
839 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
840 {
841 struct function_ctx *ctx;
842
843 assert(mask->function_stack_size > 1);
844 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
845
846 ctx = func_ctx(mask);
847 mask->function_stack_size--;
848
849 *pc = ctx->pc;
850 mask->ret_mask = ctx->ret_mask;
851
852 lp_exec_mask_update(mask);
853 }
854
855
856 static LLVMValueRef
857 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
858 unsigned file,
859 int index,
860 unsigned chan)
861 {
862 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
863 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
864 LLVMValueRef var_of_array;
865
866 switch (file) {
867 case TGSI_FILE_TEMPORARY:
868 array_of_vars = bld->temps;
869 var_of_array = bld->temps_array;
870 break;
871 case TGSI_FILE_OUTPUT:
872 array_of_vars = bld->outputs;
873 var_of_array = bld->outputs_array;
874 break;
875 default:
876 assert(0);
877 return NULL;
878 }
879
880 assert(chan < 4);
881
882 if (bld->indirect_files & (1 << file)) {
883 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
884 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
885 }
886 else {
887 assert(index <= bld->bld_base.info->file_max[file]);
888 return array_of_vars[index][chan];
889 }
890 }
891
892
893 /**
894 * Return pointer to a temporary register channel (src or dest).
895 * Note that indirect addressing cannot be handled here.
896 * \param index which temporary register
897 * \param chan which channel of the temp register.
898 */
899 LLVMValueRef
900 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
901 unsigned index,
902 unsigned chan)
903 {
904 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
905 }
906
907 /**
908 * Return pointer to a output register channel (src or dest).
909 * Note that indirect addressing cannot be handled here.
910 * \param index which output register
911 * \param chan which channel of the output register.
912 */
913 LLVMValueRef
914 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
915 unsigned index,
916 unsigned chan)
917 {
918 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
919 }
920
921 /*
922 * If we have indirect addressing in outputs copy our alloca array
923 * to the outputs slots specified by the caller to make sure
924 * our outputs are delivered consistently via the same interface.
925 */
926 static void
927 gather_outputs(struct lp_build_tgsi_soa_context * bld)
928 {
929 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
930 unsigned index, chan;
931 assert(bld->bld_base.info->num_outputs <=
932 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
933 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
934 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
935 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
936 }
937 }
938 }
939 }
940
941 /**
942 * Gather vector.
943 * XXX the lp_build_gather() function should be capable of doing this
944 * with a little work.
945 */
946 static LLVMValueRef
947 build_gather(struct lp_build_tgsi_context *bld_base,
948 LLVMValueRef base_ptr,
949 LLVMValueRef indexes,
950 LLVMValueRef overflow_mask,
951 LLVMValueRef indexes2)
952 {
953 struct gallivm_state *gallivm = bld_base->base.gallivm;
954 LLVMBuilderRef builder = gallivm->builder;
955 struct lp_build_context *uint_bld = &bld_base->uint_bld;
956 struct lp_build_context *bld = &bld_base->base;
957 LLVMValueRef res;
958 unsigned i;
959
960 if (indexes2)
961 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
962 else
963 res = bld->undef;
964 /*
965 * overflow_mask is a vector telling us which channels
966 * in the vector overflowed. We use the overflow behavior for
967 * constant buffers which is defined as:
968 * Out of bounds access to constant buffer returns 0 in all
969 * components. Out of bounds behavior is always with respect
970 * to the size of the buffer bound at that slot.
971 */
972
973 if (overflow_mask) {
974 /*
975 * We avoid per-element control flow here (also due to llvm going crazy,
976 * though I suspect it's better anyway since overflow is likely rare).
977 * Note that since we still fetch from buffers even if num_elements was
978 * zero (in this case we'll fetch from index zero) the jit func callers
979 * MUST provide valid fake constant buffers of size 4x32 (the values do
980 * not matter), otherwise we'd still need (not per element though)
981 * control flow.
982 */
983 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
984 if (indexes2)
985 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
986 }
987
988 /*
989 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
990 */
991 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
992 LLVMValueRef si, di;
993 LLVMValueRef index;
994 LLVMValueRef scalar_ptr, scalar;
995
996 di = lp_build_const_int32(bld->gallivm, i);
997 if (indexes2)
998 si = lp_build_const_int32(bld->gallivm, i >> 1);
999 else
1000 si = di;
1001
1002 if (indexes2 && (i & 1)) {
1003 index = LLVMBuildExtractElement(builder,
1004 indexes2, si, "");
1005 } else {
1006 index = LLVMBuildExtractElement(builder,
1007 indexes, si, "");
1008 }
1009 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
1010 &index, 1, "gather_ptr");
1011 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1012
1013 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
1014 }
1015
1016 if (overflow_mask) {
1017 if (indexes2) {
1018 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
1019 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1020 bld_base->dbl_bld.int_vec_type, "");
1021 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1022 bld_base->dbl_bld.zero, res);
1023 } else
1024 res = lp_build_select(bld, overflow_mask, bld->zero, res);
1025 }
1026
1027 return res;
1028 }
1029
1030
1031 /**
1032 * Scatter/store vector.
1033 */
1034 static void
1035 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1036 LLVMValueRef base_ptr,
1037 LLVMValueRef indexes,
1038 LLVMValueRef values,
1039 struct lp_exec_mask *mask,
1040 LLVMValueRef pred)
1041 {
1042 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1043 LLVMBuilderRef builder = gallivm->builder;
1044 unsigned i;
1045
1046 /* Mix the predicate and execution mask */
1047 if (mask->has_mask) {
1048 if (pred) {
1049 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
1050 }
1051 else {
1052 pred = mask->exec_mask;
1053 }
1054 }
1055
1056 /*
1057 * Loop over elements of index_vec, store scalar value.
1058 */
1059 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1060 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1061 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1062 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1063 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1064 LLVMValueRef scalar_pred = pred ?
1065 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1066
1067 if (0)
1068 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1069 ii, val, index, scalar_ptr);
1070
1071 if (scalar_pred) {
1072 LLVMValueRef real_val, dst_val;
1073 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1074 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1075 LLVMBuildStore(builder, real_val, scalar_ptr);
1076 }
1077 else {
1078 LLVMBuildStore(builder, val, scalar_ptr);
1079 }
1080 }
1081 }
1082
1083
1084 /**
1085 * Read the current value of the ADDR register, convert the floats to
1086 * ints, add the base index and return the vector of offsets.
1087 * The offsets will be used to index into the constant buffer or
1088 * temporary register file.
1089 */
1090 static LLVMValueRef
1091 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1092 unsigned reg_file, unsigned reg_index,
1093 const struct tgsi_ind_register *indirect_reg)
1094 {
1095 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1096 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1097 /* always use X component of address register */
1098 unsigned swizzle = indirect_reg->Swizzle;
1099 LLVMValueRef base;
1100 LLVMValueRef rel;
1101 LLVMValueRef max_index;
1102 LLVMValueRef index;
1103
1104 assert(bld->indirect_files & (1 << reg_file));
1105
1106 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1107
1108 assert(swizzle < 4);
1109 switch (indirect_reg->File) {
1110 case TGSI_FILE_ADDRESS:
1111 rel = LLVMBuildLoad(builder,
1112 bld->addr[indirect_reg->Index][swizzle],
1113 "load addr reg");
1114 /* ADDR LLVM values already have LLVM integer type. */
1115 break;
1116 case TGSI_FILE_TEMPORARY:
1117 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1118 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1119 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1120 * value actually stored is expected to be an integer */
1121 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1122 break;
1123 default:
1124 assert(0);
1125 rel = uint_bld->zero;
1126 }
1127
1128 index = lp_build_add(uint_bld, base, rel);
1129
1130 /*
1131 * emit_fetch_constant handles constant buffer overflow so this code
1132 * is pointless for them.
1133 * Furthermore the D3D10 spec in section 6.5 says:
1134 * If the constant buffer bound to a slot is larger than the size
1135 * declared in the shader for that slot, implementations are allowed
1136 * to return incorrect data (not necessarily 0) for indices that are
1137 * larger than the declared size but smaller than the buffer size.
1138 */
1139 if (reg_file != TGSI_FILE_CONSTANT) {
1140 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1141 uint_bld->type,
1142 bld->bld_base.info->file_max[reg_file]);
1143
1144 assert(!uint_bld->type.sign);
1145 index = lp_build_min(uint_bld, index, max_index);
1146 }
1147
1148 return index;
1149 }
1150
1151 static struct lp_build_context *
1152 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1153 enum tgsi_opcode_type stype)
1154 {
1155 struct lp_build_context *bld_fetch;
1156
1157 switch (stype) {
1158 case TGSI_TYPE_FLOAT:
1159 case TGSI_TYPE_UNTYPED:
1160 bld_fetch = &bld_base->base;
1161 break;
1162 case TGSI_TYPE_UNSIGNED:
1163 bld_fetch = &bld_base->uint_bld;
1164 break;
1165 case TGSI_TYPE_SIGNED:
1166 bld_fetch = &bld_base->int_bld;
1167 break;
1168 case TGSI_TYPE_DOUBLE:
1169 bld_fetch = &bld_base->dbl_bld;
1170 break;
1171 case TGSI_TYPE_UNSIGNED64:
1172 bld_fetch = &bld_base->uint64_bld;
1173 break;
1174 case TGSI_TYPE_SIGNED64:
1175 bld_fetch = &bld_base->int64_bld;
1176 break;
1177 case TGSI_TYPE_VOID:
1178 default:
1179 assert(0);
1180 bld_fetch = NULL;
1181 break;
1182 }
1183 return bld_fetch;
1184 }
1185
1186 static LLVMValueRef
1187 get_soa_array_offsets(struct lp_build_context *uint_bld,
1188 LLVMValueRef indirect_index,
1189 unsigned chan_index,
1190 boolean need_perelement_offset)
1191 {
1192 struct gallivm_state *gallivm = uint_bld->gallivm;
1193 LLVMValueRef chan_vec =
1194 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1195 LLVMValueRef length_vec =
1196 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1197 LLVMValueRef index_vec;
1198
1199 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1200 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1201 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1202 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1203
1204 if (need_perelement_offset) {
1205 LLVMValueRef pixel_offsets;
1206 unsigned i;
1207 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1208 pixel_offsets = uint_bld->undef;
1209 for (i = 0; i < uint_bld->type.length; i++) {
1210 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1211 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1212 ii, ii, "");
1213 }
1214 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1215 }
1216 return index_vec;
1217 }
1218
1219 static LLVMValueRef
1220 emit_fetch_constant(
1221 struct lp_build_tgsi_context * bld_base,
1222 const struct tgsi_full_src_register * reg,
1223 enum tgsi_opcode_type stype,
1224 unsigned swizzle)
1225 {
1226 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1227 struct gallivm_state *gallivm = bld_base->base.gallivm;
1228 LLVMBuilderRef builder = gallivm->builder;
1229 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1230 unsigned dimension = 0;
1231 LLVMValueRef consts_ptr;
1232 LLVMValueRef num_consts;
1233 LLVMValueRef res;
1234
1235 /* XXX: Handle fetching xyzw components as a vector */
1236 assert(swizzle != ~0u);
1237
1238 if (reg->Register.Dimension) {
1239 assert(!reg->Dimension.Indirect);
1240 dimension = reg->Dimension.Index;
1241 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1242 }
1243
1244 consts_ptr = bld->consts[dimension];
1245 num_consts = bld->consts_sizes[dimension];
1246
1247 if (reg->Register.Indirect) {
1248 LLVMValueRef indirect_index;
1249 LLVMValueRef swizzle_vec =
1250 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1251 LLVMValueRef index_vec; /* index into the const buffer */
1252 LLVMValueRef overflow_mask;
1253 LLVMValueRef index_vec2 = NULL;
1254
1255 indirect_index = get_indirect_index(bld,
1256 reg->Register.File,
1257 reg->Register.Index,
1258 &reg->Indirect);
1259
1260 /* All fetches are from the same constant buffer, so
1261 * we need to propagate the size to a vector to do a
1262 * vector comparison */
1263 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1264 /* Construct a boolean vector telling us which channels
1265 * overflow the bound constant buffer */
1266 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1267 indirect_index, num_consts);
1268
1269 /* index_vec = indirect_index * 4 + swizzle */
1270 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1271 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1272
1273 if (tgsi_type_is_64bit(stype)) {
1274 LLVMValueRef swizzle_vec2;
1275 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
1276 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1277 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1278 }
1279 /* Gather values from the constant buffer */
1280 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1281 }
1282 else {
1283 LLVMValueRef index; /* index into the const buffer */
1284 LLVMValueRef scalar, scalar_ptr;
1285 struct lp_build_context *bld_broad = &bld_base->base;
1286 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1287
1288 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1289 &index, 1, "");
1290 if (stype == TGSI_TYPE_DOUBLE) {
1291 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1292 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1293 bld_broad = &bld_base->dbl_bld;
1294 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1295 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1296 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1297 bld_broad = &bld_base->uint64_bld;
1298 } else if (stype == TGSI_TYPE_SIGNED64) {
1299 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1300 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1301 bld_broad = &bld_base->int64_bld;
1302 }
1303 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1304 res = lp_build_broadcast_scalar(bld_broad, scalar);
1305 }
1306
1307 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1308 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1309 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1310 }
1311
1312 return res;
1313 }
1314
1315 /**
1316 * Fetch 64-bit values from two separate channels.
1317 * 64-bit values are stored split across two channels, like xy and zw.
1318 * This function creates a set of 16 floats,
1319 * extracts the values from the two channels,
1320 * puts them in the correct place, then casts to 8 64-bits.
1321 */
1322 static LLVMValueRef
1323 emit_fetch_64bit(
1324 struct lp_build_tgsi_context * bld_base,
1325 enum tgsi_opcode_type stype,
1326 LLVMValueRef input,
1327 LLVMValueRef input2)
1328 {
1329 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1330 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1331 LLVMBuilderRef builder = gallivm->builder;
1332 LLVMValueRef res;
1333 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1334 int i;
1335 LLVMValueRef shuffles[16];
1336 int len = bld_base->base.type.length * 2;
1337 assert(len <= 16);
1338
1339 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1340 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1341 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1342 }
1343 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1344
1345 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1346 }
1347
1348 static LLVMValueRef
1349 emit_fetch_immediate(
1350 struct lp_build_tgsi_context * bld_base,
1351 const struct tgsi_full_src_register * reg,
1352 enum tgsi_opcode_type stype,
1353 unsigned swizzle)
1354 {
1355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1356 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1357 LLVMBuilderRef builder = gallivm->builder;
1358 LLVMValueRef res = NULL;
1359
1360 if (bld->use_immediates_array || reg->Register.Indirect) {
1361 LLVMValueRef imms_array;
1362 LLVMTypeRef fptr_type;
1363
1364 /* cast imms_array pointer to float* */
1365 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1366 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1367
1368 if (reg->Register.Indirect) {
1369 LLVMValueRef indirect_index;
1370 LLVMValueRef index_vec; /* index into the immediate register array */
1371 LLVMValueRef index_vec2 = NULL;
1372 indirect_index = get_indirect_index(bld,
1373 reg->Register.File,
1374 reg->Register.Index,
1375 &reg->Indirect);
1376 /*
1377 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1378 * immediates are stored as full vectors (FIXME??? - might be better
1379 * to store them the same as constants) but all elements are the same
1380 * in any case.
1381 */
1382 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1383 indirect_index,
1384 swizzle,
1385 FALSE);
1386 if (tgsi_type_is_64bit(stype))
1387 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1388 indirect_index,
1389 swizzle + 1,
1390 FALSE);
1391 /* Gather values from the immediate register array */
1392 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1393 } else {
1394 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1395 reg->Register.Index * 4 + swizzle);
1396 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1397 bld->imms_array, &lindex, 1, "");
1398 res = LLVMBuildLoad(builder, imms_ptr, "");
1399
1400 if (tgsi_type_is_64bit(stype)) {
1401 LLVMValueRef lindex1;
1402 LLVMValueRef imms_ptr2;
1403 LLVMValueRef res2;
1404
1405 lindex1 = lp_build_const_int32(gallivm,
1406 reg->Register.Index * 4 + swizzle + 1);
1407 imms_ptr2 = LLVMBuildGEP(builder,
1408 bld->imms_array, &lindex1, 1, "");
1409 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1410 res = emit_fetch_64bit(bld_base, stype, res, res2);
1411 }
1412 }
1413 }
1414 else {
1415 res = bld->immediates[reg->Register.Index][swizzle];
1416 if (tgsi_type_is_64bit(stype))
1417 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
1418 }
1419
1420 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1421 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1422 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1423 }
1424 return res;
1425 }
1426
1427 static LLVMValueRef
1428 emit_fetch_input(
1429 struct lp_build_tgsi_context * bld_base,
1430 const struct tgsi_full_src_register * reg,
1431 enum tgsi_opcode_type stype,
1432 unsigned swizzle)
1433 {
1434 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1435 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1436 LLVMBuilderRef builder = gallivm->builder;
1437 LLVMValueRef res;
1438
1439 if (reg->Register.Indirect) {
1440 LLVMValueRef indirect_index;
1441 LLVMValueRef index_vec; /* index into the input reg array */
1442 LLVMValueRef index_vec2 = NULL;
1443 LLVMValueRef inputs_array;
1444 LLVMTypeRef fptr_type;
1445
1446 indirect_index = get_indirect_index(bld,
1447 reg->Register.File,
1448 reg->Register.Index,
1449 &reg->Indirect);
1450
1451 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1452 indirect_index,
1453 swizzle,
1454 TRUE);
1455 if (tgsi_type_is_64bit(stype)) {
1456 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1457 indirect_index,
1458 swizzle + 1,
1459 TRUE);
1460 }
1461 /* cast inputs_array pointer to float* */
1462 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1463 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1464
1465 /* Gather values from the input register array */
1466 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1467 } else {
1468 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1469 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1470 reg->Register.Index * 4 + swizzle);
1471 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1472 bld->inputs_array, &lindex, 1, "");
1473
1474 res = LLVMBuildLoad(builder, input_ptr, "");
1475 if (tgsi_type_is_64bit(stype)) {
1476 LLVMValueRef lindex1;
1477 LLVMValueRef input_ptr2;
1478 LLVMValueRef res2;
1479
1480 lindex1 = lp_build_const_int32(gallivm,
1481 reg->Register.Index * 4 + swizzle + 1);
1482 input_ptr2 = LLVMBuildGEP(builder,
1483 bld->inputs_array, &lindex1, 1, "");
1484 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1485 res = emit_fetch_64bit(bld_base, stype, res, res2);
1486 }
1487 }
1488 else {
1489 res = bld->inputs[reg->Register.Index][swizzle];
1490 if (tgsi_type_is_64bit(stype))
1491 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
1492 }
1493 }
1494
1495 assert(res);
1496
1497 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1498 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1499 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1500 }
1501
1502 return res;
1503 }
1504
1505
1506 static LLVMValueRef
1507 emit_fetch_gs_input(
1508 struct lp_build_tgsi_context * bld_base,
1509 const struct tgsi_full_src_register * reg,
1510 enum tgsi_opcode_type stype,
1511 unsigned swizzle)
1512 {
1513 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1514 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1515 const struct tgsi_shader_info *info = bld->bld_base.info;
1516 LLVMBuilderRef builder = gallivm->builder;
1517 LLVMValueRef attrib_index = NULL;
1518 LLVMValueRef vertex_index = NULL;
1519 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1520 LLVMValueRef res;
1521
1522 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1523 /* This is really a system value not a regular input */
1524 assert(!reg->Register.Indirect);
1525 assert(!reg->Dimension.Indirect);
1526 res = bld->system_values.prim_id;
1527 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1528 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1529 }
1530 return res;
1531 }
1532
1533 if (reg->Register.Indirect) {
1534 attrib_index = get_indirect_index(bld,
1535 reg->Register.File,
1536 reg->Register.Index,
1537 &reg->Indirect);
1538 } else {
1539 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1540 }
1541
1542 if (reg->Dimension.Indirect) {
1543 vertex_index = get_indirect_index(bld,
1544 reg->Register.File,
1545 reg->Dimension.Index,
1546 &reg->DimIndirect);
1547 } else {
1548 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1549 }
1550
1551 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1552 reg->Dimension.Indirect,
1553 vertex_index,
1554 reg->Register.Indirect,
1555 attrib_index,
1556 swizzle_index);
1557
1558 assert(res);
1559 if (tgsi_type_is_64bit(stype)) {
1560 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
1561 LLVMValueRef res2;
1562 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1563 reg->Dimension.Indirect,
1564 vertex_index,
1565 reg->Register.Indirect,
1566 attrib_index,
1567 swizzle_index);
1568 assert(res2);
1569 res = emit_fetch_64bit(bld_base, stype, res, res2);
1570 } else if (stype == TGSI_TYPE_UNSIGNED) {
1571 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1572 } else if (stype == TGSI_TYPE_SIGNED) {
1573 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1574 }
1575
1576 return res;
1577 }
1578
1579 static LLVMValueRef
1580 emit_fetch_temporary(
1581 struct lp_build_tgsi_context * bld_base,
1582 const struct tgsi_full_src_register * reg,
1583 enum tgsi_opcode_type stype,
1584 unsigned swizzle)
1585 {
1586 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1587 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1588 LLVMBuilderRef builder = gallivm->builder;
1589 LLVMValueRef res;
1590
1591 if (reg->Register.Indirect) {
1592 LLVMValueRef indirect_index;
1593 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1594 LLVMValueRef temps_array;
1595 LLVMTypeRef fptr_type;
1596
1597 indirect_index = get_indirect_index(bld,
1598 reg->Register.File,
1599 reg->Register.Index,
1600 &reg->Indirect);
1601
1602 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1603 indirect_index,
1604 swizzle,
1605 TRUE);
1606 if (tgsi_type_is_64bit(stype)) {
1607 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1608 indirect_index,
1609 swizzle + 1,
1610 TRUE);
1611 }
1612
1613 /* cast temps_array pointer to float* */
1614 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1615 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1616
1617 /* Gather values from the temporary register array */
1618 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1619 }
1620 else {
1621 LLVMValueRef temp_ptr;
1622 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1623 res = LLVMBuildLoad(builder, temp_ptr, "");
1624
1625 if (tgsi_type_is_64bit(stype)) {
1626 LLVMValueRef temp_ptr2, res2;
1627
1628 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
1629 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1630 res = emit_fetch_64bit(bld_base, stype, res, res2);
1631 }
1632 }
1633
1634 if (stype == TGSI_TYPE_SIGNED ||
1635 stype == TGSI_TYPE_UNSIGNED ||
1636 stype == TGSI_TYPE_DOUBLE ||
1637 stype == TGSI_TYPE_SIGNED64 ||
1638 stype == TGSI_TYPE_UNSIGNED64) {
1639 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1640 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1641 }
1642
1643 return res;
1644 }
1645
1646 static LLVMValueRef
1647 emit_fetch_system_value(
1648 struct lp_build_tgsi_context * bld_base,
1649 const struct tgsi_full_src_register * reg,
1650 enum tgsi_opcode_type stype,
1651 unsigned swizzle)
1652 {
1653 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1654 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1655 const struct tgsi_shader_info *info = bld->bld_base.info;
1656 LLVMBuilderRef builder = gallivm->builder;
1657 LLVMValueRef res;
1658 enum tgsi_opcode_type atype; // Actual type of the value
1659
1660 assert(!reg->Register.Indirect);
1661
1662 switch (info->system_value_semantic_name[reg->Register.Index]) {
1663 case TGSI_SEMANTIC_INSTANCEID:
1664 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1665 atype = TGSI_TYPE_UNSIGNED;
1666 break;
1667
1668 case TGSI_SEMANTIC_VERTEXID:
1669 res = bld->system_values.vertex_id;
1670 atype = TGSI_TYPE_UNSIGNED;
1671 break;
1672
1673 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1674 res = bld->system_values.vertex_id_nobase;
1675 atype = TGSI_TYPE_UNSIGNED;
1676 break;
1677
1678 case TGSI_SEMANTIC_BASEVERTEX:
1679 res = bld->system_values.basevertex;
1680 atype = TGSI_TYPE_UNSIGNED;
1681 break;
1682
1683 case TGSI_SEMANTIC_PRIMID:
1684 res = bld->system_values.prim_id;
1685 atype = TGSI_TYPE_UNSIGNED;
1686 break;
1687
1688 case TGSI_SEMANTIC_INVOCATIONID:
1689 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1690 atype = TGSI_TYPE_UNSIGNED;
1691 break;
1692
1693 default:
1694 assert(!"unexpected semantic in emit_fetch_system_value");
1695 res = bld_base->base.zero;
1696 atype = TGSI_TYPE_FLOAT;
1697 break;
1698 }
1699
1700 if (atype != stype) {
1701 if (stype == TGSI_TYPE_FLOAT) {
1702 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1703 } else if (stype == TGSI_TYPE_UNSIGNED) {
1704 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1705 } else if (stype == TGSI_TYPE_SIGNED) {
1706 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1707 }
1708 }
1709
1710 return res;
1711 }
1712
1713 /**
1714 * Register fetch with derivatives.
1715 */
1716 static void
1717 emit_fetch_deriv(
1718 struct lp_build_tgsi_soa_context *bld,
1719 LLVMValueRef src,
1720 LLVMValueRef *res,
1721 LLVMValueRef *ddx,
1722 LLVMValueRef *ddy)
1723 {
1724 if (res)
1725 *res = src;
1726
1727 /* TODO: use interpolation coeffs for inputs */
1728
1729 if (ddx)
1730 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1731
1732 if (ddy)
1733 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1734 }
1735
1736
1737 /**
1738 * Predicate.
1739 */
1740 static void
1741 emit_fetch_predicate(
1742 struct lp_build_tgsi_soa_context *bld,
1743 const struct tgsi_full_instruction *inst,
1744 LLVMValueRef *pred)
1745 {
1746 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1747 unsigned index;
1748 unsigned char swizzles[4];
1749 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1750 LLVMValueRef value;
1751 unsigned chan;
1752
1753 if (!inst->Instruction.Predicate) {
1754 TGSI_FOR_EACH_CHANNEL( chan ) {
1755 pred[chan] = NULL;
1756 }
1757 return;
1758 }
1759
1760 swizzles[0] = inst->Predicate.SwizzleX;
1761 swizzles[1] = inst->Predicate.SwizzleY;
1762 swizzles[2] = inst->Predicate.SwizzleZ;
1763 swizzles[3] = inst->Predicate.SwizzleW;
1764
1765 index = inst->Predicate.Index;
1766 assert(index < LP_MAX_TGSI_PREDS);
1767
1768 TGSI_FOR_EACH_CHANNEL( chan ) {
1769 unsigned swizzle = swizzles[chan];
1770
1771 /*
1772 * Only fetch the predicate register channels that are actually listed
1773 * in the swizzles
1774 */
1775 if (!unswizzled[swizzle]) {
1776 value = LLVMBuildLoad(builder,
1777 bld->preds[index][swizzle], "");
1778
1779 /*
1780 * Convert the value to an integer mask.
1781 *
1782 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1783 * is needlessly causing two comparisons due to storing the intermediate
1784 * result as float vector instead of an integer mask vector.
1785 */
1786 value = lp_build_compare(bld->bld_base.base.gallivm,
1787 bld->bld_base.base.type,
1788 PIPE_FUNC_NOTEQUAL,
1789 value,
1790 bld->bld_base.base.zero);
1791 if (inst->Predicate.Negate) {
1792 value = LLVMBuildNot(builder, value, "");
1793 }
1794
1795 unswizzled[swizzle] = value;
1796 } else {
1797 value = unswizzled[swizzle];
1798 }
1799
1800 pred[chan] = value;
1801 }
1802 }
1803
1804 /**
1805 * store an array of 8 64-bit into two arrays of 8 floats
1806 * i.e.
1807 * value is d0, d1, d2, d3 etc.
1808 * each 64-bit has high and low pieces x, y
1809 * so gets stored into the separate channels as:
1810 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1811 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1812 */
1813 static void
1814 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1815 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1816 LLVMValueRef pred,
1817 LLVMValueRef value)
1818 {
1819 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1820 struct gallivm_state *gallivm = bld_base->base.gallivm;
1821 LLVMBuilderRef builder = gallivm->builder;
1822 struct lp_build_context *float_bld = &bld_base->base;
1823 unsigned i;
1824 LLVMValueRef temp, temp2;
1825 LLVMValueRef shuffles[8];
1826 LLVMValueRef shuffles2[8];
1827
1828 for (i = 0; i < bld_base->base.type.length; i++) {
1829 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1830 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1831 }
1832
1833 temp = LLVMBuildShuffleVector(builder, value,
1834 LLVMGetUndef(LLVMTypeOf(value)),
1835 LLVMConstVector(shuffles,
1836 bld_base->base.type.length),
1837 "");
1838 temp2 = LLVMBuildShuffleVector(builder, value,
1839 LLVMGetUndef(LLVMTypeOf(value)),
1840 LLVMConstVector(shuffles2,
1841 bld_base->base.type.length),
1842 "");
1843
1844 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr);
1845 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2);
1846 }
1847
1848 /**
1849 * Register store.
1850 */
1851 static void
1852 emit_store_chan(
1853 struct lp_build_tgsi_context *bld_base,
1854 const struct tgsi_full_instruction *inst,
1855 unsigned index,
1856 unsigned chan_index,
1857 LLVMValueRef pred,
1858 LLVMValueRef value)
1859 {
1860 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1861 struct gallivm_state *gallivm = bld_base->base.gallivm;
1862 LLVMBuilderRef builder = gallivm->builder;
1863 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1864 struct lp_build_context *float_bld = &bld_base->base;
1865 struct lp_build_context *int_bld = &bld_base->int_bld;
1866 LLVMValueRef indirect_index = NULL;
1867 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1868
1869 /*
1870 * Apply saturation.
1871 *
1872 * It is always assumed to be float.
1873 */
1874 if (inst->Instruction.Saturate) {
1875 assert(dtype == TGSI_TYPE_FLOAT ||
1876 dtype == TGSI_TYPE_UNTYPED);
1877 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1878 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1879 }
1880
1881 if (reg->Register.Indirect) {
1882 /*
1883 * Currently the mesa/st doesn't generate indirect stores
1884 * to 64-bit values, it normally uses MOV to do indirect stores.
1885 */
1886 assert(!tgsi_type_is_64bit(dtype));
1887 indirect_index = get_indirect_index(bld,
1888 reg->Register.File,
1889 reg->Register.Index,
1890 &reg->Indirect);
1891 } else {
1892 assert(reg->Register.Index <=
1893 bld_base->info->file_max[reg->Register.File]);
1894 }
1895
1896 if (DEBUG_EXECUTION) {
1897 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1898 }
1899
1900 switch( reg->Register.File ) {
1901 case TGSI_FILE_OUTPUT:
1902 /* Outputs are always stored as floats */
1903 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1904
1905 if (reg->Register.Indirect) {
1906 LLVMValueRef index_vec; /* indexes into the output registers */
1907 LLVMValueRef outputs_array;
1908 LLVMTypeRef fptr_type;
1909
1910 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1911 indirect_index,
1912 chan_index,
1913 TRUE);
1914
1915 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1916 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1917
1918 /* Scatter store values into output registers */
1919 emit_mask_scatter(bld, outputs_array, index_vec, value,
1920 &bld->exec_mask, pred);
1921 }
1922 else {
1923 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1924 chan_index);
1925
1926 if (tgsi_type_is_64bit(dtype)) {
1927 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1928 chan_index + 1);
1929 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1930 pred, value);
1931 } else
1932 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1933 }
1934 break;
1935
1936 case TGSI_FILE_TEMPORARY:
1937 /* Temporaries are always stored as floats */
1938 if (!tgsi_type_is_64bit(dtype))
1939 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1940 else
1941 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1942
1943 if (reg->Register.Indirect) {
1944 LLVMValueRef index_vec; /* indexes into the temp registers */
1945 LLVMValueRef temps_array;
1946 LLVMTypeRef fptr_type;
1947
1948 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1949 indirect_index,
1950 chan_index,
1951 TRUE);
1952
1953 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1954 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1955
1956 /* Scatter store values into temp registers */
1957 emit_mask_scatter(bld, temps_array, index_vec, value,
1958 &bld->exec_mask, pred);
1959 }
1960 else {
1961 LLVMValueRef temp_ptr;
1962 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1963
1964 if (tgsi_type_is_64bit(dtype)) {
1965 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1966 reg->Register.Index,
1967 chan_index + 1);
1968 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1969 pred, value);
1970 }
1971 else
1972 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1973 }
1974 break;
1975
1976 case TGSI_FILE_ADDRESS:
1977 assert(dtype == TGSI_TYPE_SIGNED);
1978 assert(LLVMTypeOf(value) == int_bld->vec_type);
1979 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1980 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1981 bld->addr[reg->Register.Index][chan_index]);
1982 break;
1983
1984 case TGSI_FILE_PREDICATE:
1985 assert(LLVMTypeOf(value) == float_bld->vec_type);
1986 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1987 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1988 bld->preds[reg->Register.Index][chan_index]);
1989 break;
1990
1991 default:
1992 assert( 0 );
1993 }
1994
1995 (void)dtype;
1996 }
1997
1998 /*
1999 * Called at the beginning of the translation of each TGSI instruction, to
2000 * emit some debug code.
2001 */
2002 static void
2003 emit_debug(
2004 struct lp_build_tgsi_context * bld_base,
2005 const struct tgsi_full_instruction * inst,
2006 const struct tgsi_opcode_info * info)
2007
2008 {
2009 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2010
2011 if (DEBUG_EXECUTION) {
2012 /*
2013 * Dump the TGSI instruction.
2014 */
2015
2016 struct gallivm_state *gallivm = bld_base->base.gallivm;
2017 char buf[512];
2018 buf[0] = '$';
2019 buf[1] = ' ';
2020 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
2021 lp_build_printf(gallivm, buf);
2022
2023 /* Dump the execution mask.
2024 */
2025 if (bld->exec_mask.has_mask) {
2026 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
2027 }
2028 }
2029 }
2030
2031 static void
2032 emit_store(
2033 struct lp_build_tgsi_context * bld_base,
2034 const struct tgsi_full_instruction * inst,
2035 const struct tgsi_opcode_info * info,
2036 LLVMValueRef dst[4])
2037
2038 {
2039 unsigned chan_index;
2040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2041 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
2042 if(info->num_dst) {
2043 LLVMValueRef pred[TGSI_NUM_CHANNELS];
2044
2045 emit_fetch_predicate( bld, inst, pred );
2046
2047 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2048
2049 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
2050 continue;
2051 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
2052 }
2053 }
2054 }
2055
2056 static unsigned
2057 tgsi_to_pipe_tex_target(unsigned tgsi_target)
2058 {
2059 switch (tgsi_target) {
2060 case TGSI_TEXTURE_BUFFER:
2061 return PIPE_BUFFER;
2062 case TGSI_TEXTURE_1D:
2063 case TGSI_TEXTURE_SHADOW1D:
2064 return PIPE_TEXTURE_1D;
2065 case TGSI_TEXTURE_2D:
2066 case TGSI_TEXTURE_SHADOW2D:
2067 case TGSI_TEXTURE_2D_MSAA:
2068 return PIPE_TEXTURE_2D;
2069 case TGSI_TEXTURE_3D:
2070 return PIPE_TEXTURE_3D;
2071 case TGSI_TEXTURE_CUBE:
2072 case TGSI_TEXTURE_SHADOWCUBE:
2073 return PIPE_TEXTURE_CUBE;
2074 case TGSI_TEXTURE_RECT:
2075 case TGSI_TEXTURE_SHADOWRECT:
2076 return PIPE_TEXTURE_RECT;
2077 case TGSI_TEXTURE_1D_ARRAY:
2078 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2079 return PIPE_TEXTURE_1D_ARRAY;
2080 case TGSI_TEXTURE_2D_ARRAY:
2081 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2082 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2083 return PIPE_TEXTURE_2D_ARRAY;
2084 case TGSI_TEXTURE_CUBE_ARRAY:
2085 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2086 return PIPE_TEXTURE_CUBE_ARRAY;
2087 default:
2088 assert(0);
2089 return PIPE_BUFFER;
2090 }
2091 }
2092
2093
2094 static enum lp_sampler_lod_property
2095 lp_build_lod_property(
2096 struct lp_build_tgsi_context *bld_base,
2097 const struct tgsi_full_instruction *inst,
2098 unsigned src_op)
2099 {
2100 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2101 enum lp_sampler_lod_property lod_property;
2102
2103 /*
2104 * Not much we can do here. We could try catching inputs declared
2105 * with constant interpolation but not sure it's worth it - since for
2106 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2107 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2108 * like the constant/immediate recognition below.
2109 * What seems to be of more value would be to recognize temps holding
2110 * broadcasted scalars but no way we can do it.
2111 * Tried asking llvm but without any success (using LLVMIsConstant
2112 * even though this isn't exactly what we'd need), even as simple as
2113 * IMM[0] UINT32 (0,-1,0,0)
2114 * MOV TEMP[0] IMM[0].yyyy
2115 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2116 * doesn't work.
2117 * This means there's ZERO chance this will ever catch a scalar lod
2118 * with traditional tex opcodes as well as texel fetches, since the lod
2119 * comes from the same reg as coords (except some test shaders using
2120 * constant coords maybe).
2121 * There's at least hope for sample opcodes as well as size queries.
2122 */
2123 if (reg->Register.File == TGSI_FILE_CONSTANT ||
2124 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2125 lod_property = LP_SAMPLER_LOD_SCALAR;
2126 }
2127 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2128 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2129 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2130 }
2131 else {
2132 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2133 }
2134 }
2135 else {
2136 /* never use scalar (per-quad) lod the results are just too wrong. */
2137 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2138 }
2139 return lod_property;
2140 }
2141
2142
2143 /**
2144 * High-level instruction translators.
2145 */
2146
2147 static void
2148 emit_tex( struct lp_build_tgsi_soa_context *bld,
2149 const struct tgsi_full_instruction *inst,
2150 enum lp_build_tex_modifier modifier,
2151 LLVMValueRef *texel,
2152 unsigned sampler_reg,
2153 enum lp_sampler_op_type sampler_op)
2154 {
2155 unsigned unit = inst->Src[sampler_reg].Register.Index;
2156 LLVMValueRef oow = NULL;
2157 LLVMValueRef lod = NULL;
2158 LLVMValueRef coords[5];
2159 LLVMValueRef offsets[3] = { NULL };
2160 struct lp_derivatives derivs;
2161 struct lp_sampler_params params;
2162 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2163 unsigned num_derivs, num_offsets, i;
2164 unsigned shadow_coord = 0;
2165 unsigned layer_coord = 0;
2166 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2167
2168 memset(&params, 0, sizeof(params));
2169
2170 if (!bld->sampler) {
2171 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2172 for (i = 0; i < 4; i++) {
2173 texel[i] = bld->bld_base.base.undef;
2174 }
2175 return;
2176 }
2177
2178 switch (inst->Texture.Texture) {
2179 case TGSI_TEXTURE_1D_ARRAY:
2180 layer_coord = 1;
2181 /* fallthrough */
2182 case TGSI_TEXTURE_1D:
2183 num_offsets = 1;
2184 num_derivs = 1;
2185 break;
2186 case TGSI_TEXTURE_2D_ARRAY:
2187 layer_coord = 2;
2188 /* fallthrough */
2189 case TGSI_TEXTURE_2D:
2190 case TGSI_TEXTURE_RECT:
2191 num_offsets = 2;
2192 num_derivs = 2;
2193 break;
2194 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2195 layer_coord = 1;
2196 /* fallthrough */
2197 case TGSI_TEXTURE_SHADOW1D:
2198 shadow_coord = 2;
2199 num_offsets = 1;
2200 num_derivs = 1;
2201 break;
2202 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2203 layer_coord = 2;
2204 shadow_coord = 3;
2205 num_offsets = 2;
2206 num_derivs = 2;
2207 break;
2208 case TGSI_TEXTURE_SHADOW2D:
2209 case TGSI_TEXTURE_SHADOWRECT:
2210 shadow_coord = 2;
2211 num_offsets = 2;
2212 num_derivs = 2;
2213 break;
2214 case TGSI_TEXTURE_CUBE:
2215 num_offsets = 2;
2216 num_derivs = 3;
2217 break;
2218 case TGSI_TEXTURE_3D:
2219 num_offsets = 3;
2220 num_derivs = 3;
2221 break;
2222 case TGSI_TEXTURE_SHADOWCUBE:
2223 shadow_coord = 3;
2224 num_offsets = 2;
2225 num_derivs = 3;
2226 break;
2227 case TGSI_TEXTURE_CUBE_ARRAY:
2228 num_offsets = 2;
2229 num_derivs = 3;
2230 layer_coord = 3;
2231 break;
2232 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2233 num_offsets = 2;
2234 num_derivs = 3;
2235 layer_coord = 3;
2236 shadow_coord = 4; /* shadow coord special different reg */
2237 break;
2238 case TGSI_TEXTURE_2D_MSAA:
2239 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2240 default:
2241 assert(0);
2242 return;
2243 }
2244
2245 /* Note lod and especially projected are illegal in a LOT of cases */
2246 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2247 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2248 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2249 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2250 /* note that shadow cube array with bias/explicit lod does not exist */
2251 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2252 }
2253 else {
2254 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2255 }
2256 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2257 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2258 }
2259 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2260 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2261 }
2262 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2263 }
2264
2265 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2266 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2267 oow = lp_build_rcp(&bld->bld_base.base, oow);
2268 }
2269
2270 for (i = 0; i < num_derivs; i++) {
2271 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2272 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2273 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2274 }
2275 for (i = num_derivs; i < 5; i++) {
2276 coords[i] = bld->bld_base.base.undef;
2277 }
2278
2279 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2280 if (layer_coord) {
2281 if (layer_coord == 3) {
2282 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2283 }
2284 else {
2285 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2286 }
2287 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2288 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2289 }
2290 /* Shadow coord occupies always 5th slot. */
2291 if (shadow_coord) {
2292 sample_key |= LP_SAMPLER_SHADOW;
2293 if (shadow_coord == 4) {
2294 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2295 }
2296 else {
2297 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2298 }
2299 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2300 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2301 }
2302
2303 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2304 unsigned dim;
2305 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2306 for (dim = 0; dim < num_derivs; ++dim) {
2307 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2308 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2309 }
2310 params.derivs = &derivs;
2311 /*
2312 * could also check all src regs if constant but I doubt such
2313 * cases exist in practice.
2314 */
2315 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2316 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2317 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2318 }
2319 else {
2320 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2321 }
2322 }
2323 else {
2324 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2325 }
2326 }
2327 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2328
2329 /* we don't handle the 4 offset version of tg4 */
2330 if (inst->Texture.NumOffsets == 1) {
2331 unsigned dim;
2332 sample_key |= LP_SAMPLER_OFFSETS;
2333 for (dim = 0; dim < num_offsets; dim++) {
2334 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2335 }
2336 }
2337
2338 params.type = bld->bld_base.base.type;
2339 params.sample_key = sample_key;
2340 params.texture_index = unit;
2341 params.sampler_index = unit;
2342 params.context_ptr = bld->context_ptr;
2343 params.thread_data_ptr = bld->thread_data_ptr;
2344 params.coords = coords;
2345 params.offsets = offsets;
2346 params.lod = lod;
2347 params.texel = texel;
2348
2349 bld->sampler->emit_tex_sample(bld->sampler,
2350 bld->bld_base.base.gallivm,
2351 &params);
2352 }
2353
2354 static void
2355 emit_sample(struct lp_build_tgsi_soa_context *bld,
2356 const struct tgsi_full_instruction *inst,
2357 enum lp_build_tex_modifier modifier,
2358 boolean compare,
2359 LLVMValueRef *texel)
2360 {
2361 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2362 unsigned texture_unit, sampler_unit;
2363 LLVMValueRef lod = NULL;
2364 LLVMValueRef coords[5];
2365 LLVMValueRef offsets[3] = { NULL };
2366 struct lp_derivatives derivs;
2367 struct lp_sampler_params params;
2368 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2369
2370 unsigned num_offsets, num_derivs, i;
2371 unsigned layer_coord = 0;
2372 unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
2373
2374 memset(&params, 0, sizeof(params));
2375
2376 if (!bld->sampler) {
2377 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2378 for (i = 0; i < 4; i++) {
2379 texel[i] = bld->bld_base.base.undef;
2380 }
2381 return;
2382 }
2383
2384 /*
2385 * unlike old-style tex opcodes the texture/sampler indices
2386 * always come from src1 and src2 respectively.
2387 */
2388 texture_unit = inst->Src[1].Register.Index;
2389 sampler_unit = inst->Src[2].Register.Index;
2390
2391 /*
2392 * Note inst->Texture.Texture will contain the number of offsets,
2393 * however the target information is NOT there and comes from the
2394 * declared sampler views instead.
2395 */
2396 switch (bld->sv[texture_unit].Resource) {
2397 case TGSI_TEXTURE_1D:
2398 num_offsets = 1;
2399 num_derivs = 1;
2400 break;
2401 case TGSI_TEXTURE_1D_ARRAY:
2402 layer_coord = 1;
2403 num_offsets = 1;
2404 num_derivs = 1;
2405 break;
2406 case TGSI_TEXTURE_2D:
2407 case TGSI_TEXTURE_RECT:
2408 num_offsets = 2;
2409 num_derivs = 2;
2410 break;
2411 case TGSI_TEXTURE_2D_ARRAY:
2412 layer_coord = 2;
2413 num_offsets = 2;
2414 num_derivs = 2;
2415 break;
2416 case TGSI_TEXTURE_CUBE:
2417 num_offsets = 2;
2418 num_derivs = 3;
2419 break;
2420 case TGSI_TEXTURE_3D:
2421 num_offsets = 3;
2422 num_derivs = 3;
2423 break;
2424 case TGSI_TEXTURE_CUBE_ARRAY:
2425 layer_coord = 3;
2426 num_offsets = 2;
2427 num_derivs = 3;
2428 break;
2429 default:
2430 assert(0);
2431 return;
2432 }
2433
2434 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2435 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2436 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2437 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2438 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2439 }
2440 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2441 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2442 }
2443 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2444 }
2445 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2446 /* XXX might be better to explicitly pass the level zero information */
2447 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2448 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2449 }
2450
2451 for (i = 0; i < num_derivs; i++) {
2452 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2453 }
2454 for (i = num_derivs; i < 5; i++) {
2455 coords[i] = bld->bld_base.base.undef;
2456 }
2457
2458 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2459 if (layer_coord) {
2460 if (layer_coord == 3)
2461 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2462 else
2463 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2464 }
2465 /* Shadow coord occupies always 5th slot. */
2466 if (compare) {
2467 sample_key |= LP_SAMPLER_SHADOW;
2468 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2469 }
2470
2471 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2472 unsigned dim;
2473 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2474 for (dim = 0; dim < num_derivs; ++dim) {
2475 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2476 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2477 }
2478 params.derivs = &derivs;
2479 /*
2480 * could also check all src regs if constant but I doubt such
2481 * cases exist in practice.
2482 */
2483 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2484 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2485 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2486 }
2487 else {
2488 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2489 }
2490 }
2491 else {
2492 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2493 }
2494 }
2495
2496 /* some advanced gather instructions (txgo) would require 4 offsets */
2497 if (inst->Texture.NumOffsets == 1) {
2498 unsigned dim;
2499 sample_key |= LP_SAMPLER_OFFSETS;
2500 for (dim = 0; dim < num_offsets; dim++) {
2501 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2502 }
2503 }
2504 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2505
2506 params.type = bld->bld_base.base.type;
2507 params.sample_key = sample_key;
2508 params.texture_index = texture_unit;
2509 params.sampler_index = sampler_unit;
2510 params.context_ptr = bld->context_ptr;
2511 params.thread_data_ptr = bld->thread_data_ptr;
2512 params.coords = coords;
2513 params.offsets = offsets;
2514 params.lod = lod;
2515 params.texel = texel;
2516
2517 bld->sampler->emit_tex_sample(bld->sampler,
2518 bld->bld_base.base.gallivm,
2519 &params);
2520
2521 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2522 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2523 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2524 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2525 unsigned char swizzles[4];
2526 swizzles[0] = inst->Src[1].Register.SwizzleX;
2527 swizzles[1] = inst->Src[1].Register.SwizzleY;
2528 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2529 swizzles[3] = inst->Src[1].Register.SwizzleW;
2530
2531 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2532 }
2533 }
2534
2535 static void
2536 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2537 const struct tgsi_full_instruction *inst,
2538 LLVMValueRef *texel,
2539 boolean is_samplei)
2540 {
2541 unsigned unit, target;
2542 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2543 LLVMValueRef explicit_lod = NULL;
2544 LLVMValueRef coords[5];
2545 LLVMValueRef offsets[3] = { NULL };
2546 struct lp_sampler_params params;
2547 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2548 unsigned dims, i;
2549 unsigned layer_coord = 0;
2550 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2551
2552 memset(&params, 0, sizeof(params));
2553
2554 if (!bld->sampler) {
2555 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2556 for (i = 0; i < 4; i++) {
2557 texel[i] = coord_undef;
2558 }
2559 return;
2560 }
2561
2562 unit = inst->Src[1].Register.Index;
2563
2564 if (is_samplei) {
2565 target = bld->sv[unit].Resource;
2566 }
2567 else {
2568 target = inst->Texture.Texture;
2569 }
2570
2571 switch (target) {
2572 case TGSI_TEXTURE_1D:
2573 case TGSI_TEXTURE_BUFFER:
2574 dims = 1;
2575 break;
2576 case TGSI_TEXTURE_1D_ARRAY:
2577 layer_coord = 1;
2578 dims = 1;
2579 break;
2580 case TGSI_TEXTURE_2D:
2581 case TGSI_TEXTURE_RECT:
2582 case TGSI_TEXTURE_2D_MSAA:
2583 dims = 2;
2584 break;
2585 case TGSI_TEXTURE_2D_ARRAY:
2586 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2587 layer_coord = 2;
2588 dims = 2;
2589 break;
2590 case TGSI_TEXTURE_3D:
2591 dims = 3;
2592 break;
2593 default:
2594 assert(0);
2595 return;
2596 }
2597
2598 /* always have lod except for buffers and msaa targets ? */
2599 if (target != TGSI_TEXTURE_BUFFER &&
2600 target != TGSI_TEXTURE_2D_MSAA &&
2601 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2602 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2603 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2604 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2605 }
2606 /*
2607 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2608 * would be the sample index.
2609 */
2610
2611 for (i = 0; i < dims; i++) {
2612 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2613 }
2614 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2615 for (i = dims; i < 5; i++) {
2616 coords[i] = coord_undef;
2617 }
2618 if (layer_coord)
2619 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2620
2621 if (inst->Texture.NumOffsets == 1) {
2622 unsigned dim;
2623 sample_key |= LP_SAMPLER_OFFSETS;
2624 for (dim = 0; dim < dims; dim++) {
2625 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2626 }
2627 }
2628 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2629
2630 params.type = bld->bld_base.base.type;
2631 params.sample_key = sample_key;
2632 params.texture_index = unit;
2633 /*
2634 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2635 * and trigger some assertions with d3d10 where the sampler view number
2636 * can exceed this.
2637 */
2638 params.sampler_index = 0;
2639 params.context_ptr = bld->context_ptr;
2640 params.thread_data_ptr = bld->thread_data_ptr;
2641 params.coords = coords;
2642 params.offsets = offsets;
2643 params.derivs = NULL;
2644 params.lod = explicit_lod;
2645 params.texel = texel;
2646
2647 bld->sampler->emit_tex_sample(bld->sampler,
2648 bld->bld_base.base.gallivm,
2649 &params);
2650
2651 if (is_samplei &&
2652 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2653 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2654 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2655 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2656 unsigned char swizzles[4];
2657 swizzles[0] = inst->Src[1].Register.SwizzleX;
2658 swizzles[1] = inst->Src[1].Register.SwizzleY;
2659 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2660 swizzles[3] = inst->Src[1].Register.SwizzleW;
2661
2662 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2663 }
2664 }
2665
2666 static void
2667 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2668 const struct tgsi_full_instruction *inst,
2669 LLVMValueRef *sizes_out,
2670 boolean is_sviewinfo)
2671 {
2672 LLVMValueRef explicit_lod;
2673 enum lp_sampler_lod_property lod_property;
2674 unsigned has_lod;
2675 unsigned i;
2676 unsigned unit = inst->Src[1].Register.Index;
2677 unsigned target, pipe_target;
2678 struct lp_sampler_size_query_params params;
2679
2680 if (is_sviewinfo) {
2681 target = bld->sv[unit].Resource;
2682 }
2683 else {
2684 target = inst->Texture.Texture;
2685 }
2686 switch (target) {
2687 case TGSI_TEXTURE_BUFFER:
2688 case TGSI_TEXTURE_RECT:
2689 case TGSI_TEXTURE_SHADOWRECT:
2690 has_lod = 0;
2691 break;
2692 default:
2693 has_lod = 1;
2694 break;
2695 }
2696
2697 if (!bld->sampler) {
2698 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2699 for (i = 0; i < 4; i++)
2700 sizes_out[i] = bld->bld_base.int_bld.undef;
2701 return;
2702 }
2703
2704 if (has_lod) {
2705 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2706 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2707 }
2708 else {
2709 explicit_lod = NULL;
2710 lod_property = LP_SAMPLER_LOD_SCALAR;
2711 }
2712
2713
2714 pipe_target = tgsi_to_pipe_tex_target(target);
2715
2716 params.int_type = bld->bld_base.int_bld.type;
2717 params.texture_unit = unit;
2718 params.target = pipe_target;
2719 params.context_ptr = bld->context_ptr;
2720 params.is_sviewinfo = TRUE;
2721 params.lod_property = lod_property;
2722 params.explicit_lod = explicit_lod;
2723 params.sizes_out = sizes_out;
2724
2725 bld->sampler->emit_size_query(bld->sampler,
2726 bld->bld_base.base.gallivm,
2727 &params);
2728 }
2729
2730 static boolean
2731 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2732 int pc)
2733 {
2734 unsigned i;
2735
2736 for (i = 0; i < 5; i++) {
2737 unsigned opcode;
2738
2739 if (pc + i >= bld->bld_base.info->num_instructions)
2740 return TRUE;
2741
2742 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2743
2744 if (opcode == TGSI_OPCODE_END)
2745 return TRUE;
2746
2747 if (opcode == TGSI_OPCODE_TEX ||
2748 opcode == TGSI_OPCODE_TXP ||
2749 opcode == TGSI_OPCODE_TXD ||
2750 opcode == TGSI_OPCODE_TXB ||
2751 opcode == TGSI_OPCODE_TXL ||
2752 opcode == TGSI_OPCODE_TXF ||
2753 opcode == TGSI_OPCODE_TXQ ||
2754 opcode == TGSI_OPCODE_TEX2 ||
2755 opcode == TGSI_OPCODE_TXB2 ||
2756 opcode == TGSI_OPCODE_TXL2 ||
2757 opcode == TGSI_OPCODE_SAMPLE ||
2758 opcode == TGSI_OPCODE_SAMPLE_B ||
2759 opcode == TGSI_OPCODE_SAMPLE_C ||
2760 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2761 opcode == TGSI_OPCODE_SAMPLE_D ||
2762 opcode == TGSI_OPCODE_SAMPLE_I ||
2763 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2764 opcode == TGSI_OPCODE_SAMPLE_L ||
2765 opcode == TGSI_OPCODE_SVIEWINFO ||
2766 opcode == TGSI_OPCODE_CAL ||
2767 opcode == TGSI_OPCODE_CALLNZ ||
2768 opcode == TGSI_OPCODE_IF ||
2769 opcode == TGSI_OPCODE_UIF ||
2770 opcode == TGSI_OPCODE_BGNLOOP ||
2771 opcode == TGSI_OPCODE_SWITCH)
2772 return FALSE;
2773 }
2774
2775 return TRUE;
2776 }
2777
2778
2779
2780 /**
2781 * Kill fragment if any of the src register values are negative.
2782 */
2783 static void
2784 emit_kill_if(
2785 struct lp_build_tgsi_soa_context *bld,
2786 const struct tgsi_full_instruction *inst,
2787 int pc)
2788 {
2789 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2790 const struct tgsi_full_src_register *reg = &inst->Src[0];
2791 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2792 LLVMValueRef mask;
2793 unsigned chan_index;
2794
2795 memset(&terms, 0, sizeof terms);
2796
2797 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2798 unsigned swizzle;
2799
2800 /* Unswizzle channel */
2801 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2802
2803 /* Check if the component has not been already tested. */
2804 assert(swizzle < TGSI_NUM_CHANNELS);
2805 if( !terms[swizzle] )
2806 /* TODO: change the comparison operator instead of setting the sign */
2807 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2808 }
2809
2810 mask = NULL;
2811 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2812 if(terms[chan_index]) {
2813 LLVMValueRef chan_mask;
2814
2815 /*
2816 * If term < 0 then mask = 0 else mask = ~0.
2817 */
2818 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2819
2820 if(mask)
2821 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2822 else
2823 mask = chan_mask;
2824 }
2825 }
2826
2827 if (bld->exec_mask.has_mask) {
2828 LLVMValueRef invmask;
2829 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2830 mask = LLVMBuildOr(builder, mask, invmask, "");
2831 }
2832
2833 lp_build_mask_update(bld->mask, mask);
2834 if (!near_end_of_shader(bld, pc))
2835 lp_build_mask_check(bld->mask);
2836 }
2837
2838
2839 /**
2840 * Unconditional fragment kill.
2841 * The only predication is the execution mask which will apply if
2842 * we're inside a loop or conditional.
2843 */
2844 static void
2845 emit_kill(struct lp_build_tgsi_soa_context *bld,
2846 int pc)
2847 {
2848 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2849 LLVMValueRef mask;
2850
2851 /* For those channels which are "alive", disable fragment shader
2852 * execution.
2853 */
2854 if (bld->exec_mask.has_mask) {
2855 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2856 }
2857 else {
2858 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2859 mask = zero;
2860 }
2861
2862 lp_build_mask_update(bld->mask, mask);
2863
2864 if (!near_end_of_shader(bld, pc))
2865 lp_build_mask_check(bld->mask);
2866 }
2867
2868
2869 /**
2870 * Emit code which will dump the value of all the temporary registers
2871 * to stdout.
2872 */
2873 static void
2874 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2875 unsigned file)
2876 {
2877 const struct tgsi_shader_info *info = bld->bld_base.info;
2878 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2879 LLVMBuilderRef builder = gallivm->builder;
2880 LLVMValueRef reg_ptr;
2881 int index;
2882 int max_index = info->file_max[file];
2883
2884 /*
2885 * Some register files, particularly constants, can be very large,
2886 * and dumping everything could make this unusably slow.
2887 */
2888 max_index = MIN2(max_index, 32);
2889
2890 for (index = 0; index <= max_index; index++) {
2891 LLVMValueRef res;
2892 unsigned mask;
2893 int chan;
2894
2895 if (index < 8 * sizeof(unsigned) &&
2896 (info->file_mask[file] & (1u << index)) == 0) {
2897 /* This was not declared.*/
2898 continue;
2899 }
2900
2901 if (file == TGSI_FILE_INPUT) {
2902 mask = info->input_usage_mask[index];
2903 } else {
2904 mask = TGSI_WRITEMASK_XYZW;
2905 }
2906
2907 for (chan = 0; chan < 4; chan++) {
2908 if ((mask & (1 << chan)) == 0) {
2909 /* This channel is not used.*/
2910 continue;
2911 }
2912
2913 if (file == TGSI_FILE_CONSTANT) {
2914 struct tgsi_full_src_register reg;
2915 memset(&reg, 0, sizeof reg);
2916 reg.Register.File = file;
2917 reg.Register.Index = index;
2918 reg.Register.SwizzleX = 0;
2919 reg.Register.SwizzleY = 1;
2920 reg.Register.SwizzleZ = 2;
2921 reg.Register.SwizzleW = 3;
2922
2923 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2924 if (!res) {
2925 continue;
2926 }
2927 } else if (file == TGSI_FILE_INPUT) {
2928 res = bld->inputs[index][chan];
2929 if (!res) {
2930 continue;
2931 }
2932 } else if (file == TGSI_FILE_TEMPORARY) {
2933 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2934 assert(reg_ptr);
2935 res = LLVMBuildLoad(builder, reg_ptr, "");
2936 } else if (file == TGSI_FILE_OUTPUT) {
2937 reg_ptr = lp_get_output_ptr(bld, index, chan);
2938 assert(reg_ptr);
2939 res = LLVMBuildLoad(builder, reg_ptr, "");
2940 } else {
2941 assert(0);
2942 continue;
2943 }
2944
2945 emit_dump_reg(gallivm, file, index, chan, res);
2946 }
2947 }
2948 }
2949
2950
2951
2952 void
2953 lp_emit_declaration_soa(
2954 struct lp_build_tgsi_context *bld_base,
2955 const struct tgsi_full_declaration *decl)
2956 {
2957 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2958 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2959 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2960 const unsigned first = decl->Range.First;
2961 const unsigned last = decl->Range.Last;
2962 unsigned idx, i;
2963
2964 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2965
2966 switch (decl->Declaration.File) {
2967 case TGSI_FILE_TEMPORARY:
2968 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2969 assert(last < LP_MAX_INLINED_TEMPS);
2970 for (idx = first; idx <= last; ++idx) {
2971 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2972 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2973 }
2974 }
2975 break;
2976
2977 case TGSI_FILE_OUTPUT:
2978 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2979 for (idx = first; idx <= last; ++idx) {
2980 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2981 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2982 vec_type, "output");
2983 }
2984 }
2985 break;
2986
2987 case TGSI_FILE_ADDRESS:
2988 /* ADDR registers are only allocated with an integer LLVM IR type,
2989 * as they are guaranteed to always have integers.
2990 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2991 * an ADDR register for that matter).
2992 */
2993 assert(last < LP_MAX_TGSI_ADDRS);
2994 for (idx = first; idx <= last; ++idx) {
2995 assert(idx < LP_MAX_TGSI_ADDRS);
2996 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2997 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2998 }
2999 break;
3000
3001 case TGSI_FILE_PREDICATE:
3002 assert(last < LP_MAX_TGSI_PREDS);
3003 for (idx = first; idx <= last; ++idx) {
3004 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
3005 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
3006 "predicate");
3007 }
3008 break;
3009
3010 case TGSI_FILE_SAMPLER_VIEW:
3011 /*
3012 * The target stored here MUST match whatever there actually
3013 * is in the set sampler views (what about return type?).
3014 */
3015 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3016 for (idx = first; idx <= last; ++idx) {
3017 bld->sv[idx] = decl->SamplerView;
3018 }
3019 break;
3020
3021 case TGSI_FILE_CONSTANT:
3022 {
3023 /*
3024 * We could trivially fetch the per-buffer pointer when fetching the
3025 * constant, relying on llvm to figure out it's always the same pointer
3026 * anyway. However, doing so results in a huge (more than factor of 10)
3027 * slowdown in llvm compilation times for some (but not all) shaders
3028 * (more specifically, the IR optimization spends way more time in
3029 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
3030 */
3031 unsigned idx2D = decl->Dim.Index2D;
3032 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
3033 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
3034 bld->consts[idx2D] =
3035 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
3036 bld->consts_sizes[idx2D] =
3037 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
3038 }
3039 break;
3040
3041 default:
3042 /* don't need to declare other vars */
3043 break;
3044 }
3045 }
3046
3047
3048 void lp_emit_immediate_soa(
3049 struct lp_build_tgsi_context *bld_base,
3050 const struct tgsi_full_immediate *imm)
3051 {
3052 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3053 struct gallivm_state * gallivm = bld_base->base.gallivm;
3054 LLVMValueRef imms[4];
3055 unsigned i;
3056 const uint size = imm->Immediate.NrTokens - 1;
3057 assert(size <= 4);
3058 switch (imm->Immediate.DataType) {
3059 case TGSI_IMM_FLOAT32:
3060 for( i = 0; i < size; ++i )
3061 imms[i] =
3062 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3063
3064 break;
3065 case TGSI_IMM_FLOAT64:
3066 case TGSI_IMM_UINT64:
3067 case TGSI_IMM_INT64:
3068 case TGSI_IMM_UINT32:
3069 for( i = 0; i < size; ++i ) {
3070 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3071 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3072 }
3073
3074 break;
3075 case TGSI_IMM_INT32:
3076 for( i = 0; i < size; ++i ) {
3077 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3078 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3079 }
3080
3081 break;
3082 }
3083 for( i = size; i < 4; ++i )
3084 imms[i] = bld_base->base.undef;
3085
3086 if (bld->use_immediates_array) {
3087 unsigned index = bld->num_immediates;
3088 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3089 LLVMBuilderRef builder = gallivm->builder;
3090
3091 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3092 for (i = 0; i < 4; ++i ) {
3093 LLVMValueRef lindex = lp_build_const_int32(
3094 bld->bld_base.base.gallivm, index * 4 + i);
3095 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3096 bld->imms_array, &lindex, 1, "");
3097 LLVMBuildStore(builder, imms[i], imm_ptr);
3098 }
3099 } else {
3100 /* simply copy the immediate values into the next immediates[] slot */
3101 unsigned i;
3102 assert(imm->Immediate.NrTokens - 1 <= 4);
3103 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3104
3105 for(i = 0; i < 4; ++i )
3106 bld->immediates[bld->num_immediates][i] = imms[i];
3107
3108 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3109 unsigned index = bld->num_immediates;
3110 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3111 LLVMBuilderRef builder = gallivm->builder;
3112 for (i = 0; i < 4; ++i ) {
3113 LLVMValueRef lindex = lp_build_const_int32(
3114 bld->bld_base.base.gallivm, index * 4 + i);
3115 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3116 bld->imms_array, &lindex, 1, "");
3117 LLVMBuildStore(builder,
3118 bld->immediates[index][i],
3119 imm_ptr);
3120 }
3121 }
3122 }
3123
3124 bld->num_immediates++;
3125 }
3126
3127 static void
3128 ddx_emit(
3129 const struct lp_build_tgsi_action * action,
3130 struct lp_build_tgsi_context * bld_base,
3131 struct lp_build_emit_data * emit_data)
3132 {
3133 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3134
3135 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3136 &emit_data->output[emit_data->chan], NULL);
3137 }
3138
3139 static void
3140 ddy_emit(
3141 const struct lp_build_tgsi_action * action,
3142 struct lp_build_tgsi_context * bld_base,
3143 struct lp_build_emit_data * emit_data)
3144 {
3145 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3146
3147 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3148 &emit_data->output[emit_data->chan]);
3149 }
3150
3151 static void
3152 kill_emit(
3153 const struct lp_build_tgsi_action * action,
3154 struct lp_build_tgsi_context * bld_base,
3155 struct lp_build_emit_data * emit_data)
3156 {
3157 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3158
3159 emit_kill(bld, bld_base->pc - 1);
3160 }
3161
3162 static void
3163 kill_if_emit(
3164 const struct lp_build_tgsi_action * action,
3165 struct lp_build_tgsi_context * bld_base,
3166 struct lp_build_emit_data * emit_data)
3167 {
3168 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3169
3170 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3171 }
3172
3173 static void
3174 tex_emit(
3175 const struct lp_build_tgsi_action * action,
3176 struct lp_build_tgsi_context * bld_base,
3177 struct lp_build_emit_data * emit_data)
3178 {
3179 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3180
3181 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3182 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3183 }
3184
3185 static void
3186 tex2_emit(
3187 const struct lp_build_tgsi_action * action,
3188 struct lp_build_tgsi_context * bld_base,
3189 struct lp_build_emit_data * emit_data)
3190 {
3191 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3192
3193 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3194 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3195 }
3196
3197 static void
3198 txb_emit(
3199 const struct lp_build_tgsi_action * action,
3200 struct lp_build_tgsi_context * bld_base,
3201 struct lp_build_emit_data * emit_data)
3202 {
3203 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3204
3205 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3206 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3207 }
3208
3209 static void
3210 txb2_emit(
3211 const struct lp_build_tgsi_action * action,
3212 struct lp_build_tgsi_context * bld_base,
3213 struct lp_build_emit_data * emit_data)
3214 {
3215 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3216
3217 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3218 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3219 }
3220
3221 static void
3222 txd_emit(
3223 const struct lp_build_tgsi_action * action,
3224 struct lp_build_tgsi_context * bld_base,
3225 struct lp_build_emit_data * emit_data)
3226 {
3227 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3228
3229 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3230 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3231 }
3232
3233 static void
3234 txl_emit(
3235 const struct lp_build_tgsi_action * action,
3236 struct lp_build_tgsi_context * bld_base,
3237 struct lp_build_emit_data * emit_data)
3238 {
3239 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3240
3241 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3242 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3243 }
3244
3245 static void
3246 txl2_emit(
3247 const struct lp_build_tgsi_action * action,
3248 struct lp_build_tgsi_context * bld_base,
3249 struct lp_build_emit_data * emit_data)
3250 {
3251 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3252
3253 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3254 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3255 }
3256
3257 static void
3258 txp_emit(
3259 const struct lp_build_tgsi_action * action,
3260 struct lp_build_tgsi_context * bld_base,
3261 struct lp_build_emit_data * emit_data)
3262 {
3263 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3264
3265 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3266 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3267 }
3268
3269 static void
3270 tg4_emit(
3271 const struct lp_build_tgsi_action * action,
3272 struct lp_build_tgsi_context * bld_base,
3273 struct lp_build_emit_data * emit_data)
3274 {
3275 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3276
3277 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3278 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3279 }
3280
3281 static void
3282 txq_emit(
3283 const struct lp_build_tgsi_action * action,
3284 struct lp_build_tgsi_context * bld_base,
3285 struct lp_build_emit_data * emit_data)
3286 {
3287 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3288
3289 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3290 }
3291
3292 static void
3293 txf_emit(
3294 const struct lp_build_tgsi_action * action,
3295 struct lp_build_tgsi_context * bld_base,
3296 struct lp_build_emit_data * emit_data)
3297 {
3298 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3299
3300 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3301 }
3302
3303 static void
3304 sample_i_emit(
3305 const struct lp_build_tgsi_action * action,
3306 struct lp_build_tgsi_context * bld_base,
3307 struct lp_build_emit_data * emit_data)
3308 {
3309 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3310
3311 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3312 }
3313
3314 static void
3315 sample_emit(
3316 const struct lp_build_tgsi_action * action,
3317 struct lp_build_tgsi_context * bld_base,
3318 struct lp_build_emit_data * emit_data)
3319 {
3320 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3321
3322 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3323 FALSE, emit_data->output);
3324 }
3325
3326 static void
3327 sample_b_emit(
3328 const struct lp_build_tgsi_action * action,
3329 struct lp_build_tgsi_context * bld_base,
3330 struct lp_build_emit_data * emit_data)
3331 {
3332 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3333
3334 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3335 FALSE, emit_data->output);
3336 }
3337
3338 static void
3339 sample_c_emit(
3340 const struct lp_build_tgsi_action * action,
3341 struct lp_build_tgsi_context * bld_base,
3342 struct lp_build_emit_data * emit_data)
3343 {
3344 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3345
3346 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3347 TRUE, emit_data->output);
3348 }
3349
3350 static void
3351 sample_c_lz_emit(
3352 const struct lp_build_tgsi_action * action,
3353 struct lp_build_tgsi_context * bld_base,
3354 struct lp_build_emit_data * emit_data)
3355 {
3356 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3357
3358 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3359 TRUE, emit_data->output);
3360 }
3361
3362 static void
3363 sample_d_emit(
3364 const struct lp_build_tgsi_action * action,
3365 struct lp_build_tgsi_context * bld_base,
3366 struct lp_build_emit_data * emit_data)
3367 {
3368 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3369
3370 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3371 FALSE, emit_data->output);
3372 }
3373
3374 static void
3375 sample_l_emit(
3376 const struct lp_build_tgsi_action * action,
3377 struct lp_build_tgsi_context * bld_base,
3378 struct lp_build_emit_data * emit_data)
3379 {
3380 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3381
3382 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3383 FALSE, emit_data->output);
3384 }
3385
3386 static void
3387 sviewinfo_emit(
3388 const struct lp_build_tgsi_action * action,
3389 struct lp_build_tgsi_context * bld_base,
3390 struct lp_build_emit_data * emit_data)
3391 {
3392 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3393
3394 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3395 }
3396
3397 static LLVMValueRef
3398 mask_vec(struct lp_build_tgsi_context *bld_base)
3399 {
3400 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3401 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3402 struct lp_exec_mask *exec_mask = &bld->exec_mask;
3403
3404 if (!exec_mask->has_mask) {
3405 return lp_build_mask_value(bld->mask);
3406 }
3407 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3408 exec_mask->exec_mask, "");
3409 }
3410
3411 static void
3412 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3413 LLVMValueRef ptr,
3414 LLVMValueRef mask)
3415 {
3416 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3417 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3418
3419 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3420
3421 LLVMBuildStore(builder, current_vec, ptr);
3422 }
3423
3424 static void
3425 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3426 LLVMValueRef ptr,
3427 LLVMValueRef mask)
3428 {
3429 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3430 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3431
3432 current_vec = lp_build_select(&bld_base->uint_bld,
3433 mask,
3434 bld_base->uint_bld.zero,
3435 current_vec);
3436
3437 LLVMBuildStore(builder, current_vec, ptr);
3438 }
3439
3440 static LLVMValueRef
3441 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3442 LLVMValueRef current_mask_vec,
3443 LLVMValueRef total_emitted_vertices_vec)
3444 {
3445 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3446 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3447 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3448 total_emitted_vertices_vec,
3449 bld->max_output_vertices_vec);
3450
3451 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3452 }
3453
3454 static void
3455 emit_vertex(
3456 const struct lp_build_tgsi_action * action,
3457 struct lp_build_tgsi_context * bld_base,
3458 struct lp_build_emit_data * emit_data)
3459 {
3460 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3461 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3462
3463 if (bld->gs_iface->emit_vertex) {
3464 LLVMValueRef mask = mask_vec(bld_base);
3465 LLVMValueRef total_emitted_vertices_vec =
3466 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3467 mask = clamp_mask_to_max_output_vertices(bld, mask,
3468 total_emitted_vertices_vec);
3469 gather_outputs(bld);
3470 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3471 bld->outputs,
3472 total_emitted_vertices_vec);
3473 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3474 mask);
3475 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3476 mask);
3477 #if DUMP_GS_EMITS
3478 lp_build_print_value(bld->bld_base.base.gallivm,
3479 " +++ emit vertex masked ones = ",
3480 mask);
3481 lp_build_print_value(bld->bld_base.base.gallivm,
3482 " +++ emit vertex emitted = ",
3483 total_emitted_vertices_vec);
3484 #endif
3485 }
3486 }
3487
3488
3489 static void
3490 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3491 LLVMValueRef mask)
3492 {
3493 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3494 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3495
3496 if (bld->gs_iface->end_primitive) {
3497 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3498 LLVMValueRef emitted_vertices_vec =
3499 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3500 LLVMValueRef emitted_prims_vec =
3501 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3502
3503 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3504 emitted_vertices_vec,
3505 uint_bld->zero);
3506 /* We need to combine the current execution mask with the mask
3507 telling us which, if any, execution slots actually have
3508 unemitted primitives, this way we make sure that end_primitives
3509 executes only on the paths that have unflushed vertices */
3510 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3511
3512 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3513 emitted_vertices_vec,
3514 emitted_prims_vec);
3515
3516 #if DUMP_GS_EMITS
3517 lp_build_print_value(bld->bld_base.base.gallivm,
3518 " +++ end prim masked ones = ",
3519 mask);
3520 lp_build_print_value(bld->bld_base.base.gallivm,
3521 " +++ end prim emitted verts1 = ",
3522 emitted_vertices_vec);
3523 lp_build_print_value(bld->bld_base.base.gallivm,
3524 " +++ end prim emitted prims1 = ",
3525 LLVMBuildLoad(builder,
3526 bld->emitted_prims_vec_ptr, ""));
3527 #endif
3528 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3529 mask);
3530 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3531 mask);
3532 #if DUMP_GS_EMITS
3533 lp_build_print_value(bld->bld_base.base.gallivm,
3534 " +++ end prim emitted verts2 = ",
3535 LLVMBuildLoad(builder,
3536 bld->emitted_vertices_vec_ptr, ""));
3537 #endif
3538 }
3539
3540 }
3541
3542 static void
3543 end_primitive(
3544 const struct lp_build_tgsi_action * action,
3545 struct lp_build_tgsi_context * bld_base,
3546 struct lp_build_emit_data * emit_data)
3547 {
3548 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3549
3550 if (bld->gs_iface->end_primitive) {
3551 LLVMValueRef mask = mask_vec(bld_base);
3552 end_primitive_masked(bld_base, mask);
3553 }
3554 }
3555
3556 static void
3557 cal_emit(
3558 const struct lp_build_tgsi_action * action,
3559 struct lp_build_tgsi_context * bld_base,
3560 struct lp_build_emit_data * emit_data)
3561 {
3562 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3563
3564 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3565 &bld_base->pc);
3566 }
3567
3568 static void
3569 ret_emit(
3570 const struct lp_build_tgsi_action * action,
3571 struct lp_build_tgsi_context * bld_base,
3572 struct lp_build_emit_data * emit_data)
3573 {
3574 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3575
3576 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3577 }
3578
3579 static void
3580 brk_emit(
3581 const struct lp_build_tgsi_action * action,
3582 struct lp_build_tgsi_context * bld_base,
3583 struct lp_build_emit_data * emit_data)
3584 {
3585 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3586
3587 lp_exec_break(&bld->exec_mask, bld_base);
3588 }
3589
3590 static void
3591 breakc_emit(
3592 const struct lp_build_tgsi_action * action,
3593 struct lp_build_tgsi_context * bld_base,
3594 struct lp_build_emit_data * emit_data)
3595 {
3596 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3597 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3598 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3599 LLVMValueRef unsigned_cond =
3600 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
3601 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3602 unsigned_cond,
3603 uint_bld->zero);
3604
3605 lp_exec_break_condition(&bld->exec_mask, cond);
3606 }
3607
3608 static void
3609 if_emit(
3610 const struct lp_build_tgsi_action * action,
3611 struct lp_build_tgsi_context * bld_base,
3612 struct lp_build_emit_data * emit_data)
3613 {
3614 LLVMValueRef tmp;
3615 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3616
3617 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3618 emit_data->args[0], bld->bld_base.base.zero);
3619 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3620 }
3621
3622 static void
3623 uif_emit(
3624 const struct lp_build_tgsi_action * action,
3625 struct lp_build_tgsi_context * bld_base,
3626 struct lp_build_emit_data * emit_data)
3627 {
3628 LLVMValueRef tmp;
3629 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3630 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3631
3632 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3633 emit_data->args[0], uint_bld->zero);
3634 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3635 }
3636
3637 static void
3638 case_emit(
3639 const struct lp_build_tgsi_action * action,
3640 struct lp_build_tgsi_context * bld_base,
3641 struct lp_build_emit_data * emit_data)
3642 {
3643 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3644
3645 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3646 }
3647
3648 static void
3649 default_emit(
3650 const struct lp_build_tgsi_action * action,
3651 struct lp_build_tgsi_context * bld_base,
3652 struct lp_build_emit_data * emit_data)
3653 {
3654 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3655
3656 lp_exec_default(&bld->exec_mask, bld_base);
3657 }
3658
3659 static void
3660 switch_emit(
3661 const struct lp_build_tgsi_action * action,
3662 struct lp_build_tgsi_context * bld_base,
3663 struct lp_build_emit_data * emit_data)
3664 {
3665 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3666
3667 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3668 }
3669
3670 static void
3671 endswitch_emit(
3672 const struct lp_build_tgsi_action * action,
3673 struct lp_build_tgsi_context * bld_base,
3674 struct lp_build_emit_data * emit_data)
3675 {
3676 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3677
3678 lp_exec_endswitch(&bld->exec_mask, bld_base);
3679 }
3680
3681 static void
3682 bgnloop_emit(
3683 const struct lp_build_tgsi_action * action,
3684 struct lp_build_tgsi_context * bld_base,
3685 struct lp_build_emit_data * emit_data)
3686 {
3687 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3688
3689 lp_exec_bgnloop(&bld->exec_mask);
3690 }
3691
3692 static void
3693 bgnsub_emit(
3694 const struct lp_build_tgsi_action * action,
3695 struct lp_build_tgsi_context * bld_base,
3696 struct lp_build_emit_data * emit_data)
3697 {
3698 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3699
3700 lp_exec_mask_bgnsub(&bld->exec_mask);
3701 }
3702
3703 static void
3704 else_emit(
3705 const struct lp_build_tgsi_action * action,
3706 struct lp_build_tgsi_context * bld_base,
3707 struct lp_build_emit_data * emit_data)
3708 {
3709 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3710
3711 lp_exec_mask_cond_invert(&bld->exec_mask);
3712 }
3713
3714 static void
3715 endif_emit(
3716 const struct lp_build_tgsi_action * action,
3717 struct lp_build_tgsi_context * bld_base,
3718 struct lp_build_emit_data * emit_data)
3719 {
3720 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3721
3722 lp_exec_mask_cond_pop(&bld->exec_mask);
3723 }
3724
3725 static void
3726 endloop_emit(
3727 const struct lp_build_tgsi_action * action,
3728 struct lp_build_tgsi_context * bld_base,
3729 struct lp_build_emit_data * emit_data)
3730 {
3731 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3732
3733 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3734 }
3735
3736 static void
3737 endsub_emit(
3738 const struct lp_build_tgsi_action * action,
3739 struct lp_build_tgsi_context * bld_base,
3740 struct lp_build_emit_data * emit_data)
3741 {
3742 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3743
3744 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3745 }
3746
3747 static void
3748 cont_emit(
3749 const struct lp_build_tgsi_action * action,
3750 struct lp_build_tgsi_context * bld_base,
3751 struct lp_build_emit_data * emit_data)
3752 {
3753 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3754
3755 lp_exec_continue(&bld->exec_mask);
3756 }
3757
3758 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3759 {
3760 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3761 struct gallivm_state * gallivm = bld_base->base.gallivm;
3762
3763 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3764 LLVMValueRef array_size =
3765 lp_build_const_int32(gallivm,
3766 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3767 bld->temps_array = lp_build_array_alloca(gallivm,
3768 bld_base->base.vec_type, array_size,
3769 "temp_array");
3770 }
3771
3772 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3773 LLVMValueRef array_size =
3774 lp_build_const_int32(gallivm,
3775 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3776 bld->outputs_array = lp_build_array_alloca(gallivm,
3777 bld_base->base.vec_type, array_size,
3778 "output_array");
3779 }
3780
3781 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3782 LLVMValueRef array_size =
3783 lp_build_const_int32(gallivm,
3784 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3785 bld->imms_array = lp_build_array_alloca(gallivm,
3786 bld_base->base.vec_type, array_size,
3787 "imms_array");
3788 }
3789
3790 /* If we have indirect addressing in inputs we need to copy them into
3791 * our alloca array to be able to iterate over them */
3792 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3793 unsigned index, chan;
3794 LLVMTypeRef vec_type = bld_base->base.vec_type;
3795 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3796 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3797 bld->inputs_array = lp_build_array_alloca(gallivm,
3798 vec_type, array_size,
3799 "input_array");
3800
3801 assert(bld_base->info->num_inputs
3802 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3803
3804 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3805 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3806 LLVMValueRef lindex =
3807 lp_build_const_int32(gallivm, index * 4 + chan);
3808 LLVMValueRef input_ptr =
3809 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3810 &lindex, 1, "");
3811 LLVMValueRef value = bld->inputs[index][chan];
3812 if (value)
3813 LLVMBuildStore(gallivm->builder, value, input_ptr);
3814 }
3815 }
3816 }
3817
3818 if (bld->gs_iface) {
3819 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3820 bld->emitted_prims_vec_ptr =
3821 lp_build_alloca(gallivm,
3822 uint_bld->vec_type,
3823 "emitted_prims_ptr");
3824 bld->emitted_vertices_vec_ptr =
3825 lp_build_alloca(gallivm,
3826 uint_bld->vec_type,
3827 "emitted_vertices_ptr");
3828 bld->total_emitted_vertices_vec_ptr =
3829 lp_build_alloca(gallivm,
3830 uint_bld->vec_type,
3831 "total_emitted_vertices_ptr");
3832
3833 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3834 bld->emitted_prims_vec_ptr);
3835 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3836 bld->emitted_vertices_vec_ptr);
3837 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3838 bld->total_emitted_vertices_vec_ptr);
3839 }
3840
3841 if (DEBUG_EXECUTION) {
3842 lp_build_printf(gallivm, "\n");
3843 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3844 if (!bld->gs_iface)
3845 emit_dump_file(bld, TGSI_FILE_INPUT);
3846 }
3847 }
3848
3849 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3850 {
3851 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3852 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3853
3854 if (DEBUG_EXECUTION) {
3855 /* for debugging */
3856 if (0) {
3857 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3858 }
3859 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3860 lp_build_printf(bld_base->base.gallivm, "\n");
3861 }
3862
3863 /* If we have indirect addressing in outputs we need to copy our alloca array
3864 * to the outputs slots specified by the caller */
3865 if (bld->gs_iface) {
3866 LLVMValueRef total_emitted_vertices_vec;
3867 LLVMValueRef emitted_prims_vec;
3868 /* implicit end_primitives, needed in case there are any unflushed
3869 vertices in the cache. Note must not call end_primitive here
3870 since the exec_mask is not valid at this point. */
3871 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3872
3873 total_emitted_vertices_vec =
3874 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3875 emitted_prims_vec =
3876 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3877
3878 bld->gs_iface->gs_epilogue(bld->gs_iface,
3879 &bld->bld_base,
3880 total_emitted_vertices_vec,
3881 emitted_prims_vec);
3882 } else {
3883 gather_outputs(bld);
3884 }
3885 }
3886
3887 void
3888 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3889 const struct tgsi_token *tokens,
3890 struct lp_type type,
3891 struct lp_build_mask_context *mask,
3892 LLVMValueRef consts_ptr,
3893 LLVMValueRef const_sizes_ptr,
3894 const struct lp_bld_tgsi_system_values *system_values,
3895 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3896 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3897 LLVMValueRef context_ptr,
3898 LLVMValueRef thread_data_ptr,
3899 struct lp_build_sampler_soa *sampler,
3900 const struct tgsi_shader_info *info,
3901 const struct lp_build_tgsi_gs_iface *gs_iface)
3902 {
3903 struct lp_build_tgsi_soa_context bld;
3904
3905 struct lp_type res_type;
3906
3907 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3908 memset(&res_type, 0, sizeof res_type);
3909 res_type.width = type.width;
3910 res_type.length = type.length;
3911 res_type.sign = 1;
3912
3913 /* Setup build context */
3914 memset(&bld, 0, sizeof bld);
3915 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3916 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3917 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3918 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3919 {
3920 struct lp_type dbl_type;
3921 dbl_type = type;
3922 dbl_type.width *= 2;
3923 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3924 }
3925 {
3926 struct lp_type uint64_type;
3927 uint64_type = lp_uint_type(type);
3928 uint64_type.width *= 2;
3929 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3930 }
3931 {
3932 struct lp_type int64_type;
3933 int64_type = lp_int_type(type);
3934 int64_type.width *= 2;
3935 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3936 }
3937 bld.mask = mask;
3938 bld.inputs = inputs;
3939 bld.outputs = outputs;
3940 bld.consts_ptr = consts_ptr;
3941 bld.const_sizes_ptr = const_sizes_ptr;
3942 bld.sampler = sampler;
3943 bld.bld_base.info = info;
3944 bld.indirect_files = info->indirect_files;
3945 bld.context_ptr = context_ptr;
3946 bld.thread_data_ptr = thread_data_ptr;
3947
3948 /*
3949 * If the number of temporaries is rather large then we just
3950 * allocate them as an array right from the start and treat
3951 * like indirect temporaries.
3952 */
3953 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3954 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3955 }
3956 /*
3957 * For performance reason immediates are always backed in a static
3958 * array, but if their number is too great, we have to use just
3959 * a dynamically allocated array.
3960 */
3961 bld.use_immediates_array =
3962 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3963 if (bld.use_immediates_array) {
3964 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3965 }
3966
3967
3968 bld.bld_base.soa = TRUE;
3969 bld.bld_base.emit_debug = emit_debug;
3970 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3971 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3972 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3973 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3974 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3975 bld.bld_base.emit_store = emit_store;
3976
3977 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3978 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3979
3980 bld.bld_base.emit_prologue = emit_prologue;
3981 bld.bld_base.emit_epilogue = emit_epilogue;
3982
3983 /* Set opcode actions */
3984 lp_set_default_actions_cpu(&bld.bld_base);
3985
3986 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3987 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3988 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3989 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3990 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3991 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3992 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3993 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3994 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3995 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3996 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3997 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3998 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3999 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4000 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4001 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4002 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4003 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4004 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4005 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4006 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4007 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4008 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4009 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4010 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4011 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4012 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4013 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4014 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4015 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4016 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4017 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4018 /* DX10 sampling ops */
4019 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4020 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4021 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4022 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4023 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4024 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4025 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4026 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4027 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4028
4029 if (gs_iface) {
4030 /* There's no specific value for this because it should always
4031 * be set, but apps using ext_geometry_shader4 quite often
4032 * were forgetting so we're using MAX_VERTEX_VARYING from
4033 * that spec even though we could debug_assert if it's not
4034 * set, but that's a lot uglier. */
4035 uint max_output_vertices;
4036
4037 /* inputs are always indirect with gs */
4038 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4039 bld.gs_iface = gs_iface;
4040 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4041 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4042 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4043
4044 max_output_vertices =
4045 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4046 if (!max_output_vertices)
4047 max_output_vertices = 32;
4048
4049 bld.max_output_vertices_vec =
4050 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4051 max_output_vertices);
4052 }
4053
4054 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4055
4056 bld.system_values = *system_values;
4057
4058 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4059
4060 if (0) {
4061 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4062 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4063 debug_printf("11111111111111111111111111111 \n");
4064 tgsi_dump(tokens, 0);
4065 lp_debug_dump_value(function);
4066 debug_printf("2222222222222222222222222222 \n");
4067 }
4068
4069 if (0) {
4070 LLVMModuleRef module = LLVMGetGlobalParent(
4071 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4072 LLVMDumpModule(module);
4073
4074 }
4075 lp_exec_mask_fini(&bld.exec_mask);
4076 }