e5d0293b8f9cb53a6f7ee84d8ad720797d84a673
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68
69 /* SM 4.0 says that subroutines can nest 32 deep and
70 * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72
73 #define DUMP_GS_EMITS 0
74
75 /*
76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77 * instruction.
78 *
79 * TODO:
80 * - take execution masks in consideration
81 * - debug control-flow instructions
82 */
83 #define DEBUG_EXECUTION 0
84
85
86 /*
87 * Emit code to print a register value.
88 */
89 static void
90 emit_dump_reg(struct gallivm_state *gallivm,
91 unsigned file,
92 unsigned index,
93 unsigned chan,
94 LLVMValueRef value)
95 {
96 char buf[32];
97
98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
99 tgsi_file_name(file),
100 index, "xyzw"[chan]);
101
102 lp_build_print_value(gallivm, buf, value);
103 }
104
105 /*
106 * Return the context for the current function.
107 * (always 'main', if shader doesn't do any function calls)
108 */
109 static inline struct function_ctx *
110 func_ctx(struct lp_exec_mask *mask)
111 {
112 assert(mask->function_stack_size > 0);
113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114 return &mask->function_stack[mask->function_stack_size - 1];
115 }
116
117 /*
118 * Returns true if we're in a loop.
119 * It's global, meaning that it returns true even if there's
120 * no loop inside the current function, but we were inside
121 * a loop inside another function, from which this one was called.
122 */
123 static inline boolean
124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126 int i;
127 for (i = mask->function_stack_size - 1; i >= 0; --i) {
128 const struct function_ctx *ctx = &mask->function_stack[i];
129 if (ctx->loop_stack_size > 0)
130 return TRUE;
131 }
132 return FALSE;
133 }
134
135 /*
136 * Returns true if we're inside a switch statement.
137 * It's global, meaning that it returns true even if there's
138 * no switch in the current function, but we were inside
139 * a switch inside another function, from which this one was called.
140 */
141 static inline boolean
142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144 int i;
145 for (i = mask->function_stack_size - 1; i >= 0; --i) {
146 const struct function_ctx *ctx = &mask->function_stack[i];
147 if (ctx->switch_stack_size > 0)
148 return TRUE;
149 }
150 return FALSE;
151 }
152
153 /*
154 * Returns true if we're inside a conditional.
155 * It's global, meaning that it returns true even if there's
156 * no conditional in the current function, but we were inside
157 * a conditional inside another function, from which this one was called.
158 */
159 static inline boolean
160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162 int i;
163 for (i = mask->function_stack_size - 1; i >= 0; --i) {
164 const struct function_ctx *ctx = &mask->function_stack[i];
165 if (ctx->cond_stack_size > 0)
166 return TRUE;
167 }
168 return FALSE;
169 }
170
171
172 /*
173 * Initialize a function context at the specified index.
174 */
175 static void
176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179 LLVMBuilderRef builder = mask->bld->gallivm->builder;
180 struct function_ctx *ctx = &mask->function_stack[function_idx];
181
182 ctx->cond_stack_size = 0;
183 ctx->loop_stack_size = 0;
184 ctx->switch_stack_size = 0;
185
186 if (function_idx == 0) {
187 ctx->ret_mask = mask->ret_mask;
188 }
189
190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191 int_type, "looplimiter");
192 LLVMBuildStore(
193 builder,
194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195 ctx->loop_limiter);
196 }
197
198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200 mask->bld = bld;
201 mask->has_mask = FALSE;
202 mask->ret_in_main = FALSE;
203 /* For the main function */
204 mask->function_stack_size = 1;
205
206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208 mask->cond_mask = mask->switch_mask =
209 LLVMConstAllOnes(mask->int_vec_type);
210
211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212 sizeof(mask->function_stack[0]));
213 lp_exec_mask_function_init(mask, 0);
214 }
215
216 static void
217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219 FREE(mask->function_stack);
220 }
221
222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224 LLVMBuilderRef builder = mask->bld->gallivm->builder;
225 boolean has_loop_mask = mask_has_loop(mask);
226 boolean has_cond_mask = mask_has_cond(mask);
227 boolean has_switch_mask = mask_has_switch(mask);
228 boolean has_ret_mask = mask->function_stack_size > 1 ||
229 mask->ret_in_main;
230
231 if (has_loop_mask) {
232 /*for loops we need to update the entire mask at runtime */
233 LLVMValueRef tmp;
234 assert(mask->break_mask);
235 tmp = LLVMBuildAnd(builder,
236 mask->cont_mask,
237 mask->break_mask,
238 "maskcb");
239 mask->exec_mask = LLVMBuildAnd(builder,
240 mask->cond_mask,
241 tmp,
242 "maskfull");
243 } else
244 mask->exec_mask = mask->cond_mask;
245
246 if (has_switch_mask) {
247 mask->exec_mask = LLVMBuildAnd(builder,
248 mask->exec_mask,
249 mask->switch_mask,
250 "switchmask");
251 }
252
253 if (has_ret_mask) {
254 mask->exec_mask = LLVMBuildAnd(builder,
255 mask->exec_mask,
256 mask->ret_mask,
257 "callmask");
258 }
259
260 mask->has_mask = (has_cond_mask ||
261 has_loop_mask ||
262 has_switch_mask ||
263 has_ret_mask);
264 }
265
266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267 LLVMValueRef val)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 struct function_ctx *ctx = func_ctx(mask);
271
272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273 ctx->cond_stack_size++;
274 return;
275 }
276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278 }
279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280 assert(LLVMTypeOf(val) == mask->int_vec_type);
281 mask->cond_mask = LLVMBuildAnd(builder,
282 mask->cond_mask,
283 val,
284 "");
285 lp_exec_mask_update(mask);
286 }
287
288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290 LLVMBuilderRef builder = mask->bld->gallivm->builder;
291 struct function_ctx *ctx = func_ctx(mask);
292 LLVMValueRef prev_mask;
293 LLVMValueRef inv_mask;
294
295 assert(ctx->cond_stack_size);
296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297 return;
298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301 }
302
303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304
305 mask->cond_mask = LLVMBuildAnd(builder,
306 inv_mask,
307 prev_mask, "");
308 lp_exec_mask_update(mask);
309 }
310
311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313 struct function_ctx *ctx = func_ctx(mask);
314 assert(ctx->cond_stack_size);
315 --ctx->cond_stack_size;
316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317 return;
318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319 lp_exec_mask_update(mask);
320 }
321
322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324 LLVMBuilderRef builder = mask->bld->gallivm->builder;
325 struct function_ctx *ctx = func_ctx(mask);
326
327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328 ++ctx->loop_stack_size;
329 return;
330 }
331
332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333 ctx->break_type;
334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335
336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340 ++ctx->loop_stack_size;
341
342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344
345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346
347 LLVMBuildBr(builder, ctx->loop_block);
348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349
350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351
352 lp_exec_mask_update(mask);
353 }
354
355 static void lp_exec_break(struct lp_exec_mask *mask,
356 struct lp_build_tgsi_context * bld_base)
357 {
358 LLVMBuilderRef builder = mask->bld->gallivm->builder;
359 struct function_ctx *ctx = func_ctx(mask);
360
361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362 LLVMValueRef exec_mask = LLVMBuildNot(builder,
363 mask->exec_mask,
364 "break");
365
366 mask->break_mask = LLVMBuildAnd(builder,
367 mask->break_mask,
368 exec_mask, "break_full");
369 }
370 else {
371 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373 opcode == TGSI_OPCODE_CASE);
374
375
376 if (ctx->switch_in_default) {
377 /*
378 * stop default execution but only if this is an unconditional switch.
379 * (The condition here is not perfect since dead code after break is
380 * allowed but should be sufficient since false negatives are just
381 * unoptimized - so we don't have to pre-evaluate that).
382 */
383 if(break_always && ctx->switch_pc) {
384 bld_base->pc = ctx->switch_pc;
385 return;
386 }
387 }
388
389 if (break_always) {
390 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391 }
392 else {
393 LLVMValueRef exec_mask = LLVMBuildNot(builder,
394 mask->exec_mask,
395 "break");
396 mask->switch_mask = LLVMBuildAnd(builder,
397 mask->switch_mask,
398 exec_mask, "break_switch");
399 }
400 }
401
402 lp_exec_mask_update(mask);
403 }
404
405 static void lp_exec_continue(struct lp_exec_mask *mask)
406 {
407 LLVMBuilderRef builder = mask->bld->gallivm->builder;
408 LLVMValueRef exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "");
411
412 mask->cont_mask = LLVMBuildAnd(builder,
413 mask->cont_mask,
414 exec_mask, "");
415
416 lp_exec_mask_update(mask);
417 }
418
419
420 static void lp_exec_endloop(struct gallivm_state *gallivm,
421 struct lp_exec_mask *mask)
422 {
423 LLVMBuilderRef builder = mask->bld->gallivm->builder;
424 struct function_ctx *ctx = func_ctx(mask);
425 LLVMBasicBlockRef endloop;
426 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
427 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
428 mask->bld->type.width *
429 mask->bld->type.length);
430 LLVMValueRef i1cond, i2cond, icond, limiter;
431
432 assert(mask->break_mask);
433
434
435 assert(ctx->loop_stack_size);
436 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
437 --ctx->loop_stack_size;
438 return;
439 }
440
441 /*
442 * Restore the cont_mask, but don't pop
443 */
444 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
445 lp_exec_mask_update(mask);
446
447 /*
448 * Unlike the continue mask, the break_mask must be preserved across loop
449 * iterations
450 */
451 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
452
453 /* Decrement the loop limiter */
454 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
455
456 limiter = LLVMBuildSub(
457 builder,
458 limiter,
459 LLVMConstInt(int_type, 1, false),
460 "");
461
462 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
463
464 /* i1cond = (mask != 0) */
465 i1cond = LLVMBuildICmp(
466 builder,
467 LLVMIntNE,
468 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
469 LLVMConstNull(reg_type), "i1cond");
470
471 /* i2cond = (looplimiter > 0) */
472 i2cond = LLVMBuildICmp(
473 builder,
474 LLVMIntSGT,
475 limiter,
476 LLVMConstNull(int_type), "i2cond");
477
478 /* if( i1cond && i2cond ) */
479 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
480
481 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
482
483 LLVMBuildCondBr(builder,
484 icond, ctx->loop_block, endloop);
485
486 LLVMPositionBuilderAtEnd(builder, endloop);
487
488 assert(ctx->loop_stack_size);
489 --ctx->loop_stack_size;
490 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
491 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
492 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
493 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
494 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
495 ctx->switch_stack_size];
496
497 lp_exec_mask_update(mask);
498 }
499
500 static void lp_exec_switch(struct lp_exec_mask *mask,
501 LLVMValueRef switchval)
502 {
503 struct function_ctx *ctx = func_ctx(mask);
504
505 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
506 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
507 ctx->switch_stack_size++;
508 return;
509 }
510
511 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
512 ctx->break_type;
513 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
514
515 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
516 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
517 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
518 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
519 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
520 ctx->switch_stack_size++;
521
522 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
523 ctx->switch_val = switchval;
524 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
525 ctx->switch_in_default = false;
526 ctx->switch_pc = 0;
527
528 lp_exec_mask_update(mask);
529 }
530
531 static void lp_exec_endswitch(struct lp_exec_mask *mask,
532 struct lp_build_tgsi_context * bld_base)
533 {
534 LLVMBuilderRef builder = mask->bld->gallivm->builder;
535 struct function_ctx *ctx = func_ctx(mask);
536
537 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
538 ctx->switch_stack_size--;
539 return;
540 }
541
542 /* check if there's deferred default if so do it now */
543 if (ctx->switch_pc && !ctx->switch_in_default) {
544 LLVMValueRef prevmask, defaultmask;
545 unsigned tmp_pc;
546 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
547 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
548 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
549 ctx->switch_in_default = true;
550
551 lp_exec_mask_update(mask);
552
553 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
554 TGSI_OPCODE_DEFAULT);
555
556 tmp_pc = bld_base->pc;
557 bld_base->pc = ctx->switch_pc;
558 /*
559 * re-purpose switch_pc to point to here again, since we stop execution of
560 * the deferred default after next break.
561 */
562 ctx->switch_pc = tmp_pc - 1;
563
564 return;
565 }
566
567 else if (ctx->switch_pc && ctx->switch_in_default) {
568 assert(bld_base->pc == ctx->switch_pc + 1);
569 }
570
571 ctx->switch_stack_size--;
572 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
573 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
574 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
575 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
576 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
577
578 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
579
580 lp_exec_mask_update(mask);
581 }
582
583 static void lp_exec_case(struct lp_exec_mask *mask,
584 LLVMValueRef caseval)
585 {
586 LLVMBuilderRef builder = mask->bld->gallivm->builder;
587 struct function_ctx *ctx = func_ctx(mask);
588
589 LLVMValueRef casemask, prevmask;
590
591 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
592 return;
593 }
594
595 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
596 if (!ctx->switch_in_default) {
597 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
598 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
599 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
600 ctx->switch_mask_default, "sw_default_mask");
601 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
602 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
603
604 lp_exec_mask_update(mask);
605 }
606 }
607
608 /*
609 * Analyse default statement in a switch.
610 * \return true if default is last statement, false otherwise
611 * \param default_pc_start contains pc of instruction to jump to
612 * if default wasn't last but there's no
613 * fallthrough into default.
614 */
615 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
616 struct lp_build_tgsi_context * bld_base,
617 int *default_pc_start)
618 {
619 unsigned pc = bld_base->pc;
620 struct function_ctx *ctx = func_ctx(mask);
621 int curr_switch_stack = ctx->switch_stack_size;
622
623 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
624 return false;
625 }
626
627 /* skip over case statements which are together with default */
628 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
629 pc++;
630 }
631
632 while (pc != ~0u && pc < bld_base->num_instructions) {
633 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
634 switch (opcode) {
635 case TGSI_OPCODE_CASE:
636 if (curr_switch_stack == ctx->switch_stack_size) {
637 *default_pc_start = pc - 1;
638 return false;
639 }
640 break;
641 case TGSI_OPCODE_SWITCH:
642 curr_switch_stack++;
643 break;
644 case TGSI_OPCODE_ENDSWITCH:
645 if (curr_switch_stack == ctx->switch_stack_size) {
646 *default_pc_start = pc - 1;
647 return true;
648 }
649 curr_switch_stack--;
650 break;
651 }
652 pc++;
653 }
654 /* should never arrive here */
655 assert(0);
656 return true;
657 }
658
659 static void lp_exec_default(struct lp_exec_mask *mask,
660 struct lp_build_tgsi_context * bld_base)
661 {
662 LLVMBuilderRef builder = mask->bld->gallivm->builder;
663 struct function_ctx *ctx = func_ctx(mask);
664
665 int default_exec_pc;
666 boolean default_is_last;
667
668 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
669 return;
670 }
671
672 /*
673 * This is a messy opcode, because it may not be always at the end and
674 * there can be fallthrough in and out of it.
675 */
676
677 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
678 /*
679 * If it is last statement in switch (note that case statements appearing
680 * "at the same time" as default don't change that) everything is just fine,
681 * update switch mask and go on. This means we can handle default with
682 * fallthrough INTO it without overhead, if it is last.
683 */
684 if (default_is_last) {
685 LLVMValueRef prevmask, defaultmask;
686 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
687 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
688 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
689 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
690 ctx->switch_in_default = true;
691
692 lp_exec_mask_update(mask);
693 }
694 else {
695 /*
696 * Technically, "case" immediately before default isn't really a
697 * fallthrough, however we still have to count them as such as we
698 * already have updated the masks.
699 * If that happens in practice could add a switch optimizer pass
700 * which just gets rid of all case statements appearing together with
701 * default (or could do switch analysis at switch start time instead).
702 */
703 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
704 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
705 opcode != TGSI_OPCODE_SWITCH);
706 /*
707 * If it is not last statement and there was no fallthrough into it,
708 * we record the PC and continue execution at next case (again, those
709 * case encountered at the same time don't count). At endswitch
710 * time, we update switchmask, and go back executing the code we skipped
711 * until the next break (possibly re-executing some code with changed mask
712 * if there was a fallthrough out of default).
713 * Finally, if it is not last statement and there was a fallthrough into it,
714 * do the same as with the former case, except instead of skipping the code
715 * just execute it without updating the mask, then go back and re-execute.
716 */
717 ctx->switch_pc = bld_base->pc;
718 if (!ft_into) {
719 bld_base->pc = default_exec_pc;
720 }
721 }
722 }
723
724
725 /* stores val into an address pointed to by dst_ptr.
726 * mask->exec_mask is used to figure out which bits of val
727 * should be stored into the address
728 * (0 means don't store this bit, 1 means do store).
729 */
730 static void lp_exec_mask_store(struct lp_exec_mask *mask,
731 struct lp_build_context *bld_store,
732 LLVMValueRef val,
733 LLVMValueRef dst_ptr)
734 {
735 LLVMBuilderRef builder = mask->bld->gallivm->builder;
736 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
737
738 assert(lp_check_value(bld_store->type, val));
739 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
740 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
741
742 if (exec_mask) {
743 LLVMValueRef res, dst;
744
745 dst = LLVMBuildLoad(builder, dst_ptr, "");
746 res = lp_build_select(bld_store, exec_mask, val, dst);
747 LLVMBuildStore(builder, res, dst_ptr);
748 } else
749 LLVMBuildStore(builder, val, dst_ptr);
750 }
751
752 static void lp_exec_mask_call(struct lp_exec_mask *mask,
753 int func,
754 int *pc)
755 {
756 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
757 return;
758 }
759
760 lp_exec_mask_function_init(mask, mask->function_stack_size);
761 mask->function_stack[mask->function_stack_size].pc = *pc;
762 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
763 mask->function_stack_size++;
764 *pc = func;
765 }
766
767 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
768 {
769 LLVMBuilderRef builder = mask->bld->gallivm->builder;
770 struct function_ctx *ctx = func_ctx(mask);
771 LLVMValueRef exec_mask;
772
773 if (ctx->cond_stack_size == 0 &&
774 ctx->loop_stack_size == 0 &&
775 ctx->switch_stack_size == 0 &&
776 mask->function_stack_size == 1) {
777 /* returning from main() */
778 *pc = -1;
779 return;
780 }
781
782 if (mask->function_stack_size == 1) {
783 /*
784 * This requires special handling since we need to ensure
785 * we don't drop the mask even if we have no call stack
786 * (e.g. after a ret in a if clause after the endif)
787 */
788 mask->ret_in_main = TRUE;
789 }
790
791 exec_mask = LLVMBuildNot(builder,
792 mask->exec_mask,
793 "ret");
794
795 mask->ret_mask = LLVMBuildAnd(builder,
796 mask->ret_mask,
797 exec_mask, "ret_full");
798
799 lp_exec_mask_update(mask);
800 }
801
802 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
803 {
804 }
805
806 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
807 {
808 struct function_ctx *ctx;
809
810 assert(mask->function_stack_size > 1);
811 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
812
813 ctx = func_ctx(mask);
814 mask->function_stack_size--;
815
816 *pc = ctx->pc;
817 mask->ret_mask = ctx->ret_mask;
818
819 lp_exec_mask_update(mask);
820 }
821
822
823 static LLVMValueRef
824 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
825 unsigned file,
826 int index,
827 unsigned chan)
828 {
829 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
830 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
831 LLVMValueRef var_of_array;
832
833 switch (file) {
834 case TGSI_FILE_TEMPORARY:
835 array_of_vars = bld->temps;
836 var_of_array = bld->temps_array;
837 break;
838 case TGSI_FILE_OUTPUT:
839 array_of_vars = bld->outputs;
840 var_of_array = bld->outputs_array;
841 break;
842 default:
843 assert(0);
844 return NULL;
845 }
846
847 assert(chan < 4);
848
849 if (bld->indirect_files & (1 << file)) {
850 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
851 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
852 }
853 else {
854 assert(index <= bld->bld_base.info->file_max[file]);
855 return array_of_vars[index][chan];
856 }
857 }
858
859
860 /**
861 * Return pointer to a temporary register channel (src or dest).
862 * Note that indirect addressing cannot be handled here.
863 * \param index which temporary register
864 * \param chan which channel of the temp register.
865 */
866 LLVMValueRef
867 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
868 unsigned index,
869 unsigned chan)
870 {
871 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
872 }
873
874 /**
875 * Return pointer to a output register channel (src or dest).
876 * Note that indirect addressing cannot be handled here.
877 * \param index which output register
878 * \param chan which channel of the output register.
879 */
880 LLVMValueRef
881 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
882 unsigned index,
883 unsigned chan)
884 {
885 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
886 }
887
888 /*
889 * If we have indirect addressing in outputs copy our alloca array
890 * to the outputs slots specified by the caller to make sure
891 * our outputs are delivered consistently via the same interface.
892 */
893 static void
894 gather_outputs(struct lp_build_tgsi_soa_context * bld)
895 {
896 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
897 unsigned index, chan;
898 assert(bld->bld_base.info->num_outputs <=
899 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
900 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
901 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
902 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
903 }
904 }
905 }
906 }
907
908 /**
909 * Gather vector.
910 * XXX the lp_build_gather() function should be capable of doing this
911 * with a little work.
912 */
913 static LLVMValueRef
914 build_gather(struct lp_build_tgsi_context *bld_base,
915 LLVMValueRef base_ptr,
916 LLVMValueRef indexes,
917 LLVMValueRef overflow_mask,
918 LLVMValueRef indexes2)
919 {
920 struct gallivm_state *gallivm = bld_base->base.gallivm;
921 LLVMBuilderRef builder = gallivm->builder;
922 struct lp_build_context *uint_bld = &bld_base->uint_bld;
923 struct lp_build_context *bld = &bld_base->base;
924 LLVMValueRef res;
925 unsigned i;
926
927 if (indexes2)
928 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
929 else
930 res = bld->undef;
931 /*
932 * overflow_mask is a vector telling us which channels
933 * in the vector overflowed. We use the overflow behavior for
934 * constant buffers which is defined as:
935 * Out of bounds access to constant buffer returns 0 in all
936 * components. Out of bounds behavior is always with respect
937 * to the size of the buffer bound at that slot.
938 */
939
940 if (overflow_mask) {
941 /*
942 * We avoid per-element control flow here (also due to llvm going crazy,
943 * though I suspect it's better anyway since overflow is likely rare).
944 * Note that since we still fetch from buffers even if num_elements was
945 * zero (in this case we'll fetch from index zero) the jit func callers
946 * MUST provide valid fake constant buffers of size 4x32 (the values do
947 * not matter), otherwise we'd still need (not per element though)
948 * control flow.
949 */
950 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
951 if (indexes2)
952 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
953 }
954
955 /*
956 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
957 */
958 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
959 LLVMValueRef si, di;
960 LLVMValueRef index;
961 LLVMValueRef scalar_ptr, scalar;
962
963 di = lp_build_const_int32(bld->gallivm, i);
964 if (indexes2)
965 si = lp_build_const_int32(bld->gallivm, i >> 1);
966 else
967 si = di;
968
969 if (indexes2 && (i & 1)) {
970 index = LLVMBuildExtractElement(builder,
971 indexes2, si, "");
972 } else {
973 index = LLVMBuildExtractElement(builder,
974 indexes, si, "");
975 }
976 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
977 &index, 1, "gather_ptr");
978 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
979
980 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
981 }
982
983 if (overflow_mask) {
984 if (indexes2) {
985 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
986 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
987 bld_base->dbl_bld.int_vec_type, "");
988 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
989 bld_base->dbl_bld.zero, res);
990 } else
991 res = lp_build_select(bld, overflow_mask, bld->zero, res);
992 }
993
994 return res;
995 }
996
997
998 /**
999 * Scatter/store vector.
1000 */
1001 static void
1002 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1003 LLVMValueRef base_ptr,
1004 LLVMValueRef indexes,
1005 LLVMValueRef values,
1006 struct lp_exec_mask *mask)
1007 {
1008 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1009 LLVMBuilderRef builder = gallivm->builder;
1010 unsigned i;
1011 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1012
1013 /*
1014 * Loop over elements of index_vec, store scalar value.
1015 */
1016 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1017 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1018 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1019 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1020 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1021 LLVMValueRef scalar_pred = pred ?
1022 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1023
1024 if (0)
1025 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1026 ii, val, index, scalar_ptr);
1027
1028 if (scalar_pred) {
1029 LLVMValueRef real_val, dst_val;
1030 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1031 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1032 LLVMBuildStore(builder, real_val, scalar_ptr);
1033 }
1034 else {
1035 LLVMBuildStore(builder, val, scalar_ptr);
1036 }
1037 }
1038 }
1039
1040
1041 /**
1042 * Read the current value of the ADDR register, convert the floats to
1043 * ints, add the base index and return the vector of offsets.
1044 * The offsets will be used to index into the constant buffer or
1045 * temporary register file.
1046 */
1047 static LLVMValueRef
1048 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1049 unsigned reg_file, unsigned reg_index,
1050 const struct tgsi_ind_register *indirect_reg)
1051 {
1052 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1053 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1054 /* always use X component of address register */
1055 unsigned swizzle = indirect_reg->Swizzle;
1056 LLVMValueRef base;
1057 LLVMValueRef rel;
1058 LLVMValueRef max_index;
1059 LLVMValueRef index;
1060
1061 assert(bld->indirect_files & (1 << reg_file));
1062
1063 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1064
1065 assert(swizzle < 4);
1066 switch (indirect_reg->File) {
1067 case TGSI_FILE_ADDRESS:
1068 rel = LLVMBuildLoad(builder,
1069 bld->addr[indirect_reg->Index][swizzle],
1070 "load addr reg");
1071 /* ADDR LLVM values already have LLVM integer type. */
1072 break;
1073 case TGSI_FILE_TEMPORARY:
1074 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1075 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1076 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1077 * value actually stored is expected to be an integer */
1078 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1079 break;
1080 default:
1081 assert(0);
1082 rel = uint_bld->zero;
1083 }
1084
1085 index = lp_build_add(uint_bld, base, rel);
1086
1087 /*
1088 * emit_fetch_constant handles constant buffer overflow so this code
1089 * is pointless for them.
1090 * Furthermore the D3D10 spec in section 6.5 says:
1091 * If the constant buffer bound to a slot is larger than the size
1092 * declared in the shader for that slot, implementations are allowed
1093 * to return incorrect data (not necessarily 0) for indices that are
1094 * larger than the declared size but smaller than the buffer size.
1095 */
1096 if (reg_file != TGSI_FILE_CONSTANT) {
1097 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1098 uint_bld->type,
1099 bld->bld_base.info->file_max[reg_file]);
1100
1101 assert(!uint_bld->type.sign);
1102 index = lp_build_min(uint_bld, index, max_index);
1103 }
1104
1105 return index;
1106 }
1107
1108 static struct lp_build_context *
1109 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1110 enum tgsi_opcode_type stype)
1111 {
1112 struct lp_build_context *bld_fetch;
1113
1114 switch (stype) {
1115 case TGSI_TYPE_FLOAT:
1116 case TGSI_TYPE_UNTYPED:
1117 bld_fetch = &bld_base->base;
1118 break;
1119 case TGSI_TYPE_UNSIGNED:
1120 bld_fetch = &bld_base->uint_bld;
1121 break;
1122 case TGSI_TYPE_SIGNED:
1123 bld_fetch = &bld_base->int_bld;
1124 break;
1125 case TGSI_TYPE_DOUBLE:
1126 bld_fetch = &bld_base->dbl_bld;
1127 break;
1128 case TGSI_TYPE_UNSIGNED64:
1129 bld_fetch = &bld_base->uint64_bld;
1130 break;
1131 case TGSI_TYPE_SIGNED64:
1132 bld_fetch = &bld_base->int64_bld;
1133 break;
1134 case TGSI_TYPE_VOID:
1135 default:
1136 assert(0);
1137 bld_fetch = NULL;
1138 break;
1139 }
1140 return bld_fetch;
1141 }
1142
1143 static LLVMValueRef
1144 get_soa_array_offsets(struct lp_build_context *uint_bld,
1145 LLVMValueRef indirect_index,
1146 unsigned chan_index,
1147 boolean need_perelement_offset)
1148 {
1149 struct gallivm_state *gallivm = uint_bld->gallivm;
1150 LLVMValueRef chan_vec =
1151 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1152 LLVMValueRef length_vec =
1153 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1154 LLVMValueRef index_vec;
1155
1156 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1157 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1158 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1159 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1160
1161 if (need_perelement_offset) {
1162 LLVMValueRef pixel_offsets;
1163 unsigned i;
1164 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1165 pixel_offsets = uint_bld->undef;
1166 for (i = 0; i < uint_bld->type.length; i++) {
1167 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1168 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1169 ii, ii, "");
1170 }
1171 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1172 }
1173 return index_vec;
1174 }
1175
1176 static LLVMValueRef
1177 emit_fetch_constant(
1178 struct lp_build_tgsi_context * bld_base,
1179 const struct tgsi_full_src_register * reg,
1180 enum tgsi_opcode_type stype,
1181 unsigned swizzle)
1182 {
1183 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1184 struct gallivm_state *gallivm = bld_base->base.gallivm;
1185 LLVMBuilderRef builder = gallivm->builder;
1186 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1187 unsigned dimension = 0;
1188 LLVMValueRef consts_ptr;
1189 LLVMValueRef num_consts;
1190 LLVMValueRef res;
1191
1192 /* XXX: Handle fetching xyzw components as a vector */
1193 assert(swizzle != ~0u);
1194
1195 if (reg->Register.Dimension) {
1196 assert(!reg->Dimension.Indirect);
1197 dimension = reg->Dimension.Index;
1198 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1199 }
1200
1201 consts_ptr = bld->consts[dimension];
1202 num_consts = bld->consts_sizes[dimension];
1203
1204 if (reg->Register.Indirect) {
1205 LLVMValueRef indirect_index;
1206 LLVMValueRef swizzle_vec =
1207 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1208 LLVMValueRef index_vec; /* index into the const buffer */
1209 LLVMValueRef overflow_mask;
1210 LLVMValueRef index_vec2 = NULL;
1211
1212 indirect_index = get_indirect_index(bld,
1213 reg->Register.File,
1214 reg->Register.Index,
1215 &reg->Indirect);
1216
1217 /* All fetches are from the same constant buffer, so
1218 * we need to propagate the size to a vector to do a
1219 * vector comparison */
1220 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1221 /* Construct a boolean vector telling us which channels
1222 * overflow the bound constant buffer */
1223 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1224 indirect_index, num_consts);
1225
1226 /* index_vec = indirect_index * 4 + swizzle */
1227 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1228 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1229
1230 if (tgsi_type_is_64bit(stype)) {
1231 LLVMValueRef swizzle_vec2;
1232 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
1233 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1234 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1235 }
1236 /* Gather values from the constant buffer */
1237 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1238 }
1239 else {
1240 LLVMValueRef index; /* index into the const buffer */
1241 LLVMValueRef scalar, scalar_ptr;
1242 struct lp_build_context *bld_broad = &bld_base->base;
1243 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1244
1245 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1246 &index, 1, "");
1247 if (stype == TGSI_TYPE_DOUBLE) {
1248 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1249 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1250 bld_broad = &bld_base->dbl_bld;
1251 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1252 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1253 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1254 bld_broad = &bld_base->uint64_bld;
1255 } else if (stype == TGSI_TYPE_SIGNED64) {
1256 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1257 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1258 bld_broad = &bld_base->int64_bld;
1259 }
1260 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1261 res = lp_build_broadcast_scalar(bld_broad, scalar);
1262 }
1263
1264 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1265 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1266 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1267 }
1268
1269 return res;
1270 }
1271
1272 /**
1273 * Fetch 64-bit values from two separate channels.
1274 * 64-bit values are stored split across two channels, like xy and zw.
1275 * This function creates a set of 16 floats,
1276 * extracts the values from the two channels,
1277 * puts them in the correct place, then casts to 8 64-bits.
1278 */
1279 static LLVMValueRef
1280 emit_fetch_64bit(
1281 struct lp_build_tgsi_context * bld_base,
1282 enum tgsi_opcode_type stype,
1283 LLVMValueRef input,
1284 LLVMValueRef input2)
1285 {
1286 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1287 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1288 LLVMBuilderRef builder = gallivm->builder;
1289 LLVMValueRef res;
1290 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1291 int i;
1292 LLVMValueRef shuffles[16];
1293 int len = bld_base->base.type.length * 2;
1294 assert(len <= 16);
1295
1296 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1297 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1298 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1299 }
1300 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1301
1302 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1303 }
1304
1305 static LLVMValueRef
1306 emit_fetch_immediate(
1307 struct lp_build_tgsi_context * bld_base,
1308 const struct tgsi_full_src_register * reg,
1309 enum tgsi_opcode_type stype,
1310 unsigned swizzle)
1311 {
1312 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314 LLVMBuilderRef builder = gallivm->builder;
1315 LLVMValueRef res = NULL;
1316
1317 if (bld->use_immediates_array || reg->Register.Indirect) {
1318 LLVMValueRef imms_array;
1319 LLVMTypeRef fptr_type;
1320
1321 /* cast imms_array pointer to float* */
1322 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1323 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1324
1325 if (reg->Register.Indirect) {
1326 LLVMValueRef indirect_index;
1327 LLVMValueRef index_vec; /* index into the immediate register array */
1328 LLVMValueRef index_vec2 = NULL;
1329 indirect_index = get_indirect_index(bld,
1330 reg->Register.File,
1331 reg->Register.Index,
1332 &reg->Indirect);
1333 /*
1334 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1335 * immediates are stored as full vectors (FIXME??? - might be better
1336 * to store them the same as constants) but all elements are the same
1337 * in any case.
1338 */
1339 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1340 indirect_index,
1341 swizzle,
1342 FALSE);
1343 if (tgsi_type_is_64bit(stype))
1344 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1345 indirect_index,
1346 swizzle + 1,
1347 FALSE);
1348 /* Gather values from the immediate register array */
1349 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1350 } else {
1351 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1352 reg->Register.Index * 4 + swizzle);
1353 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1354 bld->imms_array, &lindex, 1, "");
1355 res = LLVMBuildLoad(builder, imms_ptr, "");
1356
1357 if (tgsi_type_is_64bit(stype)) {
1358 LLVMValueRef lindex1;
1359 LLVMValueRef imms_ptr2;
1360 LLVMValueRef res2;
1361
1362 lindex1 = lp_build_const_int32(gallivm,
1363 reg->Register.Index * 4 + swizzle + 1);
1364 imms_ptr2 = LLVMBuildGEP(builder,
1365 bld->imms_array, &lindex1, 1, "");
1366 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1367 res = emit_fetch_64bit(bld_base, stype, res, res2);
1368 }
1369 }
1370 }
1371 else {
1372 res = bld->immediates[reg->Register.Index][swizzle];
1373 if (tgsi_type_is_64bit(stype))
1374 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
1375 }
1376
1377 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1378 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1379 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1380 }
1381 return res;
1382 }
1383
1384 static LLVMValueRef
1385 emit_fetch_input(
1386 struct lp_build_tgsi_context * bld_base,
1387 const struct tgsi_full_src_register * reg,
1388 enum tgsi_opcode_type stype,
1389 unsigned swizzle)
1390 {
1391 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1392 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1393 LLVMBuilderRef builder = gallivm->builder;
1394 LLVMValueRef res;
1395
1396 if (reg->Register.Indirect) {
1397 LLVMValueRef indirect_index;
1398 LLVMValueRef index_vec; /* index into the input reg array */
1399 LLVMValueRef index_vec2 = NULL;
1400 LLVMValueRef inputs_array;
1401 LLVMTypeRef fptr_type;
1402
1403 indirect_index = get_indirect_index(bld,
1404 reg->Register.File,
1405 reg->Register.Index,
1406 &reg->Indirect);
1407
1408 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1409 indirect_index,
1410 swizzle,
1411 TRUE);
1412 if (tgsi_type_is_64bit(stype)) {
1413 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1414 indirect_index,
1415 swizzle + 1,
1416 TRUE);
1417 }
1418 /* cast inputs_array pointer to float* */
1419 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1420 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1421
1422 /* Gather values from the input register array */
1423 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1424 } else {
1425 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1426 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1427 reg->Register.Index * 4 + swizzle);
1428 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1429 bld->inputs_array, &lindex, 1, "");
1430
1431 res = LLVMBuildLoad(builder, input_ptr, "");
1432 if (tgsi_type_is_64bit(stype)) {
1433 LLVMValueRef lindex1;
1434 LLVMValueRef input_ptr2;
1435 LLVMValueRef res2;
1436
1437 lindex1 = lp_build_const_int32(gallivm,
1438 reg->Register.Index * 4 + swizzle + 1);
1439 input_ptr2 = LLVMBuildGEP(builder,
1440 bld->inputs_array, &lindex1, 1, "");
1441 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1442 res = emit_fetch_64bit(bld_base, stype, res, res2);
1443 }
1444 }
1445 else {
1446 res = bld->inputs[reg->Register.Index][swizzle];
1447 if (tgsi_type_is_64bit(stype))
1448 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
1449 }
1450 }
1451
1452 assert(res);
1453
1454 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1455 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1456 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1457 }
1458
1459 return res;
1460 }
1461
1462
1463 static LLVMValueRef
1464 emit_fetch_gs_input(
1465 struct lp_build_tgsi_context * bld_base,
1466 const struct tgsi_full_src_register * reg,
1467 enum tgsi_opcode_type stype,
1468 unsigned swizzle)
1469 {
1470 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1471 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1472 const struct tgsi_shader_info *info = bld->bld_base.info;
1473 LLVMBuilderRef builder = gallivm->builder;
1474 LLVMValueRef attrib_index = NULL;
1475 LLVMValueRef vertex_index = NULL;
1476 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1477 LLVMValueRef res;
1478
1479 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1480 /* This is really a system value not a regular input */
1481 assert(!reg->Register.Indirect);
1482 assert(!reg->Dimension.Indirect);
1483 res = bld->system_values.prim_id;
1484 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1485 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1486 }
1487 return res;
1488 }
1489
1490 if (reg->Register.Indirect) {
1491 attrib_index = get_indirect_index(bld,
1492 reg->Register.File,
1493 reg->Register.Index,
1494 &reg->Indirect);
1495 } else {
1496 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1497 }
1498
1499 if (reg->Dimension.Indirect) {
1500 vertex_index = get_indirect_index(bld,
1501 reg->Register.File,
1502 reg->Dimension.Index,
1503 &reg->DimIndirect);
1504 } else {
1505 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1506 }
1507
1508 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1509 reg->Dimension.Indirect,
1510 vertex_index,
1511 reg->Register.Indirect,
1512 attrib_index,
1513 swizzle_index);
1514
1515 assert(res);
1516 if (tgsi_type_is_64bit(stype)) {
1517 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
1518 LLVMValueRef res2;
1519 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1520 reg->Dimension.Indirect,
1521 vertex_index,
1522 reg->Register.Indirect,
1523 attrib_index,
1524 swizzle_index);
1525 assert(res2);
1526 res = emit_fetch_64bit(bld_base, stype, res, res2);
1527 } else if (stype == TGSI_TYPE_UNSIGNED) {
1528 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1529 } else if (stype == TGSI_TYPE_SIGNED) {
1530 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1531 }
1532
1533 return res;
1534 }
1535
1536 static LLVMValueRef
1537 emit_fetch_temporary(
1538 struct lp_build_tgsi_context * bld_base,
1539 const struct tgsi_full_src_register * reg,
1540 enum tgsi_opcode_type stype,
1541 unsigned swizzle)
1542 {
1543 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1544 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1545 LLVMBuilderRef builder = gallivm->builder;
1546 LLVMValueRef res;
1547
1548 if (reg->Register.Indirect) {
1549 LLVMValueRef indirect_index;
1550 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1551 LLVMValueRef temps_array;
1552 LLVMTypeRef fptr_type;
1553
1554 indirect_index = get_indirect_index(bld,
1555 reg->Register.File,
1556 reg->Register.Index,
1557 &reg->Indirect);
1558
1559 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1560 indirect_index,
1561 swizzle,
1562 TRUE);
1563 if (tgsi_type_is_64bit(stype)) {
1564 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1565 indirect_index,
1566 swizzle + 1,
1567 TRUE);
1568 }
1569
1570 /* cast temps_array pointer to float* */
1571 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1572 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1573
1574 /* Gather values from the temporary register array */
1575 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1576 }
1577 else {
1578 LLVMValueRef temp_ptr;
1579 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1580 res = LLVMBuildLoad(builder, temp_ptr, "");
1581
1582 if (tgsi_type_is_64bit(stype)) {
1583 LLVMValueRef temp_ptr2, res2;
1584
1585 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
1586 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1587 res = emit_fetch_64bit(bld_base, stype, res, res2);
1588 }
1589 }
1590
1591 if (stype == TGSI_TYPE_SIGNED ||
1592 stype == TGSI_TYPE_UNSIGNED ||
1593 stype == TGSI_TYPE_DOUBLE ||
1594 stype == TGSI_TYPE_SIGNED64 ||
1595 stype == TGSI_TYPE_UNSIGNED64) {
1596 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1597 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1598 }
1599
1600 return res;
1601 }
1602
1603 static LLVMValueRef
1604 emit_fetch_system_value(
1605 struct lp_build_tgsi_context * bld_base,
1606 const struct tgsi_full_src_register * reg,
1607 enum tgsi_opcode_type stype,
1608 unsigned swizzle)
1609 {
1610 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1611 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1612 const struct tgsi_shader_info *info = bld->bld_base.info;
1613 LLVMBuilderRef builder = gallivm->builder;
1614 LLVMValueRef res;
1615 enum tgsi_opcode_type atype; // Actual type of the value
1616
1617 assert(!reg->Register.Indirect);
1618
1619 switch (info->system_value_semantic_name[reg->Register.Index]) {
1620 case TGSI_SEMANTIC_INSTANCEID:
1621 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1622 atype = TGSI_TYPE_UNSIGNED;
1623 break;
1624
1625 case TGSI_SEMANTIC_VERTEXID:
1626 res = bld->system_values.vertex_id;
1627 atype = TGSI_TYPE_UNSIGNED;
1628 break;
1629
1630 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1631 res = bld->system_values.vertex_id_nobase;
1632 atype = TGSI_TYPE_UNSIGNED;
1633 break;
1634
1635 case TGSI_SEMANTIC_BASEVERTEX:
1636 res = bld->system_values.basevertex;
1637 atype = TGSI_TYPE_UNSIGNED;
1638 break;
1639
1640 case TGSI_SEMANTIC_PRIMID:
1641 res = bld->system_values.prim_id;
1642 atype = TGSI_TYPE_UNSIGNED;
1643 break;
1644
1645 case TGSI_SEMANTIC_INVOCATIONID:
1646 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1647 atype = TGSI_TYPE_UNSIGNED;
1648 break;
1649
1650 default:
1651 assert(!"unexpected semantic in emit_fetch_system_value");
1652 res = bld_base->base.zero;
1653 atype = TGSI_TYPE_FLOAT;
1654 break;
1655 }
1656
1657 if (atype != stype) {
1658 if (stype == TGSI_TYPE_FLOAT) {
1659 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1660 } else if (stype == TGSI_TYPE_UNSIGNED) {
1661 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1662 } else if (stype == TGSI_TYPE_SIGNED) {
1663 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1664 }
1665 }
1666
1667 return res;
1668 }
1669
1670 /**
1671 * Register fetch with derivatives.
1672 */
1673 static void
1674 emit_fetch_deriv(
1675 struct lp_build_tgsi_soa_context *bld,
1676 LLVMValueRef src,
1677 LLVMValueRef *res,
1678 LLVMValueRef *ddx,
1679 LLVMValueRef *ddy)
1680 {
1681 if (res)
1682 *res = src;
1683
1684 /* TODO: use interpolation coeffs for inputs */
1685
1686 if (ddx)
1687 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1688
1689 if (ddy)
1690 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1691 }
1692
1693 /**
1694 * store an array of 8 64-bit into two arrays of 8 floats
1695 * i.e.
1696 * value is d0, d1, d2, d3 etc.
1697 * each 64-bit has high and low pieces x, y
1698 * so gets stored into the separate channels as:
1699 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1700 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1701 */
1702 static void
1703 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1704 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1705 LLVMValueRef value)
1706 {
1707 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1708 struct gallivm_state *gallivm = bld_base->base.gallivm;
1709 LLVMBuilderRef builder = gallivm->builder;
1710 struct lp_build_context *float_bld = &bld_base->base;
1711 unsigned i;
1712 LLVMValueRef temp, temp2;
1713 LLVMValueRef shuffles[8];
1714 LLVMValueRef shuffles2[8];
1715
1716 for (i = 0; i < bld_base->base.type.length; i++) {
1717 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1718 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1719 }
1720
1721 temp = LLVMBuildShuffleVector(builder, value,
1722 LLVMGetUndef(LLVMTypeOf(value)),
1723 LLVMConstVector(shuffles,
1724 bld_base->base.type.length),
1725 "");
1726 temp2 = LLVMBuildShuffleVector(builder, value,
1727 LLVMGetUndef(LLVMTypeOf(value)),
1728 LLVMConstVector(shuffles2,
1729 bld_base->base.type.length),
1730 "");
1731
1732 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1733 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1734 }
1735
1736 /**
1737 * Register store.
1738 */
1739 static void
1740 emit_store_chan(
1741 struct lp_build_tgsi_context *bld_base,
1742 const struct tgsi_full_instruction *inst,
1743 unsigned index,
1744 unsigned chan_index,
1745 LLVMValueRef value)
1746 {
1747 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1748 struct gallivm_state *gallivm = bld_base->base.gallivm;
1749 LLVMBuilderRef builder = gallivm->builder;
1750 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1751 struct lp_build_context *float_bld = &bld_base->base;
1752 struct lp_build_context *int_bld = &bld_base->int_bld;
1753 LLVMValueRef indirect_index = NULL;
1754 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1755
1756 /*
1757 * Apply saturation.
1758 *
1759 * It is always assumed to be float.
1760 */
1761 if (inst->Instruction.Saturate) {
1762 assert(dtype == TGSI_TYPE_FLOAT ||
1763 dtype == TGSI_TYPE_UNTYPED);
1764 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1765 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1766 }
1767
1768 if (reg->Register.Indirect) {
1769 /*
1770 * Currently the mesa/st doesn't generate indirect stores
1771 * to 64-bit values, it normally uses MOV to do indirect stores.
1772 */
1773 assert(!tgsi_type_is_64bit(dtype));
1774 indirect_index = get_indirect_index(bld,
1775 reg->Register.File,
1776 reg->Register.Index,
1777 &reg->Indirect);
1778 } else {
1779 assert(reg->Register.Index <=
1780 bld_base->info->file_max[reg->Register.File]);
1781 }
1782
1783 if (DEBUG_EXECUTION) {
1784 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1785 }
1786
1787 switch( reg->Register.File ) {
1788 case TGSI_FILE_OUTPUT:
1789 /* Outputs are always stored as floats */
1790 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1791
1792 if (reg->Register.Indirect) {
1793 LLVMValueRef index_vec; /* indexes into the output registers */
1794 LLVMValueRef outputs_array;
1795 LLVMTypeRef fptr_type;
1796
1797 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1798 indirect_index,
1799 chan_index,
1800 TRUE);
1801
1802 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1803 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1804
1805 /* Scatter store values into output registers */
1806 emit_mask_scatter(bld, outputs_array, index_vec, value,
1807 &bld->exec_mask);
1808 }
1809 else {
1810 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1811 chan_index);
1812
1813 if (tgsi_type_is_64bit(dtype)) {
1814 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1815 chan_index + 1);
1816 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1817 value);
1818 } else
1819 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1820 }
1821 break;
1822
1823 case TGSI_FILE_TEMPORARY:
1824 /* Temporaries are always stored as floats */
1825 if (!tgsi_type_is_64bit(dtype))
1826 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1827 else
1828 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1829
1830 if (reg->Register.Indirect) {
1831 LLVMValueRef index_vec; /* indexes into the temp registers */
1832 LLVMValueRef temps_array;
1833 LLVMTypeRef fptr_type;
1834
1835 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1836 indirect_index,
1837 chan_index,
1838 TRUE);
1839
1840 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1841 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1842
1843 /* Scatter store values into temp registers */
1844 emit_mask_scatter(bld, temps_array, index_vec, value,
1845 &bld->exec_mask);
1846 }
1847 else {
1848 LLVMValueRef temp_ptr;
1849 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1850
1851 if (tgsi_type_is_64bit(dtype)) {
1852 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1853 reg->Register.Index,
1854 chan_index + 1);
1855 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1856 value);
1857 }
1858 else
1859 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1860 }
1861 break;
1862
1863 case TGSI_FILE_ADDRESS:
1864 assert(dtype == TGSI_TYPE_SIGNED);
1865 assert(LLVMTypeOf(value) == int_bld->vec_type);
1866 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1867 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1868 bld->addr[reg->Register.Index][chan_index]);
1869 break;
1870
1871 default:
1872 assert( 0 );
1873 }
1874
1875 (void)dtype;
1876 }
1877
1878 /*
1879 * Called at the beginning of the translation of each TGSI instruction, to
1880 * emit some debug code.
1881 */
1882 static void
1883 emit_debug(
1884 struct lp_build_tgsi_context * bld_base,
1885 const struct tgsi_full_instruction * inst,
1886 const struct tgsi_opcode_info * info)
1887
1888 {
1889 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1890
1891 if (DEBUG_EXECUTION) {
1892 /*
1893 * Dump the TGSI instruction.
1894 */
1895
1896 struct gallivm_state *gallivm = bld_base->base.gallivm;
1897 char buf[512];
1898 buf[0] = '$';
1899 buf[1] = ' ';
1900 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1901 lp_build_printf(gallivm, buf);
1902
1903 /* Dump the execution mask.
1904 */
1905 if (bld->exec_mask.has_mask) {
1906 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1907 }
1908 }
1909 }
1910
1911 static void
1912 emit_store(
1913 struct lp_build_tgsi_context * bld_base,
1914 const struct tgsi_full_instruction * inst,
1915 const struct tgsi_opcode_info * info,
1916 unsigned index,
1917 LLVMValueRef dst[4])
1918
1919 {
1920 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1921
1922 unsigned writemask = inst->Dst[index].Register.WriteMask;
1923 while (writemask) {
1924 unsigned chan_index = u_bit_scan(&writemask);
1925 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1926 continue;
1927 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1928 }
1929 }
1930
1931 static unsigned
1932 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1933 {
1934 switch (tgsi_target) {
1935 case TGSI_TEXTURE_BUFFER:
1936 return PIPE_BUFFER;
1937 case TGSI_TEXTURE_1D:
1938 case TGSI_TEXTURE_SHADOW1D:
1939 return PIPE_TEXTURE_1D;
1940 case TGSI_TEXTURE_2D:
1941 case TGSI_TEXTURE_SHADOW2D:
1942 case TGSI_TEXTURE_2D_MSAA:
1943 return PIPE_TEXTURE_2D;
1944 case TGSI_TEXTURE_3D:
1945 return PIPE_TEXTURE_3D;
1946 case TGSI_TEXTURE_CUBE:
1947 case TGSI_TEXTURE_SHADOWCUBE:
1948 return PIPE_TEXTURE_CUBE;
1949 case TGSI_TEXTURE_RECT:
1950 case TGSI_TEXTURE_SHADOWRECT:
1951 return PIPE_TEXTURE_RECT;
1952 case TGSI_TEXTURE_1D_ARRAY:
1953 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1954 return PIPE_TEXTURE_1D_ARRAY;
1955 case TGSI_TEXTURE_2D_ARRAY:
1956 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1957 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1958 return PIPE_TEXTURE_2D_ARRAY;
1959 case TGSI_TEXTURE_CUBE_ARRAY:
1960 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1961 return PIPE_TEXTURE_CUBE_ARRAY;
1962 default:
1963 assert(0);
1964 return PIPE_BUFFER;
1965 }
1966 }
1967
1968
1969 static enum lp_sampler_lod_property
1970 lp_build_lod_property(
1971 struct lp_build_tgsi_context *bld_base,
1972 const struct tgsi_full_instruction *inst,
1973 unsigned src_op)
1974 {
1975 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1976 enum lp_sampler_lod_property lod_property;
1977
1978 /*
1979 * Not much we can do here. We could try catching inputs declared
1980 * with constant interpolation but not sure it's worth it - since for
1981 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1982 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1983 * like the constant/immediate recognition below.
1984 * What seems to be of more value would be to recognize temps holding
1985 * broadcasted scalars but no way we can do it.
1986 * Tried asking llvm but without any success (using LLVMIsConstant
1987 * even though this isn't exactly what we'd need), even as simple as
1988 * IMM[0] UINT32 (0,-1,0,0)
1989 * MOV TEMP[0] IMM[0].yyyy
1990 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1991 * doesn't work.
1992 * This means there's ZERO chance this will ever catch a scalar lod
1993 * with traditional tex opcodes as well as texel fetches, since the lod
1994 * comes from the same reg as coords (except some test shaders using
1995 * constant coords maybe).
1996 * There's at least hope for sample opcodes as well as size queries.
1997 */
1998 if (reg->Register.File == TGSI_FILE_CONSTANT ||
1999 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2000 lod_property = LP_SAMPLER_LOD_SCALAR;
2001 }
2002 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2003 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2004 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2005 }
2006 else {
2007 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2008 }
2009 }
2010 else {
2011 /* never use scalar (per-quad) lod the results are just too wrong. */
2012 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2013 }
2014 return lod_property;
2015 }
2016
2017
2018 /**
2019 * High-level instruction translators.
2020 */
2021
2022 static void
2023 emit_tex( struct lp_build_tgsi_soa_context *bld,
2024 const struct tgsi_full_instruction *inst,
2025 enum lp_build_tex_modifier modifier,
2026 LLVMValueRef *texel,
2027 unsigned sampler_reg,
2028 enum lp_sampler_op_type sampler_op)
2029 {
2030 unsigned unit = inst->Src[sampler_reg].Register.Index;
2031 LLVMValueRef oow = NULL;
2032 LLVMValueRef lod = NULL;
2033 LLVMValueRef coords[5];
2034 LLVMValueRef offsets[3] = { NULL };
2035 struct lp_derivatives derivs;
2036 struct lp_sampler_params params;
2037 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2038 unsigned num_derivs, num_offsets, i;
2039 unsigned shadow_coord = 0;
2040 unsigned layer_coord = 0;
2041 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2042
2043 memset(&params, 0, sizeof(params));
2044
2045 if (!bld->sampler) {
2046 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2047 for (i = 0; i < 4; i++) {
2048 texel[i] = bld->bld_base.base.undef;
2049 }
2050 return;
2051 }
2052
2053 switch (inst->Texture.Texture) {
2054 case TGSI_TEXTURE_1D_ARRAY:
2055 layer_coord = 1;
2056 /* fallthrough */
2057 case TGSI_TEXTURE_1D:
2058 num_offsets = 1;
2059 num_derivs = 1;
2060 break;
2061 case TGSI_TEXTURE_2D_ARRAY:
2062 layer_coord = 2;
2063 /* fallthrough */
2064 case TGSI_TEXTURE_2D:
2065 case TGSI_TEXTURE_RECT:
2066 num_offsets = 2;
2067 num_derivs = 2;
2068 break;
2069 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2070 layer_coord = 1;
2071 /* fallthrough */
2072 case TGSI_TEXTURE_SHADOW1D:
2073 shadow_coord = 2;
2074 num_offsets = 1;
2075 num_derivs = 1;
2076 break;
2077 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2078 layer_coord = 2;
2079 shadow_coord = 3;
2080 num_offsets = 2;
2081 num_derivs = 2;
2082 break;
2083 case TGSI_TEXTURE_SHADOW2D:
2084 case TGSI_TEXTURE_SHADOWRECT:
2085 shadow_coord = 2;
2086 num_offsets = 2;
2087 num_derivs = 2;
2088 break;
2089 case TGSI_TEXTURE_CUBE:
2090 num_offsets = 2;
2091 num_derivs = 3;
2092 break;
2093 case TGSI_TEXTURE_3D:
2094 num_offsets = 3;
2095 num_derivs = 3;
2096 break;
2097 case TGSI_TEXTURE_SHADOWCUBE:
2098 shadow_coord = 3;
2099 num_offsets = 2;
2100 num_derivs = 3;
2101 break;
2102 case TGSI_TEXTURE_CUBE_ARRAY:
2103 num_offsets = 2;
2104 num_derivs = 3;
2105 layer_coord = 3;
2106 break;
2107 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2108 num_offsets = 2;
2109 num_derivs = 3;
2110 layer_coord = 3;
2111 shadow_coord = 4; /* shadow coord special different reg */
2112 break;
2113 case TGSI_TEXTURE_2D_MSAA:
2114 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2115 default:
2116 assert(0);
2117 return;
2118 }
2119
2120 /* Note lod and especially projected are illegal in a LOT of cases */
2121 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2122 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2123 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2124 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2125 /* note that shadow cube array with bias/explicit lod does not exist */
2126 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2127 }
2128 else {
2129 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2130 }
2131 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2132 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2133 }
2134 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2135 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2136 }
2137 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2138 }
2139
2140 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2141 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2142 oow = lp_build_rcp(&bld->bld_base.base, oow);
2143 }
2144
2145 for (i = 0; i < num_derivs; i++) {
2146 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2147 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2148 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2149 }
2150 for (i = num_derivs; i < 5; i++) {
2151 coords[i] = bld->bld_base.base.undef;
2152 }
2153
2154 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2155 if (layer_coord) {
2156 if (layer_coord == 3) {
2157 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2158 }
2159 else {
2160 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2161 }
2162 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2163 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2164 }
2165 /* Shadow coord occupies always 5th slot. */
2166 if (shadow_coord) {
2167 sample_key |= LP_SAMPLER_SHADOW;
2168 if (shadow_coord == 4) {
2169 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2170 }
2171 else {
2172 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2173 }
2174 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2175 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2176 }
2177
2178 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2179 unsigned dim;
2180 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2181 for (dim = 0; dim < num_derivs; ++dim) {
2182 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2183 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2184 }
2185 params.derivs = &derivs;
2186 /*
2187 * could also check all src regs if constant but I doubt such
2188 * cases exist in practice.
2189 */
2190 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2191 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2192 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2193 }
2194 else {
2195 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2196 }
2197 }
2198 else {
2199 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2200 }
2201 }
2202 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2203
2204 /* we don't handle the 4 offset version of tg4 */
2205 if (inst->Texture.NumOffsets == 1) {
2206 unsigned dim;
2207 sample_key |= LP_SAMPLER_OFFSETS;
2208 for (dim = 0; dim < num_offsets; dim++) {
2209 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2210 }
2211 }
2212
2213 params.type = bld->bld_base.base.type;
2214 params.sample_key = sample_key;
2215 params.texture_index = unit;
2216 params.sampler_index = unit;
2217 params.context_ptr = bld->context_ptr;
2218 params.thread_data_ptr = bld->thread_data_ptr;
2219 params.coords = coords;
2220 params.offsets = offsets;
2221 params.lod = lod;
2222 params.texel = texel;
2223
2224 bld->sampler->emit_tex_sample(bld->sampler,
2225 bld->bld_base.base.gallivm,
2226 &params);
2227 }
2228
2229 static void
2230 emit_sample(struct lp_build_tgsi_soa_context *bld,
2231 const struct tgsi_full_instruction *inst,
2232 enum lp_build_tex_modifier modifier,
2233 boolean compare,
2234 enum lp_sampler_op_type sample_type,
2235 LLVMValueRef *texel)
2236 {
2237 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2238 unsigned texture_unit, sampler_unit;
2239 LLVMValueRef lod = NULL;
2240 LLVMValueRef coords[5];
2241 LLVMValueRef offsets[3] = { NULL };
2242 struct lp_derivatives derivs;
2243 struct lp_sampler_params params;
2244 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2245
2246 unsigned num_offsets, num_derivs, i;
2247 unsigned layer_coord = 0;
2248 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2249
2250 memset(&params, 0, sizeof(params));
2251
2252 if (!bld->sampler) {
2253 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2254 for (i = 0; i < 4; i++) {
2255 texel[i] = bld->bld_base.base.undef;
2256 }
2257 return;
2258 }
2259
2260 /*
2261 * unlike old-style tex opcodes the texture/sampler indices
2262 * always come from src1 and src2 respectively.
2263 */
2264 texture_unit = inst->Src[1].Register.Index;
2265 sampler_unit = inst->Src[2].Register.Index;
2266
2267 /*
2268 * Note inst->Texture.Texture will contain the number of offsets,
2269 * however the target information is NOT there and comes from the
2270 * declared sampler views instead.
2271 */
2272 switch (bld->sv[texture_unit].Resource) {
2273 case TGSI_TEXTURE_1D:
2274 num_offsets = 1;
2275 num_derivs = 1;
2276 break;
2277 case TGSI_TEXTURE_1D_ARRAY:
2278 layer_coord = 1;
2279 num_offsets = 1;
2280 num_derivs = 1;
2281 break;
2282 case TGSI_TEXTURE_2D:
2283 case TGSI_TEXTURE_RECT:
2284 num_offsets = 2;
2285 num_derivs = 2;
2286 break;
2287 case TGSI_TEXTURE_2D_ARRAY:
2288 layer_coord = 2;
2289 num_offsets = 2;
2290 num_derivs = 2;
2291 break;
2292 case TGSI_TEXTURE_CUBE:
2293 num_offsets = 2;
2294 num_derivs = 3;
2295 break;
2296 case TGSI_TEXTURE_3D:
2297 num_offsets = 3;
2298 num_derivs = 3;
2299 break;
2300 case TGSI_TEXTURE_CUBE_ARRAY:
2301 layer_coord = 3;
2302 num_offsets = 2;
2303 num_derivs = 3;
2304 break;
2305 default:
2306 assert(0);
2307 return;
2308 }
2309
2310 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2311 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2312 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2313 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2314 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2315 }
2316 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2317 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2318 }
2319 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2320 }
2321 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2322 /* XXX might be better to explicitly pass the level zero information */
2323 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2324 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2325 }
2326
2327 for (i = 0; i < num_derivs; i++) {
2328 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2329 }
2330 for (i = num_derivs; i < 5; i++) {
2331 coords[i] = bld->bld_base.base.undef;
2332 }
2333
2334 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2335 if (layer_coord) {
2336 if (layer_coord == 3)
2337 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2338 else
2339 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2340 }
2341 /* Shadow coord occupies always 5th slot. */
2342 if (compare) {
2343 sample_key |= LP_SAMPLER_SHADOW;
2344 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2345 }
2346
2347 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2348 unsigned dim;
2349 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2350 for (dim = 0; dim < num_derivs; ++dim) {
2351 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2352 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2353 }
2354 params.derivs = &derivs;
2355 /*
2356 * could also check all src regs if constant but I doubt such
2357 * cases exist in practice.
2358 */
2359 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2360 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2361 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2362 }
2363 else {
2364 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2365 }
2366 }
2367 else {
2368 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2369 }
2370 }
2371
2372 /* some advanced gather instructions (txgo) would require 4 offsets */
2373 if (inst->Texture.NumOffsets == 1) {
2374 unsigned dim;
2375 sample_key |= LP_SAMPLER_OFFSETS;
2376 for (dim = 0; dim < num_offsets; dim++) {
2377 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2378 }
2379 }
2380 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2381
2382 params.type = bld->bld_base.base.type;
2383 params.sample_key = sample_key;
2384 params.texture_index = texture_unit;
2385 params.sampler_index = sampler_unit;
2386 params.context_ptr = bld->context_ptr;
2387 params.thread_data_ptr = bld->thread_data_ptr;
2388 params.coords = coords;
2389 params.offsets = offsets;
2390 params.lod = lod;
2391 params.texel = texel;
2392
2393 bld->sampler->emit_tex_sample(bld->sampler,
2394 bld->bld_base.base.gallivm,
2395 &params);
2396
2397 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2398 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2399 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2400 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2401 unsigned char swizzles[4];
2402 swizzles[0] = inst->Src[1].Register.SwizzleX;
2403 swizzles[1] = inst->Src[1].Register.SwizzleY;
2404 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2405 swizzles[3] = inst->Src[1].Register.SwizzleW;
2406
2407 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2408 }
2409 }
2410
2411 static void
2412 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2413 const struct tgsi_full_instruction *inst,
2414 LLVMValueRef *texel,
2415 boolean is_samplei)
2416 {
2417 unsigned unit, target;
2418 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2419 LLVMValueRef explicit_lod = NULL;
2420 LLVMValueRef coords[5];
2421 LLVMValueRef offsets[3] = { NULL };
2422 struct lp_sampler_params params;
2423 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2424 unsigned dims, i;
2425 unsigned layer_coord = 0;
2426 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2427
2428 memset(&params, 0, sizeof(params));
2429
2430 if (!bld->sampler) {
2431 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2432 for (i = 0; i < 4; i++) {
2433 texel[i] = coord_undef;
2434 }
2435 return;
2436 }
2437
2438 unit = inst->Src[1].Register.Index;
2439
2440 if (is_samplei) {
2441 target = bld->sv[unit].Resource;
2442 }
2443 else {
2444 target = inst->Texture.Texture;
2445 }
2446
2447 switch (target) {
2448 case TGSI_TEXTURE_1D:
2449 case TGSI_TEXTURE_BUFFER:
2450 dims = 1;
2451 break;
2452 case TGSI_TEXTURE_1D_ARRAY:
2453 layer_coord = 1;
2454 dims = 1;
2455 break;
2456 case TGSI_TEXTURE_2D:
2457 case TGSI_TEXTURE_RECT:
2458 case TGSI_TEXTURE_2D_MSAA:
2459 dims = 2;
2460 break;
2461 case TGSI_TEXTURE_2D_ARRAY:
2462 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2463 layer_coord = 2;
2464 dims = 2;
2465 break;
2466 case TGSI_TEXTURE_3D:
2467 dims = 3;
2468 break;
2469 default:
2470 assert(0);
2471 return;
2472 }
2473
2474 /* always have lod except for buffers and msaa targets ? */
2475 if (target != TGSI_TEXTURE_BUFFER &&
2476 target != TGSI_TEXTURE_2D_MSAA &&
2477 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2478 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2479 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2480 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2481 }
2482 /*
2483 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2484 * would be the sample index.
2485 */
2486
2487 for (i = 0; i < dims; i++) {
2488 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2489 }
2490 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2491 for (i = dims; i < 5; i++) {
2492 coords[i] = coord_undef;
2493 }
2494 if (layer_coord)
2495 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2496
2497 if (inst->Texture.NumOffsets == 1) {
2498 unsigned dim;
2499 sample_key |= LP_SAMPLER_OFFSETS;
2500 for (dim = 0; dim < dims; dim++) {
2501 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2502 }
2503 }
2504 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2505
2506 params.type = bld->bld_base.base.type;
2507 params.sample_key = sample_key;
2508 params.texture_index = unit;
2509 /*
2510 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2511 * and trigger some assertions with d3d10 where the sampler view number
2512 * can exceed this.
2513 */
2514 params.sampler_index = 0;
2515 params.context_ptr = bld->context_ptr;
2516 params.thread_data_ptr = bld->thread_data_ptr;
2517 params.coords = coords;
2518 params.offsets = offsets;
2519 params.derivs = NULL;
2520 params.lod = explicit_lod;
2521 params.texel = texel;
2522
2523 bld->sampler->emit_tex_sample(bld->sampler,
2524 bld->bld_base.base.gallivm,
2525 &params);
2526
2527 if (is_samplei &&
2528 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2529 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2530 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2531 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2532 unsigned char swizzles[4];
2533 swizzles[0] = inst->Src[1].Register.SwizzleX;
2534 swizzles[1] = inst->Src[1].Register.SwizzleY;
2535 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2536 swizzles[3] = inst->Src[1].Register.SwizzleW;
2537
2538 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2539 }
2540 }
2541
2542 static void
2543 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2544 const struct tgsi_full_instruction *inst,
2545 LLVMValueRef *sizes_out,
2546 boolean is_sviewinfo)
2547 {
2548 LLVMValueRef explicit_lod;
2549 enum lp_sampler_lod_property lod_property;
2550 unsigned has_lod;
2551 unsigned i;
2552 unsigned unit = inst->Src[1].Register.Index;
2553 unsigned target, pipe_target;
2554 struct lp_sampler_size_query_params params;
2555
2556 if (is_sviewinfo) {
2557 target = bld->sv[unit].Resource;
2558 }
2559 else {
2560 target = inst->Texture.Texture;
2561 }
2562 switch (target) {
2563 case TGSI_TEXTURE_BUFFER:
2564 case TGSI_TEXTURE_RECT:
2565 case TGSI_TEXTURE_SHADOWRECT:
2566 has_lod = 0;
2567 break;
2568 default:
2569 has_lod = 1;
2570 break;
2571 }
2572
2573 if (!bld->sampler) {
2574 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2575 for (i = 0; i < 4; i++)
2576 sizes_out[i] = bld->bld_base.int_bld.undef;
2577 return;
2578 }
2579
2580 if (has_lod) {
2581 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2582 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2583 }
2584 else {
2585 explicit_lod = NULL;
2586 lod_property = LP_SAMPLER_LOD_SCALAR;
2587 }
2588
2589
2590 pipe_target = tgsi_to_pipe_tex_target(target);
2591
2592 params.int_type = bld->bld_base.int_bld.type;
2593 params.texture_unit = unit;
2594 params.target = pipe_target;
2595 params.context_ptr = bld->context_ptr;
2596 params.is_sviewinfo = TRUE;
2597 params.lod_property = lod_property;
2598 params.explicit_lod = explicit_lod;
2599 params.sizes_out = sizes_out;
2600
2601 bld->sampler->emit_size_query(bld->sampler,
2602 bld->bld_base.base.gallivm,
2603 &params);
2604 }
2605
2606 static boolean
2607 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2608 int pc)
2609 {
2610 unsigned i;
2611
2612 for (i = 0; i < 5; i++) {
2613 unsigned opcode;
2614
2615 if (pc + i >= bld->bld_base.info->num_instructions)
2616 return TRUE;
2617
2618 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2619
2620 if (opcode == TGSI_OPCODE_END)
2621 return TRUE;
2622
2623 if (opcode == TGSI_OPCODE_TEX ||
2624 opcode == TGSI_OPCODE_TXP ||
2625 opcode == TGSI_OPCODE_TXD ||
2626 opcode == TGSI_OPCODE_TXB ||
2627 opcode == TGSI_OPCODE_TXL ||
2628 opcode == TGSI_OPCODE_TXF ||
2629 opcode == TGSI_OPCODE_TXQ ||
2630 opcode == TGSI_OPCODE_TEX2 ||
2631 opcode == TGSI_OPCODE_TXB2 ||
2632 opcode == TGSI_OPCODE_TXL2 ||
2633 opcode == TGSI_OPCODE_SAMPLE ||
2634 opcode == TGSI_OPCODE_SAMPLE_B ||
2635 opcode == TGSI_OPCODE_SAMPLE_C ||
2636 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2637 opcode == TGSI_OPCODE_SAMPLE_D ||
2638 opcode == TGSI_OPCODE_SAMPLE_I ||
2639 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2640 opcode == TGSI_OPCODE_SAMPLE_L ||
2641 opcode == TGSI_OPCODE_SVIEWINFO ||
2642 opcode == TGSI_OPCODE_CAL ||
2643 opcode == TGSI_OPCODE_IF ||
2644 opcode == TGSI_OPCODE_UIF ||
2645 opcode == TGSI_OPCODE_BGNLOOP ||
2646 opcode == TGSI_OPCODE_SWITCH)
2647 return FALSE;
2648 }
2649
2650 return TRUE;
2651 }
2652
2653
2654
2655 /**
2656 * Kill fragment if any of the src register values are negative.
2657 */
2658 static void
2659 emit_kill_if(
2660 struct lp_build_tgsi_soa_context *bld,
2661 const struct tgsi_full_instruction *inst,
2662 int pc)
2663 {
2664 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2665 const struct tgsi_full_src_register *reg = &inst->Src[0];
2666 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2667 LLVMValueRef mask;
2668 unsigned chan_index;
2669
2670 memset(&terms, 0, sizeof terms);
2671
2672 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2673 unsigned swizzle;
2674
2675 /* Unswizzle channel */
2676 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2677
2678 /* Check if the component has not been already tested. */
2679 assert(swizzle < TGSI_NUM_CHANNELS);
2680 if( !terms[swizzle] )
2681 /* TODO: change the comparison operator instead of setting the sign */
2682 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2683 }
2684
2685 mask = NULL;
2686 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2687 if(terms[chan_index]) {
2688 LLVMValueRef chan_mask;
2689
2690 /*
2691 * If term < 0 then mask = 0 else mask = ~0.
2692 */
2693 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2694
2695 if(mask)
2696 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2697 else
2698 mask = chan_mask;
2699 }
2700 }
2701
2702 if (bld->exec_mask.has_mask) {
2703 LLVMValueRef invmask;
2704 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2705 mask = LLVMBuildOr(builder, mask, invmask, "");
2706 }
2707
2708 lp_build_mask_update(bld->mask, mask);
2709 if (!near_end_of_shader(bld, pc))
2710 lp_build_mask_check(bld->mask);
2711 }
2712
2713
2714 /**
2715 * Unconditional fragment kill.
2716 * The only predication is the execution mask which will apply if
2717 * we're inside a loop or conditional.
2718 */
2719 static void
2720 emit_kill(struct lp_build_tgsi_soa_context *bld,
2721 int pc)
2722 {
2723 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2724 LLVMValueRef mask;
2725
2726 /* For those channels which are "alive", disable fragment shader
2727 * execution.
2728 */
2729 if (bld->exec_mask.has_mask) {
2730 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2731 }
2732 else {
2733 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2734 mask = zero;
2735 }
2736
2737 lp_build_mask_update(bld->mask, mask);
2738
2739 if (!near_end_of_shader(bld, pc))
2740 lp_build_mask_check(bld->mask);
2741 }
2742
2743
2744 /**
2745 * Emit code which will dump the value of all the temporary registers
2746 * to stdout.
2747 */
2748 static void
2749 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2750 unsigned file)
2751 {
2752 const struct tgsi_shader_info *info = bld->bld_base.info;
2753 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2754 LLVMBuilderRef builder = gallivm->builder;
2755 LLVMValueRef reg_ptr;
2756 int index;
2757 int max_index = info->file_max[file];
2758
2759 /*
2760 * Some register files, particularly constants, can be very large,
2761 * and dumping everything could make this unusably slow.
2762 */
2763 max_index = MIN2(max_index, 32);
2764
2765 for (index = 0; index <= max_index; index++) {
2766 LLVMValueRef res;
2767 unsigned mask;
2768 int chan;
2769
2770 if (index < 8 * sizeof(unsigned) &&
2771 (info->file_mask[file] & (1u << index)) == 0) {
2772 /* This was not declared.*/
2773 continue;
2774 }
2775
2776 if (file == TGSI_FILE_INPUT) {
2777 mask = info->input_usage_mask[index];
2778 } else {
2779 mask = TGSI_WRITEMASK_XYZW;
2780 }
2781
2782 for (chan = 0; chan < 4; chan++) {
2783 if ((mask & (1 << chan)) == 0) {
2784 /* This channel is not used.*/
2785 continue;
2786 }
2787
2788 if (file == TGSI_FILE_CONSTANT) {
2789 struct tgsi_full_src_register reg;
2790 memset(&reg, 0, sizeof reg);
2791 reg.Register.File = file;
2792 reg.Register.Index = index;
2793 reg.Register.SwizzleX = 0;
2794 reg.Register.SwizzleY = 1;
2795 reg.Register.SwizzleZ = 2;
2796 reg.Register.SwizzleW = 3;
2797
2798 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2799 if (!res) {
2800 continue;
2801 }
2802 } else if (file == TGSI_FILE_INPUT) {
2803 res = bld->inputs[index][chan];
2804 if (!res) {
2805 continue;
2806 }
2807 } else if (file == TGSI_FILE_TEMPORARY) {
2808 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2809 assert(reg_ptr);
2810 res = LLVMBuildLoad(builder, reg_ptr, "");
2811 } else if (file == TGSI_FILE_OUTPUT) {
2812 reg_ptr = lp_get_output_ptr(bld, index, chan);
2813 assert(reg_ptr);
2814 res = LLVMBuildLoad(builder, reg_ptr, "");
2815 } else {
2816 assert(0);
2817 continue;
2818 }
2819
2820 emit_dump_reg(gallivm, file, index, chan, res);
2821 }
2822 }
2823 }
2824
2825
2826
2827 void
2828 lp_emit_declaration_soa(
2829 struct lp_build_tgsi_context *bld_base,
2830 const struct tgsi_full_declaration *decl)
2831 {
2832 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2833 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2834 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2835 const unsigned first = decl->Range.First;
2836 const unsigned last = decl->Range.Last;
2837 unsigned idx, i;
2838
2839 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2840
2841 switch (decl->Declaration.File) {
2842 case TGSI_FILE_TEMPORARY:
2843 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2844 assert(last < LP_MAX_INLINED_TEMPS);
2845 for (idx = first; idx <= last; ++idx) {
2846 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2847 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2848 }
2849 }
2850 break;
2851
2852 case TGSI_FILE_OUTPUT:
2853 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2854 for (idx = first; idx <= last; ++idx) {
2855 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2856 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2857 vec_type, "output");
2858 }
2859 }
2860 break;
2861
2862 case TGSI_FILE_ADDRESS:
2863 /* ADDR registers are only allocated with an integer LLVM IR type,
2864 * as they are guaranteed to always have integers.
2865 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2866 * an ADDR register for that matter).
2867 */
2868 assert(last < LP_MAX_TGSI_ADDRS);
2869 for (idx = first; idx <= last; ++idx) {
2870 assert(idx < LP_MAX_TGSI_ADDRS);
2871 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2872 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2873 }
2874 break;
2875
2876 case TGSI_FILE_SAMPLER_VIEW:
2877 /*
2878 * The target stored here MUST match whatever there actually
2879 * is in the set sampler views (what about return type?).
2880 */
2881 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2882 for (idx = first; idx <= last; ++idx) {
2883 bld->sv[idx] = decl->SamplerView;
2884 }
2885 break;
2886
2887 case TGSI_FILE_CONSTANT:
2888 {
2889 /*
2890 * We could trivially fetch the per-buffer pointer when fetching the
2891 * constant, relying on llvm to figure out it's always the same pointer
2892 * anyway. However, doing so results in a huge (more than factor of 10)
2893 * slowdown in llvm compilation times for some (but not all) shaders
2894 * (more specifically, the IR optimization spends way more time in
2895 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2896 */
2897 unsigned idx2D = decl->Dim.Index2D;
2898 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2899 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2900 bld->consts[idx2D] =
2901 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2902 bld->consts_sizes[idx2D] =
2903 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2904 }
2905 break;
2906
2907 default:
2908 /* don't need to declare other vars */
2909 break;
2910 }
2911 }
2912
2913
2914 void lp_emit_immediate_soa(
2915 struct lp_build_tgsi_context *bld_base,
2916 const struct tgsi_full_immediate *imm)
2917 {
2918 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2919 struct gallivm_state * gallivm = bld_base->base.gallivm;
2920 LLVMValueRef imms[4];
2921 unsigned i;
2922 const uint size = imm->Immediate.NrTokens - 1;
2923 assert(size <= 4);
2924 switch (imm->Immediate.DataType) {
2925 case TGSI_IMM_FLOAT32:
2926 for( i = 0; i < size; ++i )
2927 imms[i] =
2928 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2929
2930 break;
2931 case TGSI_IMM_FLOAT64:
2932 case TGSI_IMM_UINT64:
2933 case TGSI_IMM_INT64:
2934 case TGSI_IMM_UINT32:
2935 for( i = 0; i < size; ++i ) {
2936 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2937 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2938 }
2939
2940 break;
2941 case TGSI_IMM_INT32:
2942 for( i = 0; i < size; ++i ) {
2943 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2944 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2945 }
2946
2947 break;
2948 }
2949 for( i = size; i < 4; ++i )
2950 imms[i] = bld_base->base.undef;
2951
2952 if (bld->use_immediates_array) {
2953 unsigned index = bld->num_immediates;
2954 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2955 LLVMBuilderRef builder = gallivm->builder;
2956
2957 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2958 for (i = 0; i < 4; ++i ) {
2959 LLVMValueRef lindex = lp_build_const_int32(
2960 bld->bld_base.base.gallivm, index * 4 + i);
2961 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2962 bld->imms_array, &lindex, 1, "");
2963 LLVMBuildStore(builder, imms[i], imm_ptr);
2964 }
2965 } else {
2966 /* simply copy the immediate values into the next immediates[] slot */
2967 unsigned i;
2968 assert(imm->Immediate.NrTokens - 1 <= 4);
2969 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2970
2971 for(i = 0; i < 4; ++i )
2972 bld->immediates[bld->num_immediates][i] = imms[i];
2973
2974 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2975 unsigned index = bld->num_immediates;
2976 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2977 LLVMBuilderRef builder = gallivm->builder;
2978 for (i = 0; i < 4; ++i ) {
2979 LLVMValueRef lindex = lp_build_const_int32(
2980 bld->bld_base.base.gallivm, index * 4 + i);
2981 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2982 bld->imms_array, &lindex, 1, "");
2983 LLVMBuildStore(builder,
2984 bld->immediates[index][i],
2985 imm_ptr);
2986 }
2987 }
2988 }
2989
2990 bld->num_immediates++;
2991 }
2992
2993 static void
2994 ddx_emit(
2995 const struct lp_build_tgsi_action * action,
2996 struct lp_build_tgsi_context * bld_base,
2997 struct lp_build_emit_data * emit_data)
2998 {
2999 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3000
3001 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3002 &emit_data->output[emit_data->chan], NULL);
3003 }
3004
3005 static void
3006 ddy_emit(
3007 const struct lp_build_tgsi_action * action,
3008 struct lp_build_tgsi_context * bld_base,
3009 struct lp_build_emit_data * emit_data)
3010 {
3011 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3012
3013 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3014 &emit_data->output[emit_data->chan]);
3015 }
3016
3017 static void
3018 kill_emit(
3019 const struct lp_build_tgsi_action * action,
3020 struct lp_build_tgsi_context * bld_base,
3021 struct lp_build_emit_data * emit_data)
3022 {
3023 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3024
3025 emit_kill(bld, bld_base->pc - 1);
3026 }
3027
3028 static void
3029 kill_if_emit(
3030 const struct lp_build_tgsi_action * action,
3031 struct lp_build_tgsi_context * bld_base,
3032 struct lp_build_emit_data * emit_data)
3033 {
3034 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3035
3036 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3037 }
3038
3039 static void
3040 tex_emit(
3041 const struct lp_build_tgsi_action * action,
3042 struct lp_build_tgsi_context * bld_base,
3043 struct lp_build_emit_data * emit_data)
3044 {
3045 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3046
3047 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3048 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3049 }
3050
3051 static void
3052 tex2_emit(
3053 const struct lp_build_tgsi_action * action,
3054 struct lp_build_tgsi_context * bld_base,
3055 struct lp_build_emit_data * emit_data)
3056 {
3057 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3058
3059 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3060 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3061 }
3062
3063 static void
3064 txb_emit(
3065 const struct lp_build_tgsi_action * action,
3066 struct lp_build_tgsi_context * bld_base,
3067 struct lp_build_emit_data * emit_data)
3068 {
3069 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3070
3071 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3072 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3073 }
3074
3075 static void
3076 txb2_emit(
3077 const struct lp_build_tgsi_action * action,
3078 struct lp_build_tgsi_context * bld_base,
3079 struct lp_build_emit_data * emit_data)
3080 {
3081 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3082
3083 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3084 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3085 }
3086
3087 static void
3088 txd_emit(
3089 const struct lp_build_tgsi_action * action,
3090 struct lp_build_tgsi_context * bld_base,
3091 struct lp_build_emit_data * emit_data)
3092 {
3093 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094
3095 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3096 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3097 }
3098
3099 static void
3100 txl_emit(
3101 const struct lp_build_tgsi_action * action,
3102 struct lp_build_tgsi_context * bld_base,
3103 struct lp_build_emit_data * emit_data)
3104 {
3105 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3106
3107 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3108 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3109 }
3110
3111 static void
3112 txl2_emit(
3113 const struct lp_build_tgsi_action * action,
3114 struct lp_build_tgsi_context * bld_base,
3115 struct lp_build_emit_data * emit_data)
3116 {
3117 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3118
3119 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3120 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3121 }
3122
3123 static void
3124 txp_emit(
3125 const struct lp_build_tgsi_action * action,
3126 struct lp_build_tgsi_context * bld_base,
3127 struct lp_build_emit_data * emit_data)
3128 {
3129 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3130
3131 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3132 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3133 }
3134
3135 static void
3136 tg4_emit(
3137 const struct lp_build_tgsi_action * action,
3138 struct lp_build_tgsi_context * bld_base,
3139 struct lp_build_emit_data * emit_data)
3140 {
3141 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3142
3143 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3144 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3145 }
3146
3147 static void
3148 lodq_emit(
3149 const struct lp_build_tgsi_action * action,
3150 struct lp_build_tgsi_context * bld_base,
3151 struct lp_build_emit_data * emit_data)
3152 {
3153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3154
3155 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3156 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3157 }
3158
3159 static void
3160 txq_emit(
3161 const struct lp_build_tgsi_action * action,
3162 struct lp_build_tgsi_context * bld_base,
3163 struct lp_build_emit_data * emit_data)
3164 {
3165 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3166
3167 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3168 }
3169
3170 static void
3171 txf_emit(
3172 const struct lp_build_tgsi_action * action,
3173 struct lp_build_tgsi_context * bld_base,
3174 struct lp_build_emit_data * emit_data)
3175 {
3176 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3177
3178 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3179 }
3180
3181 static void
3182 sample_i_emit(
3183 const struct lp_build_tgsi_action * action,
3184 struct lp_build_tgsi_context * bld_base,
3185 struct lp_build_emit_data * emit_data)
3186 {
3187 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3188
3189 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3190 }
3191
3192 static void
3193 sample_emit(
3194 const struct lp_build_tgsi_action * action,
3195 struct lp_build_tgsi_context * bld_base,
3196 struct lp_build_emit_data * emit_data)
3197 {
3198 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3199
3200 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3201 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3202 }
3203
3204 static void
3205 sample_b_emit(
3206 const struct lp_build_tgsi_action * action,
3207 struct lp_build_tgsi_context * bld_base,
3208 struct lp_build_emit_data * emit_data)
3209 {
3210 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3211
3212 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3213 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3214 }
3215
3216 static void
3217 sample_c_emit(
3218 const struct lp_build_tgsi_action * action,
3219 struct lp_build_tgsi_context * bld_base,
3220 struct lp_build_emit_data * emit_data)
3221 {
3222 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3223
3224 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3225 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3226 }
3227
3228 static void
3229 sample_c_lz_emit(
3230 const struct lp_build_tgsi_action * action,
3231 struct lp_build_tgsi_context * bld_base,
3232 struct lp_build_emit_data * emit_data)
3233 {
3234 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3235
3236 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3237 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3238 }
3239
3240 static void
3241 sample_d_emit(
3242 const struct lp_build_tgsi_action * action,
3243 struct lp_build_tgsi_context * bld_base,
3244 struct lp_build_emit_data * emit_data)
3245 {
3246 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3247
3248 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3249 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3250 }
3251
3252 static void
3253 sample_l_emit(
3254 const struct lp_build_tgsi_action * action,
3255 struct lp_build_tgsi_context * bld_base,
3256 struct lp_build_emit_data * emit_data)
3257 {
3258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3259
3260 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3261 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3262 }
3263
3264 static void
3265 gather4_emit(
3266 const struct lp_build_tgsi_action * action,
3267 struct lp_build_tgsi_context * bld_base,
3268 struct lp_build_emit_data * emit_data)
3269 {
3270 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3271
3272 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3273 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3274 }
3275
3276 static void
3277 sviewinfo_emit(
3278 const struct lp_build_tgsi_action * action,
3279 struct lp_build_tgsi_context * bld_base,
3280 struct lp_build_emit_data * emit_data)
3281 {
3282 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3283
3284 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3285 }
3286
3287 static LLVMValueRef
3288 mask_vec(struct lp_build_tgsi_context *bld_base)
3289 {
3290 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3291 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3292 struct lp_exec_mask *exec_mask = &bld->exec_mask;
3293
3294 if (!exec_mask->has_mask) {
3295 return lp_build_mask_value(bld->mask);
3296 }
3297 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3298 exec_mask->exec_mask, "");
3299 }
3300
3301 static void
3302 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3303 LLVMValueRef ptr,
3304 LLVMValueRef mask)
3305 {
3306 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3307 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3308
3309 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3310
3311 LLVMBuildStore(builder, current_vec, ptr);
3312 }
3313
3314 static void
3315 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3316 LLVMValueRef ptr,
3317 LLVMValueRef mask)
3318 {
3319 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3320 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3321
3322 current_vec = lp_build_select(&bld_base->uint_bld,
3323 mask,
3324 bld_base->uint_bld.zero,
3325 current_vec);
3326
3327 LLVMBuildStore(builder, current_vec, ptr);
3328 }
3329
3330 static LLVMValueRef
3331 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3332 LLVMValueRef current_mask_vec,
3333 LLVMValueRef total_emitted_vertices_vec)
3334 {
3335 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3336 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3337 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3338 total_emitted_vertices_vec,
3339 bld->max_output_vertices_vec);
3340
3341 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3342 }
3343
3344 static void
3345 emit_vertex(
3346 const struct lp_build_tgsi_action * action,
3347 struct lp_build_tgsi_context * bld_base,
3348 struct lp_build_emit_data * emit_data)
3349 {
3350 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3351 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3352
3353 if (bld->gs_iface->emit_vertex) {
3354 LLVMValueRef mask = mask_vec(bld_base);
3355 LLVMValueRef total_emitted_vertices_vec =
3356 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3357 mask = clamp_mask_to_max_output_vertices(bld, mask,
3358 total_emitted_vertices_vec);
3359 gather_outputs(bld);
3360 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3361 bld->outputs,
3362 total_emitted_vertices_vec);
3363 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3364 mask);
3365 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3366 mask);
3367 #if DUMP_GS_EMITS
3368 lp_build_print_value(bld->bld_base.base.gallivm,
3369 " +++ emit vertex masked ones = ",
3370 mask);
3371 lp_build_print_value(bld->bld_base.base.gallivm,
3372 " +++ emit vertex emitted = ",
3373 total_emitted_vertices_vec);
3374 #endif
3375 }
3376 }
3377
3378
3379 static void
3380 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3381 LLVMValueRef mask)
3382 {
3383 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3384 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3385
3386 if (bld->gs_iface->end_primitive) {
3387 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3388 LLVMValueRef emitted_vertices_vec =
3389 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3390 LLVMValueRef emitted_prims_vec =
3391 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3392
3393 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3394 emitted_vertices_vec,
3395 uint_bld->zero);
3396 /* We need to combine the current execution mask with the mask
3397 telling us which, if any, execution slots actually have
3398 unemitted primitives, this way we make sure that end_primitives
3399 executes only on the paths that have unflushed vertices */
3400 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3401
3402 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3403 emitted_vertices_vec,
3404 emitted_prims_vec);
3405
3406 #if DUMP_GS_EMITS
3407 lp_build_print_value(bld->bld_base.base.gallivm,
3408 " +++ end prim masked ones = ",
3409 mask);
3410 lp_build_print_value(bld->bld_base.base.gallivm,
3411 " +++ end prim emitted verts1 = ",
3412 emitted_vertices_vec);
3413 lp_build_print_value(bld->bld_base.base.gallivm,
3414 " +++ end prim emitted prims1 = ",
3415 LLVMBuildLoad(builder,
3416 bld->emitted_prims_vec_ptr, ""));
3417 #endif
3418 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3419 mask);
3420 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3421 mask);
3422 #if DUMP_GS_EMITS
3423 lp_build_print_value(bld->bld_base.base.gallivm,
3424 " +++ end prim emitted verts2 = ",
3425 LLVMBuildLoad(builder,
3426 bld->emitted_vertices_vec_ptr, ""));
3427 #endif
3428 }
3429
3430 }
3431
3432 static void
3433 end_primitive(
3434 const struct lp_build_tgsi_action * action,
3435 struct lp_build_tgsi_context * bld_base,
3436 struct lp_build_emit_data * emit_data)
3437 {
3438 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3439
3440 if (bld->gs_iface->end_primitive) {
3441 LLVMValueRef mask = mask_vec(bld_base);
3442 end_primitive_masked(bld_base, mask);
3443 }
3444 }
3445
3446 static void
3447 cal_emit(
3448 const struct lp_build_tgsi_action * action,
3449 struct lp_build_tgsi_context * bld_base,
3450 struct lp_build_emit_data * emit_data)
3451 {
3452 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3453
3454 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3455 &bld_base->pc);
3456 }
3457
3458 static void
3459 ret_emit(
3460 const struct lp_build_tgsi_action * action,
3461 struct lp_build_tgsi_context * bld_base,
3462 struct lp_build_emit_data * emit_data)
3463 {
3464 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3465
3466 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3467 }
3468
3469 static void
3470 brk_emit(
3471 const struct lp_build_tgsi_action * action,
3472 struct lp_build_tgsi_context * bld_base,
3473 struct lp_build_emit_data * emit_data)
3474 {
3475 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3476
3477 lp_exec_break(&bld->exec_mask, bld_base);
3478 }
3479
3480 static void
3481 if_emit(
3482 const struct lp_build_tgsi_action * action,
3483 struct lp_build_tgsi_context * bld_base,
3484 struct lp_build_emit_data * emit_data)
3485 {
3486 LLVMValueRef tmp;
3487 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3488
3489 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3490 emit_data->args[0], bld->bld_base.base.zero);
3491 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3492 }
3493
3494 static void
3495 uif_emit(
3496 const struct lp_build_tgsi_action * action,
3497 struct lp_build_tgsi_context * bld_base,
3498 struct lp_build_emit_data * emit_data)
3499 {
3500 LLVMValueRef tmp;
3501 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3502 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3503
3504 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3505 emit_data->args[0], uint_bld->zero);
3506 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3507 }
3508
3509 static void
3510 case_emit(
3511 const struct lp_build_tgsi_action * action,
3512 struct lp_build_tgsi_context * bld_base,
3513 struct lp_build_emit_data * emit_data)
3514 {
3515 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3516
3517 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3518 }
3519
3520 static void
3521 default_emit(
3522 const struct lp_build_tgsi_action * action,
3523 struct lp_build_tgsi_context * bld_base,
3524 struct lp_build_emit_data * emit_data)
3525 {
3526 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3527
3528 lp_exec_default(&bld->exec_mask, bld_base);
3529 }
3530
3531 static void
3532 switch_emit(
3533 const struct lp_build_tgsi_action * action,
3534 struct lp_build_tgsi_context * bld_base,
3535 struct lp_build_emit_data * emit_data)
3536 {
3537 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3538
3539 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3540 }
3541
3542 static void
3543 endswitch_emit(
3544 const struct lp_build_tgsi_action * action,
3545 struct lp_build_tgsi_context * bld_base,
3546 struct lp_build_emit_data * emit_data)
3547 {
3548 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3549
3550 lp_exec_endswitch(&bld->exec_mask, bld_base);
3551 }
3552
3553 static void
3554 bgnloop_emit(
3555 const struct lp_build_tgsi_action * action,
3556 struct lp_build_tgsi_context * bld_base,
3557 struct lp_build_emit_data * emit_data)
3558 {
3559 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3560
3561 lp_exec_bgnloop(&bld->exec_mask);
3562 }
3563
3564 static void
3565 bgnsub_emit(
3566 const struct lp_build_tgsi_action * action,
3567 struct lp_build_tgsi_context * bld_base,
3568 struct lp_build_emit_data * emit_data)
3569 {
3570 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3571
3572 lp_exec_mask_bgnsub(&bld->exec_mask);
3573 }
3574
3575 static void
3576 else_emit(
3577 const struct lp_build_tgsi_action * action,
3578 struct lp_build_tgsi_context * bld_base,
3579 struct lp_build_emit_data * emit_data)
3580 {
3581 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3582
3583 lp_exec_mask_cond_invert(&bld->exec_mask);
3584 }
3585
3586 static void
3587 endif_emit(
3588 const struct lp_build_tgsi_action * action,
3589 struct lp_build_tgsi_context * bld_base,
3590 struct lp_build_emit_data * emit_data)
3591 {
3592 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3593
3594 lp_exec_mask_cond_pop(&bld->exec_mask);
3595 }
3596
3597 static void
3598 endloop_emit(
3599 const struct lp_build_tgsi_action * action,
3600 struct lp_build_tgsi_context * bld_base,
3601 struct lp_build_emit_data * emit_data)
3602 {
3603 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3604
3605 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3606 }
3607
3608 static void
3609 endsub_emit(
3610 const struct lp_build_tgsi_action * action,
3611 struct lp_build_tgsi_context * bld_base,
3612 struct lp_build_emit_data * emit_data)
3613 {
3614 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3615
3616 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3617 }
3618
3619 static void
3620 cont_emit(
3621 const struct lp_build_tgsi_action * action,
3622 struct lp_build_tgsi_context * bld_base,
3623 struct lp_build_emit_data * emit_data)
3624 {
3625 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3626
3627 lp_exec_continue(&bld->exec_mask);
3628 }
3629
3630 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3631 {
3632 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3633 struct gallivm_state * gallivm = bld_base->base.gallivm;
3634
3635 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3636 LLVMValueRef array_size =
3637 lp_build_const_int32(gallivm,
3638 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3639 bld->temps_array = lp_build_array_alloca(gallivm,
3640 bld_base->base.vec_type, array_size,
3641 "temp_array");
3642 }
3643
3644 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3645 LLVMValueRef array_size =
3646 lp_build_const_int32(gallivm,
3647 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3648 bld->outputs_array = lp_build_array_alloca(gallivm,
3649 bld_base->base.vec_type, array_size,
3650 "output_array");
3651 }
3652
3653 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3654 LLVMValueRef array_size =
3655 lp_build_const_int32(gallivm,
3656 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3657 bld->imms_array = lp_build_array_alloca(gallivm,
3658 bld_base->base.vec_type, array_size,
3659 "imms_array");
3660 }
3661
3662 /* If we have indirect addressing in inputs we need to copy them into
3663 * our alloca array to be able to iterate over them */
3664 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3665 unsigned index, chan;
3666 LLVMTypeRef vec_type = bld_base->base.vec_type;
3667 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3668 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3669 bld->inputs_array = lp_build_array_alloca(gallivm,
3670 vec_type, array_size,
3671 "input_array");
3672
3673 assert(bld_base->info->num_inputs
3674 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3675
3676 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3677 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3678 LLVMValueRef lindex =
3679 lp_build_const_int32(gallivm, index * 4 + chan);
3680 LLVMValueRef input_ptr =
3681 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3682 &lindex, 1, "");
3683 LLVMValueRef value = bld->inputs[index][chan];
3684 if (value)
3685 LLVMBuildStore(gallivm->builder, value, input_ptr);
3686 }
3687 }
3688 }
3689
3690 if (bld->gs_iface) {
3691 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3692 bld->emitted_prims_vec_ptr =
3693 lp_build_alloca(gallivm,
3694 uint_bld->vec_type,
3695 "emitted_prims_ptr");
3696 bld->emitted_vertices_vec_ptr =
3697 lp_build_alloca(gallivm,
3698 uint_bld->vec_type,
3699 "emitted_vertices_ptr");
3700 bld->total_emitted_vertices_vec_ptr =
3701 lp_build_alloca(gallivm,
3702 uint_bld->vec_type,
3703 "total_emitted_vertices_ptr");
3704
3705 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3706 bld->emitted_prims_vec_ptr);
3707 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3708 bld->emitted_vertices_vec_ptr);
3709 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3710 bld->total_emitted_vertices_vec_ptr);
3711 }
3712
3713 if (DEBUG_EXECUTION) {
3714 lp_build_printf(gallivm, "\n");
3715 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3716 if (!bld->gs_iface)
3717 emit_dump_file(bld, TGSI_FILE_INPUT);
3718 }
3719 }
3720
3721 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3722 {
3723 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3724 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3725
3726 if (DEBUG_EXECUTION) {
3727 /* for debugging */
3728 if (0) {
3729 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3730 }
3731 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3732 lp_build_printf(bld_base->base.gallivm, "\n");
3733 }
3734
3735 /* If we have indirect addressing in outputs we need to copy our alloca array
3736 * to the outputs slots specified by the caller */
3737 if (bld->gs_iface) {
3738 LLVMValueRef total_emitted_vertices_vec;
3739 LLVMValueRef emitted_prims_vec;
3740 /* implicit end_primitives, needed in case there are any unflushed
3741 vertices in the cache. Note must not call end_primitive here
3742 since the exec_mask is not valid at this point. */
3743 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3744
3745 total_emitted_vertices_vec =
3746 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3747 emitted_prims_vec =
3748 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3749
3750 bld->gs_iface->gs_epilogue(bld->gs_iface,
3751 &bld->bld_base,
3752 total_emitted_vertices_vec,
3753 emitted_prims_vec);
3754 } else {
3755 gather_outputs(bld);
3756 }
3757 }
3758
3759 void
3760 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3761 const struct tgsi_token *tokens,
3762 struct lp_type type,
3763 struct lp_build_mask_context *mask,
3764 LLVMValueRef consts_ptr,
3765 LLVMValueRef const_sizes_ptr,
3766 const struct lp_bld_tgsi_system_values *system_values,
3767 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3768 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3769 LLVMValueRef context_ptr,
3770 LLVMValueRef thread_data_ptr,
3771 struct lp_build_sampler_soa *sampler,
3772 const struct tgsi_shader_info *info,
3773 const struct lp_build_tgsi_gs_iface *gs_iface)
3774 {
3775 struct lp_build_tgsi_soa_context bld;
3776
3777 struct lp_type res_type;
3778
3779 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3780 memset(&res_type, 0, sizeof res_type);
3781 res_type.width = type.width;
3782 res_type.length = type.length;
3783 res_type.sign = 1;
3784
3785 /* Setup build context */
3786 memset(&bld, 0, sizeof bld);
3787 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3788 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3789 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3790 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3791 {
3792 struct lp_type dbl_type;
3793 dbl_type = type;
3794 dbl_type.width *= 2;
3795 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3796 }
3797 {
3798 struct lp_type uint64_type;
3799 uint64_type = lp_uint_type(type);
3800 uint64_type.width *= 2;
3801 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3802 }
3803 {
3804 struct lp_type int64_type;
3805 int64_type = lp_int_type(type);
3806 int64_type.width *= 2;
3807 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3808 }
3809 bld.mask = mask;
3810 bld.inputs = inputs;
3811 bld.outputs = outputs;
3812 bld.consts_ptr = consts_ptr;
3813 bld.const_sizes_ptr = const_sizes_ptr;
3814 bld.sampler = sampler;
3815 bld.bld_base.info = info;
3816 bld.indirect_files = info->indirect_files;
3817 bld.context_ptr = context_ptr;
3818 bld.thread_data_ptr = thread_data_ptr;
3819
3820 /*
3821 * If the number of temporaries is rather large then we just
3822 * allocate them as an array right from the start and treat
3823 * like indirect temporaries.
3824 */
3825 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3826 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3827 }
3828 /*
3829 * For performance reason immediates are always backed in a static
3830 * array, but if their number is too great, we have to use just
3831 * a dynamically allocated array.
3832 */
3833 bld.use_immediates_array =
3834 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3835 if (bld.use_immediates_array) {
3836 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3837 }
3838
3839
3840 bld.bld_base.soa = TRUE;
3841 bld.bld_base.emit_debug = emit_debug;
3842 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3843 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3844 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3845 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3846 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3847 bld.bld_base.emit_store = emit_store;
3848
3849 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3850 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3851
3852 bld.bld_base.emit_prologue = emit_prologue;
3853 bld.bld_base.emit_epilogue = emit_epilogue;
3854
3855 /* Set opcode actions */
3856 lp_set_default_actions_cpu(&bld.bld_base);
3857
3858 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3859 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3860 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3861 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3862 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3863 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3864 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3865 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3866 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3867 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3868 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3869 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3870 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3871 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3872 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3873 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3874 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3875 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3876 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3877 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3878 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3879 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3880 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3881 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3882 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3883 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3884 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3885 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3886 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3887 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3888 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3889 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
3890 /* DX10 sampling ops */
3891 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3892 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3893 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3894 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3895 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3896 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3897 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
3898 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3899 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
3900 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3901
3902 if (gs_iface) {
3903 /* There's no specific value for this because it should always
3904 * be set, but apps using ext_geometry_shader4 quite often
3905 * were forgetting so we're using MAX_VERTEX_VARYING from
3906 * that spec even though we could debug_assert if it's not
3907 * set, but that's a lot uglier. */
3908 uint max_output_vertices;
3909
3910 /* inputs are always indirect with gs */
3911 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3912 bld.gs_iface = gs_iface;
3913 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3914 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3915 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3916
3917 max_output_vertices =
3918 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3919 if (!max_output_vertices)
3920 max_output_vertices = 32;
3921
3922 bld.max_output_vertices_vec =
3923 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3924 max_output_vertices);
3925 }
3926
3927 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3928
3929 bld.system_values = *system_values;
3930
3931 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3932
3933 if (0) {
3934 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3935 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3936 debug_printf("11111111111111111111111111111 \n");
3937 tgsi_dump(tokens, 0);
3938 lp_debug_dump_value(function);
3939 debug_printf("2222222222222222222222222222 \n");
3940 }
3941
3942 if (0) {
3943 LLVMModuleRef module = LLVMGetGlobalParent(
3944 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3945 LLVMDumpModule(module);
3946
3947 }
3948 lp_exec_mask_fini(&bld.exec_mask);
3949 }