gallivm: Make it possible to disable some optimization shortcuts in release builds
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68
69 /* SM 4.0 says that subroutines can nest 32 deep and
70 * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72
73 #define DUMP_GS_EMITS 0
74
75 /*
76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77 * instruction.
78 *
79 * TODO:
80 * - take execution masks in consideration
81 * - debug control-flow instructions
82 */
83 #define DEBUG_EXECUTION 0
84
85
86 /*
87 * Emit code to print a register value.
88 */
89 static void
90 emit_dump_reg(struct gallivm_state *gallivm,
91 unsigned file,
92 unsigned index,
93 unsigned chan,
94 LLVMValueRef value)
95 {
96 char buf[32];
97
98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
99 tgsi_file_name(file),
100 index, "xyzw"[chan]);
101
102 lp_build_print_value(gallivm, buf, value);
103 }
104
105 /*
106 * Return the context for the current function.
107 * (always 'main', if shader doesn't do any function calls)
108 */
109 static inline struct function_ctx *
110 func_ctx(struct lp_exec_mask *mask)
111 {
112 assert(mask->function_stack_size > 0);
113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114 return &mask->function_stack[mask->function_stack_size - 1];
115 }
116
117 /*
118 * Returns true if we're in a loop.
119 * It's global, meaning that it returns true even if there's
120 * no loop inside the current function, but we were inside
121 * a loop inside another function, from which this one was called.
122 */
123 static inline boolean
124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126 int i;
127 for (i = mask->function_stack_size - 1; i >= 0; --i) {
128 const struct function_ctx *ctx = &mask->function_stack[i];
129 if (ctx->loop_stack_size > 0)
130 return TRUE;
131 }
132 return FALSE;
133 }
134
135 /*
136 * Returns true if we're inside a switch statement.
137 * It's global, meaning that it returns true even if there's
138 * no switch in the current function, but we were inside
139 * a switch inside another function, from which this one was called.
140 */
141 static inline boolean
142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144 int i;
145 for (i = mask->function_stack_size - 1; i >= 0; --i) {
146 const struct function_ctx *ctx = &mask->function_stack[i];
147 if (ctx->switch_stack_size > 0)
148 return TRUE;
149 }
150 return FALSE;
151 }
152
153 /*
154 * Returns true if we're inside a conditional.
155 * It's global, meaning that it returns true even if there's
156 * no conditional in the current function, but we were inside
157 * a conditional inside another function, from which this one was called.
158 */
159 static inline boolean
160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162 int i;
163 for (i = mask->function_stack_size - 1; i >= 0; --i) {
164 const struct function_ctx *ctx = &mask->function_stack[i];
165 if (ctx->cond_stack_size > 0)
166 return TRUE;
167 }
168 return FALSE;
169 }
170
171
172 /*
173 * Initialize a function context at the specified index.
174 */
175 static void
176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179 LLVMBuilderRef builder = mask->bld->gallivm->builder;
180 struct function_ctx *ctx = &mask->function_stack[function_idx];
181
182 ctx->cond_stack_size = 0;
183 ctx->loop_stack_size = 0;
184 ctx->switch_stack_size = 0;
185
186 if (function_idx == 0) {
187 ctx->ret_mask = mask->ret_mask;
188 }
189
190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191 int_type, "looplimiter");
192 LLVMBuildStore(
193 builder,
194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195 ctx->loop_limiter);
196 }
197
198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200 mask->bld = bld;
201 mask->has_mask = FALSE;
202 mask->ret_in_main = FALSE;
203 /* For the main function */
204 mask->function_stack_size = 1;
205
206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208 mask->cond_mask = mask->switch_mask =
209 LLVMConstAllOnes(mask->int_vec_type);
210
211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212 sizeof(mask->function_stack[0]));
213 lp_exec_mask_function_init(mask, 0);
214 }
215
216 static void
217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219 FREE(mask->function_stack);
220 }
221
222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224 LLVMBuilderRef builder = mask->bld->gallivm->builder;
225 boolean has_loop_mask = mask_has_loop(mask);
226 boolean has_cond_mask = mask_has_cond(mask);
227 boolean has_switch_mask = mask_has_switch(mask);
228 boolean has_ret_mask = mask->function_stack_size > 1 ||
229 mask->ret_in_main;
230
231 if (has_loop_mask) {
232 /*for loops we need to update the entire mask at runtime */
233 LLVMValueRef tmp;
234 assert(mask->break_mask);
235 tmp = LLVMBuildAnd(builder,
236 mask->cont_mask,
237 mask->break_mask,
238 "maskcb");
239 mask->exec_mask = LLVMBuildAnd(builder,
240 mask->cond_mask,
241 tmp,
242 "maskfull");
243 } else
244 mask->exec_mask = mask->cond_mask;
245
246 if (has_switch_mask) {
247 mask->exec_mask = LLVMBuildAnd(builder,
248 mask->exec_mask,
249 mask->switch_mask,
250 "switchmask");
251 }
252
253 if (has_ret_mask) {
254 mask->exec_mask = LLVMBuildAnd(builder,
255 mask->exec_mask,
256 mask->ret_mask,
257 "callmask");
258 }
259
260 mask->has_mask = (has_cond_mask ||
261 has_loop_mask ||
262 has_switch_mask ||
263 has_ret_mask);
264 }
265
266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267 LLVMValueRef val)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 struct function_ctx *ctx = func_ctx(mask);
271
272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273 ctx->cond_stack_size++;
274 return;
275 }
276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278 }
279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280 assert(LLVMTypeOf(val) == mask->int_vec_type);
281 mask->cond_mask = LLVMBuildAnd(builder,
282 mask->cond_mask,
283 val,
284 "");
285 lp_exec_mask_update(mask);
286 }
287
288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290 LLVMBuilderRef builder = mask->bld->gallivm->builder;
291 struct function_ctx *ctx = func_ctx(mask);
292 LLVMValueRef prev_mask;
293 LLVMValueRef inv_mask;
294
295 assert(ctx->cond_stack_size);
296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297 return;
298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301 }
302
303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304
305 mask->cond_mask = LLVMBuildAnd(builder,
306 inv_mask,
307 prev_mask, "");
308 lp_exec_mask_update(mask);
309 }
310
311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313 struct function_ctx *ctx = func_ctx(mask);
314 assert(ctx->cond_stack_size);
315 --ctx->cond_stack_size;
316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317 return;
318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319 lp_exec_mask_update(mask);
320 }
321
322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324 LLVMBuilderRef builder = mask->bld->gallivm->builder;
325 struct function_ctx *ctx = func_ctx(mask);
326
327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328 ++ctx->loop_stack_size;
329 return;
330 }
331
332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333 ctx->break_type;
334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335
336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340 ++ctx->loop_stack_size;
341
342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344
345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346
347 LLVMBuildBr(builder, ctx->loop_block);
348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349
350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351
352 lp_exec_mask_update(mask);
353 }
354
355 static void lp_exec_break(struct lp_exec_mask *mask,
356 struct lp_build_tgsi_context * bld_base)
357 {
358 LLVMBuilderRef builder = mask->bld->gallivm->builder;
359 struct function_ctx *ctx = func_ctx(mask);
360
361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362 LLVMValueRef exec_mask = LLVMBuildNot(builder,
363 mask->exec_mask,
364 "break");
365
366 mask->break_mask = LLVMBuildAnd(builder,
367 mask->break_mask,
368 exec_mask, "break_full");
369 }
370 else {
371 enum tgsi_opcode opcode =
372 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
373 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
374 opcode == TGSI_OPCODE_CASE);
375
376
377 if (ctx->switch_in_default) {
378 /*
379 * stop default execution but only if this is an unconditional switch.
380 * (The condition here is not perfect since dead code after break is
381 * allowed but should be sufficient since false negatives are just
382 * unoptimized - so we don't have to pre-evaluate that).
383 */
384 if(break_always && ctx->switch_pc) {
385 bld_base->pc = ctx->switch_pc;
386 return;
387 }
388 }
389
390 if (break_always) {
391 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
392 }
393 else {
394 LLVMValueRef exec_mask = LLVMBuildNot(builder,
395 mask->exec_mask,
396 "break");
397 mask->switch_mask = LLVMBuildAnd(builder,
398 mask->switch_mask,
399 exec_mask, "break_switch");
400 }
401 }
402
403 lp_exec_mask_update(mask);
404 }
405
406 static void lp_exec_continue(struct lp_exec_mask *mask)
407 {
408 LLVMBuilderRef builder = mask->bld->gallivm->builder;
409 LLVMValueRef exec_mask = LLVMBuildNot(builder,
410 mask->exec_mask,
411 "");
412
413 mask->cont_mask = LLVMBuildAnd(builder,
414 mask->cont_mask,
415 exec_mask, "");
416
417 lp_exec_mask_update(mask);
418 }
419
420
421 static void lp_exec_endloop(struct gallivm_state *gallivm,
422 struct lp_exec_mask *mask)
423 {
424 LLVMBuilderRef builder = mask->bld->gallivm->builder;
425 struct function_ctx *ctx = func_ctx(mask);
426 LLVMBasicBlockRef endloop;
427 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
428 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
429 mask->bld->type.width *
430 mask->bld->type.length);
431 LLVMValueRef i1cond, i2cond, icond, limiter;
432
433 assert(mask->break_mask);
434
435
436 assert(ctx->loop_stack_size);
437 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
438 --ctx->loop_stack_size;
439 return;
440 }
441
442 /*
443 * Restore the cont_mask, but don't pop
444 */
445 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
446 lp_exec_mask_update(mask);
447
448 /*
449 * Unlike the continue mask, the break_mask must be preserved across loop
450 * iterations
451 */
452 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
453
454 /* Decrement the loop limiter */
455 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
456
457 limiter = LLVMBuildSub(
458 builder,
459 limiter,
460 LLVMConstInt(int_type, 1, false),
461 "");
462
463 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
464
465 /* i1cond = (mask != 0) */
466 i1cond = LLVMBuildICmp(
467 builder,
468 LLVMIntNE,
469 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
470 LLVMConstNull(reg_type), "i1cond");
471
472 /* i2cond = (looplimiter > 0) */
473 i2cond = LLVMBuildICmp(
474 builder,
475 LLVMIntSGT,
476 limiter,
477 LLVMConstNull(int_type), "i2cond");
478
479 /* if( i1cond && i2cond ) */
480 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
481
482 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
483
484 LLVMBuildCondBr(builder,
485 icond, ctx->loop_block, endloop);
486
487 LLVMPositionBuilderAtEnd(builder, endloop);
488
489 assert(ctx->loop_stack_size);
490 --ctx->loop_stack_size;
491 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
492 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
493 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
494 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
495 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
496 ctx->switch_stack_size];
497
498 lp_exec_mask_update(mask);
499 }
500
501 static void lp_exec_switch(struct lp_exec_mask *mask,
502 LLVMValueRef switchval)
503 {
504 struct function_ctx *ctx = func_ctx(mask);
505
506 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
507 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
508 ctx->switch_stack_size++;
509 return;
510 }
511
512 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
513 ctx->break_type;
514 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
515
516 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
517 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
518 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
519 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
520 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
521 ctx->switch_stack_size++;
522
523 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
524 ctx->switch_val = switchval;
525 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
526 ctx->switch_in_default = false;
527 ctx->switch_pc = 0;
528
529 lp_exec_mask_update(mask);
530 }
531
532 static void lp_exec_endswitch(struct lp_exec_mask *mask,
533 struct lp_build_tgsi_context * bld_base)
534 {
535 LLVMBuilderRef builder = mask->bld->gallivm->builder;
536 struct function_ctx *ctx = func_ctx(mask);
537
538 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
539 ctx->switch_stack_size--;
540 return;
541 }
542
543 /* check if there's deferred default if so do it now */
544 if (ctx->switch_pc && !ctx->switch_in_default) {
545 LLVMValueRef prevmask, defaultmask;
546 unsigned tmp_pc;
547 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
548 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
549 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
550 ctx->switch_in_default = true;
551
552 lp_exec_mask_update(mask);
553
554 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
555 TGSI_OPCODE_DEFAULT);
556
557 tmp_pc = bld_base->pc;
558 bld_base->pc = ctx->switch_pc;
559 /*
560 * re-purpose switch_pc to point to here again, since we stop execution of
561 * the deferred default after next break.
562 */
563 ctx->switch_pc = tmp_pc - 1;
564
565 return;
566 }
567
568 else if (ctx->switch_pc && ctx->switch_in_default) {
569 assert(bld_base->pc == ctx->switch_pc + 1);
570 }
571
572 ctx->switch_stack_size--;
573 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
574 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
575 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
576 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
577 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
578
579 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
580
581 lp_exec_mask_update(mask);
582 }
583
584 static void lp_exec_case(struct lp_exec_mask *mask,
585 LLVMValueRef caseval)
586 {
587 LLVMBuilderRef builder = mask->bld->gallivm->builder;
588 struct function_ctx *ctx = func_ctx(mask);
589
590 LLVMValueRef casemask, prevmask;
591
592 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
593 return;
594 }
595
596 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
597 if (!ctx->switch_in_default) {
598 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
599 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
600 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
601 ctx->switch_mask_default, "sw_default_mask");
602 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
603 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
604
605 lp_exec_mask_update(mask);
606 }
607 }
608
609 /*
610 * Analyse default statement in a switch.
611 * \return true if default is last statement, false otherwise
612 * \param default_pc_start contains pc of instruction to jump to
613 * if default wasn't last but there's no
614 * fallthrough into default.
615 */
616 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
617 struct lp_build_tgsi_context * bld_base,
618 int *default_pc_start)
619 {
620 unsigned pc = bld_base->pc;
621 struct function_ctx *ctx = func_ctx(mask);
622 int curr_switch_stack = ctx->switch_stack_size;
623
624 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
625 return false;
626 }
627
628 /* skip over case statements which are together with default */
629 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
630 pc++;
631 }
632
633 while (pc != ~0u && pc < bld_base->num_instructions) {
634 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
635 switch (opcode) {
636 case TGSI_OPCODE_CASE:
637 if (curr_switch_stack == ctx->switch_stack_size) {
638 *default_pc_start = pc - 1;
639 return false;
640 }
641 break;
642 case TGSI_OPCODE_SWITCH:
643 curr_switch_stack++;
644 break;
645 case TGSI_OPCODE_ENDSWITCH:
646 if (curr_switch_stack == ctx->switch_stack_size) {
647 *default_pc_start = pc - 1;
648 return true;
649 }
650 curr_switch_stack--;
651 break;
652 default:
653 ; /* nothing */
654 }
655 pc++;
656 }
657 /* should never arrive here */
658 assert(0);
659 return true;
660 }
661
662 static void lp_exec_default(struct lp_exec_mask *mask,
663 struct lp_build_tgsi_context * bld_base)
664 {
665 LLVMBuilderRef builder = mask->bld->gallivm->builder;
666 struct function_ctx *ctx = func_ctx(mask);
667
668 int default_exec_pc;
669 boolean default_is_last;
670
671 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
672 return;
673 }
674
675 /*
676 * This is a messy opcode, because it may not be always at the end and
677 * there can be fallthrough in and out of it.
678 */
679
680 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
681 /*
682 * If it is last statement in switch (note that case statements appearing
683 * "at the same time" as default don't change that) everything is just fine,
684 * update switch mask and go on. This means we can handle default with
685 * fallthrough INTO it without overhead, if it is last.
686 */
687 if (default_is_last) {
688 LLVMValueRef prevmask, defaultmask;
689 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
690 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
691 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
692 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
693 ctx->switch_in_default = true;
694
695 lp_exec_mask_update(mask);
696 }
697 else {
698 /*
699 * Technically, "case" immediately before default isn't really a
700 * fallthrough, however we still have to count them as such as we
701 * already have updated the masks.
702 * If that happens in practice could add a switch optimizer pass
703 * which just gets rid of all case statements appearing together with
704 * default (or could do switch analysis at switch start time instead).
705 */
706 enum tgsi_opcode opcode =
707 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
708 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
709 opcode != TGSI_OPCODE_SWITCH);
710 /*
711 * If it is not last statement and there was no fallthrough into it,
712 * we record the PC and continue execution at next case (again, those
713 * case encountered at the same time don't count). At endswitch
714 * time, we update switchmask, and go back executing the code we skipped
715 * until the next break (possibly re-executing some code with changed mask
716 * if there was a fallthrough out of default).
717 * Finally, if it is not last statement and there was a fallthrough into it,
718 * do the same as with the former case, except instead of skipping the code
719 * just execute it without updating the mask, then go back and re-execute.
720 */
721 ctx->switch_pc = bld_base->pc;
722 if (!ft_into) {
723 bld_base->pc = default_exec_pc;
724 }
725 }
726 }
727
728
729 /* stores val into an address pointed to by dst_ptr.
730 * mask->exec_mask is used to figure out which bits of val
731 * should be stored into the address
732 * (0 means don't store this bit, 1 means do store).
733 */
734 static void lp_exec_mask_store(struct lp_exec_mask *mask,
735 struct lp_build_context *bld_store,
736 LLVMValueRef val,
737 LLVMValueRef dst_ptr)
738 {
739 LLVMBuilderRef builder = mask->bld->gallivm->builder;
740 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
741
742 assert(lp_check_value(bld_store->type, val));
743 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
744 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
745 LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
746
747 if (exec_mask) {
748 LLVMValueRef res, dst;
749
750 dst = LLVMBuildLoad(builder, dst_ptr, "");
751 res = lp_build_select(bld_store, exec_mask, val, dst);
752 LLVMBuildStore(builder, res, dst_ptr);
753 } else
754 LLVMBuildStore(builder, val, dst_ptr);
755 }
756
757 static void lp_exec_mask_call(struct lp_exec_mask *mask,
758 int func,
759 int *pc)
760 {
761 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
762 return;
763 }
764
765 lp_exec_mask_function_init(mask, mask->function_stack_size);
766 mask->function_stack[mask->function_stack_size].pc = *pc;
767 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
768 mask->function_stack_size++;
769 *pc = func;
770 }
771
772 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
773 {
774 LLVMBuilderRef builder = mask->bld->gallivm->builder;
775 struct function_ctx *ctx = func_ctx(mask);
776 LLVMValueRef exec_mask;
777
778 if (ctx->cond_stack_size == 0 &&
779 ctx->loop_stack_size == 0 &&
780 ctx->switch_stack_size == 0 &&
781 mask->function_stack_size == 1) {
782 /* returning from main() */
783 *pc = -1;
784 return;
785 }
786
787 if (mask->function_stack_size == 1) {
788 /*
789 * This requires special handling since we need to ensure
790 * we don't drop the mask even if we have no call stack
791 * (e.g. after a ret in a if clause after the endif)
792 */
793 mask->ret_in_main = TRUE;
794 }
795
796 exec_mask = LLVMBuildNot(builder,
797 mask->exec_mask,
798 "ret");
799
800 mask->ret_mask = LLVMBuildAnd(builder,
801 mask->ret_mask,
802 exec_mask, "ret_full");
803
804 lp_exec_mask_update(mask);
805 }
806
807 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
808 {
809 }
810
811 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
812 {
813 struct function_ctx *ctx;
814
815 assert(mask->function_stack_size > 1);
816 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
817
818 ctx = func_ctx(mask);
819 mask->function_stack_size--;
820
821 *pc = ctx->pc;
822 mask->ret_mask = ctx->ret_mask;
823
824 lp_exec_mask_update(mask);
825 }
826
827
828 static LLVMValueRef
829 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
830 unsigned file,
831 int index,
832 unsigned chan)
833 {
834 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
835 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
836 LLVMValueRef var_of_array;
837
838 switch (file) {
839 case TGSI_FILE_TEMPORARY:
840 array_of_vars = bld->temps;
841 var_of_array = bld->temps_array;
842 break;
843 case TGSI_FILE_OUTPUT:
844 array_of_vars = bld->outputs;
845 var_of_array = bld->outputs_array;
846 break;
847 default:
848 assert(0);
849 return NULL;
850 }
851
852 assert(chan < 4);
853
854 if (bld->indirect_files & (1 << file)) {
855 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
856 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
857 LLVMValueRef gep[2];
858 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
859 gep[1] = lindex;
860 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
861 } else {
862 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
863 }
864 }
865 else {
866 assert(index <= bld->bld_base.info->file_max[file]);
867 return array_of_vars[index][chan];
868 }
869 }
870
871
872 /**
873 * Return pointer to a temporary register channel (src or dest).
874 * Note that indirect addressing cannot be handled here.
875 * \param index which temporary register
876 * \param chan which channel of the temp register.
877 */
878 LLVMValueRef
879 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
880 unsigned index,
881 unsigned chan)
882 {
883 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
884 }
885
886 /**
887 * Return pointer to a output register channel (src or dest).
888 * Note that indirect addressing cannot be handled here.
889 * \param index which output register
890 * \param chan which channel of the output register.
891 */
892 LLVMValueRef
893 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
894 unsigned index,
895 unsigned chan)
896 {
897 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
898 }
899
900 /*
901 * If we have indirect addressing in outputs copy our alloca array
902 * to the outputs slots specified by the caller to make sure
903 * our outputs are delivered consistently via the same interface.
904 */
905 static void
906 gather_outputs(struct lp_build_tgsi_soa_context * bld)
907 {
908 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
909 unsigned index, chan;
910 assert(bld->bld_base.info->num_outputs <=
911 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
912 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
913 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
914 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
915 }
916 }
917 }
918 }
919
920 /**
921 * Gather vector.
922 * XXX the lp_build_gather() function should be capable of doing this
923 * with a little work.
924 */
925 static LLVMValueRef
926 build_gather(struct lp_build_tgsi_context *bld_base,
927 LLVMValueRef base_ptr,
928 LLVMValueRef indexes,
929 LLVMValueRef overflow_mask,
930 LLVMValueRef indexes2)
931 {
932 struct gallivm_state *gallivm = bld_base->base.gallivm;
933 LLVMBuilderRef builder = gallivm->builder;
934 struct lp_build_context *uint_bld = &bld_base->uint_bld;
935 struct lp_build_context *bld = &bld_base->base;
936 LLVMValueRef res;
937 unsigned i;
938
939 if (indexes2)
940 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
941 else
942 res = bld->undef;
943 /*
944 * overflow_mask is a vector telling us which channels
945 * in the vector overflowed. We use the overflow behavior for
946 * constant buffers which is defined as:
947 * Out of bounds access to constant buffer returns 0 in all
948 * components. Out of bounds behavior is always with respect
949 * to the size of the buffer bound at that slot.
950 */
951
952 if (overflow_mask) {
953 /*
954 * We avoid per-element control flow here (also due to llvm going crazy,
955 * though I suspect it's better anyway since overflow is likely rare).
956 * Note that since we still fetch from buffers even if num_elements was
957 * zero (in this case we'll fetch from index zero) the jit func callers
958 * MUST provide valid fake constant buffers of size 4x32 (the values do
959 * not matter), otherwise we'd still need (not per element though)
960 * control flow.
961 */
962 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
963 if (indexes2)
964 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
965 }
966
967 /*
968 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
969 */
970 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
971 LLVMValueRef si, di;
972 LLVMValueRef index;
973 LLVMValueRef scalar_ptr, scalar;
974
975 di = lp_build_const_int32(bld->gallivm, i);
976 if (indexes2)
977 si = lp_build_const_int32(bld->gallivm, i >> 1);
978 else
979 si = di;
980
981 if (indexes2 && (i & 1)) {
982 index = LLVMBuildExtractElement(builder,
983 indexes2, si, "");
984 } else {
985 index = LLVMBuildExtractElement(builder,
986 indexes, si, "");
987 }
988 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
989 &index, 1, "gather_ptr");
990 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
991
992 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
993 }
994
995 if (overflow_mask) {
996 if (indexes2) {
997 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
998 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
999 bld_base->dbl_bld.int_vec_type, "");
1000 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1001 bld_base->dbl_bld.zero, res);
1002 } else
1003 res = lp_build_select(bld, overflow_mask, bld->zero, res);
1004 }
1005
1006 return res;
1007 }
1008
1009
1010 /**
1011 * Scatter/store vector.
1012 */
1013 static void
1014 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1015 LLVMValueRef base_ptr,
1016 LLVMValueRef indexes,
1017 LLVMValueRef values,
1018 struct lp_exec_mask *mask)
1019 {
1020 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1021 LLVMBuilderRef builder = gallivm->builder;
1022 unsigned i;
1023 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1024
1025 /*
1026 * Loop over elements of index_vec, store scalar value.
1027 */
1028 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1029 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1030 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1031 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1032 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1033 LLVMValueRef scalar_pred = pred ?
1034 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1035
1036 if (0)
1037 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1038 ii, val, index, scalar_ptr);
1039
1040 if (scalar_pred) {
1041 LLVMValueRef real_val, dst_val;
1042 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1043 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1044 LLVMBuildStore(builder, real_val, scalar_ptr);
1045 }
1046 else {
1047 LLVMBuildStore(builder, val, scalar_ptr);
1048 }
1049 }
1050 }
1051
1052
1053 /**
1054 * Read the current value of the ADDR register, convert the floats to
1055 * ints, add the base index and return the vector of offsets.
1056 * The offsets will be used to index into the constant buffer or
1057 * temporary register file.
1058 */
1059 static LLVMValueRef
1060 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1061 unsigned reg_file, unsigned reg_index,
1062 const struct tgsi_ind_register *indirect_reg)
1063 {
1064 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1065 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1066 /* always use X component of address register */
1067 unsigned swizzle = indirect_reg->Swizzle;
1068 LLVMValueRef base;
1069 LLVMValueRef rel;
1070 LLVMValueRef max_index;
1071 LLVMValueRef index;
1072
1073 assert(bld->indirect_files & (1 << reg_file));
1074
1075 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1076
1077 assert(swizzle < 4);
1078 switch (indirect_reg->File) {
1079 case TGSI_FILE_ADDRESS:
1080 rel = LLVMBuildLoad(builder,
1081 bld->addr[indirect_reg->Index][swizzle],
1082 "load addr reg");
1083 /* ADDR LLVM values already have LLVM integer type. */
1084 break;
1085 case TGSI_FILE_TEMPORARY:
1086 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1087 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1088 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1089 * value actually stored is expected to be an integer */
1090 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1091 break;
1092 default:
1093 assert(0);
1094 rel = uint_bld->zero;
1095 }
1096
1097 index = lp_build_add(uint_bld, base, rel);
1098
1099 /*
1100 * emit_fetch_constant handles constant buffer overflow so this code
1101 * is pointless for them.
1102 * Furthermore the D3D10 spec in section 6.5 says:
1103 * If the constant buffer bound to a slot is larger than the size
1104 * declared in the shader for that slot, implementations are allowed
1105 * to return incorrect data (not necessarily 0) for indices that are
1106 * larger than the declared size but smaller than the buffer size.
1107 */
1108 if (reg_file != TGSI_FILE_CONSTANT) {
1109 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1110 uint_bld->type,
1111 bld->bld_base.info->file_max[reg_file]);
1112
1113 assert(!uint_bld->type.sign);
1114 index = lp_build_min(uint_bld, index, max_index);
1115 }
1116
1117 return index;
1118 }
1119
1120 static struct lp_build_context *
1121 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1122 enum tgsi_opcode_type stype)
1123 {
1124 struct lp_build_context *bld_fetch;
1125
1126 switch (stype) {
1127 case TGSI_TYPE_FLOAT:
1128 case TGSI_TYPE_UNTYPED:
1129 bld_fetch = &bld_base->base;
1130 break;
1131 case TGSI_TYPE_UNSIGNED:
1132 bld_fetch = &bld_base->uint_bld;
1133 break;
1134 case TGSI_TYPE_SIGNED:
1135 bld_fetch = &bld_base->int_bld;
1136 break;
1137 case TGSI_TYPE_DOUBLE:
1138 bld_fetch = &bld_base->dbl_bld;
1139 break;
1140 case TGSI_TYPE_UNSIGNED64:
1141 bld_fetch = &bld_base->uint64_bld;
1142 break;
1143 case TGSI_TYPE_SIGNED64:
1144 bld_fetch = &bld_base->int64_bld;
1145 break;
1146 case TGSI_TYPE_VOID:
1147 default:
1148 assert(0);
1149 bld_fetch = NULL;
1150 break;
1151 }
1152 return bld_fetch;
1153 }
1154
1155 static LLVMValueRef
1156 get_soa_array_offsets(struct lp_build_context *uint_bld,
1157 LLVMValueRef indirect_index,
1158 unsigned chan_index,
1159 boolean need_perelement_offset)
1160 {
1161 struct gallivm_state *gallivm = uint_bld->gallivm;
1162 LLVMValueRef chan_vec =
1163 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1164 LLVMValueRef length_vec =
1165 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1166 LLVMValueRef index_vec;
1167
1168 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1169 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1170 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1171 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1172
1173 if (need_perelement_offset) {
1174 LLVMValueRef pixel_offsets;
1175 unsigned i;
1176 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1177 pixel_offsets = uint_bld->undef;
1178 for (i = 0; i < uint_bld->type.length; i++) {
1179 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1180 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1181 ii, ii, "");
1182 }
1183 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1184 }
1185 return index_vec;
1186 }
1187
1188 static LLVMValueRef
1189 emit_fetch_constant(
1190 struct lp_build_tgsi_context * bld_base,
1191 const struct tgsi_full_src_register * reg,
1192 enum tgsi_opcode_type stype,
1193 unsigned swizzle_in)
1194 {
1195 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1196 struct gallivm_state *gallivm = bld_base->base.gallivm;
1197 LLVMBuilderRef builder = gallivm->builder;
1198 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1199 unsigned dimension = 0;
1200 LLVMValueRef consts_ptr;
1201 LLVMValueRef num_consts;
1202 LLVMValueRef res;
1203 unsigned swizzle = swizzle_in & 0xffff;
1204
1205 /* XXX: Handle fetching xyzw components as a vector */
1206 assert(swizzle != ~0u);
1207
1208 if (reg->Register.Dimension) {
1209 assert(!reg->Dimension.Indirect);
1210 dimension = reg->Dimension.Index;
1211 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1212 }
1213
1214 consts_ptr = bld->consts[dimension];
1215 num_consts = bld->consts_sizes[dimension];
1216
1217 if (reg->Register.Indirect) {
1218 LLVMValueRef indirect_index;
1219 LLVMValueRef swizzle_vec =
1220 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1221 LLVMValueRef index_vec; /* index into the const buffer */
1222 LLVMValueRef overflow_mask;
1223 LLVMValueRef index_vec2 = NULL;
1224
1225 indirect_index = get_indirect_index(bld,
1226 reg->Register.File,
1227 reg->Register.Index,
1228 &reg->Indirect);
1229
1230 /* All fetches are from the same constant buffer, so
1231 * we need to propagate the size to a vector to do a
1232 * vector comparison */
1233 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1234 /* Construct a boolean vector telling us which channels
1235 * overflow the bound constant buffer */
1236 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1237 indirect_index, num_consts);
1238
1239 /* index_vec = indirect_index * 4 + swizzle */
1240 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1241 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1242
1243 if (tgsi_type_is_64bit(stype)) {
1244 LLVMValueRef swizzle_vec2;
1245 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
1246 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1247 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1248 }
1249 /* Gather values from the constant buffer */
1250 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1251 }
1252 else {
1253 LLVMValueRef index; /* index into the const buffer */
1254 LLVMValueRef scalar, scalar_ptr;
1255 struct lp_build_context *bld_broad = &bld_base->base;
1256 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1257
1258 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1259 &index, 1, "");
1260
1261 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
1262
1263 LLVMValueRef scalar2, scalar2_ptr;
1264 LLVMValueRef shuffles[2];
1265 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
1266
1267 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
1268 &index, 1, "");
1269
1270 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1271 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
1272 shuffles[0] = lp_build_const_int32(gallivm, 0);
1273 shuffles[1] = lp_build_const_int32(gallivm, 1);
1274
1275 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
1276 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
1277 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
1278 } else {
1279 if (stype == TGSI_TYPE_DOUBLE) {
1280 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1281 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1282 bld_broad = &bld_base->dbl_bld;
1283 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1284 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1285 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1286 bld_broad = &bld_base->uint64_bld;
1287 } else if (stype == TGSI_TYPE_SIGNED64) {
1288 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1289 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1290 bld_broad = &bld_base->int64_bld;
1291 }
1292 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1293 res = lp_build_broadcast_scalar(bld_broad, scalar);
1294 }
1295
1296 }
1297
1298 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1299 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1300 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1301 }
1302
1303 return res;
1304 }
1305
1306 /**
1307 * Fetch 64-bit values from two separate channels.
1308 * 64-bit values are stored split across two channels, like xy and zw.
1309 * This function creates a set of vec_length*2 floats,
1310 * extracts the values from the two channels,
1311 * puts them in the correct place, then casts to vec_length 64-bits.
1312 */
1313 static LLVMValueRef
1314 emit_fetch_64bit(
1315 struct lp_build_tgsi_context * bld_base,
1316 enum tgsi_opcode_type stype,
1317 LLVMValueRef input,
1318 LLVMValueRef input2)
1319 {
1320 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1321 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1322 LLVMBuilderRef builder = gallivm->builder;
1323 LLVMValueRef res;
1324 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1325 int i;
1326 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
1327 int len = bld_base->base.type.length * 2;
1328 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
1329
1330 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1331 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1332 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1333 }
1334 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1335
1336 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1337 }
1338
1339 static LLVMValueRef
1340 emit_fetch_immediate(
1341 struct lp_build_tgsi_context * bld_base,
1342 const struct tgsi_full_src_register * reg,
1343 enum tgsi_opcode_type stype,
1344 unsigned swizzle_in)
1345 {
1346 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1347 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1348 LLVMBuilderRef builder = gallivm->builder;
1349 LLVMValueRef res = NULL;
1350 unsigned swizzle = swizzle_in & 0xffff;
1351
1352 if (bld->use_immediates_array || reg->Register.Indirect) {
1353 LLVMValueRef imms_array;
1354 LLVMTypeRef fptr_type;
1355
1356 /* cast imms_array pointer to float* */
1357 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1358 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1359
1360 if (reg->Register.Indirect) {
1361 LLVMValueRef indirect_index;
1362 LLVMValueRef index_vec; /* index into the immediate register array */
1363 LLVMValueRef index_vec2 = NULL;
1364 indirect_index = get_indirect_index(bld,
1365 reg->Register.File,
1366 reg->Register.Index,
1367 &reg->Indirect);
1368 /*
1369 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1370 * immediates are stored as full vectors (FIXME??? - might be better
1371 * to store them the same as constants) but all elements are the same
1372 * in any case.
1373 */
1374 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1375 indirect_index,
1376 swizzle,
1377 FALSE);
1378 if (tgsi_type_is_64bit(stype))
1379 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1380 indirect_index,
1381 swizzle_in >> 16,
1382 FALSE);
1383 /* Gather values from the immediate register array */
1384 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1385 } else {
1386 LLVMValueRef gep[2];
1387 gep[0] = lp_build_const_int32(gallivm, 0);
1388 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1389 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1390 bld->imms_array, gep, 2, "");
1391 res = LLVMBuildLoad(builder, imms_ptr, "");
1392
1393 if (tgsi_type_is_64bit(stype)) {
1394 LLVMValueRef imms_ptr2;
1395 LLVMValueRef res2;
1396 gep[1] = lp_build_const_int32(gallivm,
1397 reg->Register.Index * 4 + (swizzle_in >> 16));
1398 imms_ptr2 = LLVMBuildGEP(builder,
1399 bld->imms_array, gep, 2, "");
1400 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1401 res = emit_fetch_64bit(bld_base, stype, res, res2);
1402 }
1403 }
1404 }
1405 else {
1406 res = bld->immediates[reg->Register.Index][swizzle];
1407 if (tgsi_type_is_64bit(stype))
1408 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1409 }
1410
1411 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1412 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1413 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1414 }
1415 return res;
1416 }
1417
1418 static LLVMValueRef
1419 emit_fetch_input(
1420 struct lp_build_tgsi_context * bld_base,
1421 const struct tgsi_full_src_register * reg,
1422 enum tgsi_opcode_type stype,
1423 unsigned swizzle_in)
1424 {
1425 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1426 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1427 LLVMBuilderRef builder = gallivm->builder;
1428 LLVMValueRef res;
1429 unsigned swizzle = swizzle_in & 0xffff;
1430
1431 if (reg->Register.Indirect) {
1432 LLVMValueRef indirect_index;
1433 LLVMValueRef index_vec; /* index into the input reg array */
1434 LLVMValueRef index_vec2 = NULL;
1435 LLVMValueRef inputs_array;
1436 LLVMTypeRef fptr_type;
1437
1438 indirect_index = get_indirect_index(bld,
1439 reg->Register.File,
1440 reg->Register.Index,
1441 &reg->Indirect);
1442
1443 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1444 indirect_index,
1445 swizzle,
1446 TRUE);
1447 if (tgsi_type_is_64bit(stype)) {
1448 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1449 indirect_index,
1450 swizzle_in >> 16,
1451 TRUE);
1452 }
1453 /* cast inputs_array pointer to float* */
1454 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1455 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1456
1457 /* Gather values from the input register array */
1458 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1459 } else {
1460 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1461 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1462 reg->Register.Index * 4 + swizzle);
1463 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1464 bld->inputs_array, &lindex, 1, "");
1465
1466 res = LLVMBuildLoad(builder, input_ptr, "");
1467 if (tgsi_type_is_64bit(stype)) {
1468 LLVMValueRef lindex1;
1469 LLVMValueRef input_ptr2;
1470 LLVMValueRef res2;
1471
1472 lindex1 = lp_build_const_int32(gallivm,
1473 reg->Register.Index * 4 + (swizzle_in >> 16));
1474 input_ptr2 = LLVMBuildGEP(builder,
1475 bld->inputs_array, &lindex1, 1, "");
1476 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1477 res = emit_fetch_64bit(bld_base, stype, res, res2);
1478 }
1479 }
1480 else {
1481 res = bld->inputs[reg->Register.Index][swizzle];
1482 if (tgsi_type_is_64bit(stype))
1483 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1484 }
1485 }
1486
1487 assert(res);
1488
1489 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1490 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1491 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1492 }
1493
1494 return res;
1495 }
1496
1497
1498 static LLVMValueRef
1499 emit_fetch_gs_input(
1500 struct lp_build_tgsi_context * bld_base,
1501 const struct tgsi_full_src_register * reg,
1502 enum tgsi_opcode_type stype,
1503 unsigned swizzle_in)
1504 {
1505 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1506 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1507 const struct tgsi_shader_info *info = bld->bld_base.info;
1508 LLVMBuilderRef builder = gallivm->builder;
1509 LLVMValueRef attrib_index = NULL;
1510 LLVMValueRef vertex_index = NULL;
1511 unsigned swizzle = swizzle_in & 0xffff;
1512 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1513 LLVMValueRef res;
1514
1515 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1516 /* This is really a system value not a regular input */
1517 assert(!reg->Register.Indirect);
1518 assert(!reg->Dimension.Indirect);
1519 res = bld->system_values.prim_id;
1520 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1521 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1522 }
1523 return res;
1524 }
1525
1526 if (reg->Register.Indirect) {
1527 attrib_index = get_indirect_index(bld,
1528 reg->Register.File,
1529 reg->Register.Index,
1530 &reg->Indirect);
1531 } else {
1532 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1533 }
1534
1535 if (reg->Dimension.Indirect) {
1536 vertex_index = get_indirect_index(bld,
1537 reg->Register.File,
1538 reg->Dimension.Index,
1539 &reg->DimIndirect);
1540 } else {
1541 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1542 }
1543
1544 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1545 reg->Dimension.Indirect,
1546 vertex_index,
1547 reg->Register.Indirect,
1548 attrib_index,
1549 swizzle_index);
1550
1551 assert(res);
1552 if (tgsi_type_is_64bit(stype)) {
1553 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1554 LLVMValueRef res2;
1555 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1556 reg->Dimension.Indirect,
1557 vertex_index,
1558 reg->Register.Indirect,
1559 attrib_index,
1560 swizzle_index);
1561 assert(res2);
1562 res = emit_fetch_64bit(bld_base, stype, res, res2);
1563 } else if (stype == TGSI_TYPE_UNSIGNED) {
1564 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1565 } else if (stype == TGSI_TYPE_SIGNED) {
1566 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1567 }
1568
1569 return res;
1570 }
1571
1572 static LLVMValueRef
1573 emit_fetch_temporary(
1574 struct lp_build_tgsi_context * bld_base,
1575 const struct tgsi_full_src_register * reg,
1576 enum tgsi_opcode_type stype,
1577 unsigned swizzle_in)
1578 {
1579 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1580 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1581 LLVMBuilderRef builder = gallivm->builder;
1582 LLVMValueRef res;
1583 unsigned swizzle = swizzle_in & 0xffff;
1584
1585 if (reg->Register.Indirect) {
1586 LLVMValueRef indirect_index;
1587 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1588 LLVMValueRef temps_array;
1589 LLVMTypeRef fptr_type;
1590
1591 indirect_index = get_indirect_index(bld,
1592 reg->Register.File,
1593 reg->Register.Index,
1594 &reg->Indirect);
1595
1596 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1597 indirect_index,
1598 swizzle,
1599 TRUE);
1600 if (tgsi_type_is_64bit(stype)) {
1601 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1602 indirect_index,
1603 swizzle_in >> 16,
1604 TRUE);
1605 }
1606
1607 /* cast temps_array pointer to float* */
1608 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1609 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1610
1611 /* Gather values from the temporary register array */
1612 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1613 }
1614 else {
1615 LLVMValueRef temp_ptr;
1616 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1617 res = LLVMBuildLoad(builder, temp_ptr, "");
1618
1619 if (tgsi_type_is_64bit(stype)) {
1620 LLVMValueRef temp_ptr2, res2;
1621
1622 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1623 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1624 res = emit_fetch_64bit(bld_base, stype, res, res2);
1625 }
1626 }
1627
1628 if (stype == TGSI_TYPE_SIGNED ||
1629 stype == TGSI_TYPE_UNSIGNED ||
1630 stype == TGSI_TYPE_DOUBLE ||
1631 stype == TGSI_TYPE_SIGNED64 ||
1632 stype == TGSI_TYPE_UNSIGNED64) {
1633 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1634 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1635 }
1636
1637 return res;
1638 }
1639
1640 static LLVMValueRef
1641 emit_fetch_system_value(
1642 struct lp_build_tgsi_context * bld_base,
1643 const struct tgsi_full_src_register * reg,
1644 enum tgsi_opcode_type stype,
1645 unsigned swizzle_in)
1646 {
1647 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1648 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1649 const struct tgsi_shader_info *info = bld->bld_base.info;
1650 LLVMBuilderRef builder = gallivm->builder;
1651 LLVMValueRef res;
1652 enum tgsi_opcode_type atype; // Actual type of the value
1653
1654 assert(!reg->Register.Indirect);
1655
1656 switch (info->system_value_semantic_name[reg->Register.Index]) {
1657 case TGSI_SEMANTIC_INSTANCEID:
1658 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1659 atype = TGSI_TYPE_UNSIGNED;
1660 break;
1661
1662 case TGSI_SEMANTIC_VERTEXID:
1663 res = bld->system_values.vertex_id;
1664 atype = TGSI_TYPE_UNSIGNED;
1665 break;
1666
1667 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1668 res = bld->system_values.vertex_id_nobase;
1669 atype = TGSI_TYPE_UNSIGNED;
1670 break;
1671
1672 case TGSI_SEMANTIC_BASEVERTEX:
1673 res = bld->system_values.basevertex;
1674 atype = TGSI_TYPE_UNSIGNED;
1675 break;
1676
1677 case TGSI_SEMANTIC_PRIMID:
1678 res = bld->system_values.prim_id;
1679 atype = TGSI_TYPE_UNSIGNED;
1680 break;
1681
1682 case TGSI_SEMANTIC_INVOCATIONID:
1683 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1684 atype = TGSI_TYPE_UNSIGNED;
1685 break;
1686
1687 default:
1688 assert(!"unexpected semantic in emit_fetch_system_value");
1689 res = bld_base->base.zero;
1690 atype = TGSI_TYPE_FLOAT;
1691 break;
1692 }
1693
1694 if (atype != stype) {
1695 if (stype == TGSI_TYPE_FLOAT) {
1696 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1697 } else if (stype == TGSI_TYPE_UNSIGNED) {
1698 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1699 } else if (stype == TGSI_TYPE_SIGNED) {
1700 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1701 }
1702 }
1703
1704 return res;
1705 }
1706
1707 /**
1708 * Register fetch with derivatives.
1709 */
1710 static void
1711 emit_fetch_deriv(
1712 struct lp_build_tgsi_soa_context *bld,
1713 LLVMValueRef src,
1714 LLVMValueRef *res,
1715 LLVMValueRef *ddx,
1716 LLVMValueRef *ddy)
1717 {
1718 if (res)
1719 *res = src;
1720
1721 /* TODO: use interpolation coeffs for inputs */
1722
1723 if (ddx)
1724 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1725
1726 if (ddy)
1727 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1728 }
1729
1730 /**
1731 * store an array of vec-length 64-bit into two arrays of vec_length floats
1732 * i.e.
1733 * value is d0, d1, d2, d3 etc.
1734 * each 64-bit has high and low pieces x, y
1735 * so gets stored into the separate channels as:
1736 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1737 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1738 */
1739 static void
1740 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1741 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1742 LLVMValueRef value)
1743 {
1744 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1745 struct gallivm_state *gallivm = bld_base->base.gallivm;
1746 LLVMBuilderRef builder = gallivm->builder;
1747 struct lp_build_context *float_bld = &bld_base->base;
1748 unsigned i;
1749 LLVMValueRef temp, temp2;
1750 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1751 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1752
1753 for (i = 0; i < bld_base->base.type.length; i++) {
1754 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1755 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1756 }
1757
1758 temp = LLVMBuildShuffleVector(builder, value,
1759 LLVMGetUndef(LLVMTypeOf(value)),
1760 LLVMConstVector(shuffles,
1761 bld_base->base.type.length),
1762 "");
1763 temp2 = LLVMBuildShuffleVector(builder, value,
1764 LLVMGetUndef(LLVMTypeOf(value)),
1765 LLVMConstVector(shuffles2,
1766 bld_base->base.type.length),
1767 "");
1768
1769 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1770 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1771 }
1772
1773 /**
1774 * Register store.
1775 */
1776 static void
1777 emit_store_chan(
1778 struct lp_build_tgsi_context *bld_base,
1779 const struct tgsi_full_instruction *inst,
1780 unsigned index,
1781 unsigned chan_index,
1782 LLVMValueRef value)
1783 {
1784 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1785 struct gallivm_state *gallivm = bld_base->base.gallivm;
1786 LLVMBuilderRef builder = gallivm->builder;
1787 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1788 struct lp_build_context *float_bld = &bld_base->base;
1789 struct lp_build_context *int_bld = &bld_base->int_bld;
1790 LLVMValueRef indirect_index = NULL;
1791 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1792
1793 /*
1794 * Apply saturation.
1795 *
1796 * It is always assumed to be float.
1797 */
1798 if (inst->Instruction.Saturate) {
1799 assert(dtype == TGSI_TYPE_FLOAT ||
1800 dtype == TGSI_TYPE_UNTYPED);
1801 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1802 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1803 }
1804
1805 if (reg->Register.Indirect) {
1806 /*
1807 * Currently the mesa/st doesn't generate indirect stores
1808 * to 64-bit values, it normally uses MOV to do indirect stores.
1809 */
1810 assert(!tgsi_type_is_64bit(dtype));
1811 indirect_index = get_indirect_index(bld,
1812 reg->Register.File,
1813 reg->Register.Index,
1814 &reg->Indirect);
1815 } else {
1816 assert(reg->Register.Index <=
1817 bld_base->info->file_max[reg->Register.File]);
1818 }
1819
1820 if (DEBUG_EXECUTION) {
1821 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1822 }
1823
1824 switch( reg->Register.File ) {
1825 case TGSI_FILE_OUTPUT:
1826 /* Outputs are always stored as floats */
1827 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1828
1829 if (reg->Register.Indirect) {
1830 LLVMValueRef index_vec; /* indexes into the output registers */
1831 LLVMValueRef outputs_array;
1832 LLVMTypeRef fptr_type;
1833
1834 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1835 indirect_index,
1836 chan_index,
1837 TRUE);
1838
1839 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1840 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1841
1842 /* Scatter store values into output registers */
1843 emit_mask_scatter(bld, outputs_array, index_vec, value,
1844 &bld->exec_mask);
1845 }
1846 else {
1847 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1848 chan_index);
1849
1850 if (tgsi_type_is_64bit(dtype)) {
1851 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1852 chan_index + 1);
1853 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1854 value);
1855 } else
1856 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1857 }
1858 break;
1859
1860 case TGSI_FILE_TEMPORARY:
1861 /* Temporaries are always stored as floats */
1862 if (!tgsi_type_is_64bit(dtype))
1863 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1864 else
1865 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1866
1867 if (reg->Register.Indirect) {
1868 LLVMValueRef index_vec; /* indexes into the temp registers */
1869 LLVMValueRef temps_array;
1870 LLVMTypeRef fptr_type;
1871
1872 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1873 indirect_index,
1874 chan_index,
1875 TRUE);
1876
1877 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1878 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1879
1880 /* Scatter store values into temp registers */
1881 emit_mask_scatter(bld, temps_array, index_vec, value,
1882 &bld->exec_mask);
1883 }
1884 else {
1885 LLVMValueRef temp_ptr;
1886 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1887
1888 if (tgsi_type_is_64bit(dtype)) {
1889 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1890 reg->Register.Index,
1891 chan_index + 1);
1892 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1893 value);
1894 }
1895 else
1896 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1897 }
1898 break;
1899
1900 case TGSI_FILE_ADDRESS:
1901 assert(dtype == TGSI_TYPE_SIGNED);
1902 assert(LLVMTypeOf(value) == int_bld->vec_type);
1903 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1904 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1905 bld->addr[reg->Register.Index][chan_index]);
1906 break;
1907
1908 default:
1909 assert( 0 );
1910 }
1911
1912 (void)dtype;
1913 }
1914
1915 /*
1916 * Called at the beginning of the translation of each TGSI instruction, to
1917 * emit some debug code.
1918 */
1919 static void
1920 emit_debug(
1921 struct lp_build_tgsi_context * bld_base,
1922 const struct tgsi_full_instruction * inst,
1923 const struct tgsi_opcode_info * info)
1924
1925 {
1926 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1927
1928 if (DEBUG_EXECUTION) {
1929 /*
1930 * Dump the TGSI instruction.
1931 */
1932
1933 struct gallivm_state *gallivm = bld_base->base.gallivm;
1934 char buf[512];
1935 buf[0] = '$';
1936 buf[1] = ' ';
1937 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1938 lp_build_printf(gallivm, buf);
1939
1940 /* Dump the execution mask.
1941 */
1942 if (bld->exec_mask.has_mask) {
1943 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1944 }
1945 }
1946 }
1947
1948 static void
1949 emit_store(
1950 struct lp_build_tgsi_context * bld_base,
1951 const struct tgsi_full_instruction * inst,
1952 const struct tgsi_opcode_info * info,
1953 unsigned index,
1954 LLVMValueRef dst[4])
1955
1956 {
1957 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1958
1959 unsigned writemask = inst->Dst[index].Register.WriteMask;
1960 while (writemask) {
1961 unsigned chan_index = u_bit_scan(&writemask);
1962 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1963 continue;
1964 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1965 }
1966 }
1967
1968 static unsigned
1969 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1970 {
1971 switch (tgsi_target) {
1972 case TGSI_TEXTURE_BUFFER:
1973 return PIPE_BUFFER;
1974 case TGSI_TEXTURE_1D:
1975 case TGSI_TEXTURE_SHADOW1D:
1976 return PIPE_TEXTURE_1D;
1977 case TGSI_TEXTURE_2D:
1978 case TGSI_TEXTURE_SHADOW2D:
1979 case TGSI_TEXTURE_2D_MSAA:
1980 return PIPE_TEXTURE_2D;
1981 case TGSI_TEXTURE_3D:
1982 return PIPE_TEXTURE_3D;
1983 case TGSI_TEXTURE_CUBE:
1984 case TGSI_TEXTURE_SHADOWCUBE:
1985 return PIPE_TEXTURE_CUBE;
1986 case TGSI_TEXTURE_RECT:
1987 case TGSI_TEXTURE_SHADOWRECT:
1988 return PIPE_TEXTURE_RECT;
1989 case TGSI_TEXTURE_1D_ARRAY:
1990 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1991 return PIPE_TEXTURE_1D_ARRAY;
1992 case TGSI_TEXTURE_2D_ARRAY:
1993 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1994 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1995 return PIPE_TEXTURE_2D_ARRAY;
1996 case TGSI_TEXTURE_CUBE_ARRAY:
1997 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1998 return PIPE_TEXTURE_CUBE_ARRAY;
1999 default:
2000 assert(0);
2001 return PIPE_BUFFER;
2002 }
2003 }
2004
2005
2006 static enum lp_sampler_lod_property
2007 lp_build_lod_property(
2008 struct lp_build_tgsi_context *bld_base,
2009 const struct tgsi_full_instruction *inst,
2010 unsigned src_op)
2011 {
2012 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2013 enum lp_sampler_lod_property lod_property;
2014
2015 /*
2016 * Not much we can do here. We could try catching inputs declared
2017 * with constant interpolation but not sure it's worth it - since for
2018 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2019 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2020 * like the constant/immediate recognition below.
2021 * What seems to be of more value would be to recognize temps holding
2022 * broadcasted scalars but no way we can do it.
2023 * Tried asking llvm but without any success (using LLVMIsConstant
2024 * even though this isn't exactly what we'd need), even as simple as
2025 * IMM[0] UINT32 (0,-1,0,0)
2026 * MOV TEMP[0] IMM[0].yyyy
2027 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2028 * doesn't work.
2029 * This means there's ZERO chance this will ever catch a scalar lod
2030 * with traditional tex opcodes as well as texel fetches, since the lod
2031 * comes from the same reg as coords (except some test shaders using
2032 * constant coords maybe).
2033 * There's at least hope for sample opcodes as well as size queries.
2034 */
2035 if (reg->Register.File == TGSI_FILE_CONSTANT ||
2036 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2037 lod_property = LP_SAMPLER_LOD_SCALAR;
2038 }
2039 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2040 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2041 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2042 }
2043 else {
2044 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2045 }
2046 }
2047 else {
2048 /* never use scalar (per-quad) lod the results are just too wrong. */
2049 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2050 }
2051 return lod_property;
2052 }
2053
2054
2055 /**
2056 * High-level instruction translators.
2057 */
2058
2059 static void
2060 emit_tex( struct lp_build_tgsi_soa_context *bld,
2061 const struct tgsi_full_instruction *inst,
2062 enum lp_build_tex_modifier modifier,
2063 LLVMValueRef *texel,
2064 unsigned sampler_reg,
2065 enum lp_sampler_op_type sampler_op)
2066 {
2067 unsigned unit = inst->Src[sampler_reg].Register.Index;
2068 LLVMValueRef oow = NULL;
2069 LLVMValueRef lod = NULL;
2070 LLVMValueRef coords[5];
2071 LLVMValueRef offsets[3] = { NULL };
2072 struct lp_derivatives derivs;
2073 struct lp_sampler_params params;
2074 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2075 unsigned num_derivs, num_offsets, i;
2076 unsigned shadow_coord = 0;
2077 unsigned layer_coord = 0;
2078 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2079
2080 memset(&params, 0, sizeof(params));
2081
2082 if (!bld->sampler) {
2083 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2084 for (i = 0; i < 4; i++) {
2085 texel[i] = bld->bld_base.base.undef;
2086 }
2087 return;
2088 }
2089
2090 switch (inst->Texture.Texture) {
2091 case TGSI_TEXTURE_1D_ARRAY:
2092 layer_coord = 1;
2093 /* fallthrough */
2094 case TGSI_TEXTURE_1D:
2095 num_offsets = 1;
2096 num_derivs = 1;
2097 break;
2098 case TGSI_TEXTURE_2D_ARRAY:
2099 layer_coord = 2;
2100 /* fallthrough */
2101 case TGSI_TEXTURE_2D:
2102 case TGSI_TEXTURE_RECT:
2103 num_offsets = 2;
2104 num_derivs = 2;
2105 break;
2106 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2107 layer_coord = 1;
2108 /* fallthrough */
2109 case TGSI_TEXTURE_SHADOW1D:
2110 shadow_coord = 2;
2111 num_offsets = 1;
2112 num_derivs = 1;
2113 break;
2114 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2115 layer_coord = 2;
2116 shadow_coord = 3;
2117 num_offsets = 2;
2118 num_derivs = 2;
2119 break;
2120 case TGSI_TEXTURE_SHADOW2D:
2121 case TGSI_TEXTURE_SHADOWRECT:
2122 shadow_coord = 2;
2123 num_offsets = 2;
2124 num_derivs = 2;
2125 break;
2126 case TGSI_TEXTURE_CUBE:
2127 num_offsets = 2;
2128 num_derivs = 3;
2129 break;
2130 case TGSI_TEXTURE_3D:
2131 num_offsets = 3;
2132 num_derivs = 3;
2133 break;
2134 case TGSI_TEXTURE_SHADOWCUBE:
2135 shadow_coord = 3;
2136 num_offsets = 2;
2137 num_derivs = 3;
2138 break;
2139 case TGSI_TEXTURE_CUBE_ARRAY:
2140 num_offsets = 2;
2141 num_derivs = 3;
2142 layer_coord = 3;
2143 break;
2144 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2145 num_offsets = 2;
2146 num_derivs = 3;
2147 layer_coord = 3;
2148 shadow_coord = 4; /* shadow coord special different reg */
2149 break;
2150 case TGSI_TEXTURE_2D_MSAA:
2151 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2152 default:
2153 assert(0);
2154 return;
2155 }
2156
2157 /* Note lod and especially projected are illegal in a LOT of cases */
2158 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2159 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2160 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2161 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2162 /* note that shadow cube array with bias/explicit lod does not exist */
2163 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2164 }
2165 else {
2166 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2167 }
2168 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2169 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2170 }
2171 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2172 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2173 }
2174 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2175 }
2176
2177 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2178 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2179 oow = lp_build_rcp(&bld->bld_base.base, oow);
2180 }
2181
2182 for (i = 0; i < num_derivs; i++) {
2183 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2184 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2185 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2186 }
2187 for (i = num_derivs; i < 5; i++) {
2188 coords[i] = bld->bld_base.base.undef;
2189 }
2190
2191 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2192 if (layer_coord) {
2193 if (layer_coord == 3) {
2194 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2195 }
2196 else {
2197 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2198 }
2199 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2200 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2201 }
2202 /* Shadow coord occupies always 5th slot. */
2203 if (shadow_coord) {
2204 sample_key |= LP_SAMPLER_SHADOW;
2205 if (shadow_coord == 4) {
2206 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2207 }
2208 else {
2209 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2210 }
2211 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2212 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2213 }
2214
2215 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2216 unsigned dim;
2217 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2218 for (dim = 0; dim < num_derivs; ++dim) {
2219 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2220 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2221 }
2222 params.derivs = &derivs;
2223 /*
2224 * could also check all src regs if constant but I doubt such
2225 * cases exist in practice.
2226 */
2227 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2228 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2229 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2230 }
2231 else {
2232 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2233 }
2234 }
2235 else {
2236 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2237 }
2238 }
2239 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2240
2241 /* we don't handle the 4 offset version of tg4 */
2242 if (inst->Texture.NumOffsets == 1) {
2243 unsigned dim;
2244 sample_key |= LP_SAMPLER_OFFSETS;
2245 for (dim = 0; dim < num_offsets; dim++) {
2246 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2247 }
2248 }
2249
2250 params.type = bld->bld_base.base.type;
2251 params.sample_key = sample_key;
2252 params.texture_index = unit;
2253 params.sampler_index = unit;
2254 params.context_ptr = bld->context_ptr;
2255 params.thread_data_ptr = bld->thread_data_ptr;
2256 params.coords = coords;
2257 params.offsets = offsets;
2258 params.lod = lod;
2259 params.texel = texel;
2260
2261 bld->sampler->emit_tex_sample(bld->sampler,
2262 bld->bld_base.base.gallivm,
2263 &params);
2264 }
2265
2266 static void
2267 emit_sample(struct lp_build_tgsi_soa_context *bld,
2268 const struct tgsi_full_instruction *inst,
2269 enum lp_build_tex_modifier modifier,
2270 boolean compare,
2271 enum lp_sampler_op_type sample_type,
2272 LLVMValueRef *texel)
2273 {
2274 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2275 unsigned texture_unit, sampler_unit;
2276 LLVMValueRef lod = NULL;
2277 LLVMValueRef coords[5];
2278 LLVMValueRef offsets[3] = { NULL };
2279 struct lp_derivatives derivs;
2280 struct lp_sampler_params params;
2281 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2282
2283 unsigned num_offsets, num_derivs, i;
2284 unsigned layer_coord = 0;
2285 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2286
2287 memset(&params, 0, sizeof(params));
2288
2289 if (!bld->sampler) {
2290 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2291 for (i = 0; i < 4; i++) {
2292 texel[i] = bld->bld_base.base.undef;
2293 }
2294 return;
2295 }
2296
2297 /*
2298 * unlike old-style tex opcodes the texture/sampler indices
2299 * always come from src1 and src2 respectively.
2300 */
2301 texture_unit = inst->Src[1].Register.Index;
2302 sampler_unit = inst->Src[2].Register.Index;
2303
2304 /*
2305 * Note inst->Texture.Texture will contain the number of offsets,
2306 * however the target information is NOT there and comes from the
2307 * declared sampler views instead.
2308 */
2309 switch (bld->sv[texture_unit].Resource) {
2310 case TGSI_TEXTURE_1D:
2311 num_offsets = 1;
2312 num_derivs = 1;
2313 break;
2314 case TGSI_TEXTURE_1D_ARRAY:
2315 layer_coord = 1;
2316 num_offsets = 1;
2317 num_derivs = 1;
2318 break;
2319 case TGSI_TEXTURE_2D:
2320 case TGSI_TEXTURE_RECT:
2321 num_offsets = 2;
2322 num_derivs = 2;
2323 break;
2324 case TGSI_TEXTURE_2D_ARRAY:
2325 layer_coord = 2;
2326 num_offsets = 2;
2327 num_derivs = 2;
2328 break;
2329 case TGSI_TEXTURE_CUBE:
2330 num_offsets = 2;
2331 num_derivs = 3;
2332 break;
2333 case TGSI_TEXTURE_3D:
2334 num_offsets = 3;
2335 num_derivs = 3;
2336 break;
2337 case TGSI_TEXTURE_CUBE_ARRAY:
2338 layer_coord = 3;
2339 num_offsets = 2;
2340 num_derivs = 3;
2341 break;
2342 default:
2343 assert(0);
2344 return;
2345 }
2346
2347 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2348 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2349 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2350 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2351 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2352 }
2353 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2354 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2355 }
2356 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2357 }
2358 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2359 /* XXX might be better to explicitly pass the level zero information */
2360 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2361 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2362 }
2363
2364 for (i = 0; i < num_derivs; i++) {
2365 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2366 }
2367 for (i = num_derivs; i < 5; i++) {
2368 coords[i] = bld->bld_base.base.undef;
2369 }
2370
2371 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2372 if (layer_coord) {
2373 if (layer_coord == 3)
2374 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2375 else
2376 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2377 }
2378 /* Shadow coord occupies always 5th slot. */
2379 if (compare) {
2380 sample_key |= LP_SAMPLER_SHADOW;
2381 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2382 }
2383
2384 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2385 unsigned dim;
2386 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2387 for (dim = 0; dim < num_derivs; ++dim) {
2388 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2389 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2390 }
2391 params.derivs = &derivs;
2392 /*
2393 * could also check all src regs if constant but I doubt such
2394 * cases exist in practice.
2395 */
2396 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2397 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2398 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2399 }
2400 else {
2401 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2402 }
2403 }
2404 else {
2405 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2406 }
2407 }
2408
2409 /* some advanced gather instructions (txgo) would require 4 offsets */
2410 if (inst->Texture.NumOffsets == 1) {
2411 unsigned dim;
2412 sample_key |= LP_SAMPLER_OFFSETS;
2413 for (dim = 0; dim < num_offsets; dim++) {
2414 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2415 }
2416 }
2417 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2418
2419 params.type = bld->bld_base.base.type;
2420 params.sample_key = sample_key;
2421 params.texture_index = texture_unit;
2422 params.sampler_index = sampler_unit;
2423 params.context_ptr = bld->context_ptr;
2424 params.thread_data_ptr = bld->thread_data_ptr;
2425 params.coords = coords;
2426 params.offsets = offsets;
2427 params.lod = lod;
2428 params.texel = texel;
2429
2430 bld->sampler->emit_tex_sample(bld->sampler,
2431 bld->bld_base.base.gallivm,
2432 &params);
2433
2434 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2435 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2436 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2437 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2438 unsigned char swizzles[4];
2439 swizzles[0] = inst->Src[1].Register.SwizzleX;
2440 swizzles[1] = inst->Src[1].Register.SwizzleY;
2441 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2442 swizzles[3] = inst->Src[1].Register.SwizzleW;
2443
2444 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2445 }
2446 }
2447
2448 static void
2449 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2450 const struct tgsi_full_instruction *inst,
2451 LLVMValueRef *texel,
2452 boolean is_samplei)
2453 {
2454 unsigned unit, target;
2455 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2456 LLVMValueRef explicit_lod = NULL;
2457 LLVMValueRef coords[5];
2458 LLVMValueRef offsets[3] = { NULL };
2459 struct lp_sampler_params params;
2460 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2461 unsigned dims, i;
2462 unsigned layer_coord = 0;
2463 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2464
2465 memset(&params, 0, sizeof(params));
2466
2467 if (!bld->sampler) {
2468 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2469 for (i = 0; i < 4; i++) {
2470 texel[i] = coord_undef;
2471 }
2472 return;
2473 }
2474
2475 unit = inst->Src[1].Register.Index;
2476
2477 if (is_samplei) {
2478 target = bld->sv[unit].Resource;
2479 }
2480 else {
2481 target = inst->Texture.Texture;
2482 }
2483
2484 switch (target) {
2485 case TGSI_TEXTURE_1D:
2486 case TGSI_TEXTURE_BUFFER:
2487 dims = 1;
2488 break;
2489 case TGSI_TEXTURE_1D_ARRAY:
2490 layer_coord = 1;
2491 dims = 1;
2492 break;
2493 case TGSI_TEXTURE_2D:
2494 case TGSI_TEXTURE_RECT:
2495 case TGSI_TEXTURE_2D_MSAA:
2496 dims = 2;
2497 break;
2498 case TGSI_TEXTURE_2D_ARRAY:
2499 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2500 layer_coord = 2;
2501 dims = 2;
2502 break;
2503 case TGSI_TEXTURE_3D:
2504 dims = 3;
2505 break;
2506 default:
2507 assert(0);
2508 return;
2509 }
2510
2511 /* always have lod except for buffers and msaa targets ? */
2512 if (target != TGSI_TEXTURE_BUFFER &&
2513 target != TGSI_TEXTURE_2D_MSAA &&
2514 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2515 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2516 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2517 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2518 }
2519 /*
2520 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2521 * would be the sample index.
2522 */
2523
2524 for (i = 0; i < dims; i++) {
2525 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2526 }
2527 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2528 for (i = dims; i < 5; i++) {
2529 coords[i] = coord_undef;
2530 }
2531 if (layer_coord)
2532 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2533
2534 if (inst->Texture.NumOffsets == 1) {
2535 unsigned dim;
2536 sample_key |= LP_SAMPLER_OFFSETS;
2537 for (dim = 0; dim < dims; dim++) {
2538 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2539 }
2540 }
2541 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2542
2543 params.type = bld->bld_base.base.type;
2544 params.sample_key = sample_key;
2545 params.texture_index = unit;
2546 /*
2547 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2548 * and trigger some assertions with d3d10 where the sampler view number
2549 * can exceed this.
2550 */
2551 params.sampler_index = 0;
2552 params.context_ptr = bld->context_ptr;
2553 params.thread_data_ptr = bld->thread_data_ptr;
2554 params.coords = coords;
2555 params.offsets = offsets;
2556 params.derivs = NULL;
2557 params.lod = explicit_lod;
2558 params.texel = texel;
2559
2560 bld->sampler->emit_tex_sample(bld->sampler,
2561 bld->bld_base.base.gallivm,
2562 &params);
2563
2564 if (is_samplei &&
2565 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2566 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2567 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2568 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2569 unsigned char swizzles[4];
2570 swizzles[0] = inst->Src[1].Register.SwizzleX;
2571 swizzles[1] = inst->Src[1].Register.SwizzleY;
2572 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2573 swizzles[3] = inst->Src[1].Register.SwizzleW;
2574
2575 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2576 }
2577 }
2578
2579 static void
2580 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2581 const struct tgsi_full_instruction *inst,
2582 LLVMValueRef *sizes_out,
2583 boolean is_sviewinfo)
2584 {
2585 LLVMValueRef explicit_lod;
2586 enum lp_sampler_lod_property lod_property;
2587 unsigned has_lod;
2588 unsigned i;
2589 unsigned unit = inst->Src[1].Register.Index;
2590 unsigned target, pipe_target;
2591 struct lp_sampler_size_query_params params;
2592
2593 if (is_sviewinfo) {
2594 target = bld->sv[unit].Resource;
2595 }
2596 else {
2597 target = inst->Texture.Texture;
2598 }
2599 switch (target) {
2600 case TGSI_TEXTURE_BUFFER:
2601 case TGSI_TEXTURE_RECT:
2602 case TGSI_TEXTURE_SHADOWRECT:
2603 has_lod = 0;
2604 break;
2605 default:
2606 has_lod = 1;
2607 break;
2608 }
2609
2610 if (!bld->sampler) {
2611 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2612 for (i = 0; i < 4; i++)
2613 sizes_out[i] = bld->bld_base.int_bld.undef;
2614 return;
2615 }
2616
2617 if (has_lod) {
2618 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2619 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2620 }
2621 else {
2622 explicit_lod = NULL;
2623 lod_property = LP_SAMPLER_LOD_SCALAR;
2624 }
2625
2626
2627 pipe_target = tgsi_to_pipe_tex_target(target);
2628
2629 params.int_type = bld->bld_base.int_bld.type;
2630 params.texture_unit = unit;
2631 params.target = pipe_target;
2632 params.context_ptr = bld->context_ptr;
2633 params.is_sviewinfo = TRUE;
2634 params.lod_property = lod_property;
2635 params.explicit_lod = explicit_lod;
2636 params.sizes_out = sizes_out;
2637
2638 bld->sampler->emit_size_query(bld->sampler,
2639 bld->bld_base.base.gallivm,
2640 &params);
2641 }
2642
2643 static boolean
2644 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2645 int pc)
2646 {
2647 unsigned i;
2648
2649 for (i = 0; i < 5; i++) {
2650 enum tgsi_opcode opcode;
2651
2652 if (pc + i >= bld->bld_base.info->num_instructions)
2653 return TRUE;
2654
2655 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2656
2657 if (opcode == TGSI_OPCODE_END)
2658 return TRUE;
2659
2660 if (opcode == TGSI_OPCODE_TEX ||
2661 opcode == TGSI_OPCODE_TXP ||
2662 opcode == TGSI_OPCODE_TXD ||
2663 opcode == TGSI_OPCODE_TXB ||
2664 opcode == TGSI_OPCODE_TXL ||
2665 opcode == TGSI_OPCODE_TXF ||
2666 opcode == TGSI_OPCODE_TXQ ||
2667 opcode == TGSI_OPCODE_TEX2 ||
2668 opcode == TGSI_OPCODE_TXB2 ||
2669 opcode == TGSI_OPCODE_TXL2 ||
2670 opcode == TGSI_OPCODE_SAMPLE ||
2671 opcode == TGSI_OPCODE_SAMPLE_B ||
2672 opcode == TGSI_OPCODE_SAMPLE_C ||
2673 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2674 opcode == TGSI_OPCODE_SAMPLE_D ||
2675 opcode == TGSI_OPCODE_SAMPLE_I ||
2676 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2677 opcode == TGSI_OPCODE_SAMPLE_L ||
2678 opcode == TGSI_OPCODE_SVIEWINFO ||
2679 opcode == TGSI_OPCODE_CAL ||
2680 opcode == TGSI_OPCODE_IF ||
2681 opcode == TGSI_OPCODE_UIF ||
2682 opcode == TGSI_OPCODE_BGNLOOP ||
2683 opcode == TGSI_OPCODE_SWITCH)
2684 return FALSE;
2685 }
2686
2687 return TRUE;
2688 }
2689
2690
2691
2692 /**
2693 * Kill fragment if any of the src register values are negative.
2694 */
2695 static void
2696 emit_kill_if(
2697 struct lp_build_tgsi_soa_context *bld,
2698 const struct tgsi_full_instruction *inst,
2699 int pc)
2700 {
2701 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2702 const struct tgsi_full_src_register *reg = &inst->Src[0];
2703 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2704 LLVMValueRef mask;
2705 unsigned chan_index;
2706
2707 memset(&terms, 0, sizeof terms);
2708
2709 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2710 unsigned swizzle;
2711
2712 /* Unswizzle channel */
2713 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2714
2715 /* Check if the component has not been already tested. */
2716 assert(swizzle < TGSI_NUM_CHANNELS);
2717 if( !terms[swizzle] )
2718 /* TODO: change the comparison operator instead of setting the sign */
2719 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2720 }
2721
2722 mask = NULL;
2723 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2724 if(terms[chan_index]) {
2725 LLVMValueRef chan_mask;
2726
2727 /*
2728 * If term < 0 then mask = 0 else mask = ~0.
2729 */
2730 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2731
2732 if(mask)
2733 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2734 else
2735 mask = chan_mask;
2736 }
2737 }
2738
2739 if (bld->exec_mask.has_mask) {
2740 LLVMValueRef invmask;
2741 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2742 mask = LLVMBuildOr(builder, mask, invmask, "");
2743 }
2744
2745 lp_build_mask_update(bld->mask, mask);
2746 if (!near_end_of_shader(bld, pc))
2747 lp_build_mask_check(bld->mask);
2748 }
2749
2750
2751 /**
2752 * Unconditional fragment kill.
2753 * The only predication is the execution mask which will apply if
2754 * we're inside a loop or conditional.
2755 */
2756 static void
2757 emit_kill(struct lp_build_tgsi_soa_context *bld,
2758 int pc)
2759 {
2760 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2761 LLVMValueRef mask;
2762
2763 /* For those channels which are "alive", disable fragment shader
2764 * execution.
2765 */
2766 if (bld->exec_mask.has_mask) {
2767 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2768 }
2769 else {
2770 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2771 mask = zero;
2772 }
2773
2774 lp_build_mask_update(bld->mask, mask);
2775
2776 if (!near_end_of_shader(bld, pc))
2777 lp_build_mask_check(bld->mask);
2778 }
2779
2780
2781 /**
2782 * Emit code which will dump the value of all the temporary registers
2783 * to stdout.
2784 */
2785 static void
2786 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2787 unsigned file)
2788 {
2789 const struct tgsi_shader_info *info = bld->bld_base.info;
2790 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2791 LLVMBuilderRef builder = gallivm->builder;
2792 LLVMValueRef reg_ptr;
2793 int index;
2794 int max_index = info->file_max[file];
2795
2796 /*
2797 * Some register files, particularly constants, can be very large,
2798 * and dumping everything could make this unusably slow.
2799 */
2800 max_index = MIN2(max_index, 32);
2801
2802 for (index = 0; index <= max_index; index++) {
2803 LLVMValueRef res;
2804 unsigned mask;
2805 int chan;
2806
2807 if (index < 8 * sizeof(unsigned) &&
2808 (info->file_mask[file] & (1u << index)) == 0) {
2809 /* This was not declared.*/
2810 continue;
2811 }
2812
2813 if (file == TGSI_FILE_INPUT) {
2814 mask = info->input_usage_mask[index];
2815 } else {
2816 mask = TGSI_WRITEMASK_XYZW;
2817 }
2818
2819 for (chan = 0; chan < 4; chan++) {
2820 if ((mask & (1 << chan)) == 0) {
2821 /* This channel is not used.*/
2822 continue;
2823 }
2824
2825 if (file == TGSI_FILE_CONSTANT) {
2826 struct tgsi_full_src_register reg;
2827 memset(&reg, 0, sizeof reg);
2828 reg.Register.File = file;
2829 reg.Register.Index = index;
2830 reg.Register.SwizzleX = 0;
2831 reg.Register.SwizzleY = 1;
2832 reg.Register.SwizzleZ = 2;
2833 reg.Register.SwizzleW = 3;
2834
2835 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2836 if (!res) {
2837 continue;
2838 }
2839 } else if (file == TGSI_FILE_INPUT) {
2840 res = bld->inputs[index][chan];
2841 if (!res) {
2842 continue;
2843 }
2844 } else if (file == TGSI_FILE_TEMPORARY) {
2845 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2846 assert(reg_ptr);
2847 res = LLVMBuildLoad(builder, reg_ptr, "");
2848 } else if (file == TGSI_FILE_OUTPUT) {
2849 reg_ptr = lp_get_output_ptr(bld, index, chan);
2850 assert(reg_ptr);
2851 res = LLVMBuildLoad(builder, reg_ptr, "");
2852 } else {
2853 assert(0);
2854 continue;
2855 }
2856
2857 emit_dump_reg(gallivm, file, index, chan, res);
2858 }
2859 }
2860 }
2861
2862
2863
2864 void
2865 lp_emit_declaration_soa(
2866 struct lp_build_tgsi_context *bld_base,
2867 const struct tgsi_full_declaration *decl)
2868 {
2869 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2870 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2871 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2872 const unsigned first = decl->Range.First;
2873 const unsigned last = decl->Range.Last;
2874 unsigned idx, i;
2875
2876 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2877
2878 switch (decl->Declaration.File) {
2879 case TGSI_FILE_TEMPORARY:
2880 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2881 assert(last < LP_MAX_INLINED_TEMPS);
2882 for (idx = first; idx <= last; ++idx) {
2883 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2884 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2885 }
2886 }
2887 break;
2888
2889 case TGSI_FILE_OUTPUT:
2890 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2891 for (idx = first; idx <= last; ++idx) {
2892 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2893 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2894 vec_type, "output");
2895 }
2896 }
2897 break;
2898
2899 case TGSI_FILE_ADDRESS:
2900 /* ADDR registers are only allocated with an integer LLVM IR type,
2901 * as they are guaranteed to always have integers.
2902 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2903 * an ADDR register for that matter).
2904 */
2905 assert(last < LP_MAX_TGSI_ADDRS);
2906 for (idx = first; idx <= last; ++idx) {
2907 assert(idx < LP_MAX_TGSI_ADDRS);
2908 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2909 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2910 }
2911 break;
2912
2913 case TGSI_FILE_SAMPLER_VIEW:
2914 /*
2915 * The target stored here MUST match whatever there actually
2916 * is in the set sampler views (what about return type?).
2917 */
2918 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2919 for (idx = first; idx <= last; ++idx) {
2920 bld->sv[idx] = decl->SamplerView;
2921 }
2922 break;
2923
2924 case TGSI_FILE_CONSTANT:
2925 {
2926 /*
2927 * We could trivially fetch the per-buffer pointer when fetching the
2928 * constant, relying on llvm to figure out it's always the same pointer
2929 * anyway. However, doing so results in a huge (more than factor of 10)
2930 * slowdown in llvm compilation times for some (but not all) shaders
2931 * (more specifically, the IR optimization spends way more time in
2932 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2933 */
2934 unsigned idx2D = decl->Dim.Index2D;
2935 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2936 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2937 bld->consts[idx2D] =
2938 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2939 bld->consts_sizes[idx2D] =
2940 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2941 }
2942 break;
2943
2944 default:
2945 /* don't need to declare other vars */
2946 break;
2947 }
2948 }
2949
2950
2951 void lp_emit_immediate_soa(
2952 struct lp_build_tgsi_context *bld_base,
2953 const struct tgsi_full_immediate *imm)
2954 {
2955 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2956 struct gallivm_state * gallivm = bld_base->base.gallivm;
2957 LLVMValueRef imms[4];
2958 unsigned i;
2959 const uint size = imm->Immediate.NrTokens - 1;
2960 assert(size <= 4);
2961 switch (imm->Immediate.DataType) {
2962 case TGSI_IMM_FLOAT32:
2963 for( i = 0; i < size; ++i )
2964 imms[i] =
2965 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2966
2967 break;
2968 case TGSI_IMM_FLOAT64:
2969 case TGSI_IMM_UINT64:
2970 case TGSI_IMM_INT64:
2971 case TGSI_IMM_UINT32:
2972 for( i = 0; i < size; ++i ) {
2973 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2974 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2975 }
2976
2977 break;
2978 case TGSI_IMM_INT32:
2979 for( i = 0; i < size; ++i ) {
2980 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2981 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2982 }
2983
2984 break;
2985 }
2986 for( i = size; i < 4; ++i )
2987 imms[i] = bld_base->base.undef;
2988
2989 if (bld->use_immediates_array) {
2990 unsigned index = bld->num_immediates;
2991 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2992 LLVMBuilderRef builder = gallivm->builder;
2993 LLVMValueRef gep[2];
2994 gep[0] = lp_build_const_int32(gallivm, 0);
2995
2996 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2997 for (i = 0; i < 4; ++i ) {
2998 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
2999 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3000 bld->imms_array, gep, 2, "");
3001 LLVMBuildStore(builder, imms[i], imm_ptr);
3002 }
3003 } else {
3004 /* simply copy the immediate values into the next immediates[] slot */
3005 unsigned i;
3006 assert(imm->Immediate.NrTokens - 1 <= 4);
3007 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3008
3009 for(i = 0; i < 4; ++i )
3010 bld->immediates[bld->num_immediates][i] = imms[i];
3011
3012 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3013 unsigned index = bld->num_immediates;
3014 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3015 LLVMBuilderRef builder = gallivm->builder;
3016 LLVMValueRef gep[2];
3017 gep[0] = lp_build_const_int32(gallivm, 0);
3018 for (i = 0; i < 4; ++i ) {
3019 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3020 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3021 bld->imms_array, gep, 2, "");
3022 LLVMBuildStore(builder,
3023 bld->immediates[index][i],
3024 imm_ptr);
3025 }
3026 }
3027 }
3028
3029 bld->num_immediates++;
3030 }
3031
3032 static void
3033 ddx_emit(
3034 const struct lp_build_tgsi_action * action,
3035 struct lp_build_tgsi_context * bld_base,
3036 struct lp_build_emit_data * emit_data)
3037 {
3038 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3039
3040 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3041 &emit_data->output[emit_data->chan], NULL);
3042 }
3043
3044 static void
3045 ddy_emit(
3046 const struct lp_build_tgsi_action * action,
3047 struct lp_build_tgsi_context * bld_base,
3048 struct lp_build_emit_data * emit_data)
3049 {
3050 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3051
3052 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3053 &emit_data->output[emit_data->chan]);
3054 }
3055
3056 static void
3057 kill_emit(
3058 const struct lp_build_tgsi_action * action,
3059 struct lp_build_tgsi_context * bld_base,
3060 struct lp_build_emit_data * emit_data)
3061 {
3062 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3063
3064 emit_kill(bld, bld_base->pc - 1);
3065 }
3066
3067 static void
3068 kill_if_emit(
3069 const struct lp_build_tgsi_action * action,
3070 struct lp_build_tgsi_context * bld_base,
3071 struct lp_build_emit_data * emit_data)
3072 {
3073 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3074
3075 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3076 }
3077
3078 static void
3079 tex_emit(
3080 const struct lp_build_tgsi_action * action,
3081 struct lp_build_tgsi_context * bld_base,
3082 struct lp_build_emit_data * emit_data)
3083 {
3084 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3085
3086 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3087 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3088 }
3089
3090 static void
3091 tex2_emit(
3092 const struct lp_build_tgsi_action * action,
3093 struct lp_build_tgsi_context * bld_base,
3094 struct lp_build_emit_data * emit_data)
3095 {
3096 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3097
3098 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3099 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3100 }
3101
3102 static void
3103 txb_emit(
3104 const struct lp_build_tgsi_action * action,
3105 struct lp_build_tgsi_context * bld_base,
3106 struct lp_build_emit_data * emit_data)
3107 {
3108 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3109
3110 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3111 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3112 }
3113
3114 static void
3115 txb2_emit(
3116 const struct lp_build_tgsi_action * action,
3117 struct lp_build_tgsi_context * bld_base,
3118 struct lp_build_emit_data * emit_data)
3119 {
3120 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3121
3122 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3123 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3124 }
3125
3126 static void
3127 txd_emit(
3128 const struct lp_build_tgsi_action * action,
3129 struct lp_build_tgsi_context * bld_base,
3130 struct lp_build_emit_data * emit_data)
3131 {
3132 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3133
3134 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3135 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3136 }
3137
3138 static void
3139 txl_emit(
3140 const struct lp_build_tgsi_action * action,
3141 struct lp_build_tgsi_context * bld_base,
3142 struct lp_build_emit_data * emit_data)
3143 {
3144 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3145
3146 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3147 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3148 }
3149
3150 static void
3151 txl2_emit(
3152 const struct lp_build_tgsi_action * action,
3153 struct lp_build_tgsi_context * bld_base,
3154 struct lp_build_emit_data * emit_data)
3155 {
3156 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3157
3158 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3159 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3160 }
3161
3162 static void
3163 txp_emit(
3164 const struct lp_build_tgsi_action * action,
3165 struct lp_build_tgsi_context * bld_base,
3166 struct lp_build_emit_data * emit_data)
3167 {
3168 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3169
3170 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3171 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3172 }
3173
3174 static void
3175 tg4_emit(
3176 const struct lp_build_tgsi_action * action,
3177 struct lp_build_tgsi_context * bld_base,
3178 struct lp_build_emit_data * emit_data)
3179 {
3180 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3181
3182 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3183 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3184 }
3185
3186 static void
3187 lodq_emit(
3188 const struct lp_build_tgsi_action * action,
3189 struct lp_build_tgsi_context * bld_base,
3190 struct lp_build_emit_data * emit_data)
3191 {
3192 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3193
3194 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3195 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3196 }
3197
3198 static void
3199 txq_emit(
3200 const struct lp_build_tgsi_action * action,
3201 struct lp_build_tgsi_context * bld_base,
3202 struct lp_build_emit_data * emit_data)
3203 {
3204 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3205
3206 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3207 }
3208
3209 static void
3210 txf_emit(
3211 const struct lp_build_tgsi_action * action,
3212 struct lp_build_tgsi_context * bld_base,
3213 struct lp_build_emit_data * emit_data)
3214 {
3215 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3216
3217 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3218 }
3219
3220 static void
3221 sample_i_emit(
3222 const struct lp_build_tgsi_action * action,
3223 struct lp_build_tgsi_context * bld_base,
3224 struct lp_build_emit_data * emit_data)
3225 {
3226 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3227
3228 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3229 }
3230
3231 static void
3232 sample_emit(
3233 const struct lp_build_tgsi_action * action,
3234 struct lp_build_tgsi_context * bld_base,
3235 struct lp_build_emit_data * emit_data)
3236 {
3237 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3238
3239 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3240 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3241 }
3242
3243 static void
3244 sample_b_emit(
3245 const struct lp_build_tgsi_action * action,
3246 struct lp_build_tgsi_context * bld_base,
3247 struct lp_build_emit_data * emit_data)
3248 {
3249 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3250
3251 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3252 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3253 }
3254
3255 static void
3256 sample_c_emit(
3257 const struct lp_build_tgsi_action * action,
3258 struct lp_build_tgsi_context * bld_base,
3259 struct lp_build_emit_data * emit_data)
3260 {
3261 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3262
3263 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3264 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3265 }
3266
3267 static void
3268 sample_c_lz_emit(
3269 const struct lp_build_tgsi_action * action,
3270 struct lp_build_tgsi_context * bld_base,
3271 struct lp_build_emit_data * emit_data)
3272 {
3273 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3274
3275 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3276 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3277 }
3278
3279 static void
3280 sample_d_emit(
3281 const struct lp_build_tgsi_action * action,
3282 struct lp_build_tgsi_context * bld_base,
3283 struct lp_build_emit_data * emit_data)
3284 {
3285 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3286
3287 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3288 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3289 }
3290
3291 static void
3292 sample_l_emit(
3293 const struct lp_build_tgsi_action * action,
3294 struct lp_build_tgsi_context * bld_base,
3295 struct lp_build_emit_data * emit_data)
3296 {
3297 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3298
3299 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3300 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3301 }
3302
3303 static void
3304 gather4_emit(
3305 const struct lp_build_tgsi_action * action,
3306 struct lp_build_tgsi_context * bld_base,
3307 struct lp_build_emit_data * emit_data)
3308 {
3309 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3310
3311 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3312 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3313 }
3314
3315 static void
3316 sviewinfo_emit(
3317 const struct lp_build_tgsi_action * action,
3318 struct lp_build_tgsi_context * bld_base,
3319 struct lp_build_emit_data * emit_data)
3320 {
3321 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3322
3323 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3324 }
3325
3326 static void
3327 lod_emit(
3328 const struct lp_build_tgsi_action * action,
3329 struct lp_build_tgsi_context * bld_base,
3330 struct lp_build_emit_data * emit_data)
3331 {
3332 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3333
3334 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3335 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3336 }
3337
3338 static LLVMValueRef
3339 mask_vec(struct lp_build_tgsi_context *bld_base)
3340 {
3341 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3342 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3343 struct lp_exec_mask *exec_mask = &bld->exec_mask;
3344
3345 if (!exec_mask->has_mask) {
3346 return lp_build_mask_value(bld->mask);
3347 }
3348 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3349 exec_mask->exec_mask, "");
3350 }
3351
3352 static void
3353 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3354 LLVMValueRef ptr,
3355 LLVMValueRef mask)
3356 {
3357 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3358 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3359
3360 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3361
3362 LLVMBuildStore(builder, current_vec, ptr);
3363 }
3364
3365 static void
3366 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3367 LLVMValueRef ptr,
3368 LLVMValueRef mask)
3369 {
3370 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3371 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3372
3373 current_vec = lp_build_select(&bld_base->uint_bld,
3374 mask,
3375 bld_base->uint_bld.zero,
3376 current_vec);
3377
3378 LLVMBuildStore(builder, current_vec, ptr);
3379 }
3380
3381 static LLVMValueRef
3382 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3383 LLVMValueRef current_mask_vec,
3384 LLVMValueRef total_emitted_vertices_vec)
3385 {
3386 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3387 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3388 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3389 total_emitted_vertices_vec,
3390 bld->max_output_vertices_vec);
3391
3392 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3393 }
3394
3395 static void
3396 emit_vertex(
3397 const struct lp_build_tgsi_action * action,
3398 struct lp_build_tgsi_context * bld_base,
3399 struct lp_build_emit_data * emit_data)
3400 {
3401 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3402 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3403
3404 if (bld->gs_iface->emit_vertex) {
3405 LLVMValueRef mask = mask_vec(bld_base);
3406 LLVMValueRef total_emitted_vertices_vec =
3407 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3408 mask = clamp_mask_to_max_output_vertices(bld, mask,
3409 total_emitted_vertices_vec);
3410 gather_outputs(bld);
3411 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3412 bld->outputs,
3413 total_emitted_vertices_vec);
3414 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3415 mask);
3416 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3417 mask);
3418 #if DUMP_GS_EMITS
3419 lp_build_print_value(bld->bld_base.base.gallivm,
3420 " +++ emit vertex masked ones = ",
3421 mask);
3422 lp_build_print_value(bld->bld_base.base.gallivm,
3423 " +++ emit vertex emitted = ",
3424 total_emitted_vertices_vec);
3425 #endif
3426 }
3427 }
3428
3429
3430 static void
3431 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3432 LLVMValueRef mask)
3433 {
3434 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3435 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3436
3437 if (bld->gs_iface->end_primitive) {
3438 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3439 LLVMValueRef emitted_vertices_vec =
3440 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3441 LLVMValueRef emitted_prims_vec =
3442 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3443
3444 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3445 emitted_vertices_vec,
3446 uint_bld->zero);
3447 /* We need to combine the current execution mask with the mask
3448 telling us which, if any, execution slots actually have
3449 unemitted primitives, this way we make sure that end_primitives
3450 executes only on the paths that have unflushed vertices */
3451 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3452
3453 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3454 emitted_vertices_vec,
3455 emitted_prims_vec);
3456
3457 #if DUMP_GS_EMITS
3458 lp_build_print_value(bld->bld_base.base.gallivm,
3459 " +++ end prim masked ones = ",
3460 mask);
3461 lp_build_print_value(bld->bld_base.base.gallivm,
3462 " +++ end prim emitted verts1 = ",
3463 emitted_vertices_vec);
3464 lp_build_print_value(bld->bld_base.base.gallivm,
3465 " +++ end prim emitted prims1 = ",
3466 LLVMBuildLoad(builder,
3467 bld->emitted_prims_vec_ptr, ""));
3468 #endif
3469 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3470 mask);
3471 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3472 mask);
3473 #if DUMP_GS_EMITS
3474 lp_build_print_value(bld->bld_base.base.gallivm,
3475 " +++ end prim emitted verts2 = ",
3476 LLVMBuildLoad(builder,
3477 bld->emitted_vertices_vec_ptr, ""));
3478 #endif
3479 }
3480
3481 }
3482
3483 static void
3484 end_primitive(
3485 const struct lp_build_tgsi_action * action,
3486 struct lp_build_tgsi_context * bld_base,
3487 struct lp_build_emit_data * emit_data)
3488 {
3489 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3490
3491 if (bld->gs_iface->end_primitive) {
3492 LLVMValueRef mask = mask_vec(bld_base);
3493 end_primitive_masked(bld_base, mask);
3494 }
3495 }
3496
3497 static void
3498 cal_emit(
3499 const struct lp_build_tgsi_action * action,
3500 struct lp_build_tgsi_context * bld_base,
3501 struct lp_build_emit_data * emit_data)
3502 {
3503 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3504
3505 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3506 &bld_base->pc);
3507 }
3508
3509 static void
3510 ret_emit(
3511 const struct lp_build_tgsi_action * action,
3512 struct lp_build_tgsi_context * bld_base,
3513 struct lp_build_emit_data * emit_data)
3514 {
3515 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3516
3517 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3518 }
3519
3520 static void
3521 brk_emit(
3522 const struct lp_build_tgsi_action * action,
3523 struct lp_build_tgsi_context * bld_base,
3524 struct lp_build_emit_data * emit_data)
3525 {
3526 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3527
3528 lp_exec_break(&bld->exec_mask, bld_base);
3529 }
3530
3531 static void
3532 if_emit(
3533 const struct lp_build_tgsi_action * action,
3534 struct lp_build_tgsi_context * bld_base,
3535 struct lp_build_emit_data * emit_data)
3536 {
3537 LLVMValueRef tmp;
3538 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3539
3540 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3541 emit_data->args[0], bld->bld_base.base.zero);
3542 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3543 }
3544
3545 static void
3546 uif_emit(
3547 const struct lp_build_tgsi_action * action,
3548 struct lp_build_tgsi_context * bld_base,
3549 struct lp_build_emit_data * emit_data)
3550 {
3551 LLVMValueRef tmp;
3552 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3553 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3554
3555 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3556 emit_data->args[0], uint_bld->zero);
3557 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3558 }
3559
3560 static void
3561 case_emit(
3562 const struct lp_build_tgsi_action * action,
3563 struct lp_build_tgsi_context * bld_base,
3564 struct lp_build_emit_data * emit_data)
3565 {
3566 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3567
3568 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3569 }
3570
3571 static void
3572 default_emit(
3573 const struct lp_build_tgsi_action * action,
3574 struct lp_build_tgsi_context * bld_base,
3575 struct lp_build_emit_data * emit_data)
3576 {
3577 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3578
3579 lp_exec_default(&bld->exec_mask, bld_base);
3580 }
3581
3582 static void
3583 switch_emit(
3584 const struct lp_build_tgsi_action * action,
3585 struct lp_build_tgsi_context * bld_base,
3586 struct lp_build_emit_data * emit_data)
3587 {
3588 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3589
3590 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3591 }
3592
3593 static void
3594 endswitch_emit(
3595 const struct lp_build_tgsi_action * action,
3596 struct lp_build_tgsi_context * bld_base,
3597 struct lp_build_emit_data * emit_data)
3598 {
3599 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3600
3601 lp_exec_endswitch(&bld->exec_mask, bld_base);
3602 }
3603
3604 static void
3605 bgnloop_emit(
3606 const struct lp_build_tgsi_action * action,
3607 struct lp_build_tgsi_context * bld_base,
3608 struct lp_build_emit_data * emit_data)
3609 {
3610 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3611
3612 lp_exec_bgnloop(&bld->exec_mask);
3613 }
3614
3615 static void
3616 bgnsub_emit(
3617 const struct lp_build_tgsi_action * action,
3618 struct lp_build_tgsi_context * bld_base,
3619 struct lp_build_emit_data * emit_data)
3620 {
3621 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3622
3623 lp_exec_mask_bgnsub(&bld->exec_mask);
3624 }
3625
3626 static void
3627 else_emit(
3628 const struct lp_build_tgsi_action * action,
3629 struct lp_build_tgsi_context * bld_base,
3630 struct lp_build_emit_data * emit_data)
3631 {
3632 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3633
3634 lp_exec_mask_cond_invert(&bld->exec_mask);
3635 }
3636
3637 static void
3638 endif_emit(
3639 const struct lp_build_tgsi_action * action,
3640 struct lp_build_tgsi_context * bld_base,
3641 struct lp_build_emit_data * emit_data)
3642 {
3643 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3644
3645 lp_exec_mask_cond_pop(&bld->exec_mask);
3646 }
3647
3648 static void
3649 endloop_emit(
3650 const struct lp_build_tgsi_action * action,
3651 struct lp_build_tgsi_context * bld_base,
3652 struct lp_build_emit_data * emit_data)
3653 {
3654 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3655
3656 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3657 }
3658
3659 static void
3660 endsub_emit(
3661 const struct lp_build_tgsi_action * action,
3662 struct lp_build_tgsi_context * bld_base,
3663 struct lp_build_emit_data * emit_data)
3664 {
3665 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3666
3667 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3668 }
3669
3670 static void
3671 cont_emit(
3672 const struct lp_build_tgsi_action * action,
3673 struct lp_build_tgsi_context * bld_base,
3674 struct lp_build_emit_data * emit_data)
3675 {
3676 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3677
3678 lp_exec_continue(&bld->exec_mask);
3679 }
3680
3681 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3682 {
3683 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3684 struct gallivm_state * gallivm = bld_base->base.gallivm;
3685
3686 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3687 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
3688 bld->temps_array = lp_build_alloca_undef(gallivm,
3689 LLVMArrayType(bld_base->base.vec_type, array_size),
3690 "temp_array");
3691 }
3692
3693 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3694 LLVMValueRef array_size =
3695 lp_build_const_int32(gallivm,
3696 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3697 bld->outputs_array = lp_build_array_alloca(gallivm,
3698 bld_base->base.vec_type, array_size,
3699 "output_array");
3700 }
3701
3702 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3703 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
3704 bld->imms_array = lp_build_alloca_undef(gallivm,
3705 LLVMArrayType(bld_base->base.vec_type, array_size),
3706 "imms_array");
3707 }
3708
3709 /* If we have indirect addressing in inputs we need to copy them into
3710 * our alloca array to be able to iterate over them */
3711 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3712 unsigned index, chan;
3713 LLVMTypeRef vec_type = bld_base->base.vec_type;
3714 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3715 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3716 bld->inputs_array = lp_build_array_alloca(gallivm,
3717 vec_type, array_size,
3718 "input_array");
3719
3720 assert(bld_base->info->num_inputs
3721 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3722
3723 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3724 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3725 LLVMValueRef lindex =
3726 lp_build_const_int32(gallivm, index * 4 + chan);
3727 LLVMValueRef input_ptr =
3728 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3729 &lindex, 1, "");
3730 LLVMValueRef value = bld->inputs[index][chan];
3731 if (value)
3732 LLVMBuildStore(gallivm->builder, value, input_ptr);
3733 }
3734 }
3735 }
3736
3737 if (bld->gs_iface) {
3738 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3739 bld->emitted_prims_vec_ptr =
3740 lp_build_alloca(gallivm,
3741 uint_bld->vec_type,
3742 "emitted_prims_ptr");
3743 bld->emitted_vertices_vec_ptr =
3744 lp_build_alloca(gallivm,
3745 uint_bld->vec_type,
3746 "emitted_vertices_ptr");
3747 bld->total_emitted_vertices_vec_ptr =
3748 lp_build_alloca(gallivm,
3749 uint_bld->vec_type,
3750 "total_emitted_vertices_ptr");
3751
3752 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3753 bld->emitted_prims_vec_ptr);
3754 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3755 bld->emitted_vertices_vec_ptr);
3756 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3757 bld->total_emitted_vertices_vec_ptr);
3758 }
3759
3760 if (DEBUG_EXECUTION) {
3761 lp_build_printf(gallivm, "\n");
3762 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3763 if (!bld->gs_iface)
3764 emit_dump_file(bld, TGSI_FILE_INPUT);
3765 }
3766 }
3767
3768 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3769 {
3770 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3771 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3772
3773 if (DEBUG_EXECUTION) {
3774 /* for debugging */
3775 if (0) {
3776 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3777 }
3778 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3779 lp_build_printf(bld_base->base.gallivm, "\n");
3780 }
3781
3782 /* If we have indirect addressing in outputs we need to copy our alloca array
3783 * to the outputs slots specified by the caller */
3784 if (bld->gs_iface) {
3785 LLVMValueRef total_emitted_vertices_vec;
3786 LLVMValueRef emitted_prims_vec;
3787 /* implicit end_primitives, needed in case there are any unflushed
3788 vertices in the cache. Note must not call end_primitive here
3789 since the exec_mask is not valid at this point. */
3790 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3791
3792 total_emitted_vertices_vec =
3793 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3794 emitted_prims_vec =
3795 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3796
3797 bld->gs_iface->gs_epilogue(bld->gs_iface,
3798 &bld->bld_base,
3799 total_emitted_vertices_vec,
3800 emitted_prims_vec);
3801 } else {
3802 gather_outputs(bld);
3803 }
3804 }
3805
3806 void
3807 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3808 const struct tgsi_token *tokens,
3809 struct lp_type type,
3810 struct lp_build_mask_context *mask,
3811 LLVMValueRef consts_ptr,
3812 LLVMValueRef const_sizes_ptr,
3813 const struct lp_bld_tgsi_system_values *system_values,
3814 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3815 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3816 LLVMValueRef context_ptr,
3817 LLVMValueRef thread_data_ptr,
3818 const struct lp_build_sampler_soa *sampler,
3819 const struct tgsi_shader_info *info,
3820 const struct lp_build_tgsi_gs_iface *gs_iface)
3821 {
3822 struct lp_build_tgsi_soa_context bld;
3823
3824 struct lp_type res_type;
3825
3826 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3827 memset(&res_type, 0, sizeof res_type);
3828 res_type.width = type.width;
3829 res_type.length = type.length;
3830 res_type.sign = 1;
3831
3832 /* Setup build context */
3833 memset(&bld, 0, sizeof bld);
3834 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3835 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3836 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3837 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3838 {
3839 struct lp_type dbl_type;
3840 dbl_type = type;
3841 dbl_type.width *= 2;
3842 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3843 }
3844 {
3845 struct lp_type uint64_type;
3846 uint64_type = lp_uint_type(type);
3847 uint64_type.width *= 2;
3848 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3849 }
3850 {
3851 struct lp_type int64_type;
3852 int64_type = lp_int_type(type);
3853 int64_type.width *= 2;
3854 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3855 }
3856 bld.mask = mask;
3857 bld.inputs = inputs;
3858 bld.outputs = outputs;
3859 bld.consts_ptr = consts_ptr;
3860 bld.const_sizes_ptr = const_sizes_ptr;
3861 bld.sampler = sampler;
3862 bld.bld_base.info = info;
3863 bld.indirect_files = info->indirect_files;
3864 bld.context_ptr = context_ptr;
3865 bld.thread_data_ptr = thread_data_ptr;
3866
3867 /*
3868 * If the number of temporaries is rather large then we just
3869 * allocate them as an array right from the start and treat
3870 * like indirect temporaries.
3871 */
3872 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3873 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3874 }
3875 /*
3876 * For performance reason immediates are always backed in a static
3877 * array, but if their number is too great, we have to use just
3878 * a dynamically allocated array.
3879 */
3880 bld.use_immediates_array =
3881 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3882 if (bld.use_immediates_array) {
3883 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3884 }
3885
3886
3887 bld.bld_base.soa = TRUE;
3888 bld.bld_base.emit_debug = emit_debug;
3889 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3890 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3891 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3892 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3893 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3894 bld.bld_base.emit_store = emit_store;
3895
3896 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3897 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3898
3899 bld.bld_base.emit_prologue = emit_prologue;
3900 bld.bld_base.emit_epilogue = emit_epilogue;
3901
3902 /* Set opcode actions */
3903 lp_set_default_actions_cpu(&bld.bld_base);
3904
3905 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3906 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3907 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3908 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3909 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3910 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3911 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3912 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3913 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3914 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3915 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3916 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3917 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3918 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3919 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3920 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3921 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3922 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3923 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3924 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3925 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3926 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3927 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3928 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3929 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3930 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3931 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3932 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3933 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3934 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3935 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3936 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
3937 /* DX10 sampling ops */
3938 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3939 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3940 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3941 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3942 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3943 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3944 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
3945 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3946 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
3947 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3948 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
3949
3950
3951 if (gs_iface) {
3952 /* There's no specific value for this because it should always
3953 * be set, but apps using ext_geometry_shader4 quite often
3954 * were forgetting so we're using MAX_VERTEX_VARYING from
3955 * that spec even though we could debug_assert if it's not
3956 * set, but that's a lot uglier. */
3957 uint max_output_vertices;
3958
3959 /* inputs are always indirect with gs */
3960 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3961 bld.gs_iface = gs_iface;
3962 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3963 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3964 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3965
3966 max_output_vertices =
3967 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3968 if (!max_output_vertices)
3969 max_output_vertices = 32;
3970
3971 bld.max_output_vertices_vec =
3972 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3973 max_output_vertices);
3974 }
3975
3976 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3977
3978 bld.system_values = *system_values;
3979
3980 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3981
3982 if (0) {
3983 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3984 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3985 debug_printf("11111111111111111111111111111 \n");
3986 tgsi_dump(tokens, 0);
3987 lp_debug_dump_value(function);
3988 debug_printf("2222222222222222222222222222 \n");
3989 }
3990
3991 if (0) {
3992 LLVMModuleRef module = LLVMGetGlobalParent(
3993 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3994 LLVMDumpModule(module);
3995
3996 }
3997 lp_exec_mask_fini(&bld.exec_mask);
3998 }