iris: Don't enable smooth points when point sprites are enabled
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_swizzle.h"
61 #include "lp_bld_flow.h"
62 #include "lp_bld_quad.h"
63 #include "lp_bld_tgsi.h"
64 #include "lp_bld_limits.h"
65 #include "lp_bld_debug.h"
66 #include "lp_bld_printf.h"
67 #include "lp_bld_sample.h"
68 #include "lp_bld_struct.h"
69
70 /* SM 4.0 says that subroutines can nest 32 deep and
71 * we need one more for our main function */
72 #define LP_MAX_NUM_FUNCS 33
73
74 #define DUMP_GS_EMITS 0
75
76 /*
77 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
78 * instruction.
79 *
80 * TODO:
81 * - take execution masks in consideration
82 * - debug control-flow instructions
83 */
84 #define DEBUG_EXECUTION 0
85
86
87 /*
88 * Emit code to print a register value.
89 */
90 static void
91 emit_dump_reg(struct gallivm_state *gallivm,
92 unsigned file,
93 unsigned index,
94 unsigned chan,
95 LLVMValueRef value)
96 {
97 char buf[32];
98
99 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
100 tgsi_file_name(file),
101 index, "xyzw"[chan]);
102
103 lp_build_print_value(gallivm, buf, value);
104 }
105
106 /*
107 * Return the context for the current function.
108 * (always 'main', if shader doesn't do any function calls)
109 */
110 static inline struct function_ctx *
111 func_ctx(struct lp_exec_mask *mask)
112 {
113 assert(mask->function_stack_size > 0);
114 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
115 return &mask->function_stack[mask->function_stack_size - 1];
116 }
117
118 /*
119 * Returns true if we're in a loop.
120 * It's global, meaning that it returns true even if there's
121 * no loop inside the current function, but we were inside
122 * a loop inside another function, from which this one was called.
123 */
124 static inline boolean
125 mask_has_loop(struct lp_exec_mask *mask)
126 {
127 int i;
128 for (i = mask->function_stack_size - 1; i >= 0; --i) {
129 const struct function_ctx *ctx = &mask->function_stack[i];
130 if (ctx->loop_stack_size > 0)
131 return TRUE;
132 }
133 return FALSE;
134 }
135
136 /*
137 * Returns true if we're inside a switch statement.
138 * It's global, meaning that it returns true even if there's
139 * no switch in the current function, but we were inside
140 * a switch inside another function, from which this one was called.
141 */
142 static inline boolean
143 mask_has_switch(struct lp_exec_mask *mask)
144 {
145 int i;
146 for (i = mask->function_stack_size - 1; i >= 0; --i) {
147 const struct function_ctx *ctx = &mask->function_stack[i];
148 if (ctx->switch_stack_size > 0)
149 return TRUE;
150 }
151 return FALSE;
152 }
153
154 /*
155 * Returns true if we're inside a conditional.
156 * It's global, meaning that it returns true even if there's
157 * no conditional in the current function, but we were inside
158 * a conditional inside another function, from which this one was called.
159 */
160 static inline boolean
161 mask_has_cond(struct lp_exec_mask *mask)
162 {
163 int i;
164 for (i = mask->function_stack_size - 1; i >= 0; --i) {
165 const struct function_ctx *ctx = &mask->function_stack[i];
166 if (ctx->cond_stack_size > 0)
167 return TRUE;
168 }
169 return FALSE;
170 }
171
172
173 /*
174 * Initialize a function context at the specified index.
175 */
176 static void
177 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
178 {
179 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
180 LLVMBuilderRef builder = mask->bld->gallivm->builder;
181 struct function_ctx *ctx = &mask->function_stack[function_idx];
182
183 ctx->cond_stack_size = 0;
184 ctx->loop_stack_size = 0;
185 ctx->switch_stack_size = 0;
186
187 if (function_idx == 0) {
188 ctx->ret_mask = mask->ret_mask;
189 }
190
191 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
192 int_type, "looplimiter");
193 LLVMBuildStore(
194 builder,
195 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
196 ctx->loop_limiter);
197 }
198
199 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
200 {
201 mask->bld = bld;
202 mask->has_mask = FALSE;
203 mask->ret_in_main = FALSE;
204 /* For the main function */
205 mask->function_stack_size = 1;
206
207 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
208 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
209 mask->cond_mask = mask->switch_mask =
210 LLVMConstAllOnes(mask->int_vec_type);
211
212 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
213 sizeof(mask->function_stack[0]));
214 lp_exec_mask_function_init(mask, 0);
215 }
216
217 static void
218 lp_exec_mask_fini(struct lp_exec_mask *mask)
219 {
220 FREE(mask->function_stack);
221 }
222
223 static void lp_exec_mask_update(struct lp_exec_mask *mask)
224 {
225 LLVMBuilderRef builder = mask->bld->gallivm->builder;
226 boolean has_loop_mask = mask_has_loop(mask);
227 boolean has_cond_mask = mask_has_cond(mask);
228 boolean has_switch_mask = mask_has_switch(mask);
229 boolean has_ret_mask = mask->function_stack_size > 1 ||
230 mask->ret_in_main;
231
232 if (has_loop_mask) {
233 /*for loops we need to update the entire mask at runtime */
234 LLVMValueRef tmp;
235 assert(mask->break_mask);
236 tmp = LLVMBuildAnd(builder,
237 mask->cont_mask,
238 mask->break_mask,
239 "maskcb");
240 mask->exec_mask = LLVMBuildAnd(builder,
241 mask->cond_mask,
242 tmp,
243 "maskfull");
244 } else
245 mask->exec_mask = mask->cond_mask;
246
247 if (has_switch_mask) {
248 mask->exec_mask = LLVMBuildAnd(builder,
249 mask->exec_mask,
250 mask->switch_mask,
251 "switchmask");
252 }
253
254 if (has_ret_mask) {
255 mask->exec_mask = LLVMBuildAnd(builder,
256 mask->exec_mask,
257 mask->ret_mask,
258 "callmask");
259 }
260
261 mask->has_mask = (has_cond_mask ||
262 has_loop_mask ||
263 has_switch_mask ||
264 has_ret_mask);
265 }
266
267 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
268 LLVMValueRef val)
269 {
270 LLVMBuilderRef builder = mask->bld->gallivm->builder;
271 struct function_ctx *ctx = func_ctx(mask);
272
273 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
274 ctx->cond_stack_size++;
275 return;
276 }
277 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
278 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
279 }
280 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
281 assert(LLVMTypeOf(val) == mask->int_vec_type);
282 mask->cond_mask = LLVMBuildAnd(builder,
283 mask->cond_mask,
284 val,
285 "");
286 lp_exec_mask_update(mask);
287 }
288
289 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
290 {
291 LLVMBuilderRef builder = mask->bld->gallivm->builder;
292 struct function_ctx *ctx = func_ctx(mask);
293 LLVMValueRef prev_mask;
294 LLVMValueRef inv_mask;
295
296 assert(ctx->cond_stack_size);
297 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
298 return;
299 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
300 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
301 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
302 }
303
304 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
305
306 mask->cond_mask = LLVMBuildAnd(builder,
307 inv_mask,
308 prev_mask, "");
309 lp_exec_mask_update(mask);
310 }
311
312 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
313 {
314 struct function_ctx *ctx = func_ctx(mask);
315 assert(ctx->cond_stack_size);
316 --ctx->cond_stack_size;
317 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
318 return;
319 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
320 lp_exec_mask_update(mask);
321 }
322
323 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
324 {
325 LLVMBuilderRef builder = mask->bld->gallivm->builder;
326 struct function_ctx *ctx = func_ctx(mask);
327
328 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
329 ++ctx->loop_stack_size;
330 return;
331 }
332
333 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
334 ctx->break_type;
335 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
336
337 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
338 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
339 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
340 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
341 ++ctx->loop_stack_size;
342
343 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
344 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
345
346 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
347
348 LLVMBuildBr(builder, ctx->loop_block);
349 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
350
351 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
352
353 lp_exec_mask_update(mask);
354 }
355
356 static void lp_exec_break(struct lp_exec_mask *mask,
357 struct lp_build_tgsi_context * bld_base)
358 {
359 LLVMBuilderRef builder = mask->bld->gallivm->builder;
360 struct function_ctx *ctx = func_ctx(mask);
361
362 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
363 LLVMValueRef exec_mask = LLVMBuildNot(builder,
364 mask->exec_mask,
365 "break");
366
367 mask->break_mask = LLVMBuildAnd(builder,
368 mask->break_mask,
369 exec_mask, "break_full");
370 }
371 else {
372 enum tgsi_opcode opcode =
373 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
374 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
375 opcode == TGSI_OPCODE_CASE);
376
377
378 if (ctx->switch_in_default) {
379 /*
380 * stop default execution but only if this is an unconditional switch.
381 * (The condition here is not perfect since dead code after break is
382 * allowed but should be sufficient since false negatives are just
383 * unoptimized - so we don't have to pre-evaluate that).
384 */
385 if(break_always && ctx->switch_pc) {
386 bld_base->pc = ctx->switch_pc;
387 return;
388 }
389 }
390
391 if (break_always) {
392 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
393 }
394 else {
395 LLVMValueRef exec_mask = LLVMBuildNot(builder,
396 mask->exec_mask,
397 "break");
398 mask->switch_mask = LLVMBuildAnd(builder,
399 mask->switch_mask,
400 exec_mask, "break_switch");
401 }
402 }
403
404 lp_exec_mask_update(mask);
405 }
406
407 static void lp_exec_continue(struct lp_exec_mask *mask)
408 {
409 LLVMBuilderRef builder = mask->bld->gallivm->builder;
410 LLVMValueRef exec_mask = LLVMBuildNot(builder,
411 mask->exec_mask,
412 "");
413
414 mask->cont_mask = LLVMBuildAnd(builder,
415 mask->cont_mask,
416 exec_mask, "");
417
418 lp_exec_mask_update(mask);
419 }
420
421
422 static void lp_exec_endloop(struct gallivm_state *gallivm,
423 struct lp_exec_mask *mask)
424 {
425 LLVMBuilderRef builder = mask->bld->gallivm->builder;
426 struct function_ctx *ctx = func_ctx(mask);
427 LLVMBasicBlockRef endloop;
428 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
429 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
430 mask->bld->type.width *
431 mask->bld->type.length);
432 LLVMValueRef i1cond, i2cond, icond, limiter;
433
434 assert(mask->break_mask);
435
436
437 assert(ctx->loop_stack_size);
438 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
439 --ctx->loop_stack_size;
440 return;
441 }
442
443 /*
444 * Restore the cont_mask, but don't pop
445 */
446 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
447 lp_exec_mask_update(mask);
448
449 /*
450 * Unlike the continue mask, the break_mask must be preserved across loop
451 * iterations
452 */
453 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
454
455 /* Decrement the loop limiter */
456 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
457
458 limiter = LLVMBuildSub(
459 builder,
460 limiter,
461 LLVMConstInt(int_type, 1, false),
462 "");
463
464 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
465
466 /* i1cond = (mask != 0) */
467 i1cond = LLVMBuildICmp(
468 builder,
469 LLVMIntNE,
470 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
471 LLVMConstNull(reg_type), "i1cond");
472
473 /* i2cond = (looplimiter > 0) */
474 i2cond = LLVMBuildICmp(
475 builder,
476 LLVMIntSGT,
477 limiter,
478 LLVMConstNull(int_type), "i2cond");
479
480 /* if( i1cond && i2cond ) */
481 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
482
483 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
484
485 LLVMBuildCondBr(builder,
486 icond, ctx->loop_block, endloop);
487
488 LLVMPositionBuilderAtEnd(builder, endloop);
489
490 assert(ctx->loop_stack_size);
491 --ctx->loop_stack_size;
492 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
493 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
494 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
495 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
496 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
497 ctx->switch_stack_size];
498
499 lp_exec_mask_update(mask);
500 }
501
502 static void lp_exec_switch(struct lp_exec_mask *mask,
503 LLVMValueRef switchval)
504 {
505 struct function_ctx *ctx = func_ctx(mask);
506
507 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
508 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
509 ctx->switch_stack_size++;
510 return;
511 }
512
513 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
514 ctx->break_type;
515 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
516
517 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
518 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
519 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
520 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
521 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
522 ctx->switch_stack_size++;
523
524 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
525 ctx->switch_val = switchval;
526 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
527 ctx->switch_in_default = false;
528 ctx->switch_pc = 0;
529
530 lp_exec_mask_update(mask);
531 }
532
533 static void lp_exec_endswitch(struct lp_exec_mask *mask,
534 struct lp_build_tgsi_context * bld_base)
535 {
536 LLVMBuilderRef builder = mask->bld->gallivm->builder;
537 struct function_ctx *ctx = func_ctx(mask);
538
539 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
540 ctx->switch_stack_size--;
541 return;
542 }
543
544 /* check if there's deferred default if so do it now */
545 if (ctx->switch_pc && !ctx->switch_in_default) {
546 LLVMValueRef prevmask, defaultmask;
547 unsigned tmp_pc;
548 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
549 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
550 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
551 ctx->switch_in_default = true;
552
553 lp_exec_mask_update(mask);
554
555 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
556 TGSI_OPCODE_DEFAULT);
557
558 tmp_pc = bld_base->pc;
559 bld_base->pc = ctx->switch_pc;
560 /*
561 * re-purpose switch_pc to point to here again, since we stop execution of
562 * the deferred default after next break.
563 */
564 ctx->switch_pc = tmp_pc - 1;
565
566 return;
567 }
568
569 else if (ctx->switch_pc && ctx->switch_in_default) {
570 assert(bld_base->pc == ctx->switch_pc + 1);
571 }
572
573 ctx->switch_stack_size--;
574 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
575 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
576 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
577 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
578 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
579
580 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
581
582 lp_exec_mask_update(mask);
583 }
584
585 static void lp_exec_case(struct lp_exec_mask *mask,
586 LLVMValueRef caseval)
587 {
588 LLVMBuilderRef builder = mask->bld->gallivm->builder;
589 struct function_ctx *ctx = func_ctx(mask);
590
591 LLVMValueRef casemask, prevmask;
592
593 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
594 return;
595 }
596
597 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
598 if (!ctx->switch_in_default) {
599 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
600 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
601 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
602 ctx->switch_mask_default, "sw_default_mask");
603 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
604 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
605
606 lp_exec_mask_update(mask);
607 }
608 }
609
610 /*
611 * Analyse default statement in a switch.
612 * \return true if default is last statement, false otherwise
613 * \param default_pc_start contains pc of instruction to jump to
614 * if default wasn't last but there's no
615 * fallthrough into default.
616 */
617 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
618 struct lp_build_tgsi_context * bld_base,
619 int *default_pc_start)
620 {
621 unsigned pc = bld_base->pc;
622 struct function_ctx *ctx = func_ctx(mask);
623 int curr_switch_stack = ctx->switch_stack_size;
624
625 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
626 return false;
627 }
628
629 /* skip over case statements which are together with default */
630 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
631 pc++;
632 }
633
634 while (pc != ~0u && pc < bld_base->num_instructions) {
635 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
636 switch (opcode) {
637 case TGSI_OPCODE_CASE:
638 if (curr_switch_stack == ctx->switch_stack_size) {
639 *default_pc_start = pc - 1;
640 return false;
641 }
642 break;
643 case TGSI_OPCODE_SWITCH:
644 curr_switch_stack++;
645 break;
646 case TGSI_OPCODE_ENDSWITCH:
647 if (curr_switch_stack == ctx->switch_stack_size) {
648 *default_pc_start = pc - 1;
649 return true;
650 }
651 curr_switch_stack--;
652 break;
653 default:
654 ; /* nothing */
655 }
656 pc++;
657 }
658 /* should never arrive here */
659 assert(0);
660 return true;
661 }
662
663 static void lp_exec_default(struct lp_exec_mask *mask,
664 struct lp_build_tgsi_context * bld_base)
665 {
666 LLVMBuilderRef builder = mask->bld->gallivm->builder;
667 struct function_ctx *ctx = func_ctx(mask);
668
669 int default_exec_pc;
670 boolean default_is_last;
671
672 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
673 return;
674 }
675
676 /*
677 * This is a messy opcode, because it may not be always at the end and
678 * there can be fallthrough in and out of it.
679 */
680
681 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
682 /*
683 * If it is last statement in switch (note that case statements appearing
684 * "at the same time" as default don't change that) everything is just fine,
685 * update switch mask and go on. This means we can handle default with
686 * fallthrough INTO it without overhead, if it is last.
687 */
688 if (default_is_last) {
689 LLVMValueRef prevmask, defaultmask;
690 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
691 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
692 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
693 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
694 ctx->switch_in_default = true;
695
696 lp_exec_mask_update(mask);
697 }
698 else {
699 /*
700 * Technically, "case" immediately before default isn't really a
701 * fallthrough, however we still have to count them as such as we
702 * already have updated the masks.
703 * If that happens in practice could add a switch optimizer pass
704 * which just gets rid of all case statements appearing together with
705 * default (or could do switch analysis at switch start time instead).
706 */
707 enum tgsi_opcode opcode =
708 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
709 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
710 opcode != TGSI_OPCODE_SWITCH);
711 /*
712 * If it is not last statement and there was no fallthrough into it,
713 * we record the PC and continue execution at next case (again, those
714 * case encountered at the same time don't count). At endswitch
715 * time, we update switchmask, and go back executing the code we skipped
716 * until the next break (possibly re-executing some code with changed mask
717 * if there was a fallthrough out of default).
718 * Finally, if it is not last statement and there was a fallthrough into it,
719 * do the same as with the former case, except instead of skipping the code
720 * just execute it without updating the mask, then go back and re-execute.
721 */
722 ctx->switch_pc = bld_base->pc;
723 if (!ft_into) {
724 bld_base->pc = default_exec_pc;
725 }
726 }
727 }
728
729
730 /* stores val into an address pointed to by dst_ptr.
731 * mask->exec_mask is used to figure out which bits of val
732 * should be stored into the address
733 * (0 means don't store this bit, 1 means do store).
734 */
735 static void lp_exec_mask_store(struct lp_exec_mask *mask,
736 struct lp_build_context *bld_store,
737 LLVMValueRef val,
738 LLVMValueRef dst_ptr)
739 {
740 LLVMBuilderRef builder = mask->bld->gallivm->builder;
741 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
742
743 assert(lp_check_value(bld_store->type, val));
744 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
745 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
746 LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
747
748 if (exec_mask) {
749 LLVMValueRef res, dst;
750
751 dst = LLVMBuildLoad(builder, dst_ptr, "");
752 res = lp_build_select(bld_store, exec_mask, val, dst);
753 LLVMBuildStore(builder, res, dst_ptr);
754 } else
755 LLVMBuildStore(builder, val, dst_ptr);
756 }
757
758 static void lp_exec_mask_call(struct lp_exec_mask *mask,
759 int func,
760 int *pc)
761 {
762 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
763 return;
764 }
765
766 lp_exec_mask_function_init(mask, mask->function_stack_size);
767 mask->function_stack[mask->function_stack_size].pc = *pc;
768 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
769 mask->function_stack_size++;
770 *pc = func;
771 }
772
773 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
774 {
775 LLVMBuilderRef builder = mask->bld->gallivm->builder;
776 struct function_ctx *ctx = func_ctx(mask);
777 LLVMValueRef exec_mask;
778
779 if (ctx->cond_stack_size == 0 &&
780 ctx->loop_stack_size == 0 &&
781 ctx->switch_stack_size == 0 &&
782 mask->function_stack_size == 1) {
783 /* returning from main() */
784 *pc = -1;
785 return;
786 }
787
788 if (mask->function_stack_size == 1) {
789 /*
790 * This requires special handling since we need to ensure
791 * we don't drop the mask even if we have no call stack
792 * (e.g. after a ret in a if clause after the endif)
793 */
794 mask->ret_in_main = TRUE;
795 }
796
797 exec_mask = LLVMBuildNot(builder,
798 mask->exec_mask,
799 "ret");
800
801 mask->ret_mask = LLVMBuildAnd(builder,
802 mask->ret_mask,
803 exec_mask, "ret_full");
804
805 lp_exec_mask_update(mask);
806 }
807
808 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
809 {
810 }
811
812 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
813 {
814 struct function_ctx *ctx;
815
816 assert(mask->function_stack_size > 1);
817 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
818
819 ctx = func_ctx(mask);
820 mask->function_stack_size--;
821
822 *pc = ctx->pc;
823 mask->ret_mask = ctx->ret_mask;
824
825 lp_exec_mask_update(mask);
826 }
827
828
829 static LLVMValueRef
830 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
831 unsigned file,
832 int index,
833 unsigned chan)
834 {
835 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
836 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
837 LLVMValueRef var_of_array;
838
839 switch (file) {
840 case TGSI_FILE_TEMPORARY:
841 array_of_vars = bld->temps;
842 var_of_array = bld->temps_array;
843 break;
844 case TGSI_FILE_OUTPUT:
845 array_of_vars = bld->outputs;
846 var_of_array = bld->outputs_array;
847 break;
848 default:
849 assert(0);
850 return NULL;
851 }
852
853 assert(chan < 4);
854
855 if (bld->indirect_files & (1 << file)) {
856 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
857 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
858 LLVMValueRef gep[2];
859 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
860 gep[1] = lindex;
861 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
862 } else {
863 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
864 }
865 }
866 else {
867 assert(index <= bld->bld_base.info->file_max[file]);
868 return array_of_vars[index][chan];
869 }
870 }
871
872
873 /**
874 * Return pointer to a temporary register channel (src or dest).
875 * Note that indirect addressing cannot be handled here.
876 * \param index which temporary register
877 * \param chan which channel of the temp register.
878 */
879 LLVMValueRef
880 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
881 unsigned index,
882 unsigned chan)
883 {
884 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
885 }
886
887 /**
888 * Return pointer to a output register channel (src or dest).
889 * Note that indirect addressing cannot be handled here.
890 * \param index which output register
891 * \param chan which channel of the output register.
892 */
893 LLVMValueRef
894 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
895 unsigned index,
896 unsigned chan)
897 {
898 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
899 }
900
901 /*
902 * If we have indirect addressing in outputs copy our alloca array
903 * to the outputs slots specified by the caller to make sure
904 * our outputs are delivered consistently via the same interface.
905 */
906 static void
907 gather_outputs(struct lp_build_tgsi_soa_context * bld)
908 {
909 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
910 unsigned index, chan;
911 assert(bld->bld_base.info->num_outputs <=
912 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
913 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
914 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
915 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
916 }
917 }
918 }
919 }
920
921 /**
922 * Gather vector.
923 * XXX the lp_build_gather() function should be capable of doing this
924 * with a little work.
925 */
926 static LLVMValueRef
927 build_gather(struct lp_build_tgsi_context *bld_base,
928 LLVMValueRef base_ptr,
929 LLVMValueRef indexes,
930 LLVMValueRef overflow_mask,
931 LLVMValueRef indexes2)
932 {
933 struct gallivm_state *gallivm = bld_base->base.gallivm;
934 LLVMBuilderRef builder = gallivm->builder;
935 struct lp_build_context *uint_bld = &bld_base->uint_bld;
936 struct lp_build_context *bld = &bld_base->base;
937 LLVMValueRef res;
938 unsigned i;
939
940 if (indexes2)
941 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
942 else
943 res = bld->undef;
944 /*
945 * overflow_mask is a vector telling us which channels
946 * in the vector overflowed. We use the overflow behavior for
947 * constant buffers which is defined as:
948 * Out of bounds access to constant buffer returns 0 in all
949 * components. Out of bounds behavior is always with respect
950 * to the size of the buffer bound at that slot.
951 */
952
953 if (overflow_mask) {
954 /*
955 * We avoid per-element control flow here (also due to llvm going crazy,
956 * though I suspect it's better anyway since overflow is likely rare).
957 * Note that since we still fetch from buffers even if num_elements was
958 * zero (in this case we'll fetch from index zero) the jit func callers
959 * MUST provide valid fake constant buffers of size 4x32 (the values do
960 * not matter), otherwise we'd still need (not per element though)
961 * control flow.
962 */
963 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
964 if (indexes2)
965 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
966 }
967
968 /*
969 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
970 */
971 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
972 LLVMValueRef si, di;
973 LLVMValueRef index;
974 LLVMValueRef scalar_ptr, scalar;
975
976 di = lp_build_const_int32(bld->gallivm, i);
977 if (indexes2)
978 si = lp_build_const_int32(bld->gallivm, i >> 1);
979 else
980 si = di;
981
982 if (indexes2 && (i & 1)) {
983 index = LLVMBuildExtractElement(builder,
984 indexes2, si, "");
985 } else {
986 index = LLVMBuildExtractElement(builder,
987 indexes, si, "");
988 }
989 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
990 &index, 1, "gather_ptr");
991 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
992
993 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
994 }
995
996 if (overflow_mask) {
997 if (indexes2) {
998 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
999 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1000 bld_base->dbl_bld.int_vec_type, "");
1001 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1002 bld_base->dbl_bld.zero, res);
1003 } else
1004 res = lp_build_select(bld, overflow_mask, bld->zero, res);
1005 }
1006
1007 return res;
1008 }
1009
1010
1011 /**
1012 * Scatter/store vector.
1013 */
1014 static void
1015 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1016 LLVMValueRef base_ptr,
1017 LLVMValueRef indexes,
1018 LLVMValueRef values,
1019 struct lp_exec_mask *mask)
1020 {
1021 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1022 LLVMBuilderRef builder = gallivm->builder;
1023 unsigned i;
1024 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1025
1026 /*
1027 * Loop over elements of index_vec, store scalar value.
1028 */
1029 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1030 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1031 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1032 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1033 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1034 LLVMValueRef scalar_pred = pred ?
1035 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1036
1037 if (0)
1038 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1039 ii, val, index, scalar_ptr);
1040
1041 if (scalar_pred) {
1042 LLVMValueRef real_val, dst_val;
1043 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1044 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1045 LLVMBuildStore(builder, real_val, scalar_ptr);
1046 }
1047 else {
1048 LLVMBuildStore(builder, val, scalar_ptr);
1049 }
1050 }
1051 }
1052
1053
1054 /**
1055 * Read the current value of the ADDR register, convert the floats to
1056 * ints, add the base index and return the vector of offsets.
1057 * The offsets will be used to index into the constant buffer or
1058 * temporary register file.
1059 */
1060 static LLVMValueRef
1061 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1062 unsigned reg_file, unsigned reg_index,
1063 const struct tgsi_ind_register *indirect_reg,
1064 int index_limit)
1065 {
1066 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1067 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1068 /* always use X component of address register */
1069 unsigned swizzle = indirect_reg->Swizzle;
1070 LLVMValueRef base;
1071 LLVMValueRef rel;
1072 LLVMValueRef max_index;
1073 LLVMValueRef index;
1074
1075 assert(bld->indirect_files & (1 << reg_file));
1076
1077 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1078
1079 assert(swizzle < 4);
1080 switch (indirect_reg->File) {
1081 case TGSI_FILE_ADDRESS:
1082 rel = LLVMBuildLoad(builder,
1083 bld->addr[indirect_reg->Index][swizzle],
1084 "load addr reg");
1085 /* ADDR LLVM values already have LLVM integer type. */
1086 break;
1087 case TGSI_FILE_TEMPORARY:
1088 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1089 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1090 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1091 * value actually stored is expected to be an integer */
1092 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1093 break;
1094 default:
1095 assert(0);
1096 rel = uint_bld->zero;
1097 }
1098
1099 index = lp_build_add(uint_bld, base, rel);
1100
1101 /*
1102 * emit_fetch_constant handles constant buffer overflow so this code
1103 * is pointless for them.
1104 * Furthermore the D3D10 spec in section 6.5 says:
1105 * If the constant buffer bound to a slot is larger than the size
1106 * declared in the shader for that slot, implementations are allowed
1107 * to return incorrect data (not necessarily 0) for indices that are
1108 * larger than the declared size but smaller than the buffer size.
1109 */
1110 if (reg_file != TGSI_FILE_CONSTANT) {
1111 assert(index_limit > 0);
1112 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1113 uint_bld->type, index_limit);
1114
1115 assert(!uint_bld->type.sign);
1116 index = lp_build_min(uint_bld, index, max_index);
1117 }
1118
1119 return index;
1120 }
1121
1122 static struct lp_build_context *
1123 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1124 enum tgsi_opcode_type stype)
1125 {
1126 struct lp_build_context *bld_fetch;
1127
1128 switch (stype) {
1129 case TGSI_TYPE_FLOAT:
1130 case TGSI_TYPE_UNTYPED:
1131 bld_fetch = &bld_base->base;
1132 break;
1133 case TGSI_TYPE_UNSIGNED:
1134 bld_fetch = &bld_base->uint_bld;
1135 break;
1136 case TGSI_TYPE_SIGNED:
1137 bld_fetch = &bld_base->int_bld;
1138 break;
1139 case TGSI_TYPE_DOUBLE:
1140 bld_fetch = &bld_base->dbl_bld;
1141 break;
1142 case TGSI_TYPE_UNSIGNED64:
1143 bld_fetch = &bld_base->uint64_bld;
1144 break;
1145 case TGSI_TYPE_SIGNED64:
1146 bld_fetch = &bld_base->int64_bld;
1147 break;
1148 case TGSI_TYPE_VOID:
1149 default:
1150 assert(0);
1151 bld_fetch = NULL;
1152 break;
1153 }
1154 return bld_fetch;
1155 }
1156
1157 static LLVMValueRef
1158 get_soa_array_offsets(struct lp_build_context *uint_bld,
1159 LLVMValueRef indirect_index,
1160 unsigned chan_index,
1161 boolean need_perelement_offset)
1162 {
1163 struct gallivm_state *gallivm = uint_bld->gallivm;
1164 LLVMValueRef chan_vec =
1165 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1166 LLVMValueRef length_vec =
1167 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1168 LLVMValueRef index_vec;
1169
1170 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1171 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1172 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1173 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1174
1175 if (need_perelement_offset) {
1176 LLVMValueRef pixel_offsets;
1177 unsigned i;
1178 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1179 pixel_offsets = uint_bld->undef;
1180 for (i = 0; i < uint_bld->type.length; i++) {
1181 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1182 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1183 ii, ii, "");
1184 }
1185 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1186 }
1187 return index_vec;
1188 }
1189
1190 static LLVMValueRef
1191 emit_fetch_constant(
1192 struct lp_build_tgsi_context * bld_base,
1193 const struct tgsi_full_src_register * reg,
1194 enum tgsi_opcode_type stype,
1195 unsigned swizzle_in)
1196 {
1197 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1198 struct gallivm_state *gallivm = bld_base->base.gallivm;
1199 LLVMBuilderRef builder = gallivm->builder;
1200 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1201 unsigned dimension = 0;
1202 LLVMValueRef consts_ptr;
1203 LLVMValueRef num_consts;
1204 LLVMValueRef res;
1205 unsigned swizzle = swizzle_in & 0xffff;
1206
1207 /* XXX: Handle fetching xyzw components as a vector */
1208 assert(swizzle != ~0u);
1209
1210 if (reg->Register.Dimension) {
1211 assert(!reg->Dimension.Indirect);
1212 dimension = reg->Dimension.Index;
1213 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1214 }
1215
1216 consts_ptr = bld->consts[dimension];
1217 num_consts = bld->consts_sizes[dimension];
1218
1219 if (reg->Register.Indirect) {
1220 LLVMValueRef indirect_index;
1221 LLVMValueRef swizzle_vec =
1222 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1223 LLVMValueRef index_vec; /* index into the const buffer */
1224 LLVMValueRef overflow_mask;
1225 LLVMValueRef index_vec2 = NULL;
1226
1227 indirect_index = get_indirect_index(bld,
1228 reg->Register.File,
1229 reg->Register.Index,
1230 &reg->Indirect,
1231 bld->bld_base.info->file_max[reg->Register.File]);
1232
1233 /* All fetches are from the same constant buffer, so
1234 * we need to propagate the size to a vector to do a
1235 * vector comparison */
1236 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1237 /* Construct a boolean vector telling us which channels
1238 * overflow the bound constant buffer */
1239 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1240 indirect_index, num_consts);
1241
1242 /* index_vec = indirect_index * 4 + swizzle */
1243 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1244 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1245
1246 if (tgsi_type_is_64bit(stype)) {
1247 LLVMValueRef swizzle_vec2;
1248 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
1249 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1250 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1251 }
1252 /* Gather values from the constant buffer */
1253 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1254 }
1255 else {
1256 LLVMValueRef index; /* index into the const buffer */
1257 LLVMValueRef scalar, scalar_ptr;
1258 struct lp_build_context *bld_broad = &bld_base->base;
1259 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1260
1261 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1262 &index, 1, "");
1263
1264 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
1265
1266 LLVMValueRef scalar2, scalar2_ptr;
1267 LLVMValueRef shuffles[2];
1268 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
1269
1270 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
1271 &index, 1, "");
1272
1273 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1274 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
1275 shuffles[0] = lp_build_const_int32(gallivm, 0);
1276 shuffles[1] = lp_build_const_int32(gallivm, 1);
1277
1278 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
1279 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
1280 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
1281 } else {
1282 if (stype == TGSI_TYPE_DOUBLE) {
1283 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1284 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1285 bld_broad = &bld_base->dbl_bld;
1286 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1287 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1288 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1289 bld_broad = &bld_base->uint64_bld;
1290 } else if (stype == TGSI_TYPE_SIGNED64) {
1291 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1292 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1293 bld_broad = &bld_base->int64_bld;
1294 }
1295 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1296 res = lp_build_broadcast_scalar(bld_broad, scalar);
1297 }
1298
1299 }
1300
1301 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1302 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1303 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1304 }
1305
1306 return res;
1307 }
1308
1309 /**
1310 * Fetch 64-bit values from two separate channels.
1311 * 64-bit values are stored split across two channels, like xy and zw.
1312 * This function creates a set of vec_length*2 floats,
1313 * extracts the values from the two channels,
1314 * puts them in the correct place, then casts to vec_length 64-bits.
1315 */
1316 static LLVMValueRef
1317 emit_fetch_64bit(
1318 struct lp_build_tgsi_context * bld_base,
1319 enum tgsi_opcode_type stype,
1320 LLVMValueRef input,
1321 LLVMValueRef input2)
1322 {
1323 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1324 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1325 LLVMBuilderRef builder = gallivm->builder;
1326 LLVMValueRef res;
1327 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1328 int i;
1329 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
1330 int len = bld_base->base.type.length * 2;
1331 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
1332
1333 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1334 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1335 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1336 }
1337 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1338
1339 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1340 }
1341
1342 static LLVMValueRef
1343 emit_fetch_immediate(
1344 struct lp_build_tgsi_context * bld_base,
1345 const struct tgsi_full_src_register * reg,
1346 enum tgsi_opcode_type stype,
1347 unsigned swizzle_in)
1348 {
1349 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1350 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1351 LLVMBuilderRef builder = gallivm->builder;
1352 LLVMValueRef res = NULL;
1353 unsigned swizzle = swizzle_in & 0xffff;
1354
1355 if (bld->use_immediates_array || reg->Register.Indirect) {
1356 LLVMValueRef imms_array;
1357 LLVMTypeRef fptr_type;
1358
1359 /* cast imms_array pointer to float* */
1360 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1361 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1362
1363 if (reg->Register.Indirect) {
1364 LLVMValueRef indirect_index;
1365 LLVMValueRef index_vec; /* index into the immediate register array */
1366 LLVMValueRef index_vec2 = NULL;
1367 indirect_index = get_indirect_index(bld,
1368 reg->Register.File,
1369 reg->Register.Index,
1370 &reg->Indirect,
1371 bld->bld_base.info->file_max[reg->Register.File]);
1372 /*
1373 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1374 * immediates are stored as full vectors (FIXME??? - might be better
1375 * to store them the same as constants) but all elements are the same
1376 * in any case.
1377 */
1378 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1379 indirect_index,
1380 swizzle,
1381 FALSE);
1382 if (tgsi_type_is_64bit(stype))
1383 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1384 indirect_index,
1385 swizzle_in >> 16,
1386 FALSE);
1387 /* Gather values from the immediate register array */
1388 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1389 } else {
1390 LLVMValueRef gep[2];
1391 gep[0] = lp_build_const_int32(gallivm, 0);
1392 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1393 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1394 bld->imms_array, gep, 2, "");
1395 res = LLVMBuildLoad(builder, imms_ptr, "");
1396
1397 if (tgsi_type_is_64bit(stype)) {
1398 LLVMValueRef imms_ptr2;
1399 LLVMValueRef res2;
1400 gep[1] = lp_build_const_int32(gallivm,
1401 reg->Register.Index * 4 + (swizzle_in >> 16));
1402 imms_ptr2 = LLVMBuildGEP(builder,
1403 bld->imms_array, gep, 2, "");
1404 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1405 res = emit_fetch_64bit(bld_base, stype, res, res2);
1406 }
1407 }
1408 }
1409 else {
1410 res = bld->immediates[reg->Register.Index][swizzle];
1411 if (tgsi_type_is_64bit(stype))
1412 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1413 }
1414
1415 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1416 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1417 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1418 }
1419 return res;
1420 }
1421
1422 static LLVMValueRef
1423 emit_fetch_input(
1424 struct lp_build_tgsi_context * bld_base,
1425 const struct tgsi_full_src_register * reg,
1426 enum tgsi_opcode_type stype,
1427 unsigned swizzle_in)
1428 {
1429 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1430 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1431 LLVMBuilderRef builder = gallivm->builder;
1432 LLVMValueRef res;
1433 unsigned swizzle = swizzle_in & 0xffff;
1434
1435 if (reg->Register.Indirect) {
1436 LLVMValueRef indirect_index;
1437 LLVMValueRef index_vec; /* index into the input reg array */
1438 LLVMValueRef index_vec2 = NULL;
1439 LLVMValueRef inputs_array;
1440 LLVMTypeRef fptr_type;
1441
1442 indirect_index = get_indirect_index(bld,
1443 reg->Register.File,
1444 reg->Register.Index,
1445 &reg->Indirect,
1446 bld->bld_base.info->file_max[reg->Register.File]);
1447
1448 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1449 indirect_index,
1450 swizzle,
1451 TRUE);
1452 if (tgsi_type_is_64bit(stype)) {
1453 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1454 indirect_index,
1455 swizzle_in >> 16,
1456 TRUE);
1457 }
1458 /* cast inputs_array pointer to float* */
1459 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1460 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1461
1462 /* Gather values from the input register array */
1463 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1464 } else {
1465 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1466 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1467 reg->Register.Index * 4 + swizzle);
1468 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1469 bld->inputs_array, &lindex, 1, "");
1470
1471 res = LLVMBuildLoad(builder, input_ptr, "");
1472 if (tgsi_type_is_64bit(stype)) {
1473 LLVMValueRef lindex1;
1474 LLVMValueRef input_ptr2;
1475 LLVMValueRef res2;
1476
1477 lindex1 = lp_build_const_int32(gallivm,
1478 reg->Register.Index * 4 + (swizzle_in >> 16));
1479 input_ptr2 = LLVMBuildGEP(builder,
1480 bld->inputs_array, &lindex1, 1, "");
1481 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1482 res = emit_fetch_64bit(bld_base, stype, res, res2);
1483 }
1484 }
1485 else {
1486 res = bld->inputs[reg->Register.Index][swizzle];
1487 if (tgsi_type_is_64bit(stype))
1488 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1489 }
1490 }
1491
1492 assert(res);
1493
1494 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1495 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1496 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1497 }
1498
1499 return res;
1500 }
1501
1502
1503 static LLVMValueRef
1504 emit_fetch_gs_input(
1505 struct lp_build_tgsi_context * bld_base,
1506 const struct tgsi_full_src_register * reg,
1507 enum tgsi_opcode_type stype,
1508 unsigned swizzle_in)
1509 {
1510 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1511 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1512 const struct tgsi_shader_info *info = bld->bld_base.info;
1513 LLVMBuilderRef builder = gallivm->builder;
1514 LLVMValueRef attrib_index = NULL;
1515 LLVMValueRef vertex_index = NULL;
1516 unsigned swizzle = swizzle_in & 0xffff;
1517 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1518 LLVMValueRef res;
1519
1520 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1521 /* This is really a system value not a regular input */
1522 assert(!reg->Register.Indirect);
1523 assert(!reg->Dimension.Indirect);
1524 res = bld->system_values.prim_id;
1525 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1526 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1527 }
1528 return res;
1529 }
1530
1531 if (reg->Register.Indirect) {
1532 /*
1533 * XXX: this is possibly not quite the right value, since file_max may be
1534 * larger than the max attrib index, due to it being the max of declared
1535 * inputs AND the max vertices per prim (which is 6 for tri adj).
1536 * It should however be safe to use (since we always allocate
1537 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1538 */
1539 int index_limit = info->file_max[reg->Register.File];
1540 attrib_index = get_indirect_index(bld,
1541 reg->Register.File,
1542 reg->Register.Index,
1543 &reg->Indirect,
1544 index_limit);
1545 } else {
1546 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1547 }
1548
1549 if (reg->Dimension.Indirect) {
1550 /*
1551 * A fixed 6 should do as well (which is what we allocate).
1552 */
1553 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1554 vertex_index = get_indirect_index(bld,
1555 reg->Register.File,
1556 reg->Dimension.Index,
1557 &reg->DimIndirect,
1558 index_limit);
1559 } else {
1560 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1561 }
1562
1563 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1564 reg->Dimension.Indirect,
1565 vertex_index,
1566 reg->Register.Indirect,
1567 attrib_index,
1568 swizzle_index);
1569
1570 assert(res);
1571 if (tgsi_type_is_64bit(stype)) {
1572 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1573 LLVMValueRef res2;
1574 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1575 reg->Dimension.Indirect,
1576 vertex_index,
1577 reg->Register.Indirect,
1578 attrib_index,
1579 swizzle_index);
1580 assert(res2);
1581 res = emit_fetch_64bit(bld_base, stype, res, res2);
1582 } else if (stype == TGSI_TYPE_UNSIGNED) {
1583 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1584 } else if (stype == TGSI_TYPE_SIGNED) {
1585 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1586 }
1587
1588 return res;
1589 }
1590
1591 static LLVMValueRef
1592 emit_fetch_temporary(
1593 struct lp_build_tgsi_context * bld_base,
1594 const struct tgsi_full_src_register * reg,
1595 enum tgsi_opcode_type stype,
1596 unsigned swizzle_in)
1597 {
1598 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1599 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1600 LLVMBuilderRef builder = gallivm->builder;
1601 LLVMValueRef res;
1602 unsigned swizzle = swizzle_in & 0xffff;
1603
1604 if (reg->Register.Indirect) {
1605 LLVMValueRef indirect_index;
1606 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1607 LLVMValueRef temps_array;
1608 LLVMTypeRef fptr_type;
1609
1610 indirect_index = get_indirect_index(bld,
1611 reg->Register.File,
1612 reg->Register.Index,
1613 &reg->Indirect,
1614 bld->bld_base.info->file_max[reg->Register.File]);
1615
1616 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1617 indirect_index,
1618 swizzle,
1619 TRUE);
1620 if (tgsi_type_is_64bit(stype)) {
1621 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1622 indirect_index,
1623 swizzle_in >> 16,
1624 TRUE);
1625 }
1626
1627 /* cast temps_array pointer to float* */
1628 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1629 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1630
1631 /* Gather values from the temporary register array */
1632 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1633 }
1634 else {
1635 LLVMValueRef temp_ptr;
1636 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1637 res = LLVMBuildLoad(builder, temp_ptr, "");
1638
1639 if (tgsi_type_is_64bit(stype)) {
1640 LLVMValueRef temp_ptr2, res2;
1641
1642 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1643 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1644 res = emit_fetch_64bit(bld_base, stype, res, res2);
1645 }
1646 }
1647
1648 if (stype == TGSI_TYPE_SIGNED ||
1649 stype == TGSI_TYPE_UNSIGNED ||
1650 stype == TGSI_TYPE_DOUBLE ||
1651 stype == TGSI_TYPE_SIGNED64 ||
1652 stype == TGSI_TYPE_UNSIGNED64) {
1653 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1654 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1655 }
1656
1657 return res;
1658 }
1659
1660 static LLVMValueRef
1661 emit_fetch_system_value(
1662 struct lp_build_tgsi_context * bld_base,
1663 const struct tgsi_full_src_register * reg,
1664 enum tgsi_opcode_type stype,
1665 unsigned swizzle_in)
1666 {
1667 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1668 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1669 const struct tgsi_shader_info *info = bld->bld_base.info;
1670 LLVMBuilderRef builder = gallivm->builder;
1671 LLVMValueRef res;
1672 enum tgsi_opcode_type atype; // Actual type of the value
1673
1674 assert(!reg->Register.Indirect);
1675
1676 switch (info->system_value_semantic_name[reg->Register.Index]) {
1677 case TGSI_SEMANTIC_INSTANCEID:
1678 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1679 atype = TGSI_TYPE_UNSIGNED;
1680 break;
1681
1682 case TGSI_SEMANTIC_VERTEXID:
1683 res = bld->system_values.vertex_id;
1684 atype = TGSI_TYPE_UNSIGNED;
1685 break;
1686
1687 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1688 res = bld->system_values.vertex_id_nobase;
1689 atype = TGSI_TYPE_UNSIGNED;
1690 break;
1691
1692 case TGSI_SEMANTIC_BASEVERTEX:
1693 res = bld->system_values.basevertex;
1694 atype = TGSI_TYPE_UNSIGNED;
1695 break;
1696
1697 case TGSI_SEMANTIC_PRIMID:
1698 res = bld->system_values.prim_id;
1699 atype = TGSI_TYPE_UNSIGNED;
1700 break;
1701
1702 case TGSI_SEMANTIC_INVOCATIONID:
1703 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1704 atype = TGSI_TYPE_UNSIGNED;
1705 break;
1706
1707 default:
1708 assert(!"unexpected semantic in emit_fetch_system_value");
1709 res = bld_base->base.zero;
1710 atype = TGSI_TYPE_FLOAT;
1711 break;
1712 }
1713
1714 if (atype != stype) {
1715 if (stype == TGSI_TYPE_FLOAT) {
1716 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1717 } else if (stype == TGSI_TYPE_UNSIGNED) {
1718 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1719 } else if (stype == TGSI_TYPE_SIGNED) {
1720 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1721 }
1722 }
1723
1724 return res;
1725 }
1726
1727 /**
1728 * Register fetch with derivatives.
1729 */
1730 static void
1731 emit_fetch_deriv(
1732 struct lp_build_tgsi_soa_context *bld,
1733 LLVMValueRef src,
1734 LLVMValueRef *res,
1735 LLVMValueRef *ddx,
1736 LLVMValueRef *ddy)
1737 {
1738 if (res)
1739 *res = src;
1740
1741 /* TODO: use interpolation coeffs for inputs */
1742
1743 if (ddx)
1744 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1745
1746 if (ddy)
1747 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1748 }
1749
1750 /**
1751 * store an array of vec-length 64-bit into two arrays of vec_length floats
1752 * i.e.
1753 * value is d0, d1, d2, d3 etc.
1754 * each 64-bit has high and low pieces x, y
1755 * so gets stored into the separate channels as:
1756 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1757 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1758 */
1759 static void
1760 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1761 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1762 LLVMValueRef value)
1763 {
1764 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1765 struct gallivm_state *gallivm = bld_base->base.gallivm;
1766 LLVMBuilderRef builder = gallivm->builder;
1767 struct lp_build_context *float_bld = &bld_base->base;
1768 unsigned i;
1769 LLVMValueRef temp, temp2;
1770 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1771 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1772
1773 for (i = 0; i < bld_base->base.type.length; i++) {
1774 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1775 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1776 }
1777
1778 temp = LLVMBuildShuffleVector(builder, value,
1779 LLVMGetUndef(LLVMTypeOf(value)),
1780 LLVMConstVector(shuffles,
1781 bld_base->base.type.length),
1782 "");
1783 temp2 = LLVMBuildShuffleVector(builder, value,
1784 LLVMGetUndef(LLVMTypeOf(value)),
1785 LLVMConstVector(shuffles2,
1786 bld_base->base.type.length),
1787 "");
1788
1789 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1790 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1791 }
1792
1793 /**
1794 * Register store.
1795 */
1796 static void
1797 emit_store_chan(
1798 struct lp_build_tgsi_context *bld_base,
1799 const struct tgsi_full_instruction *inst,
1800 unsigned index,
1801 unsigned chan_index,
1802 LLVMValueRef value)
1803 {
1804 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1805 struct gallivm_state *gallivm = bld_base->base.gallivm;
1806 LLVMBuilderRef builder = gallivm->builder;
1807 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1808 struct lp_build_context *float_bld = &bld_base->base;
1809 struct lp_build_context *int_bld = &bld_base->int_bld;
1810 LLVMValueRef indirect_index = NULL;
1811 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1812
1813 /*
1814 * Apply saturation.
1815 *
1816 * It is always assumed to be float.
1817 */
1818 if (inst->Instruction.Saturate) {
1819 assert(dtype == TGSI_TYPE_FLOAT ||
1820 dtype == TGSI_TYPE_UNTYPED);
1821 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1822 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1823 }
1824
1825 if (reg->Register.Indirect) {
1826 /*
1827 * Currently the mesa/st doesn't generate indirect stores
1828 * to 64-bit values, it normally uses MOV to do indirect stores.
1829 */
1830 assert(!tgsi_type_is_64bit(dtype));
1831 indirect_index = get_indirect_index(bld,
1832 reg->Register.File,
1833 reg->Register.Index,
1834 &reg->Indirect,
1835 bld->bld_base.info->file_max[reg->Register.File]);
1836 } else {
1837 assert(reg->Register.Index <=
1838 bld_base->info->file_max[reg->Register.File]);
1839 }
1840
1841 if (DEBUG_EXECUTION) {
1842 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1843 }
1844
1845 switch( reg->Register.File ) {
1846 case TGSI_FILE_OUTPUT:
1847 /* Outputs are always stored as floats */
1848 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1849
1850 if (reg->Register.Indirect) {
1851 LLVMValueRef index_vec; /* indexes into the output registers */
1852 LLVMValueRef outputs_array;
1853 LLVMTypeRef fptr_type;
1854
1855 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1856 indirect_index,
1857 chan_index,
1858 TRUE);
1859
1860 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1861 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1862
1863 /* Scatter store values into output registers */
1864 emit_mask_scatter(bld, outputs_array, index_vec, value,
1865 &bld->exec_mask);
1866 }
1867 else {
1868 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1869 chan_index);
1870
1871 if (tgsi_type_is_64bit(dtype)) {
1872 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1873 chan_index + 1);
1874 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1875 value);
1876 } else
1877 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1878 }
1879 break;
1880
1881 case TGSI_FILE_TEMPORARY:
1882 /* Temporaries are always stored as floats */
1883 if (!tgsi_type_is_64bit(dtype))
1884 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1885 else
1886 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1887
1888 if (reg->Register.Indirect) {
1889 LLVMValueRef index_vec; /* indexes into the temp registers */
1890 LLVMValueRef temps_array;
1891 LLVMTypeRef fptr_type;
1892
1893 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1894 indirect_index,
1895 chan_index,
1896 TRUE);
1897
1898 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1899 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1900
1901 /* Scatter store values into temp registers */
1902 emit_mask_scatter(bld, temps_array, index_vec, value,
1903 &bld->exec_mask);
1904 }
1905 else {
1906 LLVMValueRef temp_ptr;
1907 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1908
1909 if (tgsi_type_is_64bit(dtype)) {
1910 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1911 reg->Register.Index,
1912 chan_index + 1);
1913 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1914 value);
1915 }
1916 else
1917 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1918 }
1919 break;
1920
1921 case TGSI_FILE_ADDRESS:
1922 assert(dtype == TGSI_TYPE_SIGNED);
1923 assert(LLVMTypeOf(value) == int_bld->vec_type);
1924 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1925 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1926 bld->addr[reg->Register.Index][chan_index]);
1927 break;
1928
1929 default:
1930 assert( 0 );
1931 }
1932
1933 (void)dtype;
1934 }
1935
1936 /*
1937 * Called at the beginning of the translation of each TGSI instruction, to
1938 * emit some debug code.
1939 */
1940 static void
1941 emit_debug(
1942 struct lp_build_tgsi_context * bld_base,
1943 const struct tgsi_full_instruction * inst,
1944 const struct tgsi_opcode_info * info)
1945
1946 {
1947 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1948
1949 if (DEBUG_EXECUTION) {
1950 /*
1951 * Dump the TGSI instruction.
1952 */
1953
1954 struct gallivm_state *gallivm = bld_base->base.gallivm;
1955 char buf[512];
1956 buf[0] = '$';
1957 buf[1] = ' ';
1958 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1959 lp_build_printf(gallivm, buf);
1960
1961 /* Dump the execution mask.
1962 */
1963 if (bld->exec_mask.has_mask) {
1964 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1965 }
1966 }
1967 }
1968
1969 static void
1970 emit_store(
1971 struct lp_build_tgsi_context * bld_base,
1972 const struct tgsi_full_instruction * inst,
1973 const struct tgsi_opcode_info * info,
1974 unsigned index,
1975 LLVMValueRef dst[4])
1976
1977 {
1978 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1979
1980 unsigned writemask = inst->Dst[index].Register.WriteMask;
1981 while (writemask) {
1982 unsigned chan_index = u_bit_scan(&writemask);
1983 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1984 continue;
1985 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1986 }
1987 }
1988
1989 static unsigned
1990 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1991 {
1992 switch (tgsi_target) {
1993 case TGSI_TEXTURE_BUFFER:
1994 return PIPE_BUFFER;
1995 case TGSI_TEXTURE_1D:
1996 case TGSI_TEXTURE_SHADOW1D:
1997 return PIPE_TEXTURE_1D;
1998 case TGSI_TEXTURE_2D:
1999 case TGSI_TEXTURE_SHADOW2D:
2000 case TGSI_TEXTURE_2D_MSAA:
2001 return PIPE_TEXTURE_2D;
2002 case TGSI_TEXTURE_3D:
2003 return PIPE_TEXTURE_3D;
2004 case TGSI_TEXTURE_CUBE:
2005 case TGSI_TEXTURE_SHADOWCUBE:
2006 return PIPE_TEXTURE_CUBE;
2007 case TGSI_TEXTURE_RECT:
2008 case TGSI_TEXTURE_SHADOWRECT:
2009 return PIPE_TEXTURE_RECT;
2010 case TGSI_TEXTURE_1D_ARRAY:
2011 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2012 return PIPE_TEXTURE_1D_ARRAY;
2013 case TGSI_TEXTURE_2D_ARRAY:
2014 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2015 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2016 return PIPE_TEXTURE_2D_ARRAY;
2017 case TGSI_TEXTURE_CUBE_ARRAY:
2018 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2019 return PIPE_TEXTURE_CUBE_ARRAY;
2020 default:
2021 assert(0);
2022 return PIPE_BUFFER;
2023 }
2024 }
2025
2026
2027 static enum lp_sampler_lod_property
2028 lp_build_lod_property(
2029 struct lp_build_tgsi_context *bld_base,
2030 const struct tgsi_full_instruction *inst,
2031 unsigned src_op)
2032 {
2033 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2034 enum lp_sampler_lod_property lod_property;
2035
2036 /*
2037 * Not much we can do here. We could try catching inputs declared
2038 * with constant interpolation but not sure it's worth it - since for
2039 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2040 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2041 * like the constant/immediate recognition below.
2042 * What seems to be of more value would be to recognize temps holding
2043 * broadcasted scalars but no way we can do it.
2044 * Tried asking llvm but without any success (using LLVMIsConstant
2045 * even though this isn't exactly what we'd need), even as simple as
2046 * IMM[0] UINT32 (0,-1,0,0)
2047 * MOV TEMP[0] IMM[0].yyyy
2048 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2049 * doesn't work.
2050 * This means there's ZERO chance this will ever catch a scalar lod
2051 * with traditional tex opcodes as well as texel fetches, since the lod
2052 * comes from the same reg as coords (except some test shaders using
2053 * constant coords maybe).
2054 * There's at least hope for sample opcodes as well as size queries.
2055 */
2056 if (reg->Register.File == TGSI_FILE_CONSTANT ||
2057 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2058 lod_property = LP_SAMPLER_LOD_SCALAR;
2059 }
2060 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2061 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2062 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2063 }
2064 else {
2065 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2066 }
2067 }
2068 else {
2069 /* never use scalar (per-quad) lod the results are just too wrong. */
2070 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2071 }
2072 return lod_property;
2073 }
2074
2075
2076 /**
2077 * High-level instruction translators.
2078 */
2079
2080 static void
2081 emit_tex( struct lp_build_tgsi_soa_context *bld,
2082 const struct tgsi_full_instruction *inst,
2083 enum lp_build_tex_modifier modifier,
2084 LLVMValueRef *texel,
2085 unsigned sampler_reg,
2086 enum lp_sampler_op_type sampler_op)
2087 {
2088 unsigned unit = inst->Src[sampler_reg].Register.Index;
2089 LLVMValueRef oow = NULL;
2090 LLVMValueRef lod = NULL;
2091 LLVMValueRef coords[5];
2092 LLVMValueRef offsets[3] = { NULL };
2093 struct lp_derivatives derivs;
2094 struct lp_sampler_params params;
2095 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2096 unsigned num_derivs, num_offsets, i;
2097 unsigned shadow_coord = 0;
2098 unsigned layer_coord = 0;
2099 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2100
2101 memset(&params, 0, sizeof(params));
2102
2103 if (!bld->sampler) {
2104 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2105 for (i = 0; i < 4; i++) {
2106 texel[i] = bld->bld_base.base.undef;
2107 }
2108 return;
2109 }
2110
2111 switch (inst->Texture.Texture) {
2112 case TGSI_TEXTURE_1D_ARRAY:
2113 layer_coord = 1;
2114 /* fallthrough */
2115 case TGSI_TEXTURE_1D:
2116 num_offsets = 1;
2117 num_derivs = 1;
2118 break;
2119 case TGSI_TEXTURE_2D_ARRAY:
2120 layer_coord = 2;
2121 /* fallthrough */
2122 case TGSI_TEXTURE_2D:
2123 case TGSI_TEXTURE_RECT:
2124 num_offsets = 2;
2125 num_derivs = 2;
2126 break;
2127 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2128 layer_coord = 1;
2129 /* fallthrough */
2130 case TGSI_TEXTURE_SHADOW1D:
2131 shadow_coord = 2;
2132 num_offsets = 1;
2133 num_derivs = 1;
2134 break;
2135 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2136 layer_coord = 2;
2137 shadow_coord = 3;
2138 num_offsets = 2;
2139 num_derivs = 2;
2140 break;
2141 case TGSI_TEXTURE_SHADOW2D:
2142 case TGSI_TEXTURE_SHADOWRECT:
2143 shadow_coord = 2;
2144 num_offsets = 2;
2145 num_derivs = 2;
2146 break;
2147 case TGSI_TEXTURE_CUBE:
2148 num_offsets = 2;
2149 num_derivs = 3;
2150 break;
2151 case TGSI_TEXTURE_3D:
2152 num_offsets = 3;
2153 num_derivs = 3;
2154 break;
2155 case TGSI_TEXTURE_SHADOWCUBE:
2156 shadow_coord = 3;
2157 num_offsets = 2;
2158 num_derivs = 3;
2159 break;
2160 case TGSI_TEXTURE_CUBE_ARRAY:
2161 num_offsets = 2;
2162 num_derivs = 3;
2163 layer_coord = 3;
2164 break;
2165 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2166 num_offsets = 2;
2167 num_derivs = 3;
2168 layer_coord = 3;
2169 shadow_coord = 4; /* shadow coord special different reg */
2170 break;
2171 case TGSI_TEXTURE_2D_MSAA:
2172 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2173 default:
2174 assert(0);
2175 return;
2176 }
2177
2178 /* Note lod and especially projected are illegal in a LOT of cases */
2179 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2180 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2181 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2182 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2183 /* note that shadow cube array with bias/explicit lod does not exist */
2184 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2185 }
2186 else {
2187 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2188 }
2189 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2190 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2191 }
2192 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2193 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2194 }
2195 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2196 }
2197
2198 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2199 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2200 oow = lp_build_rcp(&bld->bld_base.base, oow);
2201 }
2202
2203 for (i = 0; i < num_derivs; i++) {
2204 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2205 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2206 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2207 }
2208 for (i = num_derivs; i < 5; i++) {
2209 coords[i] = bld->bld_base.base.undef;
2210 }
2211
2212 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2213 if (layer_coord) {
2214 if (layer_coord == 3) {
2215 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2216 }
2217 else {
2218 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2219 }
2220 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2221 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2222 }
2223 /* Shadow coord occupies always 5th slot. */
2224 if (shadow_coord) {
2225 sample_key |= LP_SAMPLER_SHADOW;
2226 if (shadow_coord == 4) {
2227 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2228 }
2229 else {
2230 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2231 }
2232 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2233 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2234 }
2235
2236 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2237 unsigned dim;
2238 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2239 for (dim = 0; dim < num_derivs; ++dim) {
2240 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2241 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2242 }
2243 params.derivs = &derivs;
2244 /*
2245 * could also check all src regs if constant but I doubt such
2246 * cases exist in practice.
2247 */
2248 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2249 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2250 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2251 }
2252 else {
2253 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2254 }
2255 }
2256 else {
2257 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2258 }
2259 }
2260 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2261
2262 /* we don't handle the 4 offset version of tg4 */
2263 if (inst->Texture.NumOffsets == 1) {
2264 unsigned dim;
2265 sample_key |= LP_SAMPLER_OFFSETS;
2266 for (dim = 0; dim < num_offsets; dim++) {
2267 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2268 }
2269 }
2270
2271 params.type = bld->bld_base.base.type;
2272 params.sample_key = sample_key;
2273 params.texture_index = unit;
2274 params.sampler_index = unit;
2275 params.context_ptr = bld->context_ptr;
2276 params.thread_data_ptr = bld->thread_data_ptr;
2277 params.coords = coords;
2278 params.offsets = offsets;
2279 params.lod = lod;
2280 params.texel = texel;
2281
2282 bld->sampler->emit_tex_sample(bld->sampler,
2283 bld->bld_base.base.gallivm,
2284 &params);
2285 }
2286
2287 static void
2288 emit_sample(struct lp_build_tgsi_soa_context *bld,
2289 const struct tgsi_full_instruction *inst,
2290 enum lp_build_tex_modifier modifier,
2291 boolean compare,
2292 enum lp_sampler_op_type sample_type,
2293 LLVMValueRef *texel)
2294 {
2295 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2296 unsigned texture_unit, sampler_unit;
2297 LLVMValueRef lod = NULL;
2298 LLVMValueRef coords[5];
2299 LLVMValueRef offsets[3] = { NULL };
2300 struct lp_derivatives derivs;
2301 struct lp_sampler_params params;
2302 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2303
2304 unsigned num_offsets, num_derivs, i;
2305 unsigned layer_coord = 0;
2306 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2307
2308 memset(&params, 0, sizeof(params));
2309
2310 if (!bld->sampler) {
2311 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2312 for (i = 0; i < 4; i++) {
2313 texel[i] = bld->bld_base.base.undef;
2314 }
2315 return;
2316 }
2317
2318 /*
2319 * unlike old-style tex opcodes the texture/sampler indices
2320 * always come from src1 and src2 respectively.
2321 */
2322 texture_unit = inst->Src[1].Register.Index;
2323 sampler_unit = inst->Src[2].Register.Index;
2324
2325 /*
2326 * Note inst->Texture.Texture will contain the number of offsets,
2327 * however the target information is NOT there and comes from the
2328 * declared sampler views instead.
2329 */
2330 switch (bld->sv[texture_unit].Resource) {
2331 case TGSI_TEXTURE_1D:
2332 num_offsets = 1;
2333 num_derivs = 1;
2334 break;
2335 case TGSI_TEXTURE_1D_ARRAY:
2336 layer_coord = 1;
2337 num_offsets = 1;
2338 num_derivs = 1;
2339 break;
2340 case TGSI_TEXTURE_2D:
2341 case TGSI_TEXTURE_RECT:
2342 num_offsets = 2;
2343 num_derivs = 2;
2344 break;
2345 case TGSI_TEXTURE_2D_ARRAY:
2346 layer_coord = 2;
2347 num_offsets = 2;
2348 num_derivs = 2;
2349 break;
2350 case TGSI_TEXTURE_CUBE:
2351 num_offsets = 2;
2352 num_derivs = 3;
2353 break;
2354 case TGSI_TEXTURE_3D:
2355 num_offsets = 3;
2356 num_derivs = 3;
2357 break;
2358 case TGSI_TEXTURE_CUBE_ARRAY:
2359 layer_coord = 3;
2360 num_offsets = 2;
2361 num_derivs = 3;
2362 break;
2363 default:
2364 assert(0);
2365 return;
2366 }
2367
2368 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2369 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2370 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2371 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2372 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2373 }
2374 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2375 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2376 }
2377 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2378 }
2379 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2380 /* XXX might be better to explicitly pass the level zero information */
2381 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2382 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2383 }
2384
2385 for (i = 0; i < num_derivs; i++) {
2386 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2387 }
2388 for (i = num_derivs; i < 5; i++) {
2389 coords[i] = bld->bld_base.base.undef;
2390 }
2391
2392 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2393 if (layer_coord) {
2394 if (layer_coord == 3)
2395 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2396 else
2397 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2398 }
2399 /* Shadow coord occupies always 5th slot. */
2400 if (compare) {
2401 sample_key |= LP_SAMPLER_SHADOW;
2402 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2403 }
2404
2405 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2406 unsigned dim;
2407 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2408 for (dim = 0; dim < num_derivs; ++dim) {
2409 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2410 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2411 }
2412 params.derivs = &derivs;
2413 /*
2414 * could also check all src regs if constant but I doubt such
2415 * cases exist in practice.
2416 */
2417 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2418 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2419 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2420 }
2421 else {
2422 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2423 }
2424 }
2425 else {
2426 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2427 }
2428 }
2429
2430 /* some advanced gather instructions (txgo) would require 4 offsets */
2431 if (inst->Texture.NumOffsets == 1) {
2432 unsigned dim;
2433 sample_key |= LP_SAMPLER_OFFSETS;
2434 for (dim = 0; dim < num_offsets; dim++) {
2435 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2436 }
2437 }
2438 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2439
2440 params.type = bld->bld_base.base.type;
2441 params.sample_key = sample_key;
2442 params.texture_index = texture_unit;
2443 params.sampler_index = sampler_unit;
2444 params.context_ptr = bld->context_ptr;
2445 params.thread_data_ptr = bld->thread_data_ptr;
2446 params.coords = coords;
2447 params.offsets = offsets;
2448 params.lod = lod;
2449 params.texel = texel;
2450
2451 bld->sampler->emit_tex_sample(bld->sampler,
2452 bld->bld_base.base.gallivm,
2453 &params);
2454
2455 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2456 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2457 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2458 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2459 unsigned char swizzles[4];
2460 swizzles[0] = inst->Src[1].Register.SwizzleX;
2461 swizzles[1] = inst->Src[1].Register.SwizzleY;
2462 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2463 swizzles[3] = inst->Src[1].Register.SwizzleW;
2464
2465 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2466 }
2467 }
2468
2469 static void
2470 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2471 const struct tgsi_full_instruction *inst,
2472 LLVMValueRef *texel,
2473 boolean is_samplei)
2474 {
2475 unsigned unit, target;
2476 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2477 LLVMValueRef explicit_lod = NULL;
2478 LLVMValueRef coords[5];
2479 LLVMValueRef offsets[3] = { NULL };
2480 struct lp_sampler_params params;
2481 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2482 unsigned dims, i;
2483 unsigned layer_coord = 0;
2484 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2485
2486 memset(&params, 0, sizeof(params));
2487
2488 if (!bld->sampler) {
2489 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2490 for (i = 0; i < 4; i++) {
2491 texel[i] = coord_undef;
2492 }
2493 return;
2494 }
2495
2496 unit = inst->Src[1].Register.Index;
2497
2498 if (is_samplei) {
2499 target = bld->sv[unit].Resource;
2500 }
2501 else {
2502 target = inst->Texture.Texture;
2503 }
2504
2505 switch (target) {
2506 case TGSI_TEXTURE_1D:
2507 case TGSI_TEXTURE_BUFFER:
2508 dims = 1;
2509 break;
2510 case TGSI_TEXTURE_1D_ARRAY:
2511 layer_coord = 1;
2512 dims = 1;
2513 break;
2514 case TGSI_TEXTURE_2D:
2515 case TGSI_TEXTURE_RECT:
2516 case TGSI_TEXTURE_2D_MSAA:
2517 dims = 2;
2518 break;
2519 case TGSI_TEXTURE_2D_ARRAY:
2520 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2521 layer_coord = 2;
2522 dims = 2;
2523 break;
2524 case TGSI_TEXTURE_3D:
2525 dims = 3;
2526 break;
2527 default:
2528 assert(0);
2529 return;
2530 }
2531
2532 /* always have lod except for buffers and msaa targets ? */
2533 if (target != TGSI_TEXTURE_BUFFER &&
2534 target != TGSI_TEXTURE_2D_MSAA &&
2535 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2536 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2537 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2539 }
2540 /*
2541 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2542 * would be the sample index.
2543 */
2544
2545 for (i = 0; i < dims; i++) {
2546 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2547 }
2548 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2549 for (i = dims; i < 5; i++) {
2550 coords[i] = coord_undef;
2551 }
2552 if (layer_coord)
2553 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2554
2555 if (inst->Texture.NumOffsets == 1) {
2556 unsigned dim;
2557 sample_key |= LP_SAMPLER_OFFSETS;
2558 for (dim = 0; dim < dims; dim++) {
2559 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2560 }
2561 }
2562 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2563
2564 params.type = bld->bld_base.base.type;
2565 params.sample_key = sample_key;
2566 params.texture_index = unit;
2567 /*
2568 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2569 * and trigger some assertions with d3d10 where the sampler view number
2570 * can exceed this.
2571 */
2572 params.sampler_index = 0;
2573 params.context_ptr = bld->context_ptr;
2574 params.thread_data_ptr = bld->thread_data_ptr;
2575 params.coords = coords;
2576 params.offsets = offsets;
2577 params.derivs = NULL;
2578 params.lod = explicit_lod;
2579 params.texel = texel;
2580
2581 bld->sampler->emit_tex_sample(bld->sampler,
2582 bld->bld_base.base.gallivm,
2583 &params);
2584
2585 if (is_samplei &&
2586 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2587 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2588 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2589 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2590 unsigned char swizzles[4];
2591 swizzles[0] = inst->Src[1].Register.SwizzleX;
2592 swizzles[1] = inst->Src[1].Register.SwizzleY;
2593 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2594 swizzles[3] = inst->Src[1].Register.SwizzleW;
2595
2596 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2597 }
2598 }
2599
2600 static void
2601 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2602 const struct tgsi_full_instruction *inst,
2603 LLVMValueRef *sizes_out,
2604 boolean is_sviewinfo)
2605 {
2606 LLVMValueRef explicit_lod;
2607 enum lp_sampler_lod_property lod_property;
2608 unsigned has_lod;
2609 unsigned i;
2610 unsigned unit = inst->Src[1].Register.Index;
2611 unsigned target, pipe_target;
2612 struct lp_sampler_size_query_params params;
2613
2614 if (is_sviewinfo) {
2615 target = bld->sv[unit].Resource;
2616 }
2617 else {
2618 target = inst->Texture.Texture;
2619 }
2620 switch (target) {
2621 case TGSI_TEXTURE_BUFFER:
2622 case TGSI_TEXTURE_RECT:
2623 case TGSI_TEXTURE_SHADOWRECT:
2624 has_lod = 0;
2625 break;
2626 default:
2627 has_lod = 1;
2628 break;
2629 }
2630
2631 if (!bld->sampler) {
2632 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2633 for (i = 0; i < 4; i++)
2634 sizes_out[i] = bld->bld_base.int_bld.undef;
2635 return;
2636 }
2637
2638 if (has_lod) {
2639 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2640 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2641 }
2642 else {
2643 explicit_lod = NULL;
2644 lod_property = LP_SAMPLER_LOD_SCALAR;
2645 }
2646
2647
2648 pipe_target = tgsi_to_pipe_tex_target(target);
2649
2650 params.int_type = bld->bld_base.int_bld.type;
2651 params.texture_unit = unit;
2652 params.target = pipe_target;
2653 params.context_ptr = bld->context_ptr;
2654 params.is_sviewinfo = TRUE;
2655 params.lod_property = lod_property;
2656 params.explicit_lod = explicit_lod;
2657 params.sizes_out = sizes_out;
2658
2659 bld->sampler->emit_size_query(bld->sampler,
2660 bld->bld_base.base.gallivm,
2661 &params);
2662 }
2663
2664 static boolean
2665 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2666 int pc)
2667 {
2668 unsigned i;
2669
2670 for (i = 0; i < 5; i++) {
2671 enum tgsi_opcode opcode;
2672
2673 if (pc + i >= bld->bld_base.info->num_instructions)
2674 return TRUE;
2675
2676 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2677
2678 if (opcode == TGSI_OPCODE_END)
2679 return TRUE;
2680
2681 if (opcode == TGSI_OPCODE_TEX ||
2682 opcode == TGSI_OPCODE_TXP ||
2683 opcode == TGSI_OPCODE_TXD ||
2684 opcode == TGSI_OPCODE_TXB ||
2685 opcode == TGSI_OPCODE_TXL ||
2686 opcode == TGSI_OPCODE_TXF ||
2687 opcode == TGSI_OPCODE_TXQ ||
2688 opcode == TGSI_OPCODE_TEX2 ||
2689 opcode == TGSI_OPCODE_TXB2 ||
2690 opcode == TGSI_OPCODE_TXL2 ||
2691 opcode == TGSI_OPCODE_SAMPLE ||
2692 opcode == TGSI_OPCODE_SAMPLE_B ||
2693 opcode == TGSI_OPCODE_SAMPLE_C ||
2694 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2695 opcode == TGSI_OPCODE_SAMPLE_D ||
2696 opcode == TGSI_OPCODE_SAMPLE_I ||
2697 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2698 opcode == TGSI_OPCODE_SAMPLE_L ||
2699 opcode == TGSI_OPCODE_SVIEWINFO ||
2700 opcode == TGSI_OPCODE_CAL ||
2701 opcode == TGSI_OPCODE_IF ||
2702 opcode == TGSI_OPCODE_UIF ||
2703 opcode == TGSI_OPCODE_BGNLOOP ||
2704 opcode == TGSI_OPCODE_SWITCH)
2705 return FALSE;
2706 }
2707
2708 return TRUE;
2709 }
2710
2711
2712
2713 /**
2714 * Kill fragment if any of the src register values are negative.
2715 */
2716 static void
2717 emit_kill_if(
2718 struct lp_build_tgsi_soa_context *bld,
2719 const struct tgsi_full_instruction *inst,
2720 int pc)
2721 {
2722 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2723 const struct tgsi_full_src_register *reg = &inst->Src[0];
2724 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2725 LLVMValueRef mask;
2726 unsigned chan_index;
2727
2728 memset(&terms, 0, sizeof terms);
2729
2730 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2731 unsigned swizzle;
2732
2733 /* Unswizzle channel */
2734 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2735
2736 /* Check if the component has not been already tested. */
2737 assert(swizzle < TGSI_NUM_CHANNELS);
2738 if( !terms[swizzle] )
2739 /* TODO: change the comparison operator instead of setting the sign */
2740 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2741 }
2742
2743 mask = NULL;
2744 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2745 if(terms[chan_index]) {
2746 LLVMValueRef chan_mask;
2747
2748 /*
2749 * If term < 0 then mask = 0 else mask = ~0.
2750 */
2751 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2752
2753 if(mask)
2754 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2755 else
2756 mask = chan_mask;
2757 }
2758 }
2759
2760 if (bld->exec_mask.has_mask) {
2761 LLVMValueRef invmask;
2762 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2763 mask = LLVMBuildOr(builder, mask, invmask, "");
2764 }
2765
2766 lp_build_mask_update(bld->mask, mask);
2767 if (!near_end_of_shader(bld, pc))
2768 lp_build_mask_check(bld->mask);
2769 }
2770
2771
2772 /**
2773 * Unconditional fragment kill.
2774 * The only predication is the execution mask which will apply if
2775 * we're inside a loop or conditional.
2776 */
2777 static void
2778 emit_kill(struct lp_build_tgsi_soa_context *bld,
2779 int pc)
2780 {
2781 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2782 LLVMValueRef mask;
2783
2784 /* For those channels which are "alive", disable fragment shader
2785 * execution.
2786 */
2787 if (bld->exec_mask.has_mask) {
2788 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2789 }
2790 else {
2791 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2792 mask = zero;
2793 }
2794
2795 lp_build_mask_update(bld->mask, mask);
2796
2797 if (!near_end_of_shader(bld, pc))
2798 lp_build_mask_check(bld->mask);
2799 }
2800
2801
2802 /**
2803 * Emit code which will dump the value of all the temporary registers
2804 * to stdout.
2805 */
2806 static void
2807 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2808 unsigned file)
2809 {
2810 const struct tgsi_shader_info *info = bld->bld_base.info;
2811 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2812 LLVMBuilderRef builder = gallivm->builder;
2813 LLVMValueRef reg_ptr;
2814 int index;
2815 int max_index = info->file_max[file];
2816
2817 /*
2818 * Some register files, particularly constants, can be very large,
2819 * and dumping everything could make this unusably slow.
2820 */
2821 max_index = MIN2(max_index, 32);
2822
2823 for (index = 0; index <= max_index; index++) {
2824 LLVMValueRef res;
2825 unsigned mask;
2826 int chan;
2827
2828 if (index < 8 * sizeof(unsigned) &&
2829 (info->file_mask[file] & (1u << index)) == 0) {
2830 /* This was not declared.*/
2831 continue;
2832 }
2833
2834 if (file == TGSI_FILE_INPUT) {
2835 mask = info->input_usage_mask[index];
2836 } else {
2837 mask = TGSI_WRITEMASK_XYZW;
2838 }
2839
2840 for (chan = 0; chan < 4; chan++) {
2841 if ((mask & (1 << chan)) == 0) {
2842 /* This channel is not used.*/
2843 continue;
2844 }
2845
2846 if (file == TGSI_FILE_CONSTANT) {
2847 struct tgsi_full_src_register reg;
2848 memset(&reg, 0, sizeof reg);
2849 reg.Register.File = file;
2850 reg.Register.Index = index;
2851 reg.Register.SwizzleX = 0;
2852 reg.Register.SwizzleY = 1;
2853 reg.Register.SwizzleZ = 2;
2854 reg.Register.SwizzleW = 3;
2855
2856 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2857 if (!res) {
2858 continue;
2859 }
2860 } else if (file == TGSI_FILE_INPUT) {
2861 res = bld->inputs[index][chan];
2862 if (!res) {
2863 continue;
2864 }
2865 } else if (file == TGSI_FILE_TEMPORARY) {
2866 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2867 assert(reg_ptr);
2868 res = LLVMBuildLoad(builder, reg_ptr, "");
2869 } else if (file == TGSI_FILE_OUTPUT) {
2870 reg_ptr = lp_get_output_ptr(bld, index, chan);
2871 assert(reg_ptr);
2872 res = LLVMBuildLoad(builder, reg_ptr, "");
2873 } else {
2874 assert(0);
2875 continue;
2876 }
2877
2878 emit_dump_reg(gallivm, file, index, chan, res);
2879 }
2880 }
2881 }
2882
2883
2884
2885 void
2886 lp_emit_declaration_soa(
2887 struct lp_build_tgsi_context *bld_base,
2888 const struct tgsi_full_declaration *decl)
2889 {
2890 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2891 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2892 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2893 const unsigned first = decl->Range.First;
2894 const unsigned last = decl->Range.Last;
2895 unsigned idx, i;
2896
2897 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2898
2899 switch (decl->Declaration.File) {
2900 case TGSI_FILE_TEMPORARY:
2901 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2902 assert(last < LP_MAX_INLINED_TEMPS);
2903 for (idx = first; idx <= last; ++idx) {
2904 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2905 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2906 }
2907 }
2908 break;
2909
2910 case TGSI_FILE_OUTPUT:
2911 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2912 for (idx = first; idx <= last; ++idx) {
2913 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2914 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2915 vec_type, "output");
2916 }
2917 }
2918 break;
2919
2920 case TGSI_FILE_ADDRESS:
2921 /* ADDR registers are only allocated with an integer LLVM IR type,
2922 * as they are guaranteed to always have integers.
2923 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2924 * an ADDR register for that matter).
2925 */
2926 assert(last < LP_MAX_TGSI_ADDRS);
2927 for (idx = first; idx <= last; ++idx) {
2928 assert(idx < LP_MAX_TGSI_ADDRS);
2929 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2930 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2931 }
2932 break;
2933
2934 case TGSI_FILE_SAMPLER_VIEW:
2935 /*
2936 * The target stored here MUST match whatever there actually
2937 * is in the set sampler views (what about return type?).
2938 */
2939 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2940 for (idx = first; idx <= last; ++idx) {
2941 bld->sv[idx] = decl->SamplerView;
2942 }
2943 break;
2944
2945 case TGSI_FILE_CONSTANT:
2946 {
2947 /*
2948 * We could trivially fetch the per-buffer pointer when fetching the
2949 * constant, relying on llvm to figure out it's always the same pointer
2950 * anyway. However, doing so results in a huge (more than factor of 10)
2951 * slowdown in llvm compilation times for some (but not all) shaders
2952 * (more specifically, the IR optimization spends way more time in
2953 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2954 */
2955 unsigned idx2D = decl->Dim.Index2D;
2956 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2957 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2958 bld->consts[idx2D] =
2959 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2960 bld->consts_sizes[idx2D] =
2961 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2962 }
2963 break;
2964
2965 default:
2966 /* don't need to declare other vars */
2967 break;
2968 }
2969 }
2970
2971
2972 void lp_emit_immediate_soa(
2973 struct lp_build_tgsi_context *bld_base,
2974 const struct tgsi_full_immediate *imm)
2975 {
2976 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2977 struct gallivm_state * gallivm = bld_base->base.gallivm;
2978 LLVMValueRef imms[4];
2979 unsigned i;
2980 const uint size = imm->Immediate.NrTokens - 1;
2981 assert(size <= 4);
2982 switch (imm->Immediate.DataType) {
2983 case TGSI_IMM_FLOAT32:
2984 for( i = 0; i < size; ++i )
2985 imms[i] =
2986 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2987
2988 break;
2989 case TGSI_IMM_FLOAT64:
2990 case TGSI_IMM_UINT64:
2991 case TGSI_IMM_INT64:
2992 case TGSI_IMM_UINT32:
2993 for( i = 0; i < size; ++i ) {
2994 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2995 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2996 }
2997
2998 break;
2999 case TGSI_IMM_INT32:
3000 for( i = 0; i < size; ++i ) {
3001 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3002 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3003 }
3004
3005 break;
3006 }
3007 for( i = size; i < 4; ++i )
3008 imms[i] = bld_base->base.undef;
3009
3010 if (bld->use_immediates_array) {
3011 unsigned index = bld->num_immediates;
3012 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3013 LLVMBuilderRef builder = gallivm->builder;
3014 LLVMValueRef gep[2];
3015 gep[0] = lp_build_const_int32(gallivm, 0);
3016
3017 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3018 for (i = 0; i < 4; ++i ) {
3019 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3020 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3021 bld->imms_array, gep, 2, "");
3022 LLVMBuildStore(builder, imms[i], imm_ptr);
3023 }
3024 } else {
3025 /* simply copy the immediate values into the next immediates[] slot */
3026 unsigned i;
3027 assert(imm->Immediate.NrTokens - 1 <= 4);
3028 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3029
3030 for(i = 0; i < 4; ++i )
3031 bld->immediates[bld->num_immediates][i] = imms[i];
3032
3033 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3034 unsigned index = bld->num_immediates;
3035 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3036 LLVMBuilderRef builder = gallivm->builder;
3037 LLVMValueRef gep[2];
3038 gep[0] = lp_build_const_int32(gallivm, 0);
3039 for (i = 0; i < 4; ++i ) {
3040 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3041 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3042 bld->imms_array, gep, 2, "");
3043 LLVMBuildStore(builder,
3044 bld->immediates[index][i],
3045 imm_ptr);
3046 }
3047 }
3048 }
3049
3050 bld->num_immediates++;
3051 }
3052
3053 static void
3054 ddx_emit(
3055 const struct lp_build_tgsi_action * action,
3056 struct lp_build_tgsi_context * bld_base,
3057 struct lp_build_emit_data * emit_data)
3058 {
3059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3060
3061 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3062 &emit_data->output[emit_data->chan], NULL);
3063 }
3064
3065 static void
3066 ddy_emit(
3067 const struct lp_build_tgsi_action * action,
3068 struct lp_build_tgsi_context * bld_base,
3069 struct lp_build_emit_data * emit_data)
3070 {
3071 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3072
3073 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3074 &emit_data->output[emit_data->chan]);
3075 }
3076
3077 static void
3078 kill_emit(
3079 const struct lp_build_tgsi_action * action,
3080 struct lp_build_tgsi_context * bld_base,
3081 struct lp_build_emit_data * emit_data)
3082 {
3083 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3084
3085 emit_kill(bld, bld_base->pc - 1);
3086 }
3087
3088 static void
3089 kill_if_emit(
3090 const struct lp_build_tgsi_action * action,
3091 struct lp_build_tgsi_context * bld_base,
3092 struct lp_build_emit_data * emit_data)
3093 {
3094 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3095
3096 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3097 }
3098
3099 static void
3100 tex_emit(
3101 const struct lp_build_tgsi_action * action,
3102 struct lp_build_tgsi_context * bld_base,
3103 struct lp_build_emit_data * emit_data)
3104 {
3105 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3106
3107 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3108 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3109 }
3110
3111 static void
3112 tex2_emit(
3113 const struct lp_build_tgsi_action * action,
3114 struct lp_build_tgsi_context * bld_base,
3115 struct lp_build_emit_data * emit_data)
3116 {
3117 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3118
3119 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3120 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3121 }
3122
3123 static void
3124 txb_emit(
3125 const struct lp_build_tgsi_action * action,
3126 struct lp_build_tgsi_context * bld_base,
3127 struct lp_build_emit_data * emit_data)
3128 {
3129 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3130
3131 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3132 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3133 }
3134
3135 static void
3136 txb2_emit(
3137 const struct lp_build_tgsi_action * action,
3138 struct lp_build_tgsi_context * bld_base,
3139 struct lp_build_emit_data * emit_data)
3140 {
3141 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3142
3143 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3144 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3145 }
3146
3147 static void
3148 txd_emit(
3149 const struct lp_build_tgsi_action * action,
3150 struct lp_build_tgsi_context * bld_base,
3151 struct lp_build_emit_data * emit_data)
3152 {
3153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3154
3155 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3156 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3157 }
3158
3159 static void
3160 txl_emit(
3161 const struct lp_build_tgsi_action * action,
3162 struct lp_build_tgsi_context * bld_base,
3163 struct lp_build_emit_data * emit_data)
3164 {
3165 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3166
3167 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3168 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3169 }
3170
3171 static void
3172 txl2_emit(
3173 const struct lp_build_tgsi_action * action,
3174 struct lp_build_tgsi_context * bld_base,
3175 struct lp_build_emit_data * emit_data)
3176 {
3177 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3178
3179 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3180 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3181 }
3182
3183 static void
3184 txp_emit(
3185 const struct lp_build_tgsi_action * action,
3186 struct lp_build_tgsi_context * bld_base,
3187 struct lp_build_emit_data * emit_data)
3188 {
3189 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3190
3191 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3192 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3193 }
3194
3195 static void
3196 tg4_emit(
3197 const struct lp_build_tgsi_action * action,
3198 struct lp_build_tgsi_context * bld_base,
3199 struct lp_build_emit_data * emit_data)
3200 {
3201 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3202
3203 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3204 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3205 }
3206
3207 static void
3208 lodq_emit(
3209 const struct lp_build_tgsi_action * action,
3210 struct lp_build_tgsi_context * bld_base,
3211 struct lp_build_emit_data * emit_data)
3212 {
3213 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3214
3215 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3216 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3217 }
3218
3219 static void
3220 txq_emit(
3221 const struct lp_build_tgsi_action * action,
3222 struct lp_build_tgsi_context * bld_base,
3223 struct lp_build_emit_data * emit_data)
3224 {
3225 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3226
3227 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3228 }
3229
3230 static void
3231 txf_emit(
3232 const struct lp_build_tgsi_action * action,
3233 struct lp_build_tgsi_context * bld_base,
3234 struct lp_build_emit_data * emit_data)
3235 {
3236 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3237
3238 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3239 }
3240
3241 static void
3242 sample_i_emit(
3243 const struct lp_build_tgsi_action * action,
3244 struct lp_build_tgsi_context * bld_base,
3245 struct lp_build_emit_data * emit_data)
3246 {
3247 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3248
3249 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3250 }
3251
3252 static void
3253 sample_emit(
3254 const struct lp_build_tgsi_action * action,
3255 struct lp_build_tgsi_context * bld_base,
3256 struct lp_build_emit_data * emit_data)
3257 {
3258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3259
3260 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3261 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3262 }
3263
3264 static void
3265 sample_b_emit(
3266 const struct lp_build_tgsi_action * action,
3267 struct lp_build_tgsi_context * bld_base,
3268 struct lp_build_emit_data * emit_data)
3269 {
3270 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3271
3272 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3273 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3274 }
3275
3276 static void
3277 sample_c_emit(
3278 const struct lp_build_tgsi_action * action,
3279 struct lp_build_tgsi_context * bld_base,
3280 struct lp_build_emit_data * emit_data)
3281 {
3282 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3283
3284 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3285 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3286 }
3287
3288 static void
3289 sample_c_lz_emit(
3290 const struct lp_build_tgsi_action * action,
3291 struct lp_build_tgsi_context * bld_base,
3292 struct lp_build_emit_data * emit_data)
3293 {
3294 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3295
3296 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3297 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3298 }
3299
3300 static void
3301 sample_d_emit(
3302 const struct lp_build_tgsi_action * action,
3303 struct lp_build_tgsi_context * bld_base,
3304 struct lp_build_emit_data * emit_data)
3305 {
3306 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3307
3308 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3309 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3310 }
3311
3312 static void
3313 sample_l_emit(
3314 const struct lp_build_tgsi_action * action,
3315 struct lp_build_tgsi_context * bld_base,
3316 struct lp_build_emit_data * emit_data)
3317 {
3318 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3319
3320 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3321 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3322 }
3323
3324 static void
3325 gather4_emit(
3326 const struct lp_build_tgsi_action * action,
3327 struct lp_build_tgsi_context * bld_base,
3328 struct lp_build_emit_data * emit_data)
3329 {
3330 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3331
3332 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3333 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3334 }
3335
3336 static void
3337 sviewinfo_emit(
3338 const struct lp_build_tgsi_action * action,
3339 struct lp_build_tgsi_context * bld_base,
3340 struct lp_build_emit_data * emit_data)
3341 {
3342 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3343
3344 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3345 }
3346
3347 static void
3348 lod_emit(
3349 const struct lp_build_tgsi_action * action,
3350 struct lp_build_tgsi_context * bld_base,
3351 struct lp_build_emit_data * emit_data)
3352 {
3353 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3354
3355 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3356 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3357 }
3358
3359 static LLVMValueRef
3360 mask_vec(struct lp_build_tgsi_context *bld_base)
3361 {
3362 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3363 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3364 struct lp_exec_mask *exec_mask = &bld->exec_mask;
3365
3366 if (!exec_mask->has_mask) {
3367 return lp_build_mask_value(bld->mask);
3368 }
3369 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3370 exec_mask->exec_mask, "");
3371 }
3372
3373 static void
3374 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3375 LLVMValueRef ptr,
3376 LLVMValueRef mask)
3377 {
3378 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3379 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3380
3381 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3382
3383 LLVMBuildStore(builder, current_vec, ptr);
3384 }
3385
3386 static void
3387 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3388 LLVMValueRef ptr,
3389 LLVMValueRef mask)
3390 {
3391 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3392 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3393
3394 current_vec = lp_build_select(&bld_base->uint_bld,
3395 mask,
3396 bld_base->uint_bld.zero,
3397 current_vec);
3398
3399 LLVMBuildStore(builder, current_vec, ptr);
3400 }
3401
3402 static LLVMValueRef
3403 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3404 LLVMValueRef current_mask_vec,
3405 LLVMValueRef total_emitted_vertices_vec)
3406 {
3407 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3408 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3409 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3410 total_emitted_vertices_vec,
3411 bld->max_output_vertices_vec);
3412
3413 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3414 }
3415
3416 static void
3417 emit_vertex(
3418 const struct lp_build_tgsi_action * action,
3419 struct lp_build_tgsi_context * bld_base,
3420 struct lp_build_emit_data * emit_data)
3421 {
3422 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3423 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3424
3425 if (bld->gs_iface->emit_vertex) {
3426 LLVMValueRef mask = mask_vec(bld_base);
3427 LLVMValueRef total_emitted_vertices_vec =
3428 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3429 mask = clamp_mask_to_max_output_vertices(bld, mask,
3430 total_emitted_vertices_vec);
3431 gather_outputs(bld);
3432 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3433 bld->outputs,
3434 total_emitted_vertices_vec);
3435 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3436 mask);
3437 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3438 mask);
3439 #if DUMP_GS_EMITS
3440 lp_build_print_value(bld->bld_base.base.gallivm,
3441 " +++ emit vertex masked ones = ",
3442 mask);
3443 lp_build_print_value(bld->bld_base.base.gallivm,
3444 " +++ emit vertex emitted = ",
3445 total_emitted_vertices_vec);
3446 #endif
3447 }
3448 }
3449
3450
3451 static void
3452 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3453 LLVMValueRef mask)
3454 {
3455 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3456 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3457
3458 if (bld->gs_iface->end_primitive) {
3459 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3460 LLVMValueRef emitted_vertices_vec =
3461 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3462 LLVMValueRef emitted_prims_vec =
3463 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3464
3465 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3466 emitted_vertices_vec,
3467 uint_bld->zero);
3468 /* We need to combine the current execution mask with the mask
3469 telling us which, if any, execution slots actually have
3470 unemitted primitives, this way we make sure that end_primitives
3471 executes only on the paths that have unflushed vertices */
3472 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3473
3474 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3475 emitted_vertices_vec,
3476 emitted_prims_vec);
3477
3478 #if DUMP_GS_EMITS
3479 lp_build_print_value(bld->bld_base.base.gallivm,
3480 " +++ end prim masked ones = ",
3481 mask);
3482 lp_build_print_value(bld->bld_base.base.gallivm,
3483 " +++ end prim emitted verts1 = ",
3484 emitted_vertices_vec);
3485 lp_build_print_value(bld->bld_base.base.gallivm,
3486 " +++ end prim emitted prims1 = ",
3487 LLVMBuildLoad(builder,
3488 bld->emitted_prims_vec_ptr, ""));
3489 #endif
3490 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3491 mask);
3492 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3493 mask);
3494 #if DUMP_GS_EMITS
3495 lp_build_print_value(bld->bld_base.base.gallivm,
3496 " +++ end prim emitted verts2 = ",
3497 LLVMBuildLoad(builder,
3498 bld->emitted_vertices_vec_ptr, ""));
3499 #endif
3500 }
3501
3502 }
3503
3504 static void
3505 end_primitive(
3506 const struct lp_build_tgsi_action * action,
3507 struct lp_build_tgsi_context * bld_base,
3508 struct lp_build_emit_data * emit_data)
3509 {
3510 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3511
3512 if (bld->gs_iface->end_primitive) {
3513 LLVMValueRef mask = mask_vec(bld_base);
3514 end_primitive_masked(bld_base, mask);
3515 }
3516 }
3517
3518 static void
3519 cal_emit(
3520 const struct lp_build_tgsi_action * action,
3521 struct lp_build_tgsi_context * bld_base,
3522 struct lp_build_emit_data * emit_data)
3523 {
3524 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3525
3526 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3527 &bld_base->pc);
3528 }
3529
3530 static void
3531 ret_emit(
3532 const struct lp_build_tgsi_action * action,
3533 struct lp_build_tgsi_context * bld_base,
3534 struct lp_build_emit_data * emit_data)
3535 {
3536 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3537
3538 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3539 }
3540
3541 static void
3542 brk_emit(
3543 const struct lp_build_tgsi_action * action,
3544 struct lp_build_tgsi_context * bld_base,
3545 struct lp_build_emit_data * emit_data)
3546 {
3547 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3548
3549 lp_exec_break(&bld->exec_mask, bld_base);
3550 }
3551
3552 static void
3553 if_emit(
3554 const struct lp_build_tgsi_action * action,
3555 struct lp_build_tgsi_context * bld_base,
3556 struct lp_build_emit_data * emit_data)
3557 {
3558 LLVMValueRef tmp;
3559 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3560
3561 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3562 emit_data->args[0], bld->bld_base.base.zero);
3563 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3564 }
3565
3566 static void
3567 uif_emit(
3568 const struct lp_build_tgsi_action * action,
3569 struct lp_build_tgsi_context * bld_base,
3570 struct lp_build_emit_data * emit_data)
3571 {
3572 LLVMValueRef tmp;
3573 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3574 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3575
3576 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3577 emit_data->args[0], uint_bld->zero);
3578 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3579 }
3580
3581 static void
3582 case_emit(
3583 const struct lp_build_tgsi_action * action,
3584 struct lp_build_tgsi_context * bld_base,
3585 struct lp_build_emit_data * emit_data)
3586 {
3587 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3588
3589 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3590 }
3591
3592 static void
3593 default_emit(
3594 const struct lp_build_tgsi_action * action,
3595 struct lp_build_tgsi_context * bld_base,
3596 struct lp_build_emit_data * emit_data)
3597 {
3598 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3599
3600 lp_exec_default(&bld->exec_mask, bld_base);
3601 }
3602
3603 static void
3604 switch_emit(
3605 const struct lp_build_tgsi_action * action,
3606 struct lp_build_tgsi_context * bld_base,
3607 struct lp_build_emit_data * emit_data)
3608 {
3609 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3610
3611 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3612 }
3613
3614 static void
3615 endswitch_emit(
3616 const struct lp_build_tgsi_action * action,
3617 struct lp_build_tgsi_context * bld_base,
3618 struct lp_build_emit_data * emit_data)
3619 {
3620 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3621
3622 lp_exec_endswitch(&bld->exec_mask, bld_base);
3623 }
3624
3625 static void
3626 bgnloop_emit(
3627 const struct lp_build_tgsi_action * action,
3628 struct lp_build_tgsi_context * bld_base,
3629 struct lp_build_emit_data * emit_data)
3630 {
3631 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3632
3633 lp_exec_bgnloop(&bld->exec_mask);
3634 }
3635
3636 static void
3637 bgnsub_emit(
3638 const struct lp_build_tgsi_action * action,
3639 struct lp_build_tgsi_context * bld_base,
3640 struct lp_build_emit_data * emit_data)
3641 {
3642 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3643
3644 lp_exec_mask_bgnsub(&bld->exec_mask);
3645 }
3646
3647 static void
3648 else_emit(
3649 const struct lp_build_tgsi_action * action,
3650 struct lp_build_tgsi_context * bld_base,
3651 struct lp_build_emit_data * emit_data)
3652 {
3653 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3654
3655 lp_exec_mask_cond_invert(&bld->exec_mask);
3656 }
3657
3658 static void
3659 endif_emit(
3660 const struct lp_build_tgsi_action * action,
3661 struct lp_build_tgsi_context * bld_base,
3662 struct lp_build_emit_data * emit_data)
3663 {
3664 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3665
3666 lp_exec_mask_cond_pop(&bld->exec_mask);
3667 }
3668
3669 static void
3670 endloop_emit(
3671 const struct lp_build_tgsi_action * action,
3672 struct lp_build_tgsi_context * bld_base,
3673 struct lp_build_emit_data * emit_data)
3674 {
3675 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3676
3677 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3678 }
3679
3680 static void
3681 endsub_emit(
3682 const struct lp_build_tgsi_action * action,
3683 struct lp_build_tgsi_context * bld_base,
3684 struct lp_build_emit_data * emit_data)
3685 {
3686 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3687
3688 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3689 }
3690
3691 static void
3692 cont_emit(
3693 const struct lp_build_tgsi_action * action,
3694 struct lp_build_tgsi_context * bld_base,
3695 struct lp_build_emit_data * emit_data)
3696 {
3697 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3698
3699 lp_exec_continue(&bld->exec_mask);
3700 }
3701
3702 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3703 {
3704 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3705 struct gallivm_state * gallivm = bld_base->base.gallivm;
3706
3707 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3708 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
3709 bld->temps_array = lp_build_alloca_undef(gallivm,
3710 LLVMArrayType(bld_base->base.vec_type, array_size),
3711 "temp_array");
3712 }
3713
3714 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3715 LLVMValueRef array_size =
3716 lp_build_const_int32(gallivm,
3717 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3718 bld->outputs_array = lp_build_array_alloca(gallivm,
3719 bld_base->base.vec_type, array_size,
3720 "output_array");
3721 }
3722
3723 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3724 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
3725 bld->imms_array = lp_build_alloca_undef(gallivm,
3726 LLVMArrayType(bld_base->base.vec_type, array_size),
3727 "imms_array");
3728 }
3729
3730 /* If we have indirect addressing in inputs we need to copy them into
3731 * our alloca array to be able to iterate over them */
3732 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3733 unsigned index, chan;
3734 LLVMTypeRef vec_type = bld_base->base.vec_type;
3735 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3736 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3737 bld->inputs_array = lp_build_array_alloca(gallivm,
3738 vec_type, array_size,
3739 "input_array");
3740
3741 assert(bld_base->info->num_inputs
3742 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3743
3744 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3745 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3746 LLVMValueRef lindex =
3747 lp_build_const_int32(gallivm, index * 4 + chan);
3748 LLVMValueRef input_ptr =
3749 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3750 &lindex, 1, "");
3751 LLVMValueRef value = bld->inputs[index][chan];
3752 if (value)
3753 LLVMBuildStore(gallivm->builder, value, input_ptr);
3754 }
3755 }
3756 }
3757
3758 if (bld->gs_iface) {
3759 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3760 bld->emitted_prims_vec_ptr =
3761 lp_build_alloca(gallivm,
3762 uint_bld->vec_type,
3763 "emitted_prims_ptr");
3764 bld->emitted_vertices_vec_ptr =
3765 lp_build_alloca(gallivm,
3766 uint_bld->vec_type,
3767 "emitted_vertices_ptr");
3768 bld->total_emitted_vertices_vec_ptr =
3769 lp_build_alloca(gallivm,
3770 uint_bld->vec_type,
3771 "total_emitted_vertices_ptr");
3772
3773 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3774 bld->emitted_prims_vec_ptr);
3775 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3776 bld->emitted_vertices_vec_ptr);
3777 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3778 bld->total_emitted_vertices_vec_ptr);
3779 }
3780
3781 if (DEBUG_EXECUTION) {
3782 lp_build_printf(gallivm, "\n");
3783 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3784 if (!bld->gs_iface)
3785 emit_dump_file(bld, TGSI_FILE_INPUT);
3786 }
3787 }
3788
3789 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3790 {
3791 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3792 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3793
3794 if (DEBUG_EXECUTION) {
3795 /* for debugging */
3796 if (0) {
3797 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3798 }
3799 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3800 lp_build_printf(bld_base->base.gallivm, "\n");
3801 }
3802
3803 /* If we have indirect addressing in outputs we need to copy our alloca array
3804 * to the outputs slots specified by the caller */
3805 if (bld->gs_iface) {
3806 LLVMValueRef total_emitted_vertices_vec;
3807 LLVMValueRef emitted_prims_vec;
3808 /* implicit end_primitives, needed in case there are any unflushed
3809 vertices in the cache. Note must not call end_primitive here
3810 since the exec_mask is not valid at this point. */
3811 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3812
3813 total_emitted_vertices_vec =
3814 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3815 emitted_prims_vec =
3816 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3817
3818 bld->gs_iface->gs_epilogue(bld->gs_iface,
3819 &bld->bld_base,
3820 total_emitted_vertices_vec,
3821 emitted_prims_vec);
3822 } else {
3823 gather_outputs(bld);
3824 }
3825 }
3826
3827 void
3828 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3829 const struct tgsi_token *tokens,
3830 struct lp_type type,
3831 struct lp_build_mask_context *mask,
3832 LLVMValueRef consts_ptr,
3833 LLVMValueRef const_sizes_ptr,
3834 const struct lp_bld_tgsi_system_values *system_values,
3835 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3836 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3837 LLVMValueRef context_ptr,
3838 LLVMValueRef thread_data_ptr,
3839 const struct lp_build_sampler_soa *sampler,
3840 const struct tgsi_shader_info *info,
3841 const struct lp_build_tgsi_gs_iface *gs_iface)
3842 {
3843 struct lp_build_tgsi_soa_context bld;
3844
3845 struct lp_type res_type;
3846
3847 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3848 memset(&res_type, 0, sizeof res_type);
3849 res_type.width = type.width;
3850 res_type.length = type.length;
3851 res_type.sign = 1;
3852
3853 /* Setup build context */
3854 memset(&bld, 0, sizeof bld);
3855 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3856 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3857 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3858 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3859 {
3860 struct lp_type dbl_type;
3861 dbl_type = type;
3862 dbl_type.width *= 2;
3863 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3864 }
3865 {
3866 struct lp_type uint64_type;
3867 uint64_type = lp_uint_type(type);
3868 uint64_type.width *= 2;
3869 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3870 }
3871 {
3872 struct lp_type int64_type;
3873 int64_type = lp_int_type(type);
3874 int64_type.width *= 2;
3875 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3876 }
3877 bld.mask = mask;
3878 bld.inputs = inputs;
3879 bld.outputs = outputs;
3880 bld.consts_ptr = consts_ptr;
3881 bld.const_sizes_ptr = const_sizes_ptr;
3882 bld.sampler = sampler;
3883 bld.bld_base.info = info;
3884 bld.indirect_files = info->indirect_files;
3885 bld.context_ptr = context_ptr;
3886 bld.thread_data_ptr = thread_data_ptr;
3887
3888 /*
3889 * If the number of temporaries is rather large then we just
3890 * allocate them as an array right from the start and treat
3891 * like indirect temporaries.
3892 */
3893 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3894 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3895 }
3896 /*
3897 * For performance reason immediates are always backed in a static
3898 * array, but if their number is too great, we have to use just
3899 * a dynamically allocated array.
3900 */
3901 bld.use_immediates_array =
3902 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3903 if (bld.use_immediates_array) {
3904 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3905 }
3906
3907
3908 bld.bld_base.soa = TRUE;
3909 bld.bld_base.emit_debug = emit_debug;
3910 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3911 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3912 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3913 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3914 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3915 bld.bld_base.emit_store = emit_store;
3916
3917 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3918 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3919
3920 bld.bld_base.emit_prologue = emit_prologue;
3921 bld.bld_base.emit_epilogue = emit_epilogue;
3922
3923 /* Set opcode actions */
3924 lp_set_default_actions_cpu(&bld.bld_base);
3925
3926 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3927 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3928 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3929 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3930 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3931 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3932 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3933 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3934 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3935 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3936 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3937 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3938 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3939 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3940 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3941 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3942 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3943 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3944 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3945 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3946 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3947 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3948 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3949 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3950 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3951 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3952 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3953 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3954 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3955 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3956 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3957 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
3958 /* DX10 sampling ops */
3959 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3960 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3961 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3962 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3963 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3964 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3965 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
3966 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3967 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
3968 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3969 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
3970
3971
3972 if (gs_iface) {
3973 /* There's no specific value for this because it should always
3974 * be set, but apps using ext_geometry_shader4 quite often
3975 * were forgetting so we're using MAX_VERTEX_VARYING from
3976 * that spec even though we could debug_assert if it's not
3977 * set, but that's a lot uglier. */
3978 uint max_output_vertices;
3979
3980 /* inputs are always indirect with gs */
3981 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3982 bld.gs_iface = gs_iface;
3983 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3984 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3985 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3986
3987 max_output_vertices =
3988 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3989 if (!max_output_vertices)
3990 max_output_vertices = 32;
3991
3992 bld.max_output_vertices_vec =
3993 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3994 max_output_vertices);
3995 }
3996
3997 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3998
3999 bld.system_values = *system_values;
4000
4001 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4002
4003 if (0) {
4004 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4005 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4006 debug_printf("11111111111111111111111111111 \n");
4007 tgsi_dump(tokens, 0);
4008 lp_debug_dump_value(function);
4009 debug_printf("2222222222222222222222222222 \n");
4010 }
4011
4012 if (0) {
4013 LLVMModuleRef module = LLVMGetGlobalParent(
4014 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4015 LLVMDumpModule(module);
4016
4017 }
4018 lp_exec_mask_fini(&bld.exec_mask);
4019 }